meter.c 14 KB


  1. /*
  2. * Copyright (c) 2017 Nicira, Inc.
  3. *
  4. * This program is free software; you can redistribute it and/or
  5. * modify it under the terms of version 2 of the GNU General Public
  6. * License as published by the Free Software Foundation.
  7. */
  8. #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  9. #include <linux/if.h>
  10. #include <linux/skbuff.h>
  11. #include <linux/ip.h>
  12. #include <linux/kernel.h>
  13. #include <linux/openvswitch.h>
  14. #include <linux/netlink.h>
  15. #include <linux/rculist.h>
  16. #include <net/netlink.h>
  17. #include <net/genetlink.h>
  18. #include "datapath.h"
  19. #include "meter.h"
  20. #define METER_HASH_BUCKETS 1024
  21. static const struct nla_policy meter_policy[OVS_METER_ATTR_MAX + 1] = {
  22. [OVS_METER_ATTR_ID] = { .type = NLA_U32, },
  23. [OVS_METER_ATTR_KBPS] = { .type = NLA_FLAG },
  24. [OVS_METER_ATTR_STATS] = { .len = sizeof(struct ovs_flow_stats) },
  25. [OVS_METER_ATTR_BANDS] = { .type = NLA_NESTED },
  26. [OVS_METER_ATTR_USED] = { .type = NLA_U64 },
  27. [OVS_METER_ATTR_CLEAR] = { .type = NLA_FLAG },
  28. [OVS_METER_ATTR_MAX_METERS] = { .type = NLA_U32 },
  29. [OVS_METER_ATTR_MAX_BANDS] = { .type = NLA_U32 },
  30. };
  31. static const struct nla_policy band_policy[OVS_BAND_ATTR_MAX + 1] = {
  32. [OVS_BAND_ATTR_TYPE] = { .type = NLA_U32, },
  33. [OVS_BAND_ATTR_RATE] = { .type = NLA_U32, },
  34. [OVS_BAND_ATTR_BURST] = { .type = NLA_U32, },
  35. [OVS_BAND_ATTR_STATS] = { .len = sizeof(struct ovs_flow_stats) },
  36. };
  37. static void ovs_meter_free(struct dp_meter *meter)
  38. {
  39. if (!meter)
  40. return;
  41. kfree_rcu(meter, rcu);
  42. }
  43. static struct hlist_head *meter_hash_bucket(const struct datapath *dp,
  44. u32 meter_id)
  45. {
  46. return &dp->meters[meter_id & (METER_HASH_BUCKETS - 1)];
  47. }
  48. /* Call with ovs_mutex or RCU read lock. */
  49. static struct dp_meter *lookup_meter(const struct datapath *dp,
  50. u32 meter_id)
  51. {
  52. struct dp_meter *meter;
  53. struct hlist_head *head;
  54. head = meter_hash_bucket(dp, meter_id);
  55. hlist_for_each_entry_rcu(meter, head, dp_hash_node) {
  56. if (meter->id == meter_id)
  57. return meter;
  58. }
  59. return NULL;
  60. }
  61. static void attach_meter(struct datapath *dp, struct dp_meter *meter)
  62. {
  63. struct hlist_head *head = meter_hash_bucket(dp, meter->id);
  64. hlist_add_head_rcu(&meter->dp_hash_node, head);
  65. }
  66. static void detach_meter(struct dp_meter *meter)
  67. {
  68. ASSERT_OVSL();
  69. if (meter)
  70. hlist_del_rcu(&meter->dp_hash_node);
  71. }
  72. static struct sk_buff *
  73. ovs_meter_cmd_reply_start(struct genl_info *info, u8 cmd,
  74. struct ovs_header **ovs_reply_header)
  75. {
  76. struct sk_buff *skb;
  77. struct ovs_header *ovs_header = info->userhdr;
  78. skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
  79. if (!skb)
  80. return ERR_PTR(-ENOMEM);
  81. *ovs_reply_header = genlmsg_put(skb, info->snd_portid,
  82. info->snd_seq,
  83. &dp_meter_genl_family, 0, cmd);
  84. if (!*ovs_reply_header) {
  85. nlmsg_free(skb);
  86. return ERR_PTR(-EMSGSIZE);
  87. }
  88. (*ovs_reply_header)->dp_ifindex = ovs_header->dp_ifindex;
  89. return skb;
  90. }
  91. static int ovs_meter_cmd_reply_stats(struct sk_buff *reply, u32 meter_id,
  92. struct dp_meter *meter)
  93. {
  94. struct nlattr *nla;
  95. struct dp_meter_band *band;
  96. u16 i;
  97. if (nla_put_u32(reply, OVS_METER_ATTR_ID, meter_id))
  98. goto error;
  99. if (!meter)
  100. return 0;
  101. if (nla_put(reply, OVS_METER_ATTR_STATS,
  102. sizeof(struct ovs_flow_stats), &meter->stats) ||
  103. nla_put_u64_64bit(reply, OVS_METER_ATTR_USED, meter->used,
  104. OVS_METER_ATTR_PAD))
  105. goto error;
  106. nla = nla_nest_start(reply, OVS_METER_ATTR_BANDS);
  107. if (!nla)
  108. goto error;
  109. band = meter->bands;
  110. for (i = 0; i < meter->n_bands; ++i, ++band) {
  111. struct nlattr *band_nla;
  112. band_nla = nla_nest_start(reply, OVS_BAND_ATTR_UNSPEC);
  113. if (!band_nla || nla_put(reply, OVS_BAND_ATTR_STATS,
  114. sizeof(struct ovs_flow_stats),
  115. &band->stats))
  116. goto error;
  117. nla_nest_end(reply, band_nla);
  118. }
  119. nla_nest_end(reply, nla);
  120. return 0;
  121. error:
  122. return -EMSGSIZE;
  123. }
  124. static int ovs_meter_cmd_features(struct sk_buff *skb, struct genl_info *info)
  125. {
  126. struct sk_buff *reply;
  127. struct ovs_header *ovs_reply_header;
  128. struct nlattr *nla, *band_nla;
  129. int err;
  130. reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_FEATURES,
  131. &ovs_reply_header);
  132. if (IS_ERR(reply))
  133. return PTR_ERR(reply);
  134. if (nla_put_u32(reply, OVS_METER_ATTR_MAX_METERS, U32_MAX) ||
  135. nla_put_u32(reply, OVS_METER_ATTR_MAX_BANDS, DP_MAX_BANDS))
  136. goto nla_put_failure;
  137. nla = nla_nest_start(reply, OVS_METER_ATTR_BANDS);
  138. if (!nla)
  139. goto nla_put_failure;
  140. band_nla = nla_nest_start(reply, OVS_BAND_ATTR_UNSPEC);
  141. if (!band_nla)
  142. goto nla_put_failure;
  143. /* Currently only DROP band type is supported. */
  144. if (nla_put_u32(reply, OVS_BAND_ATTR_TYPE, OVS_METER_BAND_TYPE_DROP))
  145. goto nla_put_failure;
  146. nla_nest_end(reply, band_nla);
  147. nla_nest_end(reply, nla);
  148. genlmsg_end(reply, ovs_reply_header);
  149. return genlmsg_reply(reply, info);
  150. nla_put_failure:
  151. nlmsg_free(reply);
  152. err = -EMSGSIZE;
  153. return err;
  154. }
  155. static struct dp_meter *dp_meter_create(struct nlattr **a)
  156. {
  157. struct nlattr *nla;
  158. int rem;
  159. u16 n_bands = 0;
  160. struct dp_meter *meter;
  161. struct dp_meter_band *band;
  162. int err;
  163. /* Validate attributes, count the bands. */
  164. if (!a[OVS_METER_ATTR_BANDS])
  165. return ERR_PTR(-EINVAL);
  166. nla_for_each_nested(nla, a[OVS_METER_ATTR_BANDS], rem)
  167. if (++n_bands > DP_MAX_BANDS)
  168. return ERR_PTR(-EINVAL);
  169. /* Allocate and set up the meter before locking anything. */
  170. meter = kzalloc(n_bands * sizeof(struct dp_meter_band) +
  171. sizeof(*meter), GFP_KERNEL);
  172. if (!meter)
  173. return ERR_PTR(-ENOMEM);
  174. meter->id = nla_get_u32(a[OVS_METER_ATTR_ID]);
  175. meter->used = div_u64(ktime_get_ns(), 1000 * 1000);
  176. meter->kbps = a[OVS_METER_ATTR_KBPS] ? 1 : 0;
  177. meter->keep_stats = !a[OVS_METER_ATTR_CLEAR];
  178. spin_lock_init(&meter->lock);
  179. if (meter->keep_stats && a[OVS_METER_ATTR_STATS]) {
  180. meter->stats = *(struct ovs_flow_stats *)
  181. nla_data(a[OVS_METER_ATTR_STATS]);
  182. }
  183. meter->n_bands = n_bands;
  184. /* Set up meter bands. */
  185. band = meter->bands;
  186. nla_for_each_nested(nla, a[OVS_METER_ATTR_BANDS], rem) {
  187. struct nlattr *attr[OVS_BAND_ATTR_MAX + 1];
  188. u32 band_max_delta_t;
  189. err = nla_parse((struct nlattr **)&attr, OVS_BAND_ATTR_MAX,
  190. nla_data(nla), nla_len(nla), band_policy,
  191. NULL);
  192. if (err)
  193. goto exit_free_meter;
  194. if (!attr[OVS_BAND_ATTR_TYPE] ||
  195. !attr[OVS_BAND_ATTR_RATE] ||
  196. !attr[OVS_BAND_ATTR_BURST]) {
  197. err = -EINVAL;
  198. goto exit_free_meter;
  199. }
  200. band->type = nla_get_u32(attr[OVS_BAND_ATTR_TYPE]);
  201. band->rate = nla_get_u32(attr[OVS_BAND_ATTR_RATE]);
  202. if (band->rate == 0) {
  203. err = -EINVAL;
  204. goto exit_free_meter;
  205. }
  206. band->burst_size = nla_get_u32(attr[OVS_BAND_ATTR_BURST]);
  207. /* Figure out max delta_t that is enough to fill any bucket.
  208. * Keep max_delta_t size to the bucket units:
  209. * pkts => 1/1000 packets, kilobits => bits.
  210. *
  211. * Start with a full bucket.
  212. */
  213. band->bucket = (band->burst_size + band->rate) * 1000ULL;
  214. band_max_delta_t = div_u64(band->bucket, band->rate);
  215. if (band_max_delta_t > meter->max_delta_t)
  216. meter->max_delta_t = band_max_delta_t;
  217. band++;
  218. }
  219. return meter;
  220. exit_free_meter:
  221. kfree(meter);
  222. return ERR_PTR(err);
  223. }
  224. static int ovs_meter_cmd_set(struct sk_buff *skb, struct genl_info *info)
  225. {
  226. struct nlattr **a = info->attrs;
  227. struct dp_meter *meter, *old_meter;
  228. struct sk_buff *reply;
  229. struct ovs_header *ovs_reply_header;
  230. struct ovs_header *ovs_header = info->userhdr;
  231. struct datapath *dp;
  232. int err;
  233. u32 meter_id;
  234. bool failed;
  235. if (!a[OVS_METER_ATTR_ID]) {
  236. return -ENODEV;
  237. }
  238. meter = dp_meter_create(a);
  239. if (IS_ERR_OR_NULL(meter))
  240. return PTR_ERR(meter);
  241. reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_SET,
  242. &ovs_reply_header);
  243. if (IS_ERR(reply)) {
  244. err = PTR_ERR(reply);
  245. goto exit_free_meter;
  246. }
  247. ovs_lock();
  248. dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
  249. if (!dp) {
  250. err = -ENODEV;
  251. goto exit_unlock;
  252. }
  253. meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]);
  254. /* Cannot fail after this. */
  255. old_meter = lookup_meter(dp, meter_id);
  256. detach_meter(old_meter);
  257. attach_meter(dp, meter);
  258. ovs_unlock();
  259. /* Build response with the meter_id and stats from
  260. * the old meter, if any.
  261. */
  262. failed = nla_put_u32(reply, OVS_METER_ATTR_ID, meter_id);
  263. WARN_ON(failed);
  264. if (old_meter) {
  265. spin_lock_bh(&old_meter->lock);
  266. if (old_meter->keep_stats) {
  267. err = ovs_meter_cmd_reply_stats(reply, meter_id,
  268. old_meter);
  269. WARN_ON(err);
  270. }
  271. spin_unlock_bh(&old_meter->lock);
  272. ovs_meter_free(old_meter);
  273. }
  274. genlmsg_end(reply, ovs_reply_header);
  275. return genlmsg_reply(reply, info);
  276. exit_unlock:
  277. ovs_unlock();
  278. nlmsg_free(reply);
  279. exit_free_meter:
  280. kfree(meter);
  281. return err;
  282. }
  283. static int ovs_meter_cmd_get(struct sk_buff *skb, struct genl_info *info)
  284. {
  285. struct nlattr **a = info->attrs;
  286. u32 meter_id;
  287. struct ovs_header *ovs_header = info->userhdr;
  288. struct ovs_header *ovs_reply_header;
  289. struct datapath *dp;
  290. int err;
  291. struct sk_buff *reply;
  292. struct dp_meter *meter;
  293. if (!a[OVS_METER_ATTR_ID])
  294. return -EINVAL;
  295. meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]);
  296. reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_GET,
  297. &ovs_reply_header);
  298. if (IS_ERR(reply))
  299. return PTR_ERR(reply);
  300. ovs_lock();
  301. dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
  302. if (!dp) {
  303. err = -ENODEV;
  304. goto exit_unlock;
  305. }
  306. /* Locate meter, copy stats. */
  307. meter = lookup_meter(dp, meter_id);
  308. if (!meter) {
  309. err = -ENOENT;
  310. goto exit_unlock;
  311. }
  312. spin_lock_bh(&meter->lock);
  313. err = ovs_meter_cmd_reply_stats(reply, meter_id, meter);
  314. spin_unlock_bh(&meter->lock);
  315. if (err)
  316. goto exit_unlock;
  317. ovs_unlock();
  318. genlmsg_end(reply, ovs_reply_header);
  319. return genlmsg_reply(reply, info);
  320. exit_unlock:
  321. ovs_unlock();
  322. nlmsg_free(reply);
  323. return err;
  324. }
  325. static int ovs_meter_cmd_del(struct sk_buff *skb, struct genl_info *info)
  326. {
  327. struct nlattr **a = info->attrs;
  328. u32 meter_id;
  329. struct ovs_header *ovs_header = info->userhdr;
  330. struct ovs_header *ovs_reply_header;
  331. struct datapath *dp;
  332. int err;
  333. struct sk_buff *reply;
  334. struct dp_meter *old_meter;
  335. if (!a[OVS_METER_ATTR_ID])
  336. return -EINVAL;
  337. meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]);
  338. reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_DEL,
  339. &ovs_reply_header);
  340. if (IS_ERR(reply))
  341. return PTR_ERR(reply);
  342. ovs_lock();
  343. dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
  344. if (!dp) {
  345. err = -ENODEV;
  346. goto exit_unlock;
  347. }
  348. old_meter = lookup_meter(dp, meter_id);
  349. if (old_meter) {
  350. spin_lock_bh(&old_meter->lock);
  351. err = ovs_meter_cmd_reply_stats(reply, meter_id, old_meter);
  352. WARN_ON(err);
  353. spin_unlock_bh(&old_meter->lock);
  354. detach_meter(old_meter);
  355. }
  356. ovs_unlock();
  357. ovs_meter_free(old_meter);
  358. genlmsg_end(reply, ovs_reply_header);
  359. return genlmsg_reply(reply, info);
  360. exit_unlock:
  361. ovs_unlock();
  362. nlmsg_free(reply);
  363. return err;
  364. }
  365. /* Meter action execution.
  366. *
  367. * Return true 'meter_id' drop band is triggered. The 'skb' should be
  368. * dropped by the caller'.
  369. */
  370. bool ovs_meter_execute(struct datapath *dp, struct sk_buff *skb,
  371. struct sw_flow_key *key, u32 meter_id)
  372. {
  373. struct dp_meter *meter;
  374. struct dp_meter_band *band;
  375. long long int now_ms = div_u64(ktime_get_ns(), 1000 * 1000);
  376. long long int long_delta_ms;
  377. u32 delta_ms;
  378. u32 cost;
  379. int i, band_exceeded_max = -1;
  380. u32 band_exceeded_rate = 0;
  381. meter = lookup_meter(dp, meter_id);
  382. /* Do not drop the packet when there is no meter. */
  383. if (!meter)
  384. return false;
  385. /* Lock the meter while using it. */
  386. spin_lock(&meter->lock);
  387. long_delta_ms = (now_ms - meter->used); /* ms */
  388. /* Make sure delta_ms will not be too large, so that bucket will not
  389. * wrap around below.
  390. */
  391. delta_ms = (long_delta_ms > (long long int)meter->max_delta_t)
  392. ? meter->max_delta_t : (u32)long_delta_ms;
  393. /* Update meter statistics.
  394. */
  395. meter->used = now_ms;
  396. meter->stats.n_packets += 1;
  397. meter->stats.n_bytes += skb->len;
  398. /* Bucket rate is either in kilobits per second, or in packets per
  399. * second. We maintain the bucket in the units of either bits or
  400. * 1/1000th of a packet, correspondingly.
  401. * Then, when rate is multiplied with milliseconds, we get the
  402. * bucket units:
  403. * msec * kbps = bits, and
  404. * msec * packets/sec = 1/1000 packets.
  405. *
  406. * 'cost' is the number of bucket units in this packet.
  407. */
  408. cost = (meter->kbps) ? skb->len * 8 : 1000;
  409. /* Update all bands and find the one hit with the highest rate. */
  410. for (i = 0; i < meter->n_bands; ++i) {
  411. long long int max_bucket_size;
  412. band = &meter->bands[i];
  413. max_bucket_size = (band->burst_size + band->rate) * 1000LL;
  414. band->bucket += delta_ms * band->rate;
  415. if (band->bucket > max_bucket_size)
  416. band->bucket = max_bucket_size;
  417. if (band->bucket >= cost) {
  418. band->bucket -= cost;
  419. } else if (band->rate > band_exceeded_rate) {
  420. band_exceeded_rate = band->rate;
  421. band_exceeded_max = i;
  422. }
  423. }
  424. if (band_exceeded_max >= 0) {
  425. /* Update band statistics. */
  426. band = &meter->bands[band_exceeded_max];
  427. band->stats.n_packets += 1;
  428. band->stats.n_bytes += skb->len;
  429. /* Drop band triggered, let the caller drop the 'skb'. */
  430. if (band->type == OVS_METER_BAND_TYPE_DROP) {
  431. spin_unlock(&meter->lock);
  432. return true;
  433. }
  434. }
  435. spin_unlock(&meter->lock);
  436. return false;
  437. }
  438. static struct genl_ops dp_meter_genl_ops[] = {
  439. { .cmd = OVS_METER_CMD_FEATURES,
  440. .flags = 0, /* OK for unprivileged users. */
  441. .policy = meter_policy,
  442. .doit = ovs_meter_cmd_features
  443. },
  444. { .cmd = OVS_METER_CMD_SET,
  445. .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN
  446. * privilege.
  447. */
  448. .policy = meter_policy,
  449. .doit = ovs_meter_cmd_set,
  450. },
  451. { .cmd = OVS_METER_CMD_GET,
  452. .flags = 0, /* OK for unprivileged users. */
  453. .policy = meter_policy,
  454. .doit = ovs_meter_cmd_get,
  455. },
  456. { .cmd = OVS_METER_CMD_DEL,
  457. .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN
  458. * privilege.
  459. */
  460. .policy = meter_policy,
  461. .doit = ovs_meter_cmd_del
  462. },
  463. };
  464. static const struct genl_multicast_group ovs_meter_multicast_group = {
  465. .name = OVS_METER_MCGROUP,
  466. };
  467. struct genl_family dp_meter_genl_family __ro_after_init = {
  468. .hdrsize = sizeof(struct ovs_header),
  469. .name = OVS_METER_FAMILY,
  470. .version = OVS_METER_VERSION,
  471. .maxattr = OVS_METER_ATTR_MAX,
  472. .netnsok = true,
  473. .parallel_ops = true,
  474. .ops = dp_meter_genl_ops,
  475. .n_ops = ARRAY_SIZE(dp_meter_genl_ops),
  476. .mcgrps = &ovs_meter_multicast_group,
  477. .n_mcgrps = 1,
  478. .module = THIS_MODULE,
  479. };
  480. int ovs_meters_init(struct datapath *dp)
  481. {
  482. int i;
  483. dp->meters = kmalloc_array(METER_HASH_BUCKETS,
  484. sizeof(struct hlist_head), GFP_KERNEL);
  485. if (!dp->meters)
  486. return -ENOMEM;
  487. for (i = 0; i < METER_HASH_BUCKETS; i++)
  488. INIT_HLIST_HEAD(&dp->meters[i]);
  489. return 0;
  490. }
  491. void ovs_meters_exit(struct datapath *dp)
  492. {
  493. int i;
  494. for (i = 0; i < METER_HASH_BUCKETS; i++) {
  495. struct hlist_head *head = &dp->meters[i];
  496. struct dp_meter *meter;
  497. struct hlist_node *n;
  498. hlist_for_each_entry_safe(meter, n, head, dp_hash_node)
  499. kfree(meter);
  500. }
  501. kfree(dp->meters);
  502. }