Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'ovs-meter-tables'

Tonghao Zhang says:

====================
openvswitch: expand meter tables and fix bug

The patch set expand or shrink the meter table when necessary.
and other patches fix bug or improve codes.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>

+249 -80
+1 -1
net/openvswitch/datapath.h
··· 82 82 u32 max_headroom; 83 83 84 84 /* Switch meters. */ 85 - struct hlist_head *meters; 85 + struct dp_meter_table meter_tbl; 86 86 }; 87 87 88 88 /**
+232 -75
net/openvswitch/meter.c
··· 12 12 #include <linux/openvswitch.h> 13 13 #include <linux/netlink.h> 14 14 #include <linux/rculist.h> 15 + #include <linux/swap.h> 15 16 16 17 #include <net/netlink.h> 17 18 #include <net/genetlink.h> 18 19 19 20 #include "datapath.h" 20 21 #include "meter.h" 21 - 22 - #define METER_HASH_BUCKETS 1024 23 22 24 23 static const struct nla_policy meter_policy[OVS_METER_ATTR_MAX + 1] = { 25 24 [OVS_METER_ATTR_ID] = { .type = NLA_U32, }, ··· 38 39 [OVS_BAND_ATTR_STATS] = { .len = sizeof(struct ovs_flow_stats) }, 39 40 }; 40 41 42 + static u32 meter_hash(struct dp_meter_instance *ti, u32 id) 43 + { 44 + return id % ti->n_meters; 45 + } 46 + 41 47 static void ovs_meter_free(struct dp_meter *meter) 42 48 { 43 49 if (!meter) ··· 51 47 kfree_rcu(meter, rcu); 52 48 } 53 49 54 - static struct hlist_head *meter_hash_bucket(const struct datapath *dp, 55 - u32 meter_id) 56 - { 57 - return &dp->meters[meter_id & (METER_HASH_BUCKETS - 1)]; 58 - } 59 - 60 50 /* Call with ovs_mutex or RCU read lock. */ 61 - static struct dp_meter *lookup_meter(const struct datapath *dp, 51 + static struct dp_meter *lookup_meter(const struct dp_meter_table *tbl, 62 52 u32 meter_id) 63 53 { 54 + struct dp_meter_instance *ti = rcu_dereference_ovsl(tbl->ti); 55 + u32 hash = meter_hash(ti, meter_id); 64 56 struct dp_meter *meter; 65 - struct hlist_head *head; 66 57 67 - head = meter_hash_bucket(dp, meter_id); 68 - hlist_for_each_entry_rcu(meter, head, dp_hash_node, 69 - lockdep_ovsl_is_held()) { 70 - if (meter->id == meter_id) 71 - return meter; 72 - } 58 + meter = rcu_dereference_ovsl(ti->dp_meters[hash]); 59 + if (meter && likely(meter->id == meter_id)) 60 + return meter; 61 + 73 62 return NULL; 74 63 } 75 64 76 - static void attach_meter(struct datapath *dp, struct dp_meter *meter) 65 + static struct dp_meter_instance *dp_meter_instance_alloc(const u32 size) 77 66 { 78 - struct hlist_head *head = meter_hash_bucket(dp, meter->id); 67 + struct dp_meter_instance *ti; 79 68 80 - hlist_add_head_rcu(&meter->dp_hash_node, head); 69 + ti = kvzalloc(sizeof(*ti) + 70 + sizeof(struct dp_meter *) * size, 71 + GFP_KERNEL); 72 + if (!ti) 73 + return NULL; 74 + 75 + ti->n_meters = size; 76 + 77 + return ti; 81 78 } 82 79 83 - static void detach_meter(struct dp_meter *meter) 80 + static void dp_meter_instance_free(struct dp_meter_instance *ti) 84 81 { 82 + kvfree(ti); 83 + } 84 + 85 + static void dp_meter_instance_free_rcu(struct rcu_head *rcu) 86 + { 87 + struct dp_meter_instance *ti; 88 + 89 + ti = container_of(rcu, struct dp_meter_instance, rcu); 90 + kvfree(ti); 91 + } 92 + 93 + static int 94 + dp_meter_instance_realloc(struct dp_meter_table *tbl, u32 size) 95 + { 96 + struct dp_meter_instance *ti = rcu_dereference_ovsl(tbl->ti); 97 + int n_meters = min(size, ti->n_meters); 98 + struct dp_meter_instance *new_ti; 99 + int i; 100 + 101 + new_ti = dp_meter_instance_alloc(size); 102 + if (!new_ti) 103 + return -ENOMEM; 104 + 105 + for (i = 0; i < n_meters; i++) 106 + new_ti->dp_meters[i] = 107 + rcu_dereference_ovsl(ti->dp_meters[i]); 108 + 109 + rcu_assign_pointer(tbl->ti, new_ti); 110 + call_rcu(&ti->rcu, dp_meter_instance_free_rcu); 111 + 112 + return 0; 113 + } 114 + 115 + static void dp_meter_instance_insert(struct dp_meter_instance *ti, 116 + struct dp_meter *meter) 117 + { 118 + u32 hash; 119 + 120 + hash = meter_hash(ti, meter->id); 121 + rcu_assign_pointer(ti->dp_meters[hash], meter); 122 + } 123 + 124 + static void dp_meter_instance_remove(struct dp_meter_instance *ti, 125 + struct dp_meter *meter) 126 + { 127 + u32 hash; 128 + 129 + hash = meter_hash(ti, meter->id); 130 + RCU_INIT_POINTER(ti->dp_meters[hash], NULL); 131 + } 132 + 133 + static int attach_meter(struct dp_meter_table *tbl, struct dp_meter *meter) 134 + { 135 + struct dp_meter_instance *ti = rcu_dereference_ovsl(tbl->ti); 136 + u32 hash = meter_hash(ti, meter->id); 137 + int err; 138 + 139 + /* In generally, slots selected should be empty, because 140 + * OvS uses id-pool to fetch a available id. 141 + */ 142 + if (unlikely(rcu_dereference_ovsl(ti->dp_meters[hash]))) 143 + return -EBUSY; 144 + 145 + dp_meter_instance_insert(ti, meter); 146 + 147 + /* That function is thread-safe. */ 148 + tbl->count++; 149 + if (tbl->count >= tbl->max_meters_allowed) { 150 + err = -EFBIG; 151 + goto attach_err; 152 + } 153 + 154 + if (tbl->count >= ti->n_meters && 155 + dp_meter_instance_realloc(tbl, ti->n_meters * 2)) { 156 + err = -ENOMEM; 157 + goto attach_err; 158 + } 159 + 160 + return 0; 161 + 162 + attach_err: 163 + dp_meter_instance_remove(ti, meter); 164 + tbl->count--; 165 + return err; 166 + } 167 + 168 + static int detach_meter(struct dp_meter_table *tbl, struct dp_meter *meter) 169 + { 170 + struct dp_meter_instance *ti; 171 + 85 172 ASSERT_OVSL(); 86 - if (meter) 87 - hlist_del_rcu(&meter->dp_hash_node); 173 + if (!meter) 174 + return 0; 175 + 176 + ti = rcu_dereference_ovsl(tbl->ti); 177 + dp_meter_instance_remove(ti, meter); 178 + 179 + tbl->count--; 180 + 181 + /* Shrink the meter array if necessary. */ 182 + if (ti->n_meters > DP_METER_ARRAY_SIZE_MIN && 183 + tbl->count <= (ti->n_meters / 4)) { 184 + int half_size = ti->n_meters / 2; 185 + int i; 186 + 187 + /* Avoid hash collision, don't move slots to other place. 188 + * Make sure there are no references of meters in array 189 + * which will be released. 190 + */ 191 + for (i = half_size; i < ti->n_meters; i++) 192 + if (rcu_dereference_ovsl(ti->dp_meters[i])) 193 + goto out; 194 + 195 + if (dp_meter_instance_realloc(tbl, half_size)) 196 + goto shrink_err; 197 + } 198 + 199 + out: 200 + return 0; 201 + 202 + shrink_err: 203 + dp_meter_instance_insert(ti, meter); 204 + tbl->count++; 205 + return -ENOMEM; 88 206 } 89 207 90 208 static struct sk_buff * ··· 242 116 if (nla_put_u32(reply, OVS_METER_ATTR_ID, meter_id)) 243 117 goto error; 244 118 245 - if (!meter) 246 - return 0; 247 - 248 119 if (nla_put(reply, OVS_METER_ATTR_STATS, 249 - sizeof(struct ovs_flow_stats), &meter->stats) || 250 - nla_put_u64_64bit(reply, OVS_METER_ATTR_USED, meter->used, 120 + sizeof(struct ovs_flow_stats), &meter->stats)) 121 + goto error; 122 + 123 + if (nla_put_u64_64bit(reply, OVS_METER_ATTR_USED, meter->used, 251 124 OVS_METER_ATTR_PAD)) 252 125 goto error; 253 126 ··· 275 150 276 151 static int ovs_meter_cmd_features(struct sk_buff *skb, struct genl_info *info) 277 152 { 278 - struct sk_buff *reply; 153 + struct ovs_header *ovs_header = info->userhdr; 279 154 struct ovs_header *ovs_reply_header; 280 155 struct nlattr *nla, *band_nla; 281 - int err; 156 + struct sk_buff *reply; 157 + struct datapath *dp; 158 + int err = -EMSGSIZE; 282 159 283 160 reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_FEATURES, 284 161 &ovs_reply_header); 285 162 if (IS_ERR(reply)) 286 163 return PTR_ERR(reply); 287 164 288 - if (nla_put_u32(reply, OVS_METER_ATTR_MAX_METERS, U32_MAX) || 289 - nla_put_u32(reply, OVS_METER_ATTR_MAX_BANDS, DP_MAX_BANDS)) 165 + ovs_lock(); 166 + dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); 167 + if (!dp) { 168 + err = -ENODEV; 169 + goto exit_unlock; 170 + } 171 + 172 + if (nla_put_u32(reply, OVS_METER_ATTR_MAX_METERS, 173 + dp->meter_tbl.max_meters_allowed)) 174 + goto exit_unlock; 175 + 176 + ovs_unlock(); 177 + 178 + if (nla_put_u32(reply, OVS_METER_ATTR_MAX_BANDS, DP_MAX_BANDS)) 290 179 goto nla_put_failure; 291 180 292 181 nla = nla_nest_start_noflag(reply, OVS_METER_ATTR_BANDS); ··· 319 180 genlmsg_end(reply, ovs_reply_header); 320 181 return genlmsg_reply(reply, info); 321 182 183 + exit_unlock: 184 + ovs_unlock(); 322 185 nla_put_failure: 323 186 nlmsg_free(reply); 324 - err = -EMSGSIZE; 325 187 return err; 326 188 } 327 189 ··· 392 252 * 393 253 * Start with a full bucket. 394 254 */ 395 - band->bucket = (band->burst_size + band->rate) * 1000; 255 + band->bucket = (band->burst_size + band->rate) * 1000ULL; 396 256 band_max_delta_t = band->bucket / band->rate; 397 257 if (band_max_delta_t > meter->max_delta_t) 398 258 meter->max_delta_t = band_max_delta_t; ··· 413 273 struct sk_buff *reply; 414 274 struct ovs_header *ovs_reply_header; 415 275 struct ovs_header *ovs_header = info->userhdr; 276 + struct dp_meter_table *meter_tbl; 416 277 struct datapath *dp; 417 278 int err; 418 279 u32 meter_id; 419 280 bool failed; 420 281 421 - if (!a[OVS_METER_ATTR_ID]) { 422 - return -ENODEV; 423 - } 282 + if (!a[OVS_METER_ATTR_ID]) 283 + return -EINVAL; 424 284 425 285 meter = dp_meter_create(a); 426 286 if (IS_ERR_OR_NULL(meter)) ··· 440 300 goto exit_unlock; 441 301 } 442 302 303 + meter_tbl = &dp->meter_tbl; 443 304 meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]); 444 305 445 - /* Cannot fail after this. */ 446 - old_meter = lookup_meter(dp, meter_id); 447 - detach_meter(old_meter); 448 - attach_meter(dp, meter); 306 + old_meter = lookup_meter(meter_tbl, meter_id); 307 + err = detach_meter(meter_tbl, old_meter); 308 + if (err) 309 + goto exit_unlock; 310 + 311 + err = attach_meter(meter_tbl, meter); 312 + if (err) 313 + goto exit_unlock; 314 + 449 315 ovs_unlock(); 450 316 451 317 /* Build response with the meter_id and stats from ··· 483 337 484 338 static int ovs_meter_cmd_get(struct sk_buff *skb, struct genl_info *info) 485 339 { 486 - struct nlattr **a = info->attrs; 487 - u32 meter_id; 488 340 struct ovs_header *ovs_header = info->userhdr; 489 341 struct ovs_header *ovs_reply_header; 490 - struct datapath *dp; 491 - int err; 492 - struct sk_buff *reply; 342 + struct nlattr **a = info->attrs; 493 343 struct dp_meter *meter; 344 + struct sk_buff *reply; 345 + struct datapath *dp; 346 + u32 meter_id; 347 + int err; 494 348 495 349 if (!a[OVS_METER_ATTR_ID]) 496 350 return -EINVAL; ··· 511 365 } 512 366 513 367 /* Locate meter, copy stats. */ 514 - meter = lookup_meter(dp, meter_id); 368 + meter = lookup_meter(&dp->meter_tbl, meter_id); 515 369 if (!meter) { 516 370 err = -ENOENT; 517 371 goto exit_unlock; ··· 536 390 537 391 static int ovs_meter_cmd_del(struct sk_buff *skb, struct genl_info *info) 538 392 { 539 - struct nlattr **a = info->attrs; 540 - u32 meter_id; 541 393 struct ovs_header *ovs_header = info->userhdr; 542 394 struct ovs_header *ovs_reply_header; 543 - struct datapath *dp; 544 - int err; 545 - struct sk_buff *reply; 395 + struct nlattr **a = info->attrs; 546 396 struct dp_meter *old_meter; 397 + struct sk_buff *reply; 398 + struct datapath *dp; 399 + u32 meter_id; 400 + int err; 547 401 548 402 if (!a[OVS_METER_ATTR_ID]) 549 403 return -EINVAL; 550 - meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]); 551 404 552 405 reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_DEL, 553 406 &ovs_reply_header); ··· 561 416 goto exit_unlock; 562 417 } 563 418 564 - old_meter = lookup_meter(dp, meter_id); 419 + meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]); 420 + old_meter = lookup_meter(&dp->meter_tbl, meter_id); 565 421 if (old_meter) { 566 422 spin_lock_bh(&old_meter->lock); 567 423 err = ovs_meter_cmd_reply_stats(reply, meter_id, old_meter); 568 424 WARN_ON(err); 569 425 spin_unlock_bh(&old_meter->lock); 570 - detach_meter(old_meter); 426 + 427 + err = detach_meter(&dp->meter_tbl, old_meter); 428 + if (err) 429 + goto exit_unlock; 571 430 } 431 + 572 432 ovs_unlock(); 573 433 ovs_meter_free(old_meter); 574 434 genlmsg_end(reply, ovs_reply_header); ··· 593 443 bool ovs_meter_execute(struct datapath *dp, struct sk_buff *skb, 594 444 struct sw_flow_key *key, u32 meter_id) 595 445 { 596 - struct dp_meter *meter; 597 - struct dp_meter_band *band; 598 446 long long int now_ms = div_u64(ktime_get_ns(), 1000 * 1000); 599 447 long long int long_delta_ms; 600 - u32 delta_ms; 601 - u32 cost; 448 + struct dp_meter_band *band; 449 + struct dp_meter *meter; 602 450 int i, band_exceeded_max = -1; 603 451 u32 band_exceeded_rate = 0; 452 + u32 delta_ms; 453 + u32 cost; 604 454 605 - meter = lookup_meter(dp, meter_id); 455 + meter = lookup_meter(&dp->meter_tbl, meter_id); 606 456 /* Do not drop the packet when there is no meter. */ 607 457 if (!meter) 608 458 return false; ··· 720 570 721 571 int ovs_meters_init(struct datapath *dp) 722 572 { 723 - int i; 573 + struct dp_meter_table *tbl = &dp->meter_tbl; 574 + struct dp_meter_instance *ti; 575 + unsigned long free_mem_bytes; 724 576 725 - dp->meters = kmalloc_array(METER_HASH_BUCKETS, 726 - sizeof(struct hlist_head), GFP_KERNEL); 727 - 728 - if (!dp->meters) 577 + ti = dp_meter_instance_alloc(DP_METER_ARRAY_SIZE_MIN); 578 + if (!ti) 729 579 return -ENOMEM; 730 580 731 - for (i = 0; i < METER_HASH_BUCKETS; i++) 732 - INIT_HLIST_HEAD(&dp->meters[i]); 581 + /* Allow meters in a datapath to use ~3.12% of physical memory. */ 582 + free_mem_bytes = nr_free_buffer_pages() * (PAGE_SIZE >> 5); 583 + tbl->max_meters_allowed = min(free_mem_bytes / sizeof(struct dp_meter), 584 + DP_METER_NUM_MAX); 585 + if (!tbl->max_meters_allowed) 586 + goto out_err; 587 + 588 + rcu_assign_pointer(tbl->ti, ti); 589 + tbl->count = 0; 733 590 734 591 return 0; 592 + 593 + out_err: 594 + dp_meter_instance_free(ti); 595 + return -ENOMEM; 735 596 } 736 597 737 598 void ovs_meters_exit(struct datapath *dp) 738 599 { 600 + struct dp_meter_table *tbl = &dp->meter_tbl; 601 + struct dp_meter_instance *ti = rcu_dereference_raw(tbl->ti); 739 602 int i; 740 603 741 - for (i = 0; i < METER_HASH_BUCKETS; i++) { 742 - struct hlist_head *head = &dp->meters[i]; 743 - struct dp_meter *meter; 744 - struct hlist_node *n; 604 + for (i = 0; i < ti->n_meters; i++) 605 + ovs_meter_free(ti->dp_meters[i]); 745 606 746 - hlist_for_each_entry_safe(meter, n, head, dp_hash_node) 747 - kfree(meter); 748 - } 749 - 750 - kfree(dp->meters); 607 + dp_meter_instance_free(ti); 751 608 }
+16 -4
net/openvswitch/meter.h
··· 13 13 #include <linux/openvswitch.h> 14 14 #include <linux/genetlink.h> 15 15 #include <linux/skbuff.h> 16 + #include <linux/bits.h> 16 17 17 18 #include "flow.h" 18 19 struct datapath; 19 20 20 21 #define DP_MAX_BANDS 1 22 + #define DP_METER_ARRAY_SIZE_MIN BIT_ULL(10) 23 + #define DP_METER_NUM_MAX (200000UL) 21 24 22 25 struct dp_meter_band { 23 26 u32 type; 24 27 u32 rate; 25 28 u32 burst_size; 26 - u32 bucket; /* 1/1000 packets, or in bits */ 29 + u64 bucket; /* 1/1000 packets, or in bits */ 27 30 struct ovs_flow_stats stats; 28 31 }; 29 32 30 33 struct dp_meter { 31 34 spinlock_t lock; /* Per meter lock */ 32 35 struct rcu_head rcu; 33 - struct hlist_node dp_hash_node; /*Element in datapath->meters 34 - * hash table. 35 - */ 36 36 u32 id; 37 37 u16 kbps:1, keep_stats:1; 38 38 u16 n_bands; ··· 40 40 u64 used; 41 41 struct ovs_flow_stats stats; 42 42 struct dp_meter_band bands[]; 43 + }; 44 + 45 + struct dp_meter_instance { 46 + struct rcu_head rcu; 47 + u32 n_meters; 48 + struct dp_meter __rcu *dp_meters[]; 49 + }; 50 + 51 + struct dp_meter_table { 52 + struct dp_meter_instance __rcu *ti; 53 + u32 count; 54 + u32 max_meters_allowed; 43 55 }; 44 56 45 57 extern struct genl_family dp_meter_genl_family;