Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

net: bridge: use rhashtable for fdbs

Before this patch the bridge used a fixed 256 element hash table which
was fine for small use cases (in my tests it starts to degrade
above 1000 entries), but it wasn't enough for medium or large
scale deployments. Modern setups have thousands of participants in a
single bridge, even only enabling vlans and adding a few thousand vlan
entries will cause a few thousand fdbs to be automatically inserted per
participating port. So we need to scale the fdb table considerably to
cope with modern workloads, and this patch converts it to use a
rhashtable for its operations thus improving the bridge scalability.
Tests show the following results (10 runs each), at up to 1000 entries
rhashtable is ~3% slower, at 2000 rhashtable is 30% faster, at 3000 it
is 2 times faster and at 30000 it is 50 times faster.
Obviously this happens because of the properties of the two constructs
and is expected, rhashtable keeps pretty much a constant time even with
10000000 entries (tested), while the fixed hash table struggles
considerably even above 10000.
As a side effect this also reduces the net_bridge struct size from 3248
bytes to 1344 bytes. Also note that the key struct is 8 bytes.

Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Nikolay Aleksandrov and committed by
David S. Miller
eb793583 e8952bab

+213 -221
+2 -2
include/trace/events/bridge.h
··· 82 82 TP_fast_assign( 83 83 __assign_str(br_dev, br->dev->name); 84 84 __assign_str(dev, f->dst ? f->dst->dev->name : "null"); 85 - memcpy(__entry->addr, f->addr.addr, ETH_ALEN); 86 - __entry->vid = f->vlan_id; 85 + memcpy(__entry->addr, f->key.addr.addr, ETH_ALEN); 86 + __entry->vid = f->key.vlan_id; 87 87 ), 88 88 89 89 TP_printk("br_dev %s dev %s addr %02x:%02x:%02x:%02x:%02x:%02x vid %u",
+10
net/bridge/br_device.c
··· 125 125 if (!br->stats) 126 126 return -ENOMEM; 127 127 128 + err = br_fdb_hash_init(br); 129 + if (err) { 130 + free_percpu(br->stats); 131 + return err; 132 + } 133 + 128 134 err = br_vlan_init(br); 129 135 if (err) { 130 136 free_percpu(br->stats); 137 + br_fdb_hash_fini(br); 131 138 return err; 132 139 } 133 140 ··· 142 135 if (err) { 143 136 free_percpu(br->stats); 144 137 br_vlan_flush(br); 138 + br_fdb_hash_fini(br); 145 139 } 146 140 br_set_lockdep_class(dev); 147 141 ··· 156 148 br_multicast_dev_del(br); 157 149 br_multicast_uninit_stats(br); 158 150 br_vlan_flush(br); 151 + br_fdb_hash_fini(br); 159 152 free_percpu(br->stats); 160 153 } 161 154 ··· 425 416 br->dev = dev; 426 417 spin_lock_init(&br->lock); 427 418 INIT_LIST_HEAD(&br->port_list); 419 + INIT_HLIST_HEAD(&br->fdb_list); 428 420 spin_lock_init(&br->hash_lock); 429 421 430 422 br->bridge_id.prio[0] = 0x80;
+185 -211
net/bridge/br_fdb.c
··· 28 28 #include <trace/events/bridge.h> 29 29 #include "br_private.h" 30 30 31 + static const struct rhashtable_params br_fdb_rht_params = { 32 + .head_offset = offsetof(struct net_bridge_fdb_entry, rhnode), 33 + .key_offset = offsetof(struct net_bridge_fdb_entry, key), 34 + .key_len = sizeof(struct net_bridge_fdb_key), 35 + .automatic_shrinking = true, 36 + .locks_mul = 1, 37 + }; 38 + 31 39 static struct kmem_cache *br_fdb_cache __read_mostly; 32 40 static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source, 33 41 const unsigned char *addr, u16 vid); 34 42 static void fdb_notify(struct net_bridge *br, 35 43 const struct net_bridge_fdb_entry *, int); 36 - 37 - static u32 fdb_salt __read_mostly; 38 44 39 45 int __init br_fdb_init(void) 40 46 { ··· 51 45 if (!br_fdb_cache) 52 46 return -ENOMEM; 53 47 54 - get_random_bytes(&fdb_salt, sizeof(fdb_salt)); 55 48 return 0; 56 49 } 57 50 ··· 59 54 kmem_cache_destroy(br_fdb_cache); 60 55 } 61 56 57 + int br_fdb_hash_init(struct net_bridge *br) 58 + { 59 + return rhashtable_init(&br->fdb_hash_tbl, &br_fdb_rht_params); 60 + } 61 + 62 + void br_fdb_hash_fini(struct net_bridge *br) 63 + { 64 + rhashtable_destroy(&br->fdb_hash_tbl); 65 + } 62 66 63 67 /* if topology_changing then use forward_delay (default 15 sec) 64 68 * otherwise keep longer (default 5 minutes) ··· 84 70 time_before_eq(fdb->updated + hold_time(br), jiffies); 85 71 } 86 72 87 - static inline int br_mac_hash(const unsigned char *mac, __u16 vid) 88 - { 89 - /* use 1 byte of OUI and 3 bytes of NIC */ 90 - u32 key = get_unaligned((u32 *)(mac + 2)); 91 - return jhash_2words(key, vid, fdb_salt) & (BR_HASH_SIZE - 1); 92 - } 93 - 94 73 static void fdb_rcu_free(struct rcu_head *head) 95 74 { 96 75 struct net_bridge_fdb_entry *ent ··· 91 84 kmem_cache_free(br_fdb_cache, ent); 92 85 } 93 86 94 - static struct net_bridge_fdb_entry *fdb_find_rcu(struct hlist_head *head, 87 + static struct net_bridge_fdb_entry *fdb_find_rcu(struct rhashtable *tbl, 95 88 const unsigned char *addr, 96 89 __u16 vid) 97 90 { 98 - struct net_bridge_fdb_entry *f; 91 + struct net_bridge_fdb_key key; 99 92 100 93 WARN_ON_ONCE(!rcu_read_lock_held()); 101 94 102 - hlist_for_each_entry_rcu(f, head, hlist) 103 - if (ether_addr_equal(f->addr.addr, addr) && f->vlan_id == vid) 104 - break; 95 + key.vlan_id = vid; 96 + memcpy(key.addr.addr, addr, sizeof(key.addr.addr)); 105 97 106 - return f; 98 + return rhashtable_lookup(tbl, &key, br_fdb_rht_params); 107 99 } 108 100 109 101 /* requires bridge hash_lock */ ··· 110 104 const unsigned char *addr, 111 105 __u16 vid) 112 106 { 113 - struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)]; 114 107 struct net_bridge_fdb_entry *fdb; 115 108 116 109 lockdep_assert_held_once(&br->hash_lock); 117 110 118 111 rcu_read_lock(); 119 - fdb = fdb_find_rcu(head, addr, vid); 112 + fdb = fdb_find_rcu(&br->fdb_hash_tbl, addr, vid); 120 113 rcu_read_unlock(); 121 114 122 115 return fdb; ··· 125 120 const unsigned char *addr, 126 121 __u16 vid) 127 122 { 128 - struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)]; 129 - 130 - return fdb_find_rcu(head, addr, vid); 123 + return fdb_find_rcu(&br->fdb_hash_tbl, addr, vid); 131 124 } 132 125 133 126 /* When a static FDB entry is added, the mac address from the entry is ··· 178 175 trace_fdb_delete(br, f); 179 176 180 177 if (f->is_static) 181 - fdb_del_hw_addr(br, f->addr.addr); 178 + fdb_del_hw_addr(br, f->key.addr.addr); 182 179 183 - hlist_del_init_rcu(&f->hlist); 180 + hlist_del_init_rcu(&f->fdb_node); 181 + rhashtable_remove_fast(&br->fdb_hash_tbl, &f->rhnode, 182 + br_fdb_rht_params); 184 183 fdb_notify(br, f, RTM_DELNEIGH); 185 184 call_rcu(&f->rcu, fdb_rcu_free); 186 185 } ··· 192 187 const struct net_bridge_port *p, 193 188 struct net_bridge_fdb_entry *f) 194 189 { 195 - const unsigned char *addr = f->addr.addr; 190 + const unsigned char *addr = f->key.addr.addr; 196 191 struct net_bridge_vlan_group *vg; 197 192 const struct net_bridge_vlan *v; 198 193 struct net_bridge_port *op; 199 - u16 vid = f->vlan_id; 194 + u16 vid = f->key.vlan_id; 200 195 201 196 /* Maybe another port has same hw addr? */ 202 197 list_for_each_entry(op, &br->port_list, list) { ··· 238 233 void br_fdb_changeaddr(struct net_bridge_port *p, const unsigned char *newaddr) 239 234 { 240 235 struct net_bridge_vlan_group *vg; 236 + struct net_bridge_fdb_entry *f; 241 237 struct net_bridge *br = p->br; 242 238 struct net_bridge_vlan *v; 243 - int i; 244 239 245 240 spin_lock_bh(&br->hash_lock); 246 - 247 241 vg = nbp_vlan_group(p); 248 - /* Search all chains since old address/hash is unknown */ 249 - for (i = 0; i < BR_HASH_SIZE; i++) { 250 - struct hlist_node *h; 251 - hlist_for_each(h, &br->hash[i]) { 252 - struct net_bridge_fdb_entry *f; 242 + hlist_for_each_entry(f, &br->fdb_list, fdb_node) { 243 + if (f->dst == p && f->is_local && !f->added_by_user) { 244 + /* delete old one */ 245 + fdb_delete_local(br, p, f); 253 246 254 - f = hlist_entry(h, struct net_bridge_fdb_entry, hlist); 255 - if (f->dst == p && f->is_local && !f->added_by_user) { 256 - /* delete old one */ 257 - fdb_delete_local(br, p, f); 258 - 259 - /* if this port has no vlan information 260 - * configured, we can safely be done at 261 - * this point. 262 - */ 263 - if (!vg || !vg->num_vlans) 264 - goto insert; 265 - } 247 + /* if this port has no vlan information 248 + * configured, we can safely be done at 249 + * this point. 250 + */ 251 + if (!vg || !vg->num_vlans) 252 + goto insert; 266 253 } 267 254 } 268 255 ··· 313 316 { 314 317 struct net_bridge *br = container_of(work, struct net_bridge, 315 318 gc_work.work); 319 + struct net_bridge_fdb_entry *f = NULL; 316 320 unsigned long delay = hold_time(br); 317 321 unsigned long work_delay = delay; 318 322 unsigned long now = jiffies; 319 - int i; 320 323 321 - for (i = 0; i < BR_HASH_SIZE; i++) { 322 - struct net_bridge_fdb_entry *f; 323 - struct hlist_node *n; 324 + /* this part is tricky, in order to avoid blocking learning and 325 + * consequently forwarding, we rely on rcu to delete objects with 326 + * delayed freeing allowing us to continue traversing 327 + */ 328 + rcu_read_lock(); 329 + hlist_for_each_entry_rcu(f, &br->fdb_list, fdb_node) { 330 + unsigned long this_timer; 324 331 325 - if (!br->hash[i].first) 332 + if (f->is_static || f->added_by_external_learn) 326 333 continue; 327 - 328 - spin_lock_bh(&br->hash_lock); 329 - hlist_for_each_entry_safe(f, n, &br->hash[i], hlist) { 330 - unsigned long this_timer; 331 - 332 - if (f->is_static) 333 - continue; 334 - if (f->added_by_external_learn) 335 - continue; 336 - this_timer = f->updated + delay; 337 - if (time_after(this_timer, now)) 338 - work_delay = min(work_delay, this_timer - now); 339 - else 334 + this_timer = f->updated + delay; 335 + if (time_after(this_timer, now)) { 336 + work_delay = min(work_delay, this_timer - now); 337 + } else { 338 + spin_lock_bh(&br->hash_lock); 339 + if (!hlist_unhashed(&f->fdb_node)) 340 340 fdb_delete(br, f); 341 + spin_unlock_bh(&br->hash_lock); 341 342 } 342 - spin_unlock_bh(&br->hash_lock); 343 - cond_resched(); 344 343 } 344 + rcu_read_unlock(); 345 345 346 346 /* Cleanup minimum 10 milliseconds apart */ 347 347 work_delay = max_t(unsigned long, work_delay, msecs_to_jiffies(10)); ··· 348 354 /* Completely flush all dynamic entries in forwarding database.*/ 349 355 void br_fdb_flush(struct net_bridge *br) 350 356 { 351 - int i; 357 + struct net_bridge_fdb_entry *f; 358 + struct hlist_node *tmp; 352 359 353 360 spin_lock_bh(&br->hash_lock); 354 - for (i = 0; i < BR_HASH_SIZE; i++) { 355 - struct net_bridge_fdb_entry *f; 356 - struct hlist_node *n; 357 - hlist_for_each_entry_safe(f, n, &br->hash[i], hlist) { 358 - if (!f->is_static) 359 - fdb_delete(br, f); 360 - } 361 + hlist_for_each_entry_safe(f, tmp, &br->fdb_list, fdb_node) { 362 + if (!f->is_static) 363 + fdb_delete(br, f); 361 364 } 362 365 spin_unlock_bh(&br->hash_lock); 363 366 } ··· 368 377 u16 vid, 369 378 int do_all) 370 379 { 371 - int i; 380 + struct net_bridge_fdb_entry *f; 381 + struct hlist_node *tmp; 372 382 373 383 spin_lock_bh(&br->hash_lock); 374 - for (i = 0; i < BR_HASH_SIZE; i++) { 375 - struct hlist_node *h, *g; 384 + hlist_for_each_entry_safe(f, tmp, &br->fdb_list, fdb_node) { 385 + if (f->dst != p) 386 + continue; 376 387 377 - hlist_for_each_safe(h, g, &br->hash[i]) { 378 - struct net_bridge_fdb_entry *f 379 - = hlist_entry(h, struct net_bridge_fdb_entry, hlist); 380 - if (f->dst != p) 388 + if (!do_all) 389 + if (f->is_static || (vid && f->key.vlan_id != vid)) 381 390 continue; 382 391 383 - if (!do_all) 384 - if (f->is_static || (vid && f->vlan_id != vid)) 385 - continue; 386 - 387 - if (f->is_local) 388 - fdb_delete_local(br, p, f); 389 - else 390 - fdb_delete(br, f); 391 - } 392 + if (f->is_local) 393 + fdb_delete_local(br, p, f); 394 + else 395 + fdb_delete(br, f); 392 396 } 393 397 spin_unlock_bh(&br->hash_lock); 394 398 } ··· 419 433 int br_fdb_fillbuf(struct net_bridge *br, void *buf, 420 434 unsigned long maxnum, unsigned long skip) 421 435 { 422 - struct __fdb_entry *fe = buf; 423 - int i, num = 0; 424 436 struct net_bridge_fdb_entry *f; 437 + struct __fdb_entry *fe = buf; 438 + int num = 0; 425 439 426 440 memset(buf, 0, maxnum*sizeof(struct __fdb_entry)); 427 441 428 442 rcu_read_lock(); 429 - for (i = 0; i < BR_HASH_SIZE; i++) { 430 - hlist_for_each_entry_rcu(f, &br->hash[i], hlist) { 431 - if (num >= maxnum) 432 - goto out; 443 + hlist_for_each_entry_rcu(f, &br->fdb_list, fdb_node) { 444 + if (num >= maxnum) 445 + break; 433 446 434 - if (has_expired(br, f)) 435 - continue; 447 + if (has_expired(br, f)) 448 + continue; 436 449 437 - /* ignore pseudo entry for local MAC address */ 438 - if (!f->dst) 439 - continue; 450 + /* ignore pseudo entry for local MAC address */ 451 + if (!f->dst) 452 + continue; 440 453 441 - if (skip) { 442 - --skip; 443 - continue; 444 - } 445 - 446 - /* convert from internal format to API */ 447 - memcpy(fe->mac_addr, f->addr.addr, ETH_ALEN); 448 - 449 - /* due to ABI compat need to split into hi/lo */ 450 - fe->port_no = f->dst->port_no; 451 - fe->port_hi = f->dst->port_no >> 8; 452 - 453 - fe->is_local = f->is_local; 454 - if (!f->is_static) 455 - fe->ageing_timer_value = jiffies_delta_to_clock_t(jiffies - f->updated); 456 - ++fe; 457 - ++num; 454 + if (skip) { 455 + --skip; 456 + continue; 458 457 } 459 - } 460 458 461 - out: 459 + /* convert from internal format to API */ 460 + memcpy(fe->mac_addr, f->key.addr.addr, ETH_ALEN); 461 + 462 + /* due to ABI compat need to split into hi/lo */ 463 + fe->port_no = f->dst->port_no; 464 + fe->port_hi = f->dst->port_no >> 8; 465 + 466 + fe->is_local = f->is_local; 467 + if (!f->is_static) 468 + fe->ageing_timer_value = jiffies_delta_to_clock_t(jiffies - f->updated); 469 + ++fe; 470 + ++num; 471 + } 462 472 rcu_read_unlock(); 463 473 464 474 return num; 465 475 } 466 476 467 - static struct net_bridge_fdb_entry *fdb_create(struct hlist_head *head, 477 + static struct net_bridge_fdb_entry *fdb_create(struct net_bridge *br, 468 478 struct net_bridge_port *source, 469 479 const unsigned char *addr, 470 480 __u16 vid, ··· 471 489 472 490 fdb = kmem_cache_alloc(br_fdb_cache, GFP_ATOMIC); 473 491 if (fdb) { 474 - memcpy(fdb->addr.addr, addr, ETH_ALEN); 492 + memcpy(fdb->key.addr.addr, addr, ETH_ALEN); 475 493 fdb->dst = source; 476 - fdb->vlan_id = vid; 494 + fdb->key.vlan_id = vid; 477 495 fdb->is_local = is_local; 478 496 fdb->is_static = is_static; 479 497 fdb->added_by_user = 0; 480 498 fdb->added_by_external_learn = 0; 481 499 fdb->offloaded = 0; 482 500 fdb->updated = fdb->used = jiffies; 483 - hlist_add_head_rcu(&fdb->hlist, head); 501 + if (rhashtable_lookup_insert_fast(&br->fdb_hash_tbl, 502 + &fdb->rhnode, 503 + br_fdb_rht_params)) { 504 + kmem_cache_free(br_fdb_cache, fdb); 505 + fdb = NULL; 506 + } else { 507 + hlist_add_head_rcu(&fdb->fdb_node, &br->fdb_list); 508 + } 484 509 } 485 510 return fdb; 486 511 } ··· 495 506 static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source, 496 507 const unsigned char *addr, u16 vid) 497 508 { 498 - struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)]; 499 509 struct net_bridge_fdb_entry *fdb; 500 510 501 511 if (!is_valid_ether_addr(addr)) ··· 512 524 fdb_delete(br, fdb); 513 525 } 514 526 515 - fdb = fdb_create(head, source, addr, vid, 1, 1); 527 + fdb = fdb_create(br, source, addr, vid, 1, 1); 516 528 if (!fdb) 517 529 return -ENOMEM; 518 530 ··· 536 548 void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, 537 549 const unsigned char *addr, u16 vid, bool added_by_user) 538 550 { 539 - struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)]; 540 551 struct net_bridge_fdb_entry *fdb; 541 552 bool fdb_modified = false; 542 553 ··· 548 561 source->state == BR_STATE_FORWARDING)) 549 562 return; 550 563 551 - fdb = fdb_find_rcu(head, addr, vid); 564 + fdb = fdb_find_rcu(&br->fdb_hash_tbl, addr, vid); 552 565 if (likely(fdb)) { 553 566 /* attempt to update an entry for a local interface */ 554 567 if (unlikely(fdb->is_local)) { ··· 577 590 } 578 591 } else { 579 592 spin_lock(&br->hash_lock); 580 - if (likely(!fdb_find_rcu(head, addr, vid))) { 581 - fdb = fdb_create(head, source, addr, vid, 0, 0); 582 - if (fdb) { 583 - if (unlikely(added_by_user)) 584 - fdb->added_by_user = 1; 585 - trace_br_fdb_update(br, source, addr, vid, added_by_user); 586 - fdb_notify(br, fdb, RTM_NEWNEIGH); 587 - } 593 + fdb = fdb_create(br, source, addr, vid, 0, 0); 594 + if (fdb) { 595 + if (unlikely(added_by_user)) 596 + fdb->added_by_user = 1; 597 + trace_br_fdb_update(br, source, addr, vid, 598 + added_by_user); 599 + fdb_notify(br, fdb, RTM_NEWNEIGH); 588 600 } 589 601 /* else we lose race and someone else inserts 590 602 * it first, don't bother updating ··· 632 646 if (fdb->added_by_external_learn) 633 647 ndm->ndm_flags |= NTF_EXT_LEARNED; 634 648 635 - if (nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->addr)) 649 + if (nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->key.addr)) 636 650 goto nla_put_failure; 637 651 if (nla_put_u32(skb, NDA_MASTER, br->dev->ifindex)) 638 652 goto nla_put_failure; ··· 643 657 if (nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci)) 644 658 goto nla_put_failure; 645 659 646 - if (fdb->vlan_id && nla_put(skb, NDA_VLAN, sizeof(u16), &fdb->vlan_id)) 660 + if (fdb->key.vlan_id && nla_put(skb, NDA_VLAN, sizeof(u16), 661 + &fdb->key.vlan_id)) 647 662 goto nla_put_failure; 648 663 649 664 nlmsg_end(skb, nlh); ··· 698 711 int *idx) 699 712 { 700 713 struct net_bridge *br = netdev_priv(dev); 714 + struct net_bridge_fdb_entry *f; 701 715 int err = 0; 702 - int i; 703 716 704 717 if (!(dev->priv_flags & IFF_EBRIDGE)) 705 - goto out; 718 + return err; 706 719 707 720 if (!filter_dev) { 708 721 err = ndo_dflt_fdb_dump(skb, cb, dev, NULL, idx); 709 722 if (err < 0) 710 - goto out; 723 + return err; 711 724 } 712 725 713 - for (i = 0; i < BR_HASH_SIZE; i++) { 714 - struct net_bridge_fdb_entry *f; 715 - 716 - hlist_for_each_entry_rcu(f, &br->hash[i], hlist) { 717 - 718 - if (*idx < cb->args[2]) 726 + rcu_read_lock(); 727 + hlist_for_each_entry_rcu(f, &br->fdb_list, fdb_node) { 728 + if (*idx < cb->args[2]) 729 + goto skip; 730 + if (filter_dev && (!f->dst || f->dst->dev != filter_dev)) { 731 + if (filter_dev != dev) 719 732 goto skip; 720 - 721 - if (filter_dev && 722 - (!f->dst || f->dst->dev != filter_dev)) { 723 - if (filter_dev != dev) 724 - goto skip; 725 - /* !f->dst is a special case for bridge 726 - * It means the MAC belongs to the bridge 727 - * Therefore need a little more filtering 728 - * we only want to dump the !f->dst case 729 - */ 730 - if (f->dst) 731 - goto skip; 732 - } 733 - if (!filter_dev && f->dst) 733 + /* !f->dst is a special case for bridge 734 + * It means the MAC belongs to the bridge 735 + * Therefore need a little more filtering 736 + * we only want to dump the !f->dst case 737 + */ 738 + if (f->dst) 734 739 goto skip; 735 - 736 - err = fdb_fill_info(skb, br, f, 737 - NETLINK_CB(cb->skb).portid, 738 - cb->nlh->nlmsg_seq, 739 - RTM_NEWNEIGH, 740 - NLM_F_MULTI); 741 - if (err < 0) 742 - goto out; 743 - skip: 744 - *idx += 1; 745 740 } 746 - } 741 + if (!filter_dev && f->dst) 742 + goto skip; 747 743 748 - out: 744 + err = fdb_fill_info(skb, br, f, 745 + NETLINK_CB(cb->skb).portid, 746 + cb->nlh->nlmsg_seq, 747 + RTM_NEWNEIGH, 748 + NLM_F_MULTI); 749 + if (err < 0) 750 + break; 751 + skip: 752 + *idx += 1; 753 + } 754 + rcu_read_unlock(); 755 + 749 756 return err; 750 757 } 751 758 ··· 747 766 static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source, 748 767 const __u8 *addr, __u16 state, __u16 flags, __u16 vid) 749 768 { 750 - struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)]; 751 769 struct net_bridge_fdb_entry *fdb; 752 770 bool modified = false; 753 771 ··· 767 787 if (!(flags & NLM_F_CREATE)) 768 788 return -ENOENT; 769 789 770 - fdb = fdb_create(head, source, addr, vid, 0, 0); 790 + fdb = fdb_create(br, source, addr, vid, 0, 0); 771 791 if (!fdb) 772 792 return -ENOMEM; 773 793 ··· 992 1012 993 1013 int br_fdb_sync_static(struct net_bridge *br, struct net_bridge_port *p) 994 1014 { 995 - struct net_bridge_fdb_entry *fdb, *tmp; 996 - int i; 1015 + struct net_bridge_fdb_entry *f, *tmp; 997 1016 int err; 998 1017 999 1018 ASSERT_RTNL(); 1000 1019 1001 - for (i = 0; i < BR_HASH_SIZE; i++) { 1002 - hlist_for_each_entry(fdb, &br->hash[i], hlist) { 1003 - /* We only care for static entries */ 1004 - if (!fdb->is_static) 1005 - continue; 1006 - 1007 - err = dev_uc_add(p->dev, fdb->addr.addr); 1008 - if (err) 1009 - goto rollback; 1010 - } 1020 + /* the key here is that static entries change only under rtnl */ 1021 + rcu_read_lock(); 1022 + hlist_for_each_entry_rcu(f, &br->fdb_list, fdb_node) { 1023 + /* We only care for static entries */ 1024 + if (!f->is_static) 1025 + continue; 1026 + err = dev_uc_add(p->dev, f->key.addr.addr); 1027 + if (err) 1028 + goto rollback; 1011 1029 } 1012 - return 0; 1030 + done: 1031 + rcu_read_unlock(); 1032 + 1033 + return err; 1013 1034 1014 1035 rollback: 1015 - for (i = 0; i < BR_HASH_SIZE; i++) { 1016 - hlist_for_each_entry(tmp, &br->hash[i], hlist) { 1017 - /* If we reached the fdb that failed, we can stop */ 1018 - if (tmp == fdb) 1019 - break; 1020 - 1021 - /* We only care for static entries */ 1022 - if (!tmp->is_static) 1023 - continue; 1024 - 1025 - dev_uc_del(p->dev, tmp->addr.addr); 1026 - } 1036 + hlist_for_each_entry_rcu(tmp, &br->fdb_list, fdb_node) { 1037 + /* We only care for static entries */ 1038 + if (!tmp->is_static) 1039 + continue; 1040 + if (tmp == f) 1041 + break; 1042 + dev_uc_del(p->dev, tmp->key.addr.addr); 1027 1043 } 1028 - return err; 1044 + 1045 + goto done; 1029 1046 } 1030 1047 1031 1048 void br_fdb_unsync_static(struct net_bridge *br, struct net_bridge_port *p) 1032 1049 { 1033 - struct net_bridge_fdb_entry *fdb; 1034 - int i; 1050 + struct net_bridge_fdb_entry *f; 1035 1051 1036 1052 ASSERT_RTNL(); 1037 1053 1038 - for (i = 0; i < BR_HASH_SIZE; i++) { 1039 - hlist_for_each_entry_rcu(fdb, &br->hash[i], hlist) { 1040 - /* We only care for static entries */ 1041 - if (!fdb->is_static) 1042 - continue; 1054 + rcu_read_lock(); 1055 + hlist_for_each_entry_rcu(f, &br->fdb_list, fdb_node) { 1056 + /* We only care for static entries */ 1057 + if (!f->is_static) 1058 + continue; 1043 1059 1044 - dev_uc_del(p->dev, fdb->addr.addr); 1045 - } 1060 + dev_uc_del(p->dev, f->key.addr.addr); 1046 1061 } 1062 + rcu_read_unlock(); 1047 1063 } 1048 1064 1049 1065 int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p, 1050 1066 const unsigned char *addr, u16 vid) 1051 1067 { 1052 1068 struct net_bridge_fdb_entry *fdb; 1053 - struct hlist_head *head; 1054 1069 bool modified = false; 1055 1070 int err = 0; 1056 1071 ··· 1053 1078 1054 1079 spin_lock_bh(&br->hash_lock); 1055 1080 1056 - head = &br->hash[br_mac_hash(addr, vid)]; 1057 1081 fdb = br_fdb_find(br, addr, vid); 1058 1082 if (!fdb) { 1059 - fdb = fdb_create(head, p, addr, vid, 0, 0); 1083 + fdb = fdb_create(br, p, addr, vid, 0, 0); 1060 1084 if (!fdb) { 1061 1085 err = -ENOMEM; 1062 1086 goto err_unlock;
+12 -4
net/bridge/br_private.h
··· 168 168 u16 pvid; 169 169 }; 170 170 171 + struct net_bridge_fdb_key { 172 + mac_addr addr; 173 + u16 vlan_id; 174 + }; 175 + 171 176 struct net_bridge_fdb_entry { 172 - struct hlist_node hlist; 177 + struct rhash_head rhnode; 173 178 struct net_bridge_port *dst; 174 179 175 - mac_addr addr; 176 - __u16 vlan_id; 180 + struct net_bridge_fdb_key key; 181 + struct hlist_node fdb_node; 177 182 unsigned char is_local:1, 178 183 is_static:1, 179 184 added_by_user:1, ··· 320 315 struct net_bridge_vlan_group __rcu *vlgrp; 321 316 #endif 322 317 323 - struct hlist_head hash[BR_HASH_SIZE]; 318 + struct rhashtable fdb_hash_tbl; 324 319 #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) 325 320 union { 326 321 struct rtable fake_rtable; ··· 410 405 int offload_fwd_mark; 411 406 #endif 412 407 bool neigh_suppress_enabled; 408 + struct hlist_head fdb_list; 413 409 }; 414 410 415 411 struct br_input_skb_cb { ··· 521 515 /* br_fdb.c */ 522 516 int br_fdb_init(void); 523 517 void br_fdb_fini(void); 518 + int br_fdb_hash_init(struct net_bridge *br); 519 + void br_fdb_hash_fini(struct net_bridge *br); 524 520 void br_fdb_flush(struct net_bridge *br); 525 521 void br_fdb_find_delete_local(struct net_bridge *br, 526 522 const struct net_bridge_port *p,
+4 -4
net/bridge/br_switchdev.c
··· 121 121 122 122 switch (type) { 123 123 case RTM_DELNEIGH: 124 - br_switchdev_fdb_call_notifiers(false, fdb->addr.addr, 125 - fdb->vlan_id, 124 + br_switchdev_fdb_call_notifiers(false, fdb->key.addr.addr, 125 + fdb->key.vlan_id, 126 126 fdb->dst->dev); 127 127 break; 128 128 case RTM_NEWNEIGH: 129 - br_switchdev_fdb_call_notifiers(true, fdb->addr.addr, 130 - fdb->vlan_id, 129 + br_switchdev_fdb_call_notifiers(true, fdb->key.addr.addr, 130 + fdb->key.vlan_id, 131 131 fdb->dst->dev); 132 132 break; 133 133 }