Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

bridge: move to workqueue gc

Move the fdb garbage collector to a workqueue which fires at least 10
milliseconds apart and cleans chain by chain allowing for other tasks
to run in the meantime. When having thousands of fdbs the system is much
more responsive. Most importantly remove the need to check if the
matched entry has expired in __br_fdb_get that causes false-sharing and
is completely unnecessary if we cleanup entries, at worst we'll get 10ms
of traffic for that entry before it gets deleted.

Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Nikolay Aleksandrov and committed by
David S. Miller
f7cdee8a 1f90c7f3

+29 -23
+1
net/bridge/br_device.c
··· 411 411 br_netfilter_rtable_init(br); 412 412 br_stp_timer_init(br); 413 413 br_multicast_init(br); 414 + INIT_DELAYED_WORK(&br->gc_work, br_fdb_cleanup); 414 415 }
+19 -12
net/bridge/br_fdb.c
··· 154 154 if (f->added_by_external_learn) 155 155 fdb_del_external_learn(f); 156 156 157 - hlist_del_rcu(&f->hlist); 157 + hlist_del_init_rcu(&f->hlist); 158 158 fdb_notify(br, f, RTM_DELNEIGH); 159 159 call_rcu(&f->rcu, fdb_rcu_free); 160 160 } ··· 290 290 spin_unlock_bh(&br->hash_lock); 291 291 } 292 292 293 - void br_fdb_cleanup(unsigned long _data) 293 + void br_fdb_cleanup(struct work_struct *work) 294 294 { 295 - struct net_bridge *br = (struct net_bridge *)_data; 295 + struct net_bridge *br = container_of(work, struct net_bridge, 296 + gc_work.work); 296 297 unsigned long delay = hold_time(br); 297 - unsigned long next_timer = jiffies + br->ageing_time; 298 + unsigned long work_delay = delay; 299 + unsigned long now = jiffies; 298 300 int i; 299 301 300 - spin_lock(&br->hash_lock); 301 302 for (i = 0; i < BR_HASH_SIZE; i++) { 302 303 struct net_bridge_fdb_entry *f; 303 304 struct hlist_node *n; 304 305 306 + if (!br->hash[i].first) 307 + continue; 308 + 309 + spin_lock_bh(&br->hash_lock); 305 310 hlist_for_each_entry_safe(f, n, &br->hash[i], hlist) { 306 311 unsigned long this_timer; 312 + 307 313 if (f->is_static) 308 314 continue; 309 315 if (f->added_by_external_learn) 310 316 continue; 311 317 this_timer = f->updated + delay; 312 - if (time_before_eq(this_timer, jiffies)) 318 + if (time_after(this_timer, now)) 319 + work_delay = min(work_delay, this_timer - now); 320 + else 313 321 fdb_delete(br, f); 314 - else if (time_before(this_timer, next_timer)) 315 - next_timer = this_timer; 316 322 } 323 + spin_unlock_bh(&br->hash_lock); 324 + cond_resched(); 317 325 } 318 - spin_unlock(&br->hash_lock); 319 326 320 - mod_timer(&br->gc_timer, round_jiffies_up(next_timer)); 327 + /* Cleanup minimum 10 milliseconds apart */ 328 + work_delay = max_t(unsigned long, work_delay, msecs_to_jiffies(10)); 329 + mod_delayed_work(system_long_wq, &br->gc_work, work_delay); 321 330 } 322 331 323 332 /* Completely flush all dynamic entries in forwarding database.*/ ··· 391 382 &br->hash[br_mac_hash(addr, vid)], hlist) { 392 383 if (ether_addr_equal(fdb->addr.addr, addr) && 393 384 fdb->vlan_id == vid) { 394 - if (unlikely(has_expired(br, fdb))) 395 - break; 396 385 return fdb; 397 386 } 398 387 }
+1 -1
net/bridge/br_if.c
··· 313 313 314 314 br_vlan_flush(br); 315 315 br_multicast_dev_del(br); 316 - del_timer_sync(&br->gc_timer); 316 + cancel_delayed_work_sync(&br->gc_work); 317 317 318 318 br_sysfs_delbr(br->dev); 319 319 unregister_netdevice_queue(br->dev, head);
+1 -1
net/bridge/br_ioctl.c
··· 149 149 b.hello_timer_value = br_timer_value(&br->hello_timer); 150 150 b.tcn_timer_value = br_timer_value(&br->tcn_timer); 151 151 b.topology_change_timer_value = br_timer_value(&br->topology_change_timer); 152 - b.gc_timer_value = br_timer_value(&br->gc_timer); 152 + b.gc_timer_value = br_timer_value(&br->gc_work.timer); 153 153 rcu_read_unlock(); 154 154 155 155 if (copy_to_user((void __user *)args[1], &b, sizeof(b)))
+1 -1
net/bridge/br_netlink.c
··· 1250 1250 if (nla_put_u64_64bit(skb, IFLA_BR_TOPOLOGY_CHANGE_TIMER, clockval, 1251 1251 IFLA_BR_PAD)) 1252 1252 return -EMSGSIZE; 1253 - clockval = br_timer_value(&br->gc_timer); 1253 + clockval = br_timer_value(&br->gc_work.timer); 1254 1254 if (nla_put_u64_64bit(skb, IFLA_BR_GC_TIMER, clockval, IFLA_BR_PAD)) 1255 1255 return -EMSGSIZE; 1256 1256
+2 -2
net/bridge/br_private.h
··· 379 379 struct timer_list hello_timer; 380 380 struct timer_list tcn_timer; 381 381 struct timer_list topology_change_timer; 382 - struct timer_list gc_timer; 382 + struct delayed_work gc_work; 383 383 struct kobject *ifobj; 384 384 u32 auto_cnt; 385 385 ··· 502 502 const unsigned char *addr, u16 vid); 503 503 void br_fdb_changeaddr(struct net_bridge_port *p, const unsigned char *newaddr); 504 504 void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr); 505 - void br_fdb_cleanup(unsigned long arg); 505 + void br_fdb_cleanup(struct work_struct *work); 506 506 void br_fdb_delete_by_port(struct net_bridge *br, 507 507 const struct net_bridge_port *p, u16 vid, int do_all); 508 508 struct net_bridge_fdb_entry *__br_fdb_get(struct net_bridge *br,
+1 -1
net/bridge/br_stp.c
··· 602 602 br->ageing_time = t; 603 603 spin_unlock_bh(&br->lock); 604 604 605 - mod_timer(&br->gc_timer, jiffies); 605 + mod_delayed_work(system_long_wq, &br->gc_work, 0); 606 606 607 607 return 0; 608 608 }
+2 -2
net/bridge/br_stp_if.c
··· 57 57 spin_lock_bh(&br->lock); 58 58 if (br->stp_enabled == BR_KERNEL_STP) 59 59 mod_timer(&br->hello_timer, jiffies + br->hello_time); 60 - mod_timer(&br->gc_timer, jiffies + HZ/10); 60 + mod_delayed_work(system_long_wq, &br->gc_work, HZ / 10); 61 61 62 62 br_config_bpdu_generation(br); 63 63 ··· 88 88 del_timer_sync(&br->hello_timer); 89 89 del_timer_sync(&br->topology_change_timer); 90 90 del_timer_sync(&br->tcn_timer); 91 - del_timer_sync(&br->gc_timer); 91 + cancel_delayed_work_sync(&br->gc_work); 92 92 } 93 93 94 94 /* called under bridge lock */
-2
net/bridge/br_stp_timer.c
··· 153 153 setup_timer(&br->topology_change_timer, 154 154 br_topology_change_timer_expired, 155 155 (unsigned long) br); 156 - 157 - setup_timer(&br->gc_timer, br_fdb_cleanup, (unsigned long) br); 158 156 } 159 157 160 158 void br_stp_port_timer_init(struct net_bridge_port *p)
+1 -1
net/bridge/br_sysfs_br.c
··· 263 263 char *buf) 264 264 { 265 265 struct net_bridge *br = to_bridge(d); 266 - return sprintf(buf, "%ld\n", br_timer_value(&br->gc_timer)); 266 + return sprintf(buf, "%ld\n", br_timer_value(&br->gc_work.timer)); 267 267 } 268 268 static DEVICE_ATTR_RO(gc_timer); 269 269