Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

ipv4: use separate genid for next hop exceptions

commit 13d82bf5 (ipv4: Fix flushing of cached routing informations)
added the support to flush learned pmtu information.

However, using rt_genid is quite heavy as it is bumped on route
add/change and multicast events amongst other places. These can
happen quite often, especially if using dynamic routing protocols.

While this is ok with routes (as they are just recreated locally),
the pmtu information is learned from remote systems and the icmp
notification can come with long delays. It is worthy to have separate
genid to avoid excessive pmtu resets.

Cc: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: Timo Teräs <timo.teras@iki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Timo Teräs and committed by
David S. Miller
5aad1de5 f016229e

+22 -2
+1
include/net/ip_fib.h
··· 51 51 52 52 struct fib_nh_exception { 53 53 struct fib_nh_exception __rcu *fnhe_next; 54 + int fnhe_genid; 54 55 __be32 fnhe_daddr; 55 56 u32 fnhe_pmtu; 56 57 __be32 fnhe_gw;
+11
include/net/net_namespace.h
··· 118 118 struct netns_ipvs *ipvs; 119 119 struct sock *diag_nlsk; 120 120 atomic_t rt_genid; 121 + atomic_t fnhe_genid; 121 122 }; 122 123 123 124 /* ··· 339 338 static inline void rt_genid_bump(struct net *net) 340 339 { 341 340 atomic_inc(&net->rt_genid); 341 + } 342 + 343 + static inline int fnhe_genid(struct net *net) 344 + { 345 + return atomic_read(&net->fnhe_genid); 346 + } 347 + 348 + static inline void fnhe_genid_bump(struct net *net) 349 + { 350 + atomic_inc(&net->fnhe_genid); 342 351 } 343 352 344 353 #endif /* __NET_NET_NAMESPACE_H */
+10 -2
net/ipv4/route.c
··· 658 658 fnhe->fnhe_next = hash->chain; 659 659 rcu_assign_pointer(hash->chain, fnhe); 660 660 } 661 + fnhe->fnhe_genid = fnhe_genid(dev_net(nh->nh_dev)); 661 662 fnhe->fnhe_daddr = daddr; 662 663 fnhe->fnhe_gw = gw; 663 664 fnhe->fnhe_pmtu = pmtu; ··· 1237 1236 spin_lock_bh(&fnhe_lock); 1238 1237 1239 1238 if (daddr == fnhe->fnhe_daddr) { 1239 + int genid = fnhe_genid(dev_net(rt->dst.dev)); 1240 1240 struct rtable *orig = rcu_dereference(fnhe->fnhe_rth); 1241 - if (orig && rt_is_expired(orig)) { 1241 + 1242 + if (fnhe->fnhe_genid != genid) { 1243 + fnhe->fnhe_genid = genid; 1242 1244 fnhe->fnhe_gw = 0; 1243 1245 fnhe->fnhe_pmtu = 0; 1244 1246 fnhe->fnhe_expires = 0; ··· 2447 2443 void __user *buffer, 2448 2444 size_t *lenp, loff_t *ppos) 2449 2445 { 2446 + struct net *net = (struct net *)__ctl->extra1; 2447 + 2450 2448 if (write) { 2451 - rt_cache_flush((struct net *)__ctl->extra1); 2449 + rt_cache_flush(net); 2450 + fnhe_genid_bump(net); 2452 2451 return 0; 2453 2452 } 2454 2453 ··· 2626 2619 static __net_init int rt_genid_init(struct net *net) 2627 2620 { 2628 2621 atomic_set(&net->rt_genid, 0); 2622 + atomic_set(&net->fnhe_genid, 0); 2629 2623 get_random_bytes(&net->ipv4.dev_addr_genid, 2630 2624 sizeof(net->ipv4.dev_addr_genid)); 2631 2625 return 0;