Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

ipv6: Protect nh->f6i_list with spinlock and flag.

We will get rid of RTNL from RTM_NEWROUTE and SIOCADDRT.

Then, we may be going to add a route tied to a dying nexthop.

The nexthop itself is not freed during the RCU grace period, but
if we link a route after __remove_nexthop_fib() is called for the
nexthop, the route will be leaked.

To avoid the race between IPv6 route addition under RCU vs nexthop
deletion under RTNL, let's add a dead flag and protect it and
nh->f6i_list with a spinlock.

__remove_nexthop_fib() acquires the nexthop's spinlock and sets false
to nh->dead, then calls ip6_del_rt() for the linked route one by one
without the spinlock because fib6_purge_rt() acquires it later.

While adding an IPv6 route, fib6_add() acquires the nexthop lock and
checks the dead flag just before inserting the route.

Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Link: https://patch.msgid.link/20250418000443.43734-15-kuniyu@amazon.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>

authored by

Kuniyuki Iwashima and committed by
Paolo Abeni
081efd18 accb46b5

+51 -8
+2
include/net/nexthop.h
··· 152 152 u8 protocol; /* app managing this nh */ 153 153 u8 nh_flags; 154 154 bool is_group; 155 + bool dead; 156 + spinlock_t lock; /* protect dead and f6i_list */ 155 157 156 158 refcount_t refcnt; 157 159 struct rcu_head rcu;
+15 -3
net/ipv4/nexthop.c
··· 541 541 INIT_LIST_HEAD(&nh->f6i_list); 542 542 INIT_LIST_HEAD(&nh->grp_list); 543 543 INIT_LIST_HEAD(&nh->fdb_list); 544 + spin_lock_init(&nh->lock); 544 545 } 545 546 return nh; 546 547 } ··· 2119 2118 /* not called for nexthop replace */ 2120 2119 static void __remove_nexthop_fib(struct net *net, struct nexthop *nh) 2121 2120 { 2122 - struct fib6_info *f6i, *tmp; 2121 + struct fib6_info *f6i; 2123 2122 bool do_flush = false; 2124 2123 struct fib_info *fi; 2125 2124 ··· 2130 2129 if (do_flush) 2131 2130 fib_flush(net); 2132 2131 2133 - /* ip6_del_rt removes the entry from this list hence the _safe */ 2134 - list_for_each_entry_safe(f6i, tmp, &nh->f6i_list, nh_list) { 2132 + spin_lock_bh(&nh->lock); 2133 + 2134 + nh->dead = true; 2135 + 2136 + while (!list_empty(&nh->f6i_list)) { 2137 + f6i = list_first_entry(&nh->f6i_list, typeof(*f6i), nh_list); 2138 + 2135 2139 /* __ip6_del_rt does a release, so do a hold here */ 2136 2140 fib6_info_hold(f6i); 2141 + 2142 + spin_unlock_bh(&nh->lock); 2137 2143 ipv6_stub->ip6_del_rt(net, f6i, 2138 2144 !READ_ONCE(net->ipv4.sysctl_nexthop_compat_mode)); 2145 + 2146 + spin_lock_bh(&nh->lock); 2139 2147 } 2148 + 2149 + spin_unlock_bh(&nh->lock); 2140 2150 } 2141 2151 2142 2152 static void __remove_nexthop(struct net *net, struct nexthop *nh,
+34 -5
net/ipv6/ip6_fib.c
··· 1048 1048 rt6_flush_exceptions(rt); 1049 1049 fib6_drop_pcpu_from(rt, table); 1050 1050 1051 - if (rt->nh && !list_empty(&rt->nh_list)) 1052 - list_del_init(&rt->nh_list); 1051 + if (rt->nh) { 1052 + spin_lock(&rt->nh->lock); 1053 + 1054 + if (!list_empty(&rt->nh_list)) 1055 + list_del_init(&rt->nh_list); 1056 + 1057 + spin_unlock(&rt->nh->lock); 1058 + } 1053 1059 1054 1060 if (refcount_read(&rt->fib6_ref) != 1) { 1055 1061 /* This route is used as dummy address holder in some split ··· 1347 1341 return 0; 1348 1342 } 1349 1343 1344 + static int fib6_add_rt2node_nh(struct fib6_node *fn, struct fib6_info *rt, 1345 + struct nl_info *info, struct netlink_ext_ack *extack, 1346 + struct list_head *purge_list) 1347 + { 1348 + int err; 1349 + 1350 + spin_lock(&rt->nh->lock); 1351 + 1352 + if (rt->nh->dead) { 1353 + NL_SET_ERR_MSG(extack, "Nexthop has been deleted"); 1354 + err = -EINVAL; 1355 + } else { 1356 + err = fib6_add_rt2node(fn, rt, info, extack, purge_list); 1357 + if (!err) 1358 + list_add(&rt->nh_list, &rt->nh->f6i_list); 1359 + } 1360 + 1361 + spin_unlock(&rt->nh->lock); 1362 + 1363 + return err; 1364 + } 1365 + 1350 1366 static void fib6_start_gc(struct net *net, struct fib6_info *rt) 1351 1367 { 1352 1368 if (!timer_pending(&net->ipv6.ip6_fib_timer) && ··· 1526 1498 } 1527 1499 #endif 1528 1500 1529 - err = fib6_add_rt2node(fn, rt, info, extack, &purge_list); 1501 + if (rt->nh) 1502 + err = fib6_add_rt2node_nh(fn, rt, info, extack, &purge_list); 1503 + else 1504 + err = fib6_add_rt2node(fn, rt, info, extack, &purge_list); 1530 1505 if (!err) { 1531 1506 struct fib6_info *iter, *next; 1532 1507 ··· 1539 1508 fib6_info_release(iter); 1540 1509 } 1541 1510 1542 - if (rt->nh) 1543 - list_add(&rt->nh_list, &rt->nh->f6i_list); 1544 1511 __fib6_update_sernum_upto_root(rt, fib6_new_sernum(info->nl_net)); 1545 1512 1546 1513 if (rt->fib6_flags & RTF_EXPIRES)