Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

net: avoid potential race between netdev_get_by_index_lock() and netns switch

netdev_get_by_index_lock() performs following steps:

rcu_lock();
dev = lookup(netns, ifindex);
dev_get(dev);
rcu_unlock();
[... lock & validate the dev ...]
return dev

Validation right now only checks if the device is registered but since
the lookup is netns-aware we must also protect against the device
switching netns right after we dropped the RCU lock. Otherwise
the caller in netns1 may get a pointer to a device which has just
switched to netns2.

We can't hold the lock for the entire netns change process (because of
the NETDEV_UNREGISTER notifier), and there's no existing marking to
indicate that the netns is unlisted because of netns move, so add one.

AFAIU none of the existing netdev_get_by_index_lock() callers can
suffer from this problem (NAPI code double checks the netns membership
and other callers are either under rtnl_lock or not ns-sensitive),
so this patch does not have to be treated as a fix.

Reviewed-by: Joe Damato <jdamato@fastly.com>
Acked-by: Stanislav Fomichev <sdf@fomichev.me>
Link: https://patch.msgid.link/20250408195956.412733-2-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

+24 -9
+5 -1
include/linux/netdevice.h
··· 1952 1952 * @priv_destructor: Called from unregister 1953 1953 * @npinfo: XXX: need comments on this one 1954 1954 * @nd_net: Network namespace this network device is inside 1955 + * protected by @lock 1955 1956 * 1956 1957 * @ml_priv: Mid-layer private 1957 1958 * @ml_priv_type: Mid-layer private type ··· 2360 2359 2361 2360 bool dismantle; 2362 2361 2362 + /** @moving_ns: device is changing netns, protected by @lock */ 2363 + bool moving_ns; 2364 + 2363 2365 enum { 2364 2366 RTNL_LINK_INITIALIZED, 2365 2367 RTNL_LINK_INITIALIZING, ··· 2525 2521 * @net_shaper_hierarchy, @reg_state, @threaded 2526 2522 * 2527 2523 * Double protects: 2528 - * @up 2524 + * @up, @moving_ns, @nd_net 2529 2525 * 2530 2526 * Double ops protects: 2531 2527 * @real_num_rx_queues, @real_num_tx_queues
+18 -7
net/core/dev.c
··· 828 828 dev_hold(dev); 829 829 rcu_read_unlock(); 830 830 831 - dev = __netdev_put_lock(dev); 831 + dev = __netdev_put_lock(dev, net); 832 832 if (!dev) 833 833 return NULL; 834 834 ··· 1039 1039 * This helper is intended for locking net_device after it has been looked up 1040 1040 * using a lockless lookup helper. Lock prevents the instance from going away. 1041 1041 */ 1042 - struct net_device *__netdev_put_lock(struct net_device *dev) 1042 + struct net_device *__netdev_put_lock(struct net_device *dev, struct net *net) 1043 1043 { 1044 1044 netdev_lock(dev); 1045 - if (dev->reg_state > NETREG_REGISTERED) { 1045 + if (dev->reg_state > NETREG_REGISTERED || 1046 + dev->moving_ns || !net_eq(dev_net(dev), net)) { 1046 1047 netdev_unlock(dev); 1047 1048 dev_put(dev); 1048 1049 return NULL; ··· 1071 1070 if (!dev) 1072 1071 return NULL; 1073 1072 1074 - return __netdev_put_lock(dev); 1073 + return __netdev_put_lock(dev, net); 1075 1074 } 1076 1075 1077 1076 struct net_device * ··· 1091 1090 dev_hold(dev); 1092 1091 rcu_read_unlock(); 1093 1092 1094 - dev = __netdev_put_lock(dev); 1093 + dev = __netdev_put_lock(dev, net); 1095 1094 if (dev) 1096 1095 return dev; 1097 1096 ··· 12158 12157 netif_close(dev); 12159 12158 /* And unlink it from device chain */ 12160 12159 unlist_netdevice(dev); 12161 - netdev_unlock_ops(dev); 12160 + 12161 + if (!netdev_need_ops_lock(dev)) 12162 + netdev_lock(dev); 12163 + dev->moving_ns = true; 12164 + netdev_unlock(dev); 12162 12165 12163 12166 synchronize_net(); 12164 12167 ··· 12198 12193 move_netdevice_notifiers_dev_net(dev, net); 12199 12194 12200 12195 /* Actually switch the network namespace */ 12196 + netdev_lock(dev); 12201 12197 dev_net_set(dev, net); 12198 + netdev_unlock(dev); 12202 12199 dev->ifindex = new_ifindex; 12203 12200 12204 12201 if (new_name[0]) { ··· 12226 12219 err = netdev_change_owner(dev, net_old, net); 12227 12220 WARN_ON(err); 12228 12221 12229 - netdev_lock_ops(dev); 12222 + netdev_lock(dev); 12223 + dev->moving_ns = false; 12224 + if (!netdev_need_ops_lock(dev)) 12225 + netdev_unlock(dev); 12226 + 12230 12227 /* Add the device back in the hashes */ 12231 12228 list_netdevice(dev); 12232 12229 /* Notify protocols, that a new device appeared. */
+1 -1
net/core/dev.h
··· 30 30 struct net_device *dev_get_by_napi_id(unsigned int napi_id); 31 31 32 32 struct net_device *netdev_get_by_index_lock(struct net *net, int ifindex); 33 - struct net_device *__netdev_put_lock(struct net_device *dev); 33 + struct net_device *__netdev_put_lock(struct net_device *dev, struct net *net); 34 34 struct net_device * 35 35 netdev_xa_find_lock(struct net *net, struct net_device *dev, 36 36 unsigned long *index);