Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'net-hold-instance-lock-during-netdev_up-register'

Stanislav Fomichev says:

====================
net: hold instance lock during NETDEV_UP/REGISTER

Solving the issue reported by Cosmin in [0] requires consistent
lock during NETDEV_UP/REGISTER notifiers. This series
addresses that (along with some other fixes in net/ipv4/devinet.c
and net/ipv6/addrconf.c) and appends the patches from Jakub
that were conditional on consistent locking in NETDEV_UNREGISTER.

0: https://lore.kernel.org/700fa36b94cbd57cfea2622029b087643c80cbc9.camel@nvidia.com
====================

Link: https://patch.msgid.link/20250401163452.622454-1-sdf@fomichev.me
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

+125 -36
+23
Documentation/networking/netdevices.rst
··· 343 343 acquiring the instance lock themselves, while the ``netif_xxx`` functions 344 344 assume that the driver has already acquired the instance lock. 345 345 346 + Notifiers and netdev instance lock 347 + ================================== 348 + 349 + For device drivers that implement shaping or queue management APIs, 350 + some of the notifiers (``enum netdev_cmd``) are running under the netdev 351 + instance lock. 352 + 353 + For devices with locked ops, currently only the following notifiers are 354 + running under the lock: 355 + * ``NETDEV_REGISTER`` 356 + * ``NETDEV_UP`` 357 + 358 + The following notifiers are running without the lock: 359 + * ``NETDEV_UNREGISTER`` 360 + 361 + There are no clear expectations for the remaining notifiers. Notifiers not on 362 + the list may run with or without the instance lock, potentially even invoking 363 + the same notifier type with and without the lock from different code paths. 364 + The goal is to eventually ensure that all (or most, with a few documented 365 + exceptions) notifiers run under the instance lock. Please extend this 366 + documentation whenever you make explicit assumption about lock being held 367 + from a notifier. 368 + 346 369 NETDEV_INTERNAL symbol namespace 347 370 ================================ 348 371
+1
drivers/net/dummy.c
··· 105 105 dev->netdev_ops = &dummy_netdev_ops; 106 106 dev->ethtool_ops = &dummy_ethtool_ops; 107 107 dev->needs_free_netdev = true; 108 + dev->request_ops_lock = true; 108 109 109 110 /* Fill in device structure with ethernet-generic values. */ 110 111 dev->flags |= IFF_NOARP;
+13
drivers/net/netdevsim/netdev.c
··· 939 939 ns->netdev->netdev_ops = &nsim_netdev_ops; 940 940 ns->netdev->stat_ops = &nsim_stat_ops; 941 941 ns->netdev->queue_mgmt_ops = &nsim_queue_mgmt_ops; 942 + netdev_lockdep_set_classes(ns->netdev); 942 943 943 944 err = nsim_udp_tunnels_info_create(ns->nsim_dev, ns->netdev); 944 945 if (err) ··· 961 960 if (err) 962 961 goto err_ipsec_teardown; 963 962 rtnl_unlock(); 963 + 964 + if (IS_ENABLED(CONFIG_DEBUG_NET)) { 965 + ns->nb.notifier_call = netdev_debug_event; 966 + if (register_netdevice_notifier_dev_net(ns->netdev, &ns->nb, 967 + &ns->nn)) 968 + ns->nb.notifier_call = NULL; 969 + } 970 + 964 971 return 0; 965 972 966 973 err_ipsec_teardown: ··· 1051 1042 1052 1043 debugfs_remove(ns->qr_dfs); 1053 1044 debugfs_remove(ns->pp_dfs); 1045 + 1046 + if (ns->nb.notifier_call) 1047 + unregister_netdevice_notifier_dev_net(ns->netdev, &ns->nb, 1048 + &ns->nn); 1054 1049 1055 1050 rtnl_lock(); 1056 1051 peer = rtnl_dereference(ns->peer);
+3
drivers/net/netdevsim/netdevsim.h
··· 144 144 145 145 struct nsim_ethtool ethtool; 146 146 struct netdevsim __rcu *peer; 147 + 148 + struct notifier_block nb; 149 + struct netdev_net_notifier nn; 147 150 }; 148 151 149 152 struct netdevsim *
+1 -1
include/linux/netdevice.h
··· 4192 4192 int netif_set_alias(struct net_device *dev, const char *alias, size_t len); 4193 4193 int dev_set_alias(struct net_device *, const char *, size_t); 4194 4194 int dev_get_alias(const struct net_device *, char *, size_t); 4195 - int netif_change_net_namespace(struct net_device *dev, struct net *net, 4195 + int __dev_change_net_namespace(struct net_device *dev, struct net *net, 4196 4196 const char *pat, int new_ifindex, 4197 4197 struct netlink_ext_ack *extack); 4198 4198 int dev_change_net_namespace(struct net_device *dev, struct net *net,
+8 -8
include/net/ip.h
··· 667 667 memcpy(buf, &naddr, sizeof(naddr)); 668 668 } 669 669 670 - #if IS_MODULE(CONFIG_IPV6) 671 - #define EXPORT_IPV6_MOD(X) EXPORT_SYMBOL(X) 672 - #define EXPORT_IPV6_MOD_GPL(X) EXPORT_SYMBOL_GPL(X) 673 - #else 674 - #define EXPORT_IPV6_MOD(X) 675 - #define EXPORT_IPV6_MOD_GPL(X) 676 - #endif 677 - 678 670 #if IS_ENABLED(CONFIG_IPV6) 679 671 #include <linux/ipv6.h> 680 672 #endif ··· 684 692 #endif 685 693 } 686 694 695 + #endif 696 + 697 + #if IS_MODULE(CONFIG_IPV6) 698 + #define EXPORT_IPV6_MOD(X) EXPORT_SYMBOL(X) 699 + #define EXPORT_IPV6_MOD_GPL(X) EXPORT_SYMBOL_GPL(X) 700 + #else 701 + #define EXPORT_IPV6_MOD(X) 702 + #define EXPORT_IPV6_MOD_GPL(X) 687 703 #endif 688 704 689 705 static inline unsigned int ipv4_addr_hash(__be32 ip)
+3
include/net/netdev_lock.h
··· 98 98 &qdisc_xmit_lock_key); \ 99 99 } 100 100 101 + int netdev_debug_event(struct notifier_block *nb, unsigned long event, 102 + void *ptr); 103 + 101 104 #endif
+1 -1
net/core/Makefile
··· 45 45 obj-$(CONFIG_OF) += of_net.o 46 46 obj-$(CONFIG_NET_TEST) += net_test.o 47 47 obj-$(CONFIG_NET_DEVMEM) += devmem.o 48 - obj-$(CONFIG_DEBUG_NET_SMALL_RTNL) += rtnl_net_debug.o 48 + obj-$(CONFIG_DEBUG_NET) += lock_debug.o 49 49 obj-$(CONFIG_FAIL_SKB_REALLOC) += skb_fault_injection.o
+10 -3
net/core/dev.c
··· 1771 1771 netdev_unlock_ops(lower_dev); 1772 1772 } 1773 1773 } 1774 + EXPORT_IPV6_MOD(netif_disable_lro); 1774 1775 1775 1776 /** 1776 1777 * dev_disable_gro_hw - disable HW Generic Receive Offload on a device ··· 1859 1858 int err; 1860 1859 1861 1860 for_each_netdev(net, dev) { 1861 + netdev_lock_ops(dev); 1862 1862 err = call_netdevice_register_notifiers(nb, dev); 1863 + netdev_unlock_ops(dev); 1863 1864 if (err) 1864 1865 goto rollback; 1865 1866 } ··· 11050 11047 memcpy(dev->perm_addr, dev->dev_addr, dev->addr_len); 11051 11048 11052 11049 /* Notify protocols, that a new device appeared. */ 11050 + netdev_lock_ops(dev); 11053 11051 ret = call_netdevice_notifiers(NETDEV_REGISTER, dev); 11052 + netdev_unlock_ops(dev); 11054 11053 ret = notifier_to_errno(ret); 11055 11054 if (ret) { 11056 11055 /* Expect explicit free_netdev() on failure */ ··· 12064 12059 } 12065 12060 EXPORT_SYMBOL(unregister_netdev); 12066 12061 12067 - int netif_change_net_namespace(struct net_device *dev, struct net *net, 12062 + int __dev_change_net_namespace(struct net_device *dev, struct net *net, 12068 12063 const char *pat, int new_ifindex, 12069 12064 struct netlink_ext_ack *extack) 12070 12065 { ··· 12149 12144 * And now a mini version of register_netdevice unregister_netdevice. 12150 12145 */ 12151 12146 12147 + netdev_lock_ops(dev); 12152 12148 /* If device is running close it first. */ 12153 12149 netif_close(dev); 12154 - 12155 12150 /* And unlink it from device chain */ 12156 12151 unlist_netdevice(dev); 12152 + netdev_unlock_ops(dev); 12157 12153 12158 12154 synchronize_net(); 12159 12155 ··· 12216 12210 err = netdev_change_owner(dev, net_old, net); 12217 12211 WARN_ON(err); 12218 12212 12213 + netdev_lock_ops(dev); 12219 12214 /* Add the device back in the hashes */ 12220 12215 list_netdevice(dev); 12221 - 12222 12216 /* Notify protocols, that a new device appeared. */ 12223 12217 call_netdevice_notifiers(NETDEV_REGISTER, dev); 12218 + netdev_unlock_ops(dev); 12224 12219 12225 12220 /* 12226 12221 * Prevent userspace races by waiting until the network
+1 -7
net/core/dev_api.c
··· 117 117 int dev_change_net_namespace(struct net_device *dev, struct net *net, 118 118 const char *pat) 119 119 { 120 - int ret; 121 - 122 - netdev_lock_ops(dev); 123 - ret = netif_change_net_namespace(dev, net, pat, 0, NULL); 124 - netdev_unlock_ops(dev); 125 - 126 - return ret; 120 + return __dev_change_net_namespace(dev, net, pat, 0, NULL); 127 121 } 128 122 EXPORT_SYMBOL_GPL(dev_change_net_namespace); 129 123
+4 -4
net/core/rtnetlink.c
··· 3025 3025 char ifname[IFNAMSIZ]; 3026 3026 int err; 3027 3027 3028 - netdev_lock_ops(dev); 3029 - 3030 3028 err = validate_linkmsg(dev, tb, extack); 3031 3029 if (err < 0) 3032 3030 goto errout; ··· 3040 3042 3041 3043 new_ifindex = nla_get_s32_default(tb[IFLA_NEW_IFINDEX], 0); 3042 3044 3043 - err = netif_change_net_namespace(dev, tgt_net, pat, 3045 + err = __dev_change_net_namespace(dev, tgt_net, pat, 3044 3046 new_ifindex, extack); 3045 3047 if (err) 3046 - goto errout; 3048 + return err; 3047 3049 3048 3050 status |= DO_SETLINK_MODIFIED; 3049 3051 } 3052 + 3053 + netdev_lock_ops(dev); 3050 3054 3051 3055 if (tb[IFLA_MAP]) { 3052 3056 struct rtnl_link_ifmap *u_map;
+9 -5
net/core/rtnl_net_debug.c net/core/lock_debug.c
··· 6 6 #include <linux/notifier.h> 7 7 #include <linux/rtnetlink.h> 8 8 #include <net/net_namespace.h> 9 + #include <net/netdev_lock.h> 9 10 #include <net/netns/generic.h> 10 11 11 - static int rtnl_net_debug_event(struct notifier_block *nb, 12 - unsigned long event, void *ptr) 12 + int netdev_debug_event(struct notifier_block *nb, unsigned long event, 13 + void *ptr) 13 14 { 14 15 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 15 16 struct net *net = dev_net(dev); ··· 18 17 19 18 /* Keep enum and don't add default to trigger -Werror=switch */ 20 19 switch (cmd) { 20 + case NETDEV_REGISTER: 21 21 case NETDEV_UP: 22 + netdev_ops_assert_locked(dev); 23 + fallthrough; 22 24 case NETDEV_DOWN: 23 25 case NETDEV_REBOOT: 24 26 case NETDEV_CHANGE: 25 - case NETDEV_REGISTER: 26 27 case NETDEV_UNREGISTER: 27 28 case NETDEV_CHANGEMTU: 28 29 case NETDEV_CHANGEADDR: ··· 69 66 70 67 return NOTIFY_DONE; 71 68 } 69 + EXPORT_SYMBOL_NS_GPL(netdev_debug_event, "NETDEV_INTERNAL"); 72 70 73 71 static int rtnl_net_debug_net_id; 74 72 ··· 78 74 struct notifier_block *nb; 79 75 80 76 nb = net_generic(net, rtnl_net_debug_net_id); 81 - nb->notifier_call = rtnl_net_debug_event; 77 + nb->notifier_call = netdev_debug_event; 82 78 83 79 return register_netdevice_notifier_net(net, nb); 84 80 } ··· 99 95 }; 100 96 101 97 static struct notifier_block rtnl_net_debug_block = { 102 - .notifier_call = rtnl_net_debug_event, 98 + .notifier_call = netdev_debug_event, 103 99 }; 104 100 105 101 static int __init rtnl_net_debug_init(void)
+1 -1
net/ipv4/devinet.c
··· 281 281 if (!in_dev->arp_parms) 282 282 goto out_kfree; 283 283 if (IPV4_DEVCONF(in_dev->cnf, FORWARDING)) 284 - dev_disable_lro(dev); 284 + netif_disable_lro(dev); 285 285 /* Reference in_dev->dev */ 286 286 netdev_hold(dev, &in_dev->dev_tracker, GFP_KERNEL); 287 287 /* Account for reference dev->ip_ptr (below) */
+13 -2
net/ipv6/addrconf.c
··· 80 80 #include <net/netlink.h> 81 81 #include <net/pkt_sched.h> 82 82 #include <net/l3mdev.h> 83 + #include <net/netdev_lock.h> 83 84 #include <linux/if_tunnel.h> 84 85 #include <linux/rtnetlink.h> 85 86 #include <linux/netconf.h> ··· 378 377 int err = -ENOMEM; 379 378 380 379 ASSERT_RTNL(); 380 + netdev_ops_assert_locked(dev); 381 381 382 382 if (dev->mtu < IPV6_MIN_MTU && dev != blackhole_netdev) 383 383 return ERR_PTR(-EINVAL); ··· 404 402 return ERR_PTR(err); 405 403 } 406 404 if (ndev->cnf.forwarding) 407 - dev_disable_lro(dev); 405 + netif_disable_lro(dev); 408 406 /* We refer to the device */ 409 407 netdev_hold(dev, &ndev->dev_tracker, GFP_KERNEL); 410 408 ··· 3154 3152 3155 3153 rtnl_net_lock(net); 3156 3154 dev = __dev_get_by_index(net, ireq.ifr6_ifindex); 3155 + netdev_lock_ops(dev); 3157 3156 if (dev) 3158 3157 err = inet6_addr_add(net, dev, &cfg, 0, 0, NULL); 3159 3158 else 3160 3159 err = -ENODEV; 3160 + netdev_unlock_ops(dev); 3161 3161 rtnl_net_unlock(net); 3162 3162 return err; 3163 3163 } ··· 5030 5026 if (!dev) { 5031 5027 NL_SET_ERR_MSG_MOD(extack, "Unable to find the interface"); 5032 5028 err = -ENODEV; 5033 - goto unlock; 5029 + goto unlock_rtnl; 5034 5030 } 5035 5031 5032 + netdev_lock_ops(dev); 5036 5033 idev = ipv6_find_idev(dev); 5037 5034 if (IS_ERR(idev)) { 5038 5035 err = PTR_ERR(idev); ··· 5070 5065 5071 5066 in6_ifa_put(ifa); 5072 5067 unlock: 5068 + netdev_unlock_ops(dev); 5069 + unlock_rtnl: 5073 5070 rtnl_net_unlock(net); 5074 5071 5075 5072 return err; ··· 6523 6516 6524 6517 if (idev->cnf.addr_gen_mode != new_val) { 6525 6518 WRITE_ONCE(idev->cnf.addr_gen_mode, new_val); 6519 + netdev_lock_ops(idev->dev); 6526 6520 addrconf_init_auto_addrs(idev->dev); 6521 + netdev_unlock_ops(idev->dev); 6527 6522 } 6528 6523 } else if (&net->ipv6.devconf_all->addr_gen_mode == ctl->data) { 6529 6524 struct net_device *dev; ··· 6537 6528 idev->cnf.addr_gen_mode != new_val) { 6538 6529 WRITE_ONCE(idev->cnf.addr_gen_mode, 6539 6530 new_val); 6531 + netdev_lock_ops(idev->dev); 6540 6532 addrconf_init_auto_addrs(idev->dev); 6533 + netdev_unlock_ops(idev->dev); 6541 6534 } 6542 6535 } 6543 6536 }
+25
tools/testing/selftests/net/lib.sh
··· 222 222 NS_LIST+=("${ns_list[@]}") 223 223 } 224 224 225 + # Create netdevsim with given id and net namespace. 226 + create_netdevsim() { 227 + local id="$1" 228 + local ns="$2" 229 + 230 + modprobe netdevsim &> /dev/null 231 + udevadm settle 232 + 233 + echo "$id 1" | ip netns exec $ns tee /sys/bus/netdevsim/new_device >/dev/null 234 + local dev=$(ip netns exec $ns ls /sys/bus/netdevsim/devices/netdevsim$id/net) 235 + ip -netns $ns link set dev $dev name nsim$id 236 + ip -netns $ns link set dev nsim$id up 237 + 238 + echo nsim$id 239 + } 240 + 241 + # Remove netdevsim with given id. 242 + cleanup_netdevsim() { 243 + local id="$1" 244 + 245 + if [ -d "/sys/bus/netdevsim/devices/netdevsim$id/net" ]; then 246 + echo "$id" > /sys/bus/netdevsim/del_device 247 + fi 248 + } 249 + 225 250 tc_rule_stats_get() 226 251 { 227 252 local dev=$1; shift
+9 -4
tools/testing/selftests/net/netns-name.sh
··· 7 7 DEV=dummy-dev0 8 8 DEV2=dummy-dev1 9 9 ALT_NAME=some-alt-name 10 + NSIM_ADDR=2025 10 11 11 12 RET_CODE=0 12 13 13 14 cleanup() { 15 + cleanup_netdevsim $NSIM_ADDR 14 16 cleanup_ns $NS $test_ns 15 17 } 16 18 ··· 27 25 28 26 # 29 27 # Test basic move without a rename 28 + # Use netdevsim because it has extra asserts for notifiers. 30 29 # 31 - ip -netns $NS link add name $DEV type dummy || fail 32 - ip -netns $NS link set dev $DEV netns $test_ns || 30 + 31 + nsim=$(create_netdevsim $NSIM_ADDR $NS) 32 + ip -netns $NS link set dev $nsim netns $test_ns || 33 33 fail "Can't perform a netns move" 34 - ip -netns $test_ns link show dev $DEV >> /dev/null || fail "Device not found after move" 35 - ip -netns $test_ns link del $DEV || fail 34 + ip -netns $test_ns link show dev $nsim >> /dev/null || 35 + fail "Device not found after move" 36 + cleanup_netdevsim $NSIM_ADDR 36 37 37 38 # 38 39 # Test move with a conflict