Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

net: hold instance lock during NETDEV_CHANGE

Cosmin reports an issue with ipv6_add_dev being called from
NETDEV_CHANGE notifier:

[ 3455.008776] ? ipv6_add_dev+0x370/0x620
[ 3455.010097] ipv6_find_idev+0x96/0xe0
[ 3455.010725] addrconf_add_dev+0x1e/0xa0
[ 3455.011382] addrconf_init_auto_addrs+0xb0/0x720
[ 3455.013537] addrconf_notify+0x35f/0x8d0
[ 3455.014214] notifier_call_chain+0x38/0xf0
[ 3455.014903] netdev_state_change+0x65/0x90
[ 3455.015586] linkwatch_do_dev+0x5a/0x70
[ 3455.016238] rtnl_getlink+0x241/0x3e0
[ 3455.019046] rtnetlink_rcv_msg+0x177/0x5e0

Similarly, linkwatch might get to ipv6_add_dev without ops lock:
[ 3456.656261] ? ipv6_add_dev+0x370/0x620
[ 3456.660039] ipv6_find_idev+0x96/0xe0
[ 3456.660445] addrconf_add_dev+0x1e/0xa0
[ 3456.660861] addrconf_init_auto_addrs+0xb0/0x720
[ 3456.661803] addrconf_notify+0x35f/0x8d0
[ 3456.662236] notifier_call_chain+0x38/0xf0
[ 3456.662676] netdev_state_change+0x65/0x90
[ 3456.663112] linkwatch_do_dev+0x5a/0x70
[ 3456.663529] __linkwatch_run_queue+0xeb/0x200
[ 3456.663990] linkwatch_event+0x21/0x30
[ 3456.664399] process_one_work+0x211/0x610
[ 3456.664828] worker_thread+0x1cc/0x380
[ 3456.665691] kthread+0xf4/0x210

Reclassify NETDEV_CHANGE as a notifier that consistently runs under the
instance lock.

Link: https://lore.kernel.org/netdev/aac073de8beec3e531c86c101b274d434741c28e.camel@nvidia.com/
Reported-by: Cosmin Ratiu <cratiu@nvidia.com>
Tested-by: Cosmin Ratiu <cratiu@nvidia.com>
Fixes: ad7c7b2172c3 ("net: hold netdev instance lock during sysfs operations")
Signed-off-by: Stanislav Fomichev <sdf@fomichev.me>
Link: https://patch.msgid.link/20250404161122.3907628-1-sdf@fomichev.me
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

authored by

Stanislav Fomichev and committed by
Jakub Kicinski
04efcee6 54f5fafc

+63 -31
+6 -4
Documentation/networking/netdevices.rst
··· 338 338 Devices drivers are encouraged to rely on the instance lock where possible. 339 339 340 340 For the (mostly software) drivers that need to interact with the core stack, 341 - there are two sets of interfaces: ``dev_xxx`` and ``netif_xxx`` (e.g., 342 - ``dev_set_mtu`` and ``netif_set_mtu``). The ``dev_xxx`` functions handle 343 - acquiring the instance lock themselves, while the ``netif_xxx`` functions 344 - assume that the driver has already acquired the instance lock. 341 + there are two sets of interfaces: ``dev_xxx``/``netdev_xxx`` and ``netif_xxx`` 342 + (e.g., ``dev_set_mtu`` and ``netif_set_mtu``). The ``dev_xxx``/``netdev_xxx`` 343 + functions handle acquiring the instance lock themselves, while the 344 + ``netif_xxx`` functions assume that the driver has already acquired 345 + the instance lock. 345 346 346 347 Notifiers and netdev instance lock 347 348 ================================== ··· 355 354 running under the lock: 356 355 * ``NETDEV_REGISTER`` 357 356 * ``NETDEV_UP`` 357 + * ``NETDEV_CHANGE`` 358 358 359 359 The following notifiers are running without the lock: 360 360 * ``NETDEV_UNREGISTER``
+2
include/linux/netdevice.h
··· 4429 4429 * pending work list (if queued). 4430 4430 */ 4431 4431 void linkwatch_sync_dev(struct net_device *dev); 4432 + void __linkwatch_sync_dev(struct net_device *dev); 4432 4433 4433 4434 /** 4434 4435 * netif_carrier_ok - test if carrier present ··· 4975 4974 int dev_set_promiscuity(struct net_device *dev, int inc); 4976 4975 int netif_set_allmulti(struct net_device *dev, int inc, bool notify); 4977 4976 int dev_set_allmulti(struct net_device *dev, int inc); 4977 + void netif_state_change(struct net_device *dev); 4978 4978 void netdev_state_change(struct net_device *dev); 4979 4979 void __netdev_notify_peers(struct net_device *dev); 4980 4980 void netdev_notify_peers(struct net_device *dev);
+1 -1
include/linux/rtnetlink.h
··· 240 240 return (nlflags & NLM_F_ECHO) || rtnl_has_listeners(net, group); 241 241 } 242 242 243 - void netdev_set_operstate(struct net_device *dev, int newstate); 243 + void netif_set_operstate(struct net_device *dev, int newstate); 244 244 245 245 #endif /* __LINUX_RTNETLINK_H */
+1 -10
net/core/dev.c
··· 1518 1518 } 1519 1519 EXPORT_SYMBOL(netdev_features_change); 1520 1520 1521 - /** 1522 - * netdev_state_change - device changes state 1523 - * @dev: device to cause notification 1524 - * 1525 - * Called to indicate a device has changed state. This function calls 1526 - * the notifier chains for netdev_chain and sends a NEWLINK message 1527 - * to the routing socket. 1528 - */ 1529 - void netdev_state_change(struct net_device *dev) 1521 + void netif_state_change(struct net_device *dev) 1530 1522 { 1531 1523 if (dev->flags & IFF_UP) { 1532 1524 struct netdev_notifier_change_info change_info = { ··· 1530 1538 rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL, 0, NULL); 1531 1539 } 1532 1540 } 1533 - EXPORT_SYMBOL(netdev_state_change); 1534 1541 1535 1542 /** 1536 1543 * __netdev_notify_peers - notify network peers about existence of @dev,
+16
net/core/dev_api.c
··· 327 327 return ret; 328 328 } 329 329 EXPORT_SYMBOL_GPL(dev_xdp_propagate); 330 + 331 + /** 332 + * netdev_state_change() - device changes state 333 + * @dev: device to cause notification 334 + * 335 + * Called to indicate a device has changed state. This function calls 336 + * the notifier chains for netdev_chain and sends a NEWLINK message 337 + * to the routing socket. 338 + */ 339 + void netdev_state_change(struct net_device *dev) 340 + { 341 + netdev_lock_ops(dev); 342 + netif_state_change(dev); 343 + netdev_unlock_ops(dev); 344 + } 345 + EXPORT_SYMBOL(netdev_state_change);
+1 -1
net/core/lock_debug.c
··· 20 20 switch (cmd) { 21 21 case NETDEV_REGISTER: 22 22 case NETDEV_UP: 23 + case NETDEV_CHANGE: 23 24 netdev_ops_assert_locked(dev); 24 25 fallthrough; 25 26 case NETDEV_DOWN: 26 27 case NETDEV_REBOOT: 27 - case NETDEV_CHANGE: 28 28 case NETDEV_UNREGISTER: 29 29 case NETDEV_CHANGEMTU: 30 30 case NETDEV_CHANGEADDR:
+9 -6
net/core/rtnetlink.c
··· 1043 1043 } 1044 1044 EXPORT_SYMBOL_GPL(rtnl_put_cacheinfo); 1045 1045 1046 - void netdev_set_operstate(struct net_device *dev, int newstate) 1046 + void netif_set_operstate(struct net_device *dev, int newstate) 1047 1047 { 1048 1048 unsigned int old = READ_ONCE(dev->operstate); 1049 1049 ··· 1052 1052 return; 1053 1053 } while (!try_cmpxchg(&dev->operstate, &old, newstate)); 1054 1054 1055 - netdev_state_change(dev); 1055 + netif_state_change(dev); 1056 1056 } 1057 - EXPORT_SYMBOL(netdev_set_operstate); 1057 + EXPORT_SYMBOL(netif_set_operstate); 1058 1058 1059 1059 static void set_operstate(struct net_device *dev, unsigned char transition) 1060 1060 { ··· 1080 1080 break; 1081 1081 } 1082 1082 1083 - netdev_set_operstate(dev, operstate); 1083 + netif_set_operstate(dev, operstate); 1084 1084 } 1085 1085 1086 1086 static unsigned int rtnl_dev_get_flags(const struct net_device *dev) ··· 3396 3396 errout: 3397 3397 if (status & DO_SETLINK_MODIFIED) { 3398 3398 if ((status & DO_SETLINK_NOTIFY) == DO_SETLINK_NOTIFY) 3399 - netdev_state_change(dev); 3399 + netif_state_change(dev); 3400 3400 3401 3401 if (err < 0) 3402 3402 net_warn_ratelimited("A link change request failed with some changes committed already. Interface %s may have been left with an inconsistent configuration, please check.\n", ··· 3676 3676 nla_len(tb[IFLA_BROADCAST])); 3677 3677 if (tb[IFLA_TXQLEN]) 3678 3678 dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]); 3679 - if (tb[IFLA_OPERSTATE]) 3679 + if (tb[IFLA_OPERSTATE]) { 3680 + netdev_lock_ops(dev); 3680 3681 set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE])); 3682 + netdev_unlock_ops(dev); 3683 + } 3681 3684 if (tb[IFLA_LINKMODE]) 3682 3685 dev->link_mode = nla_get_u8(tb[IFLA_LINKMODE]); 3683 3686 if (tb[IFLA_GROUP])
+1 -1
net/ethtool/ioctl.c
··· 60 60 u32 ethtool_op_get_link(struct net_device *dev) 61 61 { 62 62 /* Synchronize carrier state with link watch, see also rtnl_getlink() */ 63 - linkwatch_sync_dev(dev); 63 + __linkwatch_sync_dev(dev); 64 64 65 65 return netif_carrier_ok(dev) ? 1 : 0; 66 66 }
+3 -3
net/hsr/hsr_device.c
··· 33 33 struct net_device *dev = master->dev; 34 34 35 35 if (!is_admin_up(dev)) { 36 - netdev_set_operstate(dev, IF_OPER_DOWN); 36 + netif_set_operstate(dev, IF_OPER_DOWN); 37 37 return; 38 38 } 39 39 40 40 if (has_carrier) 41 - netdev_set_operstate(dev, IF_OPER_UP); 41 + netif_set_operstate(dev, IF_OPER_UP); 42 42 else 43 - netdev_set_operstate(dev, IF_OPER_LOWERLAYERDOWN); 43 + netif_set_operstate(dev, IF_OPER_LOWERLAYERDOWN); 44 44 } 45 45 46 46 static bool hsr_check_carrier(struct hsr_port *master)