Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

devlink: disallow reload operation during device cleanup

There is a race between driver code that does setup/cleanup of device
and devlink reload operation that in some drivers works with the same
code. Use after free could we easily obtained by running:

while true; do
echo 10 > /sys/bus/netdevsim/new_device
devlink dev reload netdevsim/netdevsim10 &
echo 10 > /sys/bus/netdevsim/del_device
done

Fix this by enabling reload only after setup of device is complete and
disabling it at the beginning of the cleanup process.

Reported-by: Ido Schimmel <idosch@mellanox.com>
Fixes: 2d8dc5bbf4e7 ("devlink: Add support for reload")
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Acked-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Jiri Pirko and committed by
David S. Miller
a0c76345 f95e6c9c

+57 -4
+3
drivers/net/ethernet/mellanox/mlx4/main.c
··· 4015 4015 goto err_params_unregister; 4016 4016 4017 4017 devlink_params_publish(devlink); 4018 + devlink_reload_enable(devlink); 4018 4019 pci_save_state(pdev); 4019 4020 return 0; 4020 4021 ··· 4126 4125 struct mlx4_priv *priv = mlx4_priv(dev); 4127 4126 struct devlink *devlink = priv_to_devlink(priv); 4128 4127 int active_vfs = 0; 4128 + 4129 + devlink_reload_disable(devlink); 4129 4130 4130 4131 if (mlx4_is_slave(dev)) 4131 4132 persist->interface_state |= MLX4_INTERFACE_STATE_NOWAIT;
+5 -1
drivers/net/ethernet/mellanox/mlxsw/core.c
··· 1198 1198 if (err) 1199 1199 goto err_thermal_init; 1200 1200 1201 - if (mlxsw_driver->params_register) 1201 + if (mlxsw_driver->params_register) { 1202 1202 devlink_params_publish(devlink); 1203 + devlink_reload_enable(devlink); 1204 + } 1203 1205 1204 1206 return 0; 1205 1207 ··· 1265 1263 { 1266 1264 struct devlink *devlink = priv_to_devlink(mlxsw_core); 1267 1265 1266 + if (!reload) 1267 + devlink_reload_disable(devlink); 1268 1268 if (devlink_is_reload_failed(devlink)) { 1269 1269 if (!reload) 1270 1270 /* Only the parts that were not de-initialized in the
+3
drivers/net/netdevsim/dev.c
··· 820 820 goto err_bpf_dev_exit; 821 821 822 822 devlink_params_publish(devlink); 823 + devlink_reload_enable(devlink); 823 824 return 0; 824 825 825 826 err_bpf_dev_exit: ··· 865 864 { 866 865 struct nsim_dev *nsim_dev = dev_get_drvdata(&nsim_bus_dev->dev); 867 866 struct devlink *devlink = priv_to_devlink(nsim_dev); 867 + 868 + devlink_reload_disable(devlink); 868 869 869 870 nsim_dev_reload_destroy(nsim_dev); 870 871
+5 -2
include/net/devlink.h
··· 38 38 struct device *dev; 39 39 possible_net_t _net; 40 40 struct mutex lock; 41 - bool reload_failed; 42 - bool registered; 41 + u8 reload_failed:1, 42 + reload_enabled:1, 43 + registered:1; 43 44 char priv[0] __aligned(NETDEV_ALIGN); 44 45 }; 45 46 ··· 825 824 struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size); 826 825 int devlink_register(struct devlink *devlink, struct device *dev); 827 826 void devlink_unregister(struct devlink *devlink); 827 + void devlink_reload_enable(struct devlink *devlink); 828 + void devlink_reload_disable(struct devlink *devlink); 828 829 void devlink_free(struct devlink *devlink); 829 830 int devlink_port_register(struct devlink *devlink, 830 831 struct devlink_port *devlink_port,
+41 -1
net/core/devlink.c
··· 2791 2791 { 2792 2792 int err; 2793 2793 2794 + if (!devlink->reload_enabled) 2795 + return -EOPNOTSUPP; 2796 + 2794 2797 err = devlink->ops->reload_down(devlink, !!dest_net, extack); 2795 2798 if (err) 2796 2799 return err; ··· 6311 6308 void devlink_unregister(struct devlink *devlink) 6312 6309 { 6313 6310 mutex_lock(&devlink_mutex); 6311 + WARN_ON(devlink_reload_supported(devlink) && 6312 + devlink->reload_enabled); 6314 6313 devlink_notify(devlink, DEVLINK_CMD_DEL); 6315 6314 list_del(&devlink->list); 6316 6315 mutex_unlock(&devlink_mutex); 6317 6316 } 6318 6317 EXPORT_SYMBOL_GPL(devlink_unregister); 6318 + 6319 + /** 6320 + * devlink_reload_enable - Enable reload of devlink instance 6321 + * 6322 + * @devlink: devlink 6323 + * 6324 + * Should be called at end of device initialization 6325 + * process when reload operation is supported. 6326 + */ 6327 + void devlink_reload_enable(struct devlink *devlink) 6328 + { 6329 + mutex_lock(&devlink_mutex); 6330 + devlink->reload_enabled = true; 6331 + mutex_unlock(&devlink_mutex); 6332 + } 6333 + EXPORT_SYMBOL_GPL(devlink_reload_enable); 6334 + 6335 + /** 6336 + * devlink_reload_disable - Disable reload of devlink instance 6337 + * 6338 + * @devlink: devlink 6339 + * 6340 + * Should be called at the beginning of device cleanup 6341 + * process when reload operation is supported. 6342 + */ 6343 + void devlink_reload_disable(struct devlink *devlink) 6344 + { 6345 + mutex_lock(&devlink_mutex); 6346 + /* Mutex is taken which ensures that no reload operation is in 6347 + * progress while setting up forbidded flag. 6348 + */ 6349 + devlink->reload_enabled = false; 6350 + mutex_unlock(&devlink_mutex); 6351 + } 6352 + EXPORT_SYMBOL_GPL(devlink_reload_disable); 6319 6353 6320 6354 /** 6321 6355 * devlink_free - Free devlink instance resources ··· 8241 8201 if (WARN_ON(!devlink_reload_supported(devlink))) 8242 8202 continue; 8243 8203 err = devlink_reload(devlink, &init_net, NULL); 8244 - if (err) 8204 + if (err && err != -EOPNOTSUPP) 8245 8205 pr_warn("Failed to reload devlink instance into init_net\n"); 8246 8206 } 8247 8207 }