Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

net: core: limit nested device depth

Current code doesn't limit the number of nested devices.
Nested devices would be handled recursively and this needs huge stack
memory. So, unlimited nested devices could make stack overflow.

This patch adds upper_level and lower_level, they are common variables
and represent maximum lower/upper depth.
When upper/lower device is attached or dettached,
{lower/upper}_level are updated. and if maximum depth is bigger than 8,
attach routine fails and returns -EMLINK.

In addition, this patch converts recursive routine of
netdev_walk_all_{lower/upper} to iterator routine.

Test commands:
ip link add dummy0 type dummy
ip link add link dummy0 name vlan1 type vlan id 1
ip link set vlan1 up

for i in {2..55}
do
let A=$i-1

ip link add vlan$i link vlan$A type vlan id $i
done
ip link del dummy0

Splat looks like:
[ 155.513226][ T908] BUG: KASAN: use-after-free in __unwind_start+0x71/0x850
[ 155.514162][ T908] Write of size 88 at addr ffff8880608a6cc0 by task ip/908
[ 155.515048][ T908]
[ 155.515333][ T908] CPU: 0 PID: 908 Comm: ip Not tainted 5.4.0-rc3+ #96
[ 155.516147][ T908] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006
[ 155.517233][ T908] Call Trace:
[ 155.517627][ T908]
[ 155.517918][ T908] Allocated by task 0:
[ 155.518412][ T908] (stack is not available)
[ 155.518955][ T908]
[ 155.519228][ T908] Freed by task 0:
[ 155.519885][ T908] (stack is not available)
[ 155.520452][ T908]
[ 155.520729][ T908] The buggy address belongs to the object at ffff8880608a6ac0
[ 155.520729][ T908] which belongs to the cache names_cache of size 4096
[ 155.522387][ T908] The buggy address is located 512 bytes inside of
[ 155.522387][ T908] 4096-byte region [ffff8880608a6ac0, ffff8880608a7ac0)
[ 155.523920][ T908] The buggy address belongs to the page:
[ 155.524552][ T908] page:ffffea0001822800 refcount:1 mapcount:0 mapping:ffff88806c657cc0 index:0x0 compound_mapcount:0
[ 155.525836][ T908] flags: 0x100000000010200(slab|head)
[ 155.526445][ T908] raw: 0100000000010200 ffffea0001813808 ffffea0001a26c08 ffff88806c657cc0
[ 155.527424][ T908] raw: 0000000000000000 0000000000070007 00000001ffffffff 0000000000000000
[ 155.528429][ T908] page dumped because: kasan: bad access detected
[ 155.529158][ T908]
[ 155.529410][ T908] Memory state around the buggy address:
[ 155.530060][ T908] ffff8880608a6b80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[ 155.530971][ T908] ffff8880608a6c00: fb fb fb fb fb f1 f1 f1 f1 00 f2 f2 f2 f3 f3 f3
[ 155.531889][ T908] >ffff8880608a6c80: f3 fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[ 155.532806][ T908] ^
[ 155.533509][ T908] ffff8880608a6d00: fb fb fb fb fb fb fb fb fb f1 f1 f1 f1 00 00 00
[ 155.534436][ T908] ffff8880608a6d80: f2 f3 f3 f3 f3 fb fb fb 00 00 00 00 00 00 00 00
[ ... ]

Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Taehee Yoo and committed by
David S. Miller
5343da4c 82ecff65

+231 -45
+4
include/linux/netdevice.h
··· 1649 1649 * @perm_addr: Permanent hw address 1650 1650 * @addr_assign_type: Hw address assignment type 1651 1651 * @addr_len: Hardware address length 1652 + * @upper_level: Maximum depth level of upper devices. 1653 + * @lower_level: Maximum depth level of lower devices. 1652 1654 * @neigh_priv_len: Used in neigh_alloc() 1653 1655 * @dev_id: Used to differentiate devices that share 1654 1656 * the same link layer address ··· 1877 1875 unsigned char perm_addr[MAX_ADDR_LEN]; 1878 1876 unsigned char addr_assign_type; 1879 1877 unsigned char addr_len; 1878 + unsigned char upper_level; 1879 + unsigned char lower_level; 1880 1880 unsigned short neigh_priv_len; 1881 1881 unsigned short dev_id; 1882 1882 unsigned short dev_port;
+227 -45
net/core/dev.c
··· 146 146 #include "net-sysfs.h" 147 147 148 148 #define MAX_GRO_SKBS 8 149 + #define MAX_NEST_DEV 8 149 150 150 151 /* This should be increased if a protocol with a bigger head is added. */ 151 152 #define GRO_MAX_HEAD (MAX_HEADER + 128) ··· 6645 6644 } 6646 6645 EXPORT_SYMBOL(netdev_upper_get_next_dev_rcu); 6647 6646 6647 + static struct net_device *netdev_next_upper_dev(struct net_device *dev, 6648 + struct list_head **iter) 6649 + { 6650 + struct netdev_adjacent *upper; 6651 + 6652 + upper = list_entry((*iter)->next, struct netdev_adjacent, list); 6653 + 6654 + if (&upper->list == &dev->adj_list.upper) 6655 + return NULL; 6656 + 6657 + *iter = &upper->list; 6658 + 6659 + return upper->dev; 6660 + } 6661 + 6648 6662 static struct net_device *netdev_next_upper_dev_rcu(struct net_device *dev, 6649 6663 struct list_head **iter) 6650 6664 { ··· 6677 6661 return upper->dev; 6678 6662 } 6679 6663 6664 + static int netdev_walk_all_upper_dev(struct net_device *dev, 6665 + int (*fn)(struct net_device *dev, 6666 + void *data), 6667 + void *data) 6668 + { 6669 + struct net_device *udev, *next, *now, *dev_stack[MAX_NEST_DEV + 1]; 6670 + struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1]; 6671 + int ret, cur = 0; 6672 + 6673 + now = dev; 6674 + iter = &dev->adj_list.upper; 6675 + 6676 + while (1) { 6677 + if (now != dev) { 6678 + ret = fn(now, data); 6679 + if (ret) 6680 + return ret; 6681 + } 6682 + 6683 + next = NULL; 6684 + while (1) { 6685 + udev = netdev_next_upper_dev(now, &iter); 6686 + if (!udev) 6687 + break; 6688 + 6689 + next = udev; 6690 + niter = &udev->adj_list.upper; 6691 + dev_stack[cur] = now; 6692 + iter_stack[cur++] = iter; 6693 + break; 6694 + } 6695 + 6696 + if (!next) { 6697 + if (!cur) 6698 + return 0; 6699 + next = dev_stack[--cur]; 6700 + niter = iter_stack[cur]; 6701 + } 6702 + 6703 + now = next; 6704 + iter = niter; 6705 + } 6706 + 6707 + return 0; 6708 + } 6709 + 6680 6710 int netdev_walk_all_upper_dev_rcu(struct net_device *dev, 6681 6711 int (*fn)(struct net_device *dev, 6682 6712 void *data), 6683 6713 void *data) 6684 6714 { 6685 - struct net_device *udev; 6686 - struct list_head *iter; 6687 - int ret; 6715 + struct net_device *udev, *next, *now, *dev_stack[MAX_NEST_DEV + 1]; 6716 + struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1]; 6717 + int ret, cur = 0; 6688 6718 6689 - for (iter = &dev->adj_list.upper, 6690 - udev = netdev_next_upper_dev_rcu(dev, &iter); 6691 - udev; 6692 - udev = netdev_next_upper_dev_rcu(dev, &iter)) { 6693 - /* first is the upper device itself */ 6694 - ret = fn(udev, data); 6695 - if (ret) 6696 - return ret; 6719 + now = dev; 6720 + iter = &dev->adj_list.upper; 6697 6721 6698 - /* then look at all of its upper devices */ 6699 - ret = netdev_walk_all_upper_dev_rcu(udev, fn, data); 6700 - if (ret) 6701 - return ret; 6722 + while (1) { 6723 + if (now != dev) { 6724 + ret = fn(now, data); 6725 + if (ret) 6726 + return ret; 6727 + } 6728 + 6729 + next = NULL; 6730 + while (1) { 6731 + udev = netdev_next_upper_dev_rcu(now, &iter); 6732 + if (!udev) 6733 + break; 6734 + 6735 + next = udev; 6736 + niter = &udev->adj_list.upper; 6737 + dev_stack[cur] = now; 6738 + iter_stack[cur++] = iter; 6739 + break; 6740 + } 6741 + 6742 + if (!next) { 6743 + if (!cur) 6744 + return 0; 6745 + next = dev_stack[--cur]; 6746 + niter = iter_stack[cur]; 6747 + } 6748 + 6749 + now = next; 6750 + iter = niter; 6702 6751 } 6703 6752 6704 6753 return 0; ··· 6871 6790 void *data), 6872 6791 void *data) 6873 6792 { 6874 - struct net_device *ldev; 6875 - struct list_head *iter; 6876 - int ret; 6793 + struct net_device *ldev, *next, *now, *dev_stack[MAX_NEST_DEV + 1]; 6794 + struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1]; 6795 + int ret, cur = 0; 6877 6796 6878 - for (iter = &dev->adj_list.lower, 6879 - ldev = netdev_next_lower_dev(dev, &iter); 6880 - ldev; 6881 - ldev = netdev_next_lower_dev(dev, &iter)) { 6882 - /* first is the lower device itself */ 6883 - ret = fn(ldev, data); 6884 - if (ret) 6885 - return ret; 6797 + now = dev; 6798 + iter = &dev->adj_list.lower; 6886 6799 6887 - /* then look at all of its lower devices */ 6888 - ret = netdev_walk_all_lower_dev(ldev, fn, data); 6889 - if (ret) 6890 - return ret; 6800 + while (1) { 6801 + if (now != dev) { 6802 + ret = fn(now, data); 6803 + if (ret) 6804 + return ret; 6805 + } 6806 + 6807 + next = NULL; 6808 + while (1) { 6809 + ldev = netdev_next_lower_dev(now, &iter); 6810 + if (!ldev) 6811 + break; 6812 + 6813 + next = ldev; 6814 + niter = &ldev->adj_list.lower; 6815 + dev_stack[cur] = now; 6816 + iter_stack[cur++] = iter; 6817 + break; 6818 + } 6819 + 6820 + if (!next) { 6821 + if (!cur) 6822 + return 0; 6823 + next = dev_stack[--cur]; 6824 + niter = iter_stack[cur]; 6825 + } 6826 + 6827 + now = next; 6828 + iter = niter; 6891 6829 } 6892 6830 6893 6831 return 0; ··· 6927 6827 return lower->dev; 6928 6828 } 6929 6829 6830 + static u8 __netdev_upper_depth(struct net_device *dev) 6831 + { 6832 + struct net_device *udev; 6833 + struct list_head *iter; 6834 + u8 max_depth = 0; 6835 + 6836 + for (iter = &dev->adj_list.upper, 6837 + udev = netdev_next_upper_dev(dev, &iter); 6838 + udev; 6839 + udev = netdev_next_upper_dev(dev, &iter)) { 6840 + if (max_depth < udev->upper_level) 6841 + max_depth = udev->upper_level; 6842 + } 6843 + 6844 + return max_depth; 6845 + } 6846 + 6847 + static u8 __netdev_lower_depth(struct net_device *dev) 6848 + { 6849 + struct net_device *ldev; 6850 + struct list_head *iter; 6851 + u8 max_depth = 0; 6852 + 6853 + for (iter = &dev->adj_list.lower, 6854 + ldev = netdev_next_lower_dev(dev, &iter); 6855 + ldev; 6856 + ldev = netdev_next_lower_dev(dev, &iter)) { 6857 + if (max_depth < ldev->lower_level) 6858 + max_depth = ldev->lower_level; 6859 + } 6860 + 6861 + return max_depth; 6862 + } 6863 + 6864 + static int __netdev_update_upper_level(struct net_device *dev, void *data) 6865 + { 6866 + dev->upper_level = __netdev_upper_depth(dev) + 1; 6867 + return 0; 6868 + } 6869 + 6870 + static int __netdev_update_lower_level(struct net_device *dev, void *data) 6871 + { 6872 + dev->lower_level = __netdev_lower_depth(dev) + 1; 6873 + return 0; 6874 + } 6875 + 6930 6876 int netdev_walk_all_lower_dev_rcu(struct net_device *dev, 6931 6877 int (*fn)(struct net_device *dev, 6932 6878 void *data), 6933 6879 void *data) 6934 6880 { 6935 - struct net_device *ldev; 6936 - struct list_head *iter; 6937 - int ret; 6881 + struct net_device *ldev, *next, *now, *dev_stack[MAX_NEST_DEV + 1]; 6882 + struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1]; 6883 + int ret, cur = 0; 6938 6884 6939 - for (iter = &dev->adj_list.lower, 6940 - ldev = netdev_next_lower_dev_rcu(dev, &iter); 6941 - ldev; 6942 - ldev = netdev_next_lower_dev_rcu(dev, &iter)) { 6943 - /* first is the lower device itself */ 6944 - ret = fn(ldev, data); 6945 - if (ret) 6946 - return ret; 6885 + now = dev; 6886 + iter = &dev->adj_list.lower; 6947 6887 6948 - /* then look at all of its lower devices */ 6949 - ret = netdev_walk_all_lower_dev_rcu(ldev, fn, data); 6950 - if (ret) 6951 - return ret; 6888 + while (1) { 6889 + if (now != dev) { 6890 + ret = fn(now, data); 6891 + if (ret) 6892 + return ret; 6893 + } 6894 + 6895 + next = NULL; 6896 + while (1) { 6897 + ldev = netdev_next_lower_dev_rcu(now, &iter); 6898 + if (!ldev) 6899 + break; 6900 + 6901 + next = ldev; 6902 + niter = &ldev->adj_list.lower; 6903 + dev_stack[cur] = now; 6904 + iter_stack[cur++] = iter; 6905 + break; 6906 + } 6907 + 6908 + if (!next) { 6909 + if (!cur) 6910 + return 0; 6911 + next = dev_stack[--cur]; 6912 + niter = iter_stack[cur]; 6913 + } 6914 + 6915 + now = next; 6916 + iter = niter; 6952 6917 } 6953 6918 6954 6919 return 0; ··· 7270 7105 if (netdev_has_upper_dev(upper_dev, dev)) 7271 7106 return -EBUSY; 7272 7107 7108 + if ((dev->lower_level + upper_dev->upper_level) > MAX_NEST_DEV) 7109 + return -EMLINK; 7110 + 7273 7111 if (!master) { 7274 7112 if (netdev_has_upper_dev(dev, upper_dev)) 7275 7113 return -EEXIST; ··· 7298 7130 ret = notifier_to_errno(ret); 7299 7131 if (ret) 7300 7132 goto rollback; 7133 + 7134 + __netdev_update_upper_level(dev, NULL); 7135 + netdev_walk_all_lower_dev(dev, __netdev_update_upper_level, NULL); 7136 + 7137 + __netdev_update_lower_level(upper_dev, NULL); 7138 + netdev_walk_all_upper_dev(upper_dev, __netdev_update_lower_level, NULL); 7301 7139 7302 7140 return 0; 7303 7141 ··· 7387 7213 7388 7214 call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, 7389 7215 &changeupper_info.info); 7216 + 7217 + __netdev_update_upper_level(dev, NULL); 7218 + netdev_walk_all_lower_dev(dev, __netdev_update_upper_level, NULL); 7219 + 7220 + __netdev_update_lower_level(upper_dev, NULL); 7221 + netdev_walk_all_upper_dev(upper_dev, __netdev_update_lower_level, NULL); 7390 7222 } 7391 7223 EXPORT_SYMBOL(netdev_upper_dev_unlink); 7392 7224 ··· 9392 9212 9393 9213 dev->gso_max_size = GSO_MAX_SIZE; 9394 9214 dev->gso_max_segs = GSO_MAX_SEGS; 9215 + dev->upper_level = 1; 9216 + dev->lower_level = 1; 9395 9217 9396 9218 INIT_LIST_HEAD(&dev->napi_list); 9397 9219 INIT_LIST_HEAD(&dev->unreg_list);