Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

netns: Fix crash by making igmp per namespace

This patch makes the multicast socket to be per namespace.

When a network namespace is created, other than the init_net and a
multicast packet is received, the kernel goes to a hang or a kernel panic.

How to reproduce ?

* create a child network namespace
* create a pair virtual device veth
* ip link add type veth
* move one side to the pair network device to the child namespace
* ip link set netns <childpid> dev veth1
* ping -I veth0 224.0.0.1

The bug appears because the function ip_mc_init_dev does not initialize
the different multicast fields as it exits because it is not the init_net.

BUG: soft lockup - CPU#0 stuck for 61s! [avahi-daemon:2695]
Modules linked in:
irq event stamp: 50350
hardirqs last enabled at (50349): [<c03ee949>] _spin_unlock_irqrestore+0x34/0x39
hardirqs last disabled at (50350): [<c03ec639>] schedule+0x9f/0x5ff
softirqs last enabled at (45712): [<c0374d4b>] ip_setsockopt+0x8e7/0x909
softirqs last disabled at (45710): [<c03ee682>] _spin_lock_bh+0x8/0x27

Pid: 2695, comm: avahi-daemon Not tainted (2.6.27-rc2-00029-g0872073 #3)
EIP: 0060:[<c03ee47c>] EFLAGS: 00000297 CPU: 0
EIP is at __read_lock_failed+0x8/0x10
EAX: c4f38810 EBX: c4f38810 ECX: 00000000 EDX: c04cc22e
ESI: fb0000e0 EDI: 00000011 EBP: 0f02000a ESP: c4e3faa0
DS: 007b ES: 007b FS: 00d8 GS: 0033 SS: 0068
CR0: 8005003b CR2: 44618a40 CR3: 04e37000 CR4: 000006d0
DR0: 00000000 DR1: 00000000 DR2: 00000000 DR3: 00000000
DR6: ffff0ff0 DR7: 00000400
[<c02311f8>] ? _raw_read_lock+0x23/0x25
[<c0390666>] ? ip_check_mc+0x1c/0x83
[<c036d478>] ? ip_route_input+0x229/0xe92
[<c022e2e4>] ? trace_hardirqs_on_thunk+0xc/0x10
[<c0104c9c>] ? do_IRQ+0x69/0x7d
[<c0102e64>] ? restore_nocheck_notrace+0x0/0xe
[<c036fdba>] ? ip_rcv+0x227/0x505
[<c0358764>] ? netif_receive_skb+0xfe/0x2b3
[<c03588d2>] ? netif_receive_skb+0x26c/0x2b3
[<c035af31>] ? process_backlog+0x73/0xbd
[<c035a8cd>] ? net_rx_action+0xc1/0x1ae
[<c01218a8>] ? __do_softirq+0x7b/0xef
[<c0121953>] ? do_softirq+0x37/0x4d
[<c035b50d>] ? dev_queue_xmit+0x3d4/0x40b
[<c0122037>] ? local_bh_enable+0x96/0xab
[<c035b50d>] ? dev_queue_xmit+0x3d4/0x40b
[<c012181e>] ? _local_bh_enable+0x79/0x88
[<c035fcb8>] ? neigh_resolve_output+0x20f/0x239
[<c0373118>] ? ip_finish_output+0x1df/0x209
[<c0373364>] ? ip_dev_loopback_xmit+0x62/0x66
[<c0371db5>] ? ip_local_out+0x15/0x17
[<c0372013>] ? ip_push_pending_frames+0x25c/0x2bb
[<c03891b8>] ? udp_push_pending_frames+0x2bb/0x30e
[<c038a189>] ? udp_sendmsg+0x413/0x51d
[<c038a1a9>] ? udp_sendmsg+0x433/0x51d
[<c038f927>] ? inet_sendmsg+0x35/0x3f
[<c034f092>] ? sock_sendmsg+0xb8/0xd1
[<c012d554>] ? autoremove_wake_function+0x0/0x2b
[<c022e6de>] ? copy_from_user+0x32/0x5e
[<c022e6de>] ? copy_from_user+0x32/0x5e
[<c034f238>] ? sys_sendmsg+0x18d/0x1f0
[<c0175e90>] ? pipe_write+0x3cb/0x3d7
[<c0170347>] ? do_sync_write+0xbe/0x105
[<c012d554>] ? autoremove_wake_function+0x0/0x2b
[<c03503b2>] ? sys_socketcall+0x176/0x1b0
[<c01085ea>] ? syscall_trace_enter+0x6c/0x7b
[<c0102e1a>] ? syscall_call+0x7/0xb

Signed-off-by: Daniel Lezcano <dlezcano@fr.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Daniel Lezcano and committed by
David S. Miller
877acedc 1bb5bd2c

+20 -51
+20 -51
net/ipv4/igmp.c
··· 289 289 struct rtable *rt; 290 290 struct iphdr *pip; 291 291 struct igmpv3_report *pig; 292 + struct net *net = dev_net(dev); 292 293 293 294 skb = alloc_skb(size + LL_ALLOCATED_SPACE(dev), GFP_ATOMIC); 294 295 if (skb == NULL) ··· 300 299 .nl_u = { .ip4_u = { 301 300 .daddr = IGMPV3_ALL_MCR } }, 302 301 .proto = IPPROTO_IGMP }; 303 - if (ip_route_output_key(&init_net, &rt, &fl)) { 302 + if (ip_route_output_key(net, &rt, &fl)) { 304 303 kfree_skb(skb); 305 304 return NULL; 306 305 } ··· 630 629 struct igmphdr *ih; 631 630 struct rtable *rt; 632 631 struct net_device *dev = in_dev->dev; 632 + struct net *net = dev_net(dev); 633 633 __be32 group = pmc ? pmc->multiaddr : 0; 634 634 __be32 dst; 635 635 ··· 645 643 struct flowi fl = { .oif = dev->ifindex, 646 644 .nl_u = { .ip4_u = { .daddr = dst } }, 647 645 .proto = IPPROTO_IGMP }; 648 - if (ip_route_output_key(&init_net, &rt, &fl)) 646 + if (ip_route_output_key(net, &rt, &fl)) 649 647 return -1; 650 648 } 651 649 if (rt->rt_src == 0) { ··· 1198 1196 1199 1197 ASSERT_RTNL(); 1200 1198 1201 - if (!net_eq(dev_net(in_dev->dev), &init_net)) 1202 - return; 1203 - 1204 1199 for (im=in_dev->mc_list; im; im=im->next) { 1205 1200 if (im->multiaddr == addr) { 1206 1201 im->users++; ··· 1277 1278 1278 1279 ASSERT_RTNL(); 1279 1280 1280 - if (!net_eq(dev_net(in_dev->dev), &init_net)) 1281 - return; 1282 - 1283 1281 for (ip=&in_dev->mc_list; (i=*ip)!=NULL; ip=&i->next) { 1284 1282 if (i->multiaddr==addr) { 1285 1283 if (--i->users == 0) { ··· 1304 1308 1305 1309 ASSERT_RTNL(); 1306 1310 1307 - if (!net_eq(dev_net(in_dev->dev), &init_net)) 1308 - return; 1309 - 1310 1311 for (i=in_dev->mc_list; i; i=i->next) 1311 1312 igmp_group_dropped(i); 1312 1313 ··· 1323 1330 void ip_mc_init_dev(struct in_device *in_dev) 1324 1331 { 1325 1332 ASSERT_RTNL(); 1326 - 1327 - if (!net_eq(dev_net(in_dev->dev), &init_net)) 1328 - return; 1329 1333 1330 1334 in_dev->mc_tomb = NULL; 1331 1335 #ifdef CONFIG_IP_MULTICAST ··· 1347 1357 1348 1358 ASSERT_RTNL(); 1349 1359 1350 - if (!net_eq(dev_net(in_dev->dev), &init_net)) 1351 - return; 1352 - 1353 1360 ip_mc_inc_group(in_dev, IGMP_ALL_HOSTS); 1354 1361 1355 1362 for (i=in_dev->mc_list; i; i=i->next) ··· 1362 1375 struct ip_mc_list *i; 1363 1376 1364 1377 ASSERT_RTNL(); 1365 - 1366 - if (!net_eq(dev_net(in_dev->dev), &init_net)) 1367 - return; 1368 1378 1369 1379 /* Deactivate timers */ 1370 1380 ip_mc_down(in_dev); ··· 1379 1395 write_unlock_bh(&in_dev->mc_list_lock); 1380 1396 } 1381 1397 1382 - static struct in_device * ip_mc_find_dev(struct ip_mreqn *imr) 1398 + static struct in_device *ip_mc_find_dev(struct net *net, struct ip_mreqn *imr) 1383 1399 { 1384 1400 struct flowi fl = { .nl_u = { .ip4_u = 1385 1401 { .daddr = imr->imr_multiaddr.s_addr } } }; ··· 1388 1404 struct in_device *idev = NULL; 1389 1405 1390 1406 if (imr->imr_ifindex) { 1391 - idev = inetdev_by_index(&init_net, imr->imr_ifindex); 1407 + idev = inetdev_by_index(net, imr->imr_ifindex); 1392 1408 if (idev) 1393 1409 __in_dev_put(idev); 1394 1410 return idev; 1395 1411 } 1396 1412 if (imr->imr_address.s_addr) { 1397 - dev = ip_dev_find(&init_net, imr->imr_address.s_addr); 1413 + dev = ip_dev_find(net, imr->imr_address.s_addr); 1398 1414 if (!dev) 1399 1415 return NULL; 1400 1416 dev_put(dev); 1401 1417 } 1402 1418 1403 - if (!dev && !ip_route_output_key(&init_net, &rt, &fl)) { 1419 + if (!dev && !ip_route_output_key(net, &rt, &fl)) { 1404 1420 dev = rt->u.dst.dev; 1405 1421 ip_rt_put(rt); 1406 1422 } ··· 1738 1754 struct ip_mc_socklist *iml=NULL, *i; 1739 1755 struct in_device *in_dev; 1740 1756 struct inet_sock *inet = inet_sk(sk); 1757 + struct net *net = sock_net(sk); 1741 1758 int ifindex; 1742 1759 int count = 0; 1743 1760 1744 1761 if (!ipv4_is_multicast(addr)) 1745 1762 return -EINVAL; 1746 1763 1747 - if (!net_eq(sock_net(sk), &init_net)) 1748 - return -EPROTONOSUPPORT; 1749 - 1750 1764 rtnl_lock(); 1751 1765 1752 - in_dev = ip_mc_find_dev(imr); 1766 + in_dev = ip_mc_find_dev(net, imr); 1753 1767 1754 1768 if (!in_dev) { 1755 1769 iml = NULL; ··· 1809 1827 struct inet_sock *inet = inet_sk(sk); 1810 1828 struct ip_mc_socklist *iml, **imlp; 1811 1829 struct in_device *in_dev; 1830 + struct net *net = sock_net(sk); 1812 1831 __be32 group = imr->imr_multiaddr.s_addr; 1813 1832 u32 ifindex; 1814 1833 int ret = -EADDRNOTAVAIL; 1815 1834 1816 - if (!net_eq(sock_net(sk), &init_net)) 1817 - return -EPROTONOSUPPORT; 1818 - 1819 1835 rtnl_lock(); 1820 - in_dev = ip_mc_find_dev(imr); 1836 + in_dev = ip_mc_find_dev(net, imr); 1821 1837 ifindex = imr->imr_ifindex; 1822 1838 for (imlp = &inet->mc_list; (iml = *imlp) != NULL; imlp = &iml->next) { 1823 1839 if (iml->multi.imr_multiaddr.s_addr != group) ··· 1853 1873 struct in_device *in_dev = NULL; 1854 1874 struct inet_sock *inet = inet_sk(sk); 1855 1875 struct ip_sf_socklist *psl; 1876 + struct net *net = sock_net(sk); 1856 1877 int leavegroup = 0; 1857 1878 int i, j, rv; 1858 1879 1859 1880 if (!ipv4_is_multicast(addr)) 1860 1881 return -EINVAL; 1861 1882 1862 - if (!net_eq(sock_net(sk), &init_net)) 1863 - return -EPROTONOSUPPORT; 1864 - 1865 1883 rtnl_lock(); 1866 1884 1867 1885 imr.imr_multiaddr.s_addr = mreqs->imr_multiaddr; 1868 1886 imr.imr_address.s_addr = mreqs->imr_interface; 1869 1887 imr.imr_ifindex = ifindex; 1870 - in_dev = ip_mc_find_dev(&imr); 1888 + in_dev = ip_mc_find_dev(net, &imr); 1871 1889 1872 1890 if (!in_dev) { 1873 1891 err = -ENODEV; ··· 1985 2007 struct in_device *in_dev; 1986 2008 struct inet_sock *inet = inet_sk(sk); 1987 2009 struct ip_sf_socklist *newpsl, *psl; 2010 + struct net *net = sock_net(sk); 1988 2011 int leavegroup = 0; 1989 2012 1990 2013 if (!ipv4_is_multicast(addr)) ··· 1994 2015 msf->imsf_fmode != MCAST_EXCLUDE) 1995 2016 return -EINVAL; 1996 2017 1997 - if (!net_eq(sock_net(sk), &init_net)) 1998 - return -EPROTONOSUPPORT; 1999 - 2000 2018 rtnl_lock(); 2001 2019 2002 2020 imr.imr_multiaddr.s_addr = msf->imsf_multiaddr; 2003 2021 imr.imr_address.s_addr = msf->imsf_interface; 2004 2022 imr.imr_ifindex = ifindex; 2005 - in_dev = ip_mc_find_dev(&imr); 2023 + in_dev = ip_mc_find_dev(net, &imr); 2006 2024 2007 2025 if (!in_dev) { 2008 2026 err = -ENODEV; ··· 2070 2094 struct in_device *in_dev; 2071 2095 struct inet_sock *inet = inet_sk(sk); 2072 2096 struct ip_sf_socklist *psl; 2097 + struct net *net = sock_net(sk); 2073 2098 2074 2099 if (!ipv4_is_multicast(addr)) 2075 2100 return -EINVAL; 2076 - 2077 - if (!net_eq(sock_net(sk), &init_net)) 2078 - return -EPROTONOSUPPORT; 2079 2101 2080 2102 rtnl_lock(); 2081 2103 2082 2104 imr.imr_multiaddr.s_addr = msf->imsf_multiaddr; 2083 2105 imr.imr_address.s_addr = msf->imsf_interface; 2084 2106 imr.imr_ifindex = 0; 2085 - in_dev = ip_mc_find_dev(&imr); 2107 + in_dev = ip_mc_find_dev(net, &imr); 2086 2108 2087 2109 if (!in_dev) { 2088 2110 err = -ENODEV; ··· 2136 2162 addr = psin->sin_addr.s_addr; 2137 2163 if (!ipv4_is_multicast(addr)) 2138 2164 return -EINVAL; 2139 - 2140 - if (!net_eq(sock_net(sk), &init_net)) 2141 - return -EPROTONOSUPPORT; 2142 2165 2143 2166 rtnl_lock(); 2144 2167 ··· 2217 2246 { 2218 2247 struct inet_sock *inet = inet_sk(sk); 2219 2248 struct ip_mc_socklist *iml; 2249 + struct net *net = sock_net(sk); 2220 2250 2221 2251 if (inet->mc_list == NULL) 2222 - return; 2223 - 2224 - if (!net_eq(sock_net(sk), &init_net)) 2225 2252 return; 2226 2253 2227 2254 rtnl_lock(); ··· 2227 2258 struct in_device *in_dev; 2228 2259 inet->mc_list = iml->next; 2229 2260 2230 - in_dev = inetdev_by_index(&init_net, iml->multi.imr_ifindex); 2261 + in_dev = inetdev_by_index(net, iml->multi.imr_ifindex); 2231 2262 (void) ip_mc_leave_src(sk, iml, in_dev); 2232 2263 if (in_dev != NULL) { 2233 2264 ip_mc_dec_group(in_dev, iml->multi.imr_multiaddr.s_addr);