Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

ipv6,mcast: always hold idev->lock before mca_lock

dingtianhong reported the following deadlock detected by lockdep:

======================================================
[ INFO: possible circular locking dependency detected ]
3.4.24.05-0.1-default #1 Not tainted
-------------------------------------------------------
ksoftirqd/0/3 is trying to acquire lock:
(&ndev->lock){+.+...}, at: [<ffffffff8147f804>] ipv6_get_lladdr+0x74/0x120

but task is already holding lock:
(&mc->mca_lock){+.+...}, at: [<ffffffff8149d130>] mld_send_report+0x40/0x150

which lock already depends on the new lock.

the existing dependency chain (in reverse order) is:

-> #1 (&mc->mca_lock){+.+...}:
[<ffffffff810a8027>] validate_chain+0x637/0x730
[<ffffffff810a8417>] __lock_acquire+0x2f7/0x500
[<ffffffff810a8734>] lock_acquire+0x114/0x150
[<ffffffff814f691a>] rt_spin_lock+0x4a/0x60
[<ffffffff8149e4bb>] igmp6_group_added+0x3b/0x120
[<ffffffff8149e5d8>] ipv6_mc_up+0x38/0x60
[<ffffffff81480a4d>] ipv6_find_idev+0x3d/0x80
[<ffffffff81483175>] addrconf_notify+0x3d5/0x4b0
[<ffffffff814fae3f>] notifier_call_chain+0x3f/0x80
[<ffffffff81073471>] raw_notifier_call_chain+0x11/0x20
[<ffffffff813d8722>] call_netdevice_notifiers+0x32/0x60
[<ffffffff813d92d4>] __dev_notify_flags+0x34/0x80
[<ffffffff813d9360>] dev_change_flags+0x40/0x70
[<ffffffff813ea627>] do_setlink+0x237/0x8a0
[<ffffffff813ebb6c>] rtnl_newlink+0x3ec/0x600
[<ffffffff813eb4d0>] rtnetlink_rcv_msg+0x160/0x310
[<ffffffff814040b9>] netlink_rcv_skb+0x89/0xb0
[<ffffffff813eb357>] rtnetlink_rcv+0x27/0x40
[<ffffffff81403e20>] netlink_unicast+0x140/0x180
[<ffffffff81404a9e>] netlink_sendmsg+0x33e/0x380
[<ffffffff813c4252>] sock_sendmsg+0x112/0x130
[<ffffffff813c537e>] __sys_sendmsg+0x44e/0x460
[<ffffffff813c5544>] sys_sendmsg+0x44/0x70
[<ffffffff814feab9>] system_call_fastpath+0x16/0x1b

-> #0 (&ndev->lock){+.+...}:
[<ffffffff810a798e>] check_prev_add+0x3de/0x440
[<ffffffff810a8027>] validate_chain+0x637/0x730
[<ffffffff810a8417>] __lock_acquire+0x2f7/0x500
[<ffffffff810a8734>] lock_acquire+0x114/0x150
[<ffffffff814f6c82>] rt_read_lock+0x42/0x60
[<ffffffff8147f804>] ipv6_get_lladdr+0x74/0x120
[<ffffffff8149b036>] mld_newpack+0xb6/0x160
[<ffffffff8149b18b>] add_grhead+0xab/0xc0
[<ffffffff8149d03b>] add_grec+0x3ab/0x460
[<ffffffff8149d14a>] mld_send_report+0x5a/0x150
[<ffffffff8149f99e>] igmp6_timer_handler+0x4e/0xb0
[<ffffffff8105705a>] call_timer_fn+0xca/0x1d0
[<ffffffff81057b9f>] run_timer_softirq+0x1df/0x2e0
[<ffffffff8104e8c7>] handle_pending_softirqs+0xf7/0x1f0
[<ffffffff8104ea3b>] __do_softirq_common+0x7b/0xf0
[<ffffffff8104f07f>] __thread_do_softirq+0x1af/0x210
[<ffffffff8104f1c1>] run_ksoftirqd+0xe1/0x1f0
[<ffffffff8106c7de>] kthread+0xae/0xc0
[<ffffffff814fff74>] kernel_thread_helper+0x4/0x10

actually we can just hold idev->lock before taking pmc->mca_lock,
and avoid taking idev->lock again when iterating idev->addr_list,
since the upper callers of mld_newpack() already take
read_lock_bh(&idev->lock).

Reported-by: dingtianhong <dingtianhong@huawei.com>
Cc: dingtianhong <dingtianhong@huawei.com>
Cc: Hideaki YOSHIFUJI <yoshfuji@linux-ipv6.org>
Cc: David S. Miller <davem@davemloft.net>
Cc: Hannes Frederic Sowa <hannes@stressinduktion.org>
Tested-by: Ding Tianhong <dingtianhong@huawei.com>
Tested-by: Chen Weilong <chenweilong@huawei.com>
Signed-off-by: Cong Wang <amwang@redhat.com>
Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Amerigo Wang and committed by
David S. Miller
8965779d ab6c7a0a

+15 -10
+3
include/net/addrconf.h
··· 86 86 const struct in6_addr *daddr, 87 87 unsigned int srcprefs, 88 88 struct in6_addr *saddr); 89 + extern int __ipv6_get_lladdr(struct inet6_dev *idev, 90 + struct in6_addr *addr, 91 + unsigned char banned_flags); 89 92 extern int ipv6_get_lladdr(struct net_device *dev, 90 93 struct in6_addr *addr, 91 94 unsigned char banned_flags);
+2 -2
net/ipv6/addrconf.c
··· 1444 1444 } 1445 1445 EXPORT_SYMBOL(ipv6_dev_get_saddr); 1446 1446 1447 - static int __ipv6_get_lladdr(struct inet6_dev *idev, struct in6_addr *addr, 1448 - unsigned char banned_flags) 1447 + int __ipv6_get_lladdr(struct inet6_dev *idev, struct in6_addr *addr, 1448 + unsigned char banned_flags) 1449 1449 { 1450 1450 struct inet6_ifaddr *ifp; 1451 1451 int err = -EADDRNOTAVAIL;
+10 -8
net/ipv6/mcast.c
··· 1351 1351 hdr->daddr = *daddr; 1352 1352 } 1353 1353 1354 - static struct sk_buff *mld_newpack(struct net_device *dev, int size) 1354 + static struct sk_buff *mld_newpack(struct inet6_dev *idev, int size) 1355 1355 { 1356 + struct net_device *dev = idev->dev; 1356 1357 struct net *net = dev_net(dev); 1357 1358 struct sock *sk = net->ipv6.igmp_sk; 1358 1359 struct sk_buff *skb; ··· 1378 1377 1379 1378 skb_reserve(skb, hlen); 1380 1379 1381 - if (ipv6_get_lladdr(dev, &addr_buf, IFA_F_TENTATIVE)) { 1380 + if (__ipv6_get_lladdr(idev, &addr_buf, IFA_F_TENTATIVE)) { 1382 1381 /* <draft-ietf-magma-mld-source-05.txt>: 1383 1382 * use unspecified address as the source address 1384 1383 * when a valid link-local address is not available. ··· 1475 1474 struct mld2_grec *pgr; 1476 1475 1477 1476 if (!skb) 1478 - skb = mld_newpack(dev, dev->mtu); 1477 + skb = mld_newpack(pmc->idev, dev->mtu); 1479 1478 if (!skb) 1480 1479 return NULL; 1481 1480 pgr = (struct mld2_grec *)skb_put(skb, sizeof(struct mld2_grec)); ··· 1495 1494 static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc, 1496 1495 int type, int gdeleted, int sdeleted) 1497 1496 { 1498 - struct net_device *dev = pmc->idev->dev; 1497 + struct inet6_dev *idev = pmc->idev; 1498 + struct net_device *dev = idev->dev; 1499 1499 struct mld2_report *pmr; 1500 1500 struct mld2_grec *pgr = NULL; 1501 1501 struct ip6_sf_list *psf, *psf_next, *psf_prev, **psf_list; ··· 1525 1523 AVAILABLE(skb) < grec_size(pmc, type, gdeleted, sdeleted)) { 1526 1524 if (skb) 1527 1525 mld_sendpack(skb); 1528 - skb = mld_newpack(dev, dev->mtu); 1526 + skb = mld_newpack(idev, dev->mtu); 1529 1527 } 1530 1528 } 1531 1529 first = 1; ··· 1552 1550 pgr->grec_nsrcs = htons(scount); 1553 1551 if (skb) 1554 1552 mld_sendpack(skb); 1555 - skb = mld_newpack(dev, dev->mtu); 1553 + skb = mld_newpack(idev, dev->mtu); 1556 1554 first = 1; 1557 1555 scount = 0; 1558 1556 } ··· 1607 1605 struct sk_buff *skb = NULL; 1608 1606 int type; 1609 1607 1608 + read_lock_bh(&idev->lock); 1610 1609 if (!pmc) { 1611 - read_lock_bh(&idev->lock); 1612 1610 for (pmc=idev->mc_list; pmc; pmc=pmc->next) { 1613 1611 if (pmc->mca_flags & MAF_NOREPORT) 1614 1612 continue; ··· 1620 1618 skb = add_grec(skb, pmc, type, 0, 0); 1621 1619 spin_unlock_bh(&pmc->mca_lock); 1622 1620 } 1623 - read_unlock_bh(&idev->lock); 1624 1621 } else { 1625 1622 spin_lock_bh(&pmc->mca_lock); 1626 1623 if (pmc->mca_sfcount[MCAST_EXCLUDE]) ··· 1629 1628 skb = add_grec(skb, pmc, type, 0, 0); 1630 1629 spin_unlock_bh(&pmc->mca_lock); 1631 1630 } 1631 + read_unlock_bh(&idev->lock); 1632 1632 if (skb) 1633 1633 mld_sendpack(skb); 1634 1634 }