Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

vxlan: fix igmp races

There are two race conditions in existing code for doing IGMP
management in workqueue in vxlan. First, the vxlan_group_used
function checks the list of vxlan's without any protection, and
it is possible for open followed by close to occur before the
igmp work queue runs.

To solve these move the check into vxlan_open/stop so it is
protected by RTNL. And split into two work structures so that
there is no racy reference to underlying device state.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

stephen hemminger and committed by
David S. Miller
3fc2de2f 372675a4

+38 -15
+38 -15
drivers/net/vxlan.c
··· 136 136 u32 flags; /* VXLAN_F_* below */ 137 137 138 138 struct work_struct sock_work; 139 - struct work_struct igmp_work; 139 + struct work_struct igmp_join; 140 + struct work_struct igmp_leave; 140 141 141 142 unsigned long age_interval; 142 143 struct timer_list age_timer; ··· 737 736 return false; 738 737 } 739 738 740 - 741 739 /* See if multicast group is already in use by other ID */ 742 740 static bool vxlan_group_used(struct vxlan_net *vn, __be32 remote_ip) 743 741 { ··· 770 770 queue_work(vxlan_wq, &vs->del_work); 771 771 } 772 772 773 - /* Callback to update multicast group membership. 774 - * Scheduled when vxlan goes up/down. 773 + /* Callback to update multicast group membership when first VNI on 774 + * multicast asddress is brought up 775 + * Done as workqueue because ip_mc_join_group acquires RTNL. 775 776 */ 776 - static void vxlan_igmp_work(struct work_struct *work) 777 + static void vxlan_igmp_join(struct work_struct *work) 777 778 { 778 - struct vxlan_dev *vxlan = container_of(work, struct vxlan_dev, igmp_work); 779 + struct vxlan_dev *vxlan = container_of(work, struct vxlan_dev, igmp_join); 779 780 struct vxlan_net *vn = net_generic(dev_net(vxlan->dev), vxlan_net_id); 780 781 struct vxlan_sock *vs = vxlan->vn_sock; 781 782 struct sock *sk = vs->sock->sk; ··· 786 785 }; 787 786 788 787 lock_sock(sk); 789 - if (vxlan_group_used(vn, vxlan->default_dst.remote_ip)) 790 - ip_mc_join_group(sk, &mreq); 791 - else 792 - ip_mc_leave_group(sk, &mreq); 788 + ip_mc_join_group(sk, &mreq); 789 + release_sock(sk); 790 + 791 + vxlan_sock_release(vn, vs); 792 + dev_put(vxlan->dev); 793 + } 794 + 795 + /* Inverse of vxlan_igmp_join when last VNI is brought down */ 796 + static void vxlan_igmp_leave(struct work_struct *work) 797 + { 798 + struct vxlan_dev *vxlan = container_of(work, struct vxlan_dev, igmp_leave); 799 + struct vxlan_net *vn = net_generic(dev_net(vxlan->dev), vxlan_net_id); 800 + struct vxlan_sock *vs = vxlan->vn_sock; 801 + struct sock *sk = vs->sock->sk; 802 + struct ip_mreqn mreq = { 803 + .imr_multiaddr.s_addr = vxlan->default_dst.remote_ip, 804 + .imr_ifindex = vxlan->default_dst.remote_ifindex, 805 + }; 806 + 807 + lock_sock(sk); 808 + ip_mc_leave_group(sk, &mreq); 793 809 release_sock(sk); 794 810 795 811 vxlan_sock_release(vn, vs); ··· 1377 1359 /* Start ageing timer and join group when device is brought up */ 1378 1360 static int vxlan_open(struct net_device *dev) 1379 1361 { 1362 + struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id); 1380 1363 struct vxlan_dev *vxlan = netdev_priv(dev); 1381 1364 struct vxlan_sock *vs = vxlan->vn_sock; 1382 1365 ··· 1385 1366 if (!vs) 1386 1367 return -ENOTCONN; 1387 1368 1388 - if (IN_MULTICAST(ntohl(vxlan->default_dst.remote_ip))) { 1369 + if (IN_MULTICAST(ntohl(vxlan->default_dst.remote_ip)) && 1370 + ! vxlan_group_used(vn, vxlan->default_dst.remote_ip)) { 1389 1371 vxlan_sock_hold(vs); 1390 1372 dev_hold(dev); 1391 - queue_work(vxlan_wq, &vxlan->igmp_work); 1373 + queue_work(vxlan_wq, &vxlan->igmp_join); 1392 1374 } 1393 1375 1394 1376 if (vxlan->age_interval) ··· 1420 1400 /* Cleanup timer and forwarding table on shutdown */ 1421 1401 static int vxlan_stop(struct net_device *dev) 1422 1402 { 1403 + struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id); 1423 1404 struct vxlan_dev *vxlan = netdev_priv(dev); 1424 1405 struct vxlan_sock *vs = vxlan->vn_sock; 1425 1406 1426 - if (vs && IN_MULTICAST(ntohl(vxlan->default_dst.remote_ip))) { 1407 + if (vs && IN_MULTICAST(ntohl(vxlan->default_dst.remote_ip)) && 1408 + ! vxlan_group_used(vn, vxlan->default_dst.remote_ip)) { 1427 1409 vxlan_sock_hold(vs); 1428 1410 dev_hold(dev); 1429 - queue_work(vxlan_wq, &vxlan->igmp_work); 1411 + queue_work(vxlan_wq, &vxlan->igmp_leave); 1430 1412 } 1431 1413 1432 1414 del_timer_sync(&vxlan->age_timer); ··· 1493 1471 1494 1472 INIT_LIST_HEAD(&vxlan->next); 1495 1473 spin_lock_init(&vxlan->hash_lock); 1496 - INIT_WORK(&vxlan->igmp_work, vxlan_igmp_work); 1474 + INIT_WORK(&vxlan->igmp_join, vxlan_igmp_join); 1475 + INIT_WORK(&vxlan->igmp_leave, vxlan_igmp_leave); 1497 1476 INIT_WORK(&vxlan->sock_work, vxlan_sock_work); 1498 1477 1499 1478 init_timer_deferrable(&vxlan->age_timer);