Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

mld: convert ipv6_mc_socklist->sflist to RCU

The sflist has been protected by rwlock so that the critical section
is atomic context.
In order to switch this context, changing locking is needed.
The sflist actually already protected by RTNL So if it's converted
to use RCU, its control path context can be switched to sleepable.

Suggested-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Taehee Yoo and committed by
David S. Miller
882ba1f7 cf2ce339

+24 -32
+2 -2
include/net/if_inet6.h
··· 78 78 struct ip6_sf_socklist { 79 79 unsigned int sl_max; 80 80 unsigned int sl_count; 81 + struct rcu_head rcu; 81 82 struct in6_addr sl_addr[]; 82 83 }; 83 84 ··· 92 91 int ifindex; 93 92 unsigned int sfmode; /* MCAST_{INCLUDE,EXCLUDE} */ 94 93 struct ipv6_mc_socklist __rcu *next; 95 - rwlock_t sflock; 96 - struct ip6_sf_socklist *sflist; 94 + struct ip6_sf_socklist __rcu *sflist; 97 95 struct rcu_head rcu; 98 96 }; 99 97
+22 -30
net/ipv6/mcast.c
··· 178 178 179 179 mc_lst->ifindex = dev->ifindex; 180 180 mc_lst->sfmode = mode; 181 - rwlock_init(&mc_lst->sflock); 182 - mc_lst->sflist = NULL; 181 + RCU_INIT_POINTER(mc_lst->sflist, NULL); 183 182 184 183 /* 185 184 * now add/increase the group membership on the device ··· 334 335 struct net *net = sock_net(sk); 335 336 int i, j, rv; 336 337 int leavegroup = 0; 337 - int pmclocked = 0; 338 338 int err; 339 339 340 340 source = &((struct sockaddr_in6 *)&pgsr->gsr_source)->sin6_addr; ··· 362 364 goto done; 363 365 } 364 366 /* if a source filter was set, must be the same mode as before */ 365 - if (pmc->sflist) { 367 + if (rcu_access_pointer(pmc->sflist)) { 366 368 if (pmc->sfmode != omode) { 367 369 err = -EINVAL; 368 370 goto done; ··· 374 376 pmc->sfmode = omode; 375 377 } 376 378 377 - write_lock(&pmc->sflock); 378 - pmclocked = 1; 379 - 380 - psl = pmc->sflist; 379 + psl = rtnl_dereference(pmc->sflist); 381 380 if (!add) { 382 381 if (!psl) 383 382 goto done; /* err = -EADDRNOTAVAIL */ ··· 424 429 if (psl) { 425 430 for (i = 0; i < psl->sl_count; i++) 426 431 newpsl->sl_addr[i] = psl->sl_addr[i]; 427 - sock_kfree_s(sk, psl, IP6_SFLSIZE(psl->sl_max)); 432 + atomic_sub(IP6_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc); 433 + kfree_rcu(psl, rcu); 428 434 } 429 - pmc->sflist = psl = newpsl; 435 + psl = newpsl; 436 + rcu_assign_pointer(pmc->sflist, psl); 430 437 } 431 438 rv = 1; /* > 0 for insert logic below if sl_count is 0 */ 432 439 for (i = 0; i < psl->sl_count; i++) { ··· 444 447 /* update the interface list */ 445 448 ip6_mc_add_src(idev, group, omode, 1, source, 1); 446 449 done: 447 - if (pmclocked) 448 - write_unlock(&pmc->sflock); 449 450 read_unlock_bh(&idev->lock); 450 451 rcu_read_unlock(); 451 452 if (leavegroup) ··· 521 526 (void) ip6_mc_add_src(idev, group, gsf->gf_fmode, 0, NULL, 0); 522 527 } 523 528 524 - write_lock(&pmc->sflock); 525 - psl = pmc->sflist; 529 + psl = rtnl_dereference(pmc->sflist); 526 530 if (psl) { 527 531 (void) ip6_mc_del_src(idev, group, pmc->sfmode, 528 532 psl->sl_count, psl->sl_addr, 0); 529 - sock_kfree_s(sk, psl, IP6_SFLSIZE(psl->sl_max)); 533 + atomic_sub(IP6_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc); 534 + kfree_rcu(psl, rcu); 530 535 } else 531 536 (void) ip6_mc_del_src(idev, group, pmc->sfmode, 0, NULL, 0); 532 - pmc->sflist = newpsl; 537 + rcu_assign_pointer(pmc->sflist, newpsl); 533 538 pmc->sfmode = gsf->gf_fmode; 534 - write_unlock(&pmc->sflock); 535 539 err = 0; 536 540 done: 537 541 read_unlock_bh(&idev->lock); ··· 579 585 if (!pmc) /* must have a prior join */ 580 586 goto done; 581 587 gsf->gf_fmode = pmc->sfmode; 582 - psl = pmc->sflist; 588 + psl = rtnl_dereference(pmc->sflist); 583 589 count = psl ? psl->sl_count : 0; 584 590 read_unlock_bh(&idev->lock); 585 591 rcu_read_unlock(); 586 592 587 593 copycount = count < gsf->gf_numsrc ? count : gsf->gf_numsrc; 588 594 gsf->gf_numsrc = count; 589 - /* changes to psl require the socket lock, and a write lock 590 - * on pmc->sflock. We have the socket lock so reading here is safe. 591 - */ 595 + 592 596 for (i = 0; i < copycount; i++, p++) { 593 597 struct sockaddr_in6 *psin6; 594 598 struct sockaddr_storage ss; ··· 622 630 rcu_read_unlock(); 623 631 return np->mc_all; 624 632 } 625 - read_lock(&mc->sflock); 626 - psl = mc->sflist; 633 + psl = rcu_dereference(mc->sflist); 627 634 if (!psl) { 628 635 rv = mc->sfmode == MCAST_EXCLUDE; 629 636 } else { ··· 637 646 if (mc->sfmode == MCAST_EXCLUDE && i < psl->sl_count) 638 647 rv = false; 639 648 } 640 - read_unlock(&mc->sflock); 641 649 rcu_read_unlock(); 642 650 643 651 return rv; ··· 2412 2422 static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml, 2413 2423 struct inet6_dev *idev) 2414 2424 { 2425 + struct ip6_sf_socklist *psl; 2415 2426 int err; 2416 2427 2417 - write_lock_bh(&iml->sflock); 2418 - if (!iml->sflist) { 2428 + psl = rtnl_dereference(iml->sflist); 2429 + 2430 + if (!psl) { 2419 2431 /* any-source empty exclude case */ 2420 2432 err = ip6_mc_del_src(idev, &iml->addr, iml->sfmode, 0, NULL, 0); 2421 2433 } else { 2422 2434 err = ip6_mc_del_src(idev, &iml->addr, iml->sfmode, 2423 - iml->sflist->sl_count, iml->sflist->sl_addr, 0); 2424 - sock_kfree_s(sk, iml->sflist, IP6_SFLSIZE(iml->sflist->sl_max)); 2425 - iml->sflist = NULL; 2435 + psl->sl_count, psl->sl_addr, 0); 2436 + RCU_INIT_POINTER(iml->sflist, NULL); 2437 + atomic_sub(IP6_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc); 2438 + kfree_rcu(psl, rcu); 2426 2439 } 2427 - write_unlock_bh(&iml->sflock); 2428 2440 return err; 2429 2441 } 2430 2442