Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

genetlink: fix netns vs. netlink table locking

Since my commits introducing netns awareness into
genetlink we can get this problem:

BUG: scheduling while atomic: modprobe/1178/0x00000002
2 locks held by modprobe/1178:
#0: (genl_mutex){+.+.+.}, at: [<ffffffff8135ee1a>] genl_register_mc_grou
#1: (rcu_read_lock){.+.+..}, at: [<ffffffff8135eeb5>] genl_register_mc_g
Pid: 1178, comm: modprobe Not tainted 2.6.31-rc8-wl-34789-g95cb731-dirty #
Call Trace:
[<ffffffff8103e285>] __schedule_bug+0x85/0x90
[<ffffffff81403138>] schedule+0x108/0x588
[<ffffffff8135b131>] netlink_table_grab+0xa1/0xf0
[<ffffffff8135c3a7>] netlink_change_ngroups+0x47/0x100
[<ffffffff8135ef0f>] genl_register_mc_group+0x12f/0x290

because I overlooked that netlink_table_grab() will
schedule, thinking it was just the rwlock. However,
in the contention case, that isn't actually true.

Fix this by letting the code grab the netlink table
lock first and then the RCU for netns protection.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Johannes Berg and committed by
David S. Miller
d136f1bd 8be8057e

+37 -23
+4
include/linux/netlink.h
··· 176 176 #define NETLINK_CREDS(skb) (&NETLINK_CB((skb)).creds) 177 177 178 178 179 + extern void netlink_table_grab(void); 180 + extern void netlink_table_ungrab(void); 181 + 179 182 extern struct sock *netlink_kernel_create(struct net *net, 180 183 int unit,unsigned int groups, 181 184 void (*input)(struct sk_buff *skb), 182 185 struct mutex *cb_mutex, 183 186 struct module *module); 184 187 extern void netlink_kernel_release(struct sock *sk); 188 + extern int __netlink_change_ngroups(struct sock *sk, unsigned int groups); 185 189 extern int netlink_change_ngroups(struct sock *sk, unsigned int groups); 186 190 extern void netlink_clear_multicast_users(struct sock *sk, unsigned int group); 187 191 extern void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err);
+29 -22
net/netlink/af_netlink.c
··· 177 177 * this, _but_ remember, it adds useless work on UP machines. 178 178 */ 179 179 180 - static void netlink_table_grab(void) 180 + void netlink_table_grab(void) 181 181 __acquires(nl_table_lock) 182 182 { 183 + might_sleep(); 184 + 183 185 write_lock_irq(&nl_table_lock); 184 186 185 187 if (atomic_read(&nl_table_users)) { ··· 202 200 } 203 201 } 204 202 205 - static void netlink_table_ungrab(void) 203 + void netlink_table_ungrab(void) 206 204 __releases(nl_table_lock) 207 205 { 208 206 write_unlock_irq(&nl_table_lock); ··· 1551 1549 kfree(lrh->ptr); 1552 1550 } 1553 1551 1554 - /** 1555 - * netlink_change_ngroups - change number of multicast groups 1556 - * 1557 - * This changes the number of multicast groups that are available 1558 - * on a certain netlink family. Note that it is not possible to 1559 - * change the number of groups to below 32. Also note that it does 1560 - * not implicitly call netlink_clear_multicast_users() when the 1561 - * number of groups is reduced. 1562 - * 1563 - * @sk: The kernel netlink socket, as returned by netlink_kernel_create(). 1564 - * @groups: The new number of groups. 1565 - */ 1566 - int netlink_change_ngroups(struct sock *sk, unsigned int groups) 1552 + int __netlink_change_ngroups(struct sock *sk, unsigned int groups) 1567 1553 { 1568 1554 unsigned long *listeners, *old = NULL; 1569 1555 struct listeners_rcu_head *old_rcu_head; 1570 1556 struct netlink_table *tbl = &nl_table[sk->sk_protocol]; 1571 - int err = 0; 1572 1557 1573 1558 if (groups < 32) 1574 1559 groups = 32; 1575 1560 1576 - netlink_table_grab(); 1577 1561 if (NLGRPSZ(tbl->groups) < NLGRPSZ(groups)) { 1578 1562 listeners = kzalloc(NLGRPSZ(groups) + 1579 1563 sizeof(struct listeners_rcu_head), 1580 1564 GFP_ATOMIC); 1581 - if (!listeners) { 1582 - err = -ENOMEM; 1583 - goto out_ungrab; 1584 - } 1565 + if (!listeners) 1566 + return -ENOMEM; 1585 1567 old = tbl->listeners; 1586 1568 memcpy(listeners, old, NLGRPSZ(tbl->groups)); 1587 1569 rcu_assign_pointer(tbl->listeners, listeners); ··· 1583 1597 } 1584 1598 tbl->groups = groups; 1585 1599 1586 - out_ungrab: 1600 + return 0; 1601 + } 1602 + 1603 + /** 1604 + * netlink_change_ngroups - change number of multicast groups 1605 + * 1606 + * This changes the number of multicast groups that are available 1607 + * on a certain netlink family. Note that it is not possible to 1608 + * change the number of groups to below 32. Also note that it does 1609 + * not implicitly call netlink_clear_multicast_users() when the 1610 + * number of groups is reduced. 1611 + * 1612 + * @sk: The kernel netlink socket, as returned by netlink_kernel_create(). 1613 + * @groups: The new number of groups. 1614 + */ 1615 + int netlink_change_ngroups(struct sock *sk, unsigned int groups) 1616 + { 1617 + int err; 1618 + 1619 + netlink_table_grab(); 1620 + err = __netlink_change_ngroups(sk, groups); 1587 1621 netlink_table_ungrab(); 1622 + 1588 1623 return err; 1589 1624 } 1590 1625
+4 -1
net/netlink/genetlink.c
··· 176 176 if (family->netnsok) { 177 177 struct net *net; 178 178 179 + netlink_table_grab(); 179 180 rcu_read_lock(); 180 181 for_each_net_rcu(net) { 181 - err = netlink_change_ngroups(net->genl_sock, 182 + err = __netlink_change_ngroups(net->genl_sock, 182 183 mc_groups_longs * BITS_PER_LONG); 183 184 if (err) { 184 185 /* ··· 189 188 * increased on some sockets which is ok. 190 189 */ 191 190 rcu_read_unlock(); 191 + netlink_table_ungrab(); 192 192 goto out; 193 193 } 194 194 } 195 195 rcu_read_unlock(); 196 + netlink_table_ungrab(); 196 197 } else { 197 198 err = netlink_change_ngroups(init_net.genl_sock, 198 199 mc_groups_longs * BITS_PER_LONG);