Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

inet_diag: allow concurrent operations

inet_diag_lock_handler() current implementation uses a mutex
to protect inet_diag_table[] array against concurrent changes.

This makes inet_diag dump serialized, thus less scalable
than legacy /proc files.

It is time to switch to full RCU protection.

As a bonus, if a target is statically linked instead of being
modular, inet_diag_lock_handler() & inet_diag_unlock_handler()
reduce to reads only.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Guillaume Nault <gnault@redhat.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>

authored by

Eric Dumazet and committed by
Paolo Abeni
223f5519 db591469

+40 -40
+40 -40
net/ipv4/inet_diag.c
··· 32 32 #include <linux/inet_diag.h> 33 33 #include <linux/sock_diag.h> 34 34 35 - static const struct inet_diag_handler **inet_diag_table; 35 + static const struct inet_diag_handler __rcu **inet_diag_table; 36 36 37 37 struct inet_diag_entry { 38 38 const __be32 *saddr; ··· 48 48 #endif 49 49 }; 50 50 51 - static DEFINE_MUTEX(inet_diag_table_mutex); 52 - 53 51 static const struct inet_diag_handler *inet_diag_lock_handler(int proto) 54 52 { 55 - if (proto < 0 || proto >= IPPROTO_MAX) { 56 - mutex_lock(&inet_diag_table_mutex); 57 - return ERR_PTR(-ENOENT); 58 - } 53 + const struct inet_diag_handler *handler; 54 + 55 + if (proto < 0 || proto >= IPPROTO_MAX) 56 + return NULL; 59 57 60 58 if (!READ_ONCE(inet_diag_table[proto])) 61 59 sock_load_diag_module(AF_INET, proto); 62 60 63 - mutex_lock(&inet_diag_table_mutex); 64 - if (!inet_diag_table[proto]) 65 - return ERR_PTR(-ENOENT); 61 + rcu_read_lock(); 62 + handler = rcu_dereference(inet_diag_table[proto]); 63 + if (handler && !try_module_get(handler->owner)) 64 + handler = NULL; 65 + rcu_read_unlock(); 66 66 67 - return inet_diag_table[proto]; 67 + return handler; 68 68 } 69 69 70 70 static void inet_diag_unlock_handler(const struct inet_diag_handler *handler) 71 71 { 72 - mutex_unlock(&inet_diag_table_mutex); 72 + module_put(handler->owner); 73 73 } 74 74 75 75 void inet_diag_msg_common_fill(struct inet_diag_msg *r, struct sock *sk) ··· 104 104 const struct inet_diag_handler *handler; 105 105 size_t aux = 0; 106 106 107 - handler = inet_diag_table[req->sdiag_protocol]; 107 + rcu_read_lock(); 108 + handler = rcu_dereference(inet_diag_table[req->sdiag_protocol]); 109 + DEBUG_NET_WARN_ON_ONCE(!handler); 108 110 if (handler && handler->idiag_get_aux_size) 109 111 aux = handler->idiag_get_aux_size(sk, net_admin); 112 + rcu_read_unlock(); 110 113 111 114 return nla_total_size(sizeof(struct tcp_info)) 112 115 + nla_total_size(sizeof(struct inet_diag_msg)) ··· 247 244 struct nlmsghdr *nlh; 248 245 struct nlattr *attr; 249 246 void *info = NULL; 247 + int protocol; 250 248 251 249 cb_data = cb->data; 252 - handler = inet_diag_table[inet_diag_get_protocol(req, cb_data)]; 253 - BUG_ON(!handler); 250 + protocol = inet_diag_get_protocol(req, cb_data); 251 + 252 + /* inet_diag_lock_handler() made sure inet_diag_table[] is stable. */ 253 + handler = rcu_dereference_protected(inet_diag_table[protocol], 1); 254 + DEBUG_NET_WARN_ON_ONCE(!handler); 255 + if (!handler) 256 + return -ENXIO; 254 257 255 258 nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, 256 259 cb->nlh->nlmsg_type, sizeof(*r), nlmsg_flags); ··· 614 605 protocol = inet_diag_get_protocol(req, &dump_data); 615 606 616 607 handler = inet_diag_lock_handler(protocol); 617 - if (IS_ERR(handler)) { 618 - err = PTR_ERR(handler); 619 - } else if (cmd == SOCK_DIAG_BY_FAMILY) { 608 + if (!handler) 609 + return -ENOENT; 610 + 611 + if (cmd == SOCK_DIAG_BY_FAMILY) { 620 612 struct netlink_callback cb = { 621 613 .nlh = nlh, 622 614 .skb = in_skb, ··· 1269 1259 again: 1270 1260 prev_min_dump_alloc = cb->min_dump_alloc; 1271 1261 handler = inet_diag_lock_handler(protocol); 1272 - if (!IS_ERR(handler)) 1262 + if (handler) { 1273 1263 handler->dump(skb, cb, r); 1274 - else 1275 - err = PTR_ERR(handler); 1276 - inet_diag_unlock_handler(handler); 1277 - 1264 + inet_diag_unlock_handler(handler); 1265 + } else { 1266 + err = -ENOENT; 1267 + } 1278 1268 /* The skb is not large enough to fit one sk info and 1279 1269 * inet_sk_diag_fill() has requested for a larger skb. 1280 1270 */ ··· 1467 1457 } 1468 1458 1469 1459 handler = inet_diag_lock_handler(sk->sk_protocol); 1470 - if (IS_ERR(handler)) { 1471 - inet_diag_unlock_handler(handler); 1460 + if (!handler) { 1472 1461 nlmsg_cancel(skb, nlh); 1473 - return PTR_ERR(handler); 1462 + return -ENOENT; 1474 1463 } 1475 1464 1476 1465 attr = handler->idiag_info_size ··· 1504 1495 int inet_diag_register(const struct inet_diag_handler *h) 1505 1496 { 1506 1497 const __u16 type = h->idiag_type; 1507 - int err = -EINVAL; 1508 1498 1509 1499 if (type >= IPPROTO_MAX) 1510 - goto out; 1500 + return -EINVAL; 1511 1501 1512 - mutex_lock(&inet_diag_table_mutex); 1513 - err = -EEXIST; 1514 - if (!inet_diag_table[type]) { 1515 - WRITE_ONCE(inet_diag_table[type], h); 1516 - err = 0; 1517 - } 1518 - mutex_unlock(&inet_diag_table_mutex); 1519 - out: 1520 - return err; 1502 + return !cmpxchg((const struct inet_diag_handler **)&inet_diag_table[type], 1503 + NULL, h) ? 0 : -EEXIST; 1521 1504 } 1522 1505 EXPORT_SYMBOL_GPL(inet_diag_register); 1523 1506 ··· 1520 1519 if (type >= IPPROTO_MAX) 1521 1520 return; 1522 1521 1523 - mutex_lock(&inet_diag_table_mutex); 1524 - WRITE_ONCE(inet_diag_table[type], NULL); 1525 - mutex_unlock(&inet_diag_table_mutex); 1522 + xchg((const struct inet_diag_handler **)&inet_diag_table[type], 1523 + NULL); 1526 1524 } 1527 1525 EXPORT_SYMBOL_GPL(inet_diag_unregister); 1528 1526