Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

net: add __rcu annotations to sk_wq and wq

Add proper RCU annotations/verbs to sk_wq and wq members

Fix __sctp_write_space() sk_sleep() abuse (and sock->wq access)

Fix sunrpc sk_sleep() abuse too

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Eric Dumazet and committed by
David S. Miller
eaefd110 04cfa852

+46 -30
+2 -1
include/linux/net.h
··· 118 118 }; 119 119 120 120 struct socket_wq { 121 + /* Note: wait MUST be first field of socket_wq */ 121 122 wait_queue_head_t wait; 122 123 struct fasync_struct *fasync_list; 123 124 struct rcu_head rcu; ··· 143 142 144 143 unsigned long flags; 145 144 146 - struct socket_wq *wq; 145 + struct socket_wq __rcu *wq; 147 146 148 147 struct file *file; 149 148 struct sock *sk;
+4 -3
include/net/sock.h
··· 281 281 int sk_rcvbuf; 282 282 283 283 struct sk_filter __rcu *sk_filter; 284 - struct socket_wq *sk_wq; 284 + struct socket_wq __rcu *sk_wq; 285 285 286 286 #ifdef CONFIG_NET_DMA 287 287 struct sk_buff_head sk_async_wait_queue; ··· 1266 1266 1267 1267 static inline wait_queue_head_t *sk_sleep(struct sock *sk) 1268 1268 { 1269 - return &sk->sk_wq->wait; 1269 + BUILD_BUG_ON(offsetof(struct socket_wq, wait) != 0); 1270 + return &rcu_dereference_raw(sk->sk_wq)->wait; 1270 1271 } 1271 1272 /* Detach socket from process context. 1272 1273 * Announce socket dead, detach it from wait queue and inode. ··· 1288 1287 static inline void sock_graft(struct sock *sk, struct socket *parent) 1289 1288 { 1290 1289 write_lock_bh(&sk->sk_callback_lock); 1291 - rcu_assign_pointer(sk->sk_wq, parent->wq); 1290 + sk->sk_wq = parent->wq; 1292 1291 parent->sk = sk; 1293 1292 sk_set_socket(sk, parent); 1294 1293 security_sock_graft(sk, parent);
+5 -4
net/sctp/socket.c
··· 6102 6102 wake_up_interruptible(&asoc->wait); 6103 6103 6104 6104 if (sctp_writeable(sk)) { 6105 - if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) 6106 - wake_up_interruptible(sk_sleep(sk)); 6105 + wait_queue_head_t *wq = sk_sleep(sk); 6106 + 6107 + if (wq && waitqueue_active(wq)) 6108 + wake_up_interruptible(wq); 6107 6109 6108 6110 /* Note that we try to include the Async I/O support 6109 6111 * here by modeling from the current TCP/UDP code. 6110 6112 * We have not tested with it yet. 6111 6113 */ 6112 - if (sock->wq->fasync_list && 6113 - !(sk->sk_shutdown & SEND_SHUTDOWN)) 6114 + if (!(sk->sk_shutdown & SEND_SHUTDOWN)) 6114 6115 sock_wake_async(sock, 6115 6116 SOCK_WAKE_SPACE, POLL_OUT); 6116 6117 }
+14 -9
net/socket.c
··· 240 240 static struct inode *sock_alloc_inode(struct super_block *sb) 241 241 { 242 242 struct socket_alloc *ei; 243 + struct socket_wq *wq; 243 244 244 245 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL); 245 246 if (!ei) 246 247 return NULL; 247 - ei->socket.wq = kmalloc(sizeof(struct socket_wq), GFP_KERNEL); 248 - if (!ei->socket.wq) { 248 + wq = kmalloc(sizeof(*wq), GFP_KERNEL); 249 + if (!wq) { 249 250 kmem_cache_free(sock_inode_cachep, ei); 250 251 return NULL; 251 252 } 252 - init_waitqueue_head(&ei->socket.wq->wait); 253 - ei->socket.wq->fasync_list = NULL; 253 + init_waitqueue_head(&wq->wait); 254 + wq->fasync_list = NULL; 255 + RCU_INIT_POINTER(ei->socket.wq, wq); 254 256 255 257 ei->socket.state = SS_UNCONNECTED; 256 258 ei->socket.flags = 0; ··· 275 273 static void sock_destroy_inode(struct inode *inode) 276 274 { 277 275 struct socket_alloc *ei; 276 + struct socket_wq *wq; 278 277 279 278 ei = container_of(inode, struct socket_alloc, vfs_inode); 280 - call_rcu(&ei->socket.wq->rcu, wq_free_rcu); 279 + wq = rcu_dereference_protected(ei->socket.wq, 1); 280 + call_rcu(&wq->rcu, wq_free_rcu); 281 281 kmem_cache_free(sock_inode_cachep, ei); 282 282 } 283 283 ··· 528 524 module_put(owner); 529 525 } 530 526 531 - if (sock->wq->fasync_list) 527 + if (rcu_dereference_protected(sock->wq, 1)->fasync_list) 532 528 printk(KERN_ERR "sock_release: fasync list not empty!\n"); 533 529 534 530 percpu_sub(sockets_in_use, 1); ··· 1112 1108 { 1113 1109 struct socket *sock = filp->private_data; 1114 1110 struct sock *sk = sock->sk; 1111 + struct socket_wq *wq; 1115 1112 1116 1113 if (sk == NULL) 1117 1114 return -EINVAL; 1118 1115 1119 1116 lock_sock(sk); 1117 + wq = rcu_dereference_protected(sock->wq, sock_owned_by_user(sk)); 1118 + fasync_helper(fd, filp, on, &wq->fasync_list); 1120 1119 1121 - fasync_helper(fd, filp, on, &sock->wq->fasync_list); 1122 - 1123 - if (!sock->wq->fasync_list) 1120 + if (!wq->fasync_list) 1124 1121 sock_reset_flag(sk, SOCK_FASYNC); 1125 1122 else 1126 1123 sock_set_flag(sk, SOCK_FASYNC);
+20 -12
net/sunrpc/svcsock.c
··· 420 420 static void svc_udp_data_ready(struct sock *sk, int count) 421 421 { 422 422 struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; 423 + wait_queue_head_t *wq = sk_sleep(sk); 423 424 424 425 if (svsk) { 425 426 dprintk("svc: socket %p(inet %p), count=%d, busy=%d\n", ··· 429 428 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); 430 429 svc_xprt_enqueue(&svsk->sk_xprt); 431 430 } 432 - if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) 433 - wake_up_interruptible(sk_sleep(sk)); 431 + if (wq && waitqueue_active(wq)) 432 + wake_up_interruptible(wq); 434 433 } 435 434 436 435 /* ··· 439 438 static void svc_write_space(struct sock *sk) 440 439 { 441 440 struct svc_sock *svsk = (struct svc_sock *)(sk->sk_user_data); 441 + wait_queue_head_t *wq = sk_sleep(sk); 442 442 443 443 if (svsk) { 444 444 dprintk("svc: socket %p(inet %p), write_space busy=%d\n", ··· 447 445 svc_xprt_enqueue(&svsk->sk_xprt); 448 446 } 449 447 450 - if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) { 448 + if (wq && waitqueue_active(wq)) { 451 449 dprintk("RPC svc_write_space: someone sleeping on %p\n", 452 450 svsk); 453 - wake_up_interruptible(sk_sleep(sk)); 451 + wake_up_interruptible(wq); 454 452 } 455 453 } 456 454 ··· 741 739 static void svc_tcp_listen_data_ready(struct sock *sk, int count_unused) 742 740 { 743 741 struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; 742 + wait_queue_head_t *wq; 744 743 745 744 dprintk("svc: socket %p TCP (listen) state change %d\n", 746 745 sk, sk->sk_state); ··· 764 761 printk("svc: socket %p: no user data\n", sk); 765 762 } 766 763 767 - if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) 768 - wake_up_interruptible_all(sk_sleep(sk)); 764 + wq = sk_sleep(sk); 765 + if (wq && waitqueue_active(wq)) 766 + wake_up_interruptible_all(wq); 769 767 } 770 768 771 769 /* ··· 775 771 static void svc_tcp_state_change(struct sock *sk) 776 772 { 777 773 struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; 774 + wait_queue_head_t *wq = sk_sleep(sk); 778 775 779 776 dprintk("svc: socket %p TCP (connected) state change %d (svsk %p)\n", 780 777 sk, sk->sk_state, sk->sk_user_data); ··· 786 781 set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); 787 782 svc_xprt_enqueue(&svsk->sk_xprt); 788 783 } 789 - if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) 790 - wake_up_interruptible_all(sk_sleep(sk)); 784 + if (wq && waitqueue_active(wq)) 785 + wake_up_interruptible_all(wq); 791 786 } 792 787 793 788 static void svc_tcp_data_ready(struct sock *sk, int count) 794 789 { 795 790 struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; 791 + wait_queue_head_t *wq = sk_sleep(sk); 796 792 797 793 dprintk("svc: socket %p TCP data ready (svsk %p)\n", 798 794 sk, sk->sk_user_data); ··· 801 795 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); 802 796 svc_xprt_enqueue(&svsk->sk_xprt); 803 797 } 804 - if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) 805 - wake_up_interruptible(sk_sleep(sk)); 798 + if (wq && waitqueue_active(wq)) 799 + wake_up_interruptible(wq); 806 800 } 807 801 808 802 /* ··· 1537 1531 { 1538 1532 struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); 1539 1533 struct sock *sk = svsk->sk_sk; 1534 + wait_queue_head_t *wq; 1540 1535 1541 1536 dprintk("svc: svc_sock_detach(%p)\n", svsk); 1542 1537 ··· 1546 1539 sk->sk_data_ready = svsk->sk_odata; 1547 1540 sk->sk_write_space = svsk->sk_owspace; 1548 1541 1549 - if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) 1550 - wake_up_interruptible(sk_sleep(sk)); 1542 + wq = sk_sleep(sk); 1543 + if (wq && waitqueue_active(wq)) 1544 + wake_up_interruptible(wq); 1551 1545 } 1552 1546 1553 1547 /*
+1 -1
net/unix/af_unix.c
··· 1171 1171 newsk->sk_type = sk->sk_type; 1172 1172 init_peercred(newsk); 1173 1173 newu = unix_sk(newsk); 1174 - newsk->sk_wq = &newu->peer_wq; 1174 + RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq); 1175 1175 otheru = unix_sk(other); 1176 1176 1177 1177 /* copy address information from listening to new sock*/