Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

rxrpc: Fix locking in rxrpc's sendmsg

Fix three bugs in the rxrpc's sendmsg implementation:

(1) rxrpc_new_client_call() should release the socket lock when returning
an error from rxrpc_get_call_slot().

(2) rxrpc_wait_for_tx_window_intr() will return without the call mutex
held in the event that we're interrupted by a signal whilst waiting
for tx space on the socket or relocking the call mutex afterwards.

Fix this by: (a) moving the unlock/lock of the call mutex up to
rxrpc_send_data() such that the lock is not held around all of
rxrpc_wait_for_tx_window*() and (b) indicating to higher callers
whether we're return with the lock dropped. Note that this means
recvmsg() will not block on this call whilst we're waiting.

(3) After dropping and regaining the call mutex, rxrpc_send_data() needs
to go and recheck the state of the tx_pending buffer and the
tx_total_len check in case we raced with another sendmsg() on the same
call.

Thinking on this some more, it might make sense to have different locks for
sendmsg() and recvmsg(). There's probably no need to make recvmsg() wait
for sendmsg(). It does mean that recvmsg() can return MSG_EOR indicating
that a call is dead before a sendmsg() to that call returns - but that can
currently happen anyway.

Without fix (2), something like the following can be induced:

WARNING: bad unlock balance detected!
5.16.0-rc6-syzkaller #0 Not tainted
-------------------------------------
syz-executor011/3597 is trying to release lock (&call->user_mutex) at:
[<ffffffff885163a3>] rxrpc_do_sendmsg+0xc13/0x1350 net/rxrpc/sendmsg.c:748
but there are no more locks to release!

other info that might help us debug this:
no locks held by syz-executor011/3597.
...
Call Trace:
<TASK>
__dump_stack lib/dump_stack.c:88 [inline]
dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106
print_unlock_imbalance_bug include/trace/events/lock.h:58 [inline]
__lock_release kernel/locking/lockdep.c:5306 [inline]
lock_release.cold+0x49/0x4e kernel/locking/lockdep.c:5657
__mutex_unlock_slowpath+0x99/0x5e0 kernel/locking/mutex.c:900
rxrpc_do_sendmsg+0xc13/0x1350 net/rxrpc/sendmsg.c:748
rxrpc_sendmsg+0x420/0x630 net/rxrpc/af_rxrpc.c:561
sock_sendmsg_nosec net/socket.c:704 [inline]
sock_sendmsg+0xcf/0x120 net/socket.c:724
____sys_sendmsg+0x6e8/0x810 net/socket.c:2409
___sys_sendmsg+0xf3/0x170 net/socket.c:2463
__sys_sendmsg+0xe5/0x1b0 net/socket.c:2492
do_syscall_x64 arch/x86/entry/common.c:50 [inline]
do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80
entry_SYSCALL_64_after_hwframe+0x44/0xae

[Thanks to Hawkins Jiawei and Khalid Masum for their attempts to fix this]

Fixes: bc5e3a546d55 ("rxrpc: Use MSG_WAITALL to tell sendmsg() to temporarily ignore signals")
Reported-by: syzbot+7f0483225d0c94cb3441@syzkaller.appspotmail.com
Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Marc Dionne <marc.dionne@auristor.com>
Tested-by: syzbot+7f0483225d0c94cb3441@syzkaller.appspotmail.com
cc: Hawkins Jiawei <yin31149@gmail.com>
cc: Khalid Masum <khalid.masum.92@gmail.com>
cc: Dan Carpenter <dan.carpenter@oracle.com>
cc: linux-afs@lists.infradead.org
Link: https://lore.kernel.org/r/166135894583.600315.7170979436768124075.stgit@warthog.procyon.org.uk
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

authored by

David Howells and committed by
Jakub Kicinski
b0f571ec 0cf731f9

+56 -38
+3 -1
net/rxrpc/call_object.c
··· 285 285 _enter("%p,%lx", rx, p->user_call_ID); 286 286 287 287 limiter = rxrpc_get_call_slot(p, gfp); 288 - if (!limiter) 288 + if (!limiter) { 289 + release_sock(&rx->sk); 289 290 return ERR_PTR(-ERESTARTSYS); 291 + } 290 292 291 293 call = rxrpc_alloc_client_call(rx, srx, gfp, debug_id); 292 294 if (IS_ERR(call)) {
+53 -37
net/rxrpc/sendmsg.c
··· 51 51 return sock_intr_errno(*timeo); 52 52 53 53 trace_rxrpc_transmit(call, rxrpc_transmit_wait); 54 - mutex_unlock(&call->user_mutex); 55 54 *timeo = schedule_timeout(*timeo); 56 - if (mutex_lock_interruptible(&call->user_mutex) < 0) 57 - return sock_intr_errno(*timeo); 58 55 } 59 56 } 60 57 ··· 287 290 static int rxrpc_send_data(struct rxrpc_sock *rx, 288 291 struct rxrpc_call *call, 289 292 struct msghdr *msg, size_t len, 290 - rxrpc_notify_end_tx_t notify_end_tx) 293 + rxrpc_notify_end_tx_t notify_end_tx, 294 + bool *_dropped_lock) 291 295 { 292 296 struct rxrpc_skb_priv *sp; 293 297 struct sk_buff *skb; 294 298 struct sock *sk = &rx->sk; 299 + enum rxrpc_call_state state; 295 300 long timeo; 296 - bool more; 297 - int ret, copied; 301 + bool more = msg->msg_flags & MSG_MORE; 302 + int ret, copied = 0; 298 303 299 304 timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); 300 305 301 306 /* this should be in poll */ 302 307 sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk); 303 308 309 + reload: 310 + ret = -EPIPE; 304 311 if (sk->sk_shutdown & SEND_SHUTDOWN) 305 - return -EPIPE; 312 + goto maybe_error; 313 + state = READ_ONCE(call->state); 314 + ret = -ESHUTDOWN; 315 + if (state >= RXRPC_CALL_COMPLETE) 316 + goto maybe_error; 317 + ret = -EPROTO; 318 + if (state != RXRPC_CALL_CLIENT_SEND_REQUEST && 319 + state != RXRPC_CALL_SERVER_ACK_REQUEST && 320 + state != RXRPC_CALL_SERVER_SEND_REPLY) 321 + goto maybe_error; 306 322 307 - more = msg->msg_flags & MSG_MORE; 308 - 323 + ret = -EMSGSIZE; 309 324 if (call->tx_total_len != -1) { 310 - if (len > call->tx_total_len) 311 - return -EMSGSIZE; 312 - if (!more && len != call->tx_total_len) 313 - return -EMSGSIZE; 325 + if (len - copied > call->tx_total_len) 326 + goto maybe_error; 327 + if (!more && len - copied != call->tx_total_len) 328 + goto maybe_error; 314 329 } 315 330 316 331 skb = call->tx_pending; 317 332 call->tx_pending = NULL; 318 333 rxrpc_see_skb(skb, rxrpc_skb_seen); 319 334 320 - copied = 0; 321 335 do { 322 336 /* Check to see if there's a ping ACK to reply to. */ 323 337 if (call->ackr_reason == RXRPC_ACK_PING_RESPONSE) ··· 339 331 340 332 _debug("alloc"); 341 333 342 - if (!rxrpc_check_tx_space(call, NULL)) { 343 - ret = -EAGAIN; 344 - if (msg->msg_flags & MSG_DONTWAIT) 345 - goto maybe_error; 346 - ret = rxrpc_wait_for_tx_window(rx, call, 347 - &timeo, 348 - msg->msg_flags & MSG_WAITALL); 349 - if (ret < 0) 350 - goto maybe_error; 351 - } 334 + if (!rxrpc_check_tx_space(call, NULL)) 335 + goto wait_for_space; 352 336 353 337 /* Work out the maximum size of a packet. Assume that 354 338 * the security header is going to be in the padded ··· 468 468 efault: 469 469 ret = -EFAULT; 470 470 goto out; 471 + 472 + wait_for_space: 473 + ret = -EAGAIN; 474 + if (msg->msg_flags & MSG_DONTWAIT) 475 + goto maybe_error; 476 + mutex_unlock(&call->user_mutex); 477 + *_dropped_lock = true; 478 + ret = rxrpc_wait_for_tx_window(rx, call, &timeo, 479 + msg->msg_flags & MSG_WAITALL); 480 + if (ret < 0) 481 + goto maybe_error; 482 + if (call->interruptibility == RXRPC_INTERRUPTIBLE) { 483 + if (mutex_lock_interruptible(&call->user_mutex) < 0) { 484 + ret = sock_intr_errno(timeo); 485 + goto maybe_error; 486 + } 487 + } else { 488 + mutex_lock(&call->user_mutex); 489 + } 490 + *_dropped_lock = false; 491 + goto reload; 471 492 } 472 493 473 494 /* ··· 650 629 enum rxrpc_call_state state; 651 630 struct rxrpc_call *call; 652 631 unsigned long now, j; 632 + bool dropped_lock = false; 653 633 int ret; 654 634 655 635 struct rxrpc_send_params p = { ··· 759 737 ret = rxrpc_send_abort_packet(call); 760 738 } else if (p.command != RXRPC_CMD_SEND_DATA) { 761 739 ret = -EINVAL; 762 - } else if (rxrpc_is_client_call(call) && 763 - state != RXRPC_CALL_CLIENT_SEND_REQUEST) { 764 - /* request phase complete for this client call */ 765 - ret = -EPROTO; 766 - } else if (rxrpc_is_service_call(call) && 767 - state != RXRPC_CALL_SERVER_ACK_REQUEST && 768 - state != RXRPC_CALL_SERVER_SEND_REPLY) { 769 - /* Reply phase not begun or not complete for service call. */ 770 - ret = -EPROTO; 771 740 } else { 772 - ret = rxrpc_send_data(rx, call, msg, len, NULL); 741 + ret = rxrpc_send_data(rx, call, msg, len, NULL, &dropped_lock); 773 742 } 774 743 775 744 out_put_unlock: 776 - mutex_unlock(&call->user_mutex); 745 + if (!dropped_lock) 746 + mutex_unlock(&call->user_mutex); 777 747 error_put: 778 748 rxrpc_put_call(call, rxrpc_call_put); 779 749 _leave(" = %d", ret); ··· 793 779 struct msghdr *msg, size_t len, 794 780 rxrpc_notify_end_tx_t notify_end_tx) 795 781 { 782 + bool dropped_lock = false; 796 783 int ret; 797 784 798 785 _enter("{%d,%s},", call->debug_id, rxrpc_call_states[call->state]); ··· 811 796 case RXRPC_CALL_SERVER_ACK_REQUEST: 812 797 case RXRPC_CALL_SERVER_SEND_REPLY: 813 798 ret = rxrpc_send_data(rxrpc_sk(sock->sk), call, msg, len, 814 - notify_end_tx); 799 + notify_end_tx, &dropped_lock); 815 800 break; 816 801 case RXRPC_CALL_COMPLETE: 817 802 read_lock_bh(&call->state_lock); ··· 825 810 break; 826 811 } 827 812 828 - mutex_unlock(&call->user_mutex); 813 + if (!dropped_lock) 814 + mutex_unlock(&call->user_mutex); 829 815 _leave(" = %d", ret); 830 816 return ret; 831 817 }