Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

rxrpc: Fix call timer start racing with call destruction

The rxrpc_call struct has a timer used to handle various timed events
relating to a call. This timer can get started from the packet input
routines that are run in softirq mode with just the RCU read lock held.
Unfortunately, because only the RCU read lock is held - and neither ref or
other lock is taken - the call can start getting destroyed at the same time
a packet comes in addressed to that call. This causes the timer - which
was already stopped - to get restarted. Later, the timer dispatch code may
then oops if the timer got deallocated first.

Fix this by trying to take a ref on the rxrpc_call struct and, if
successful, passing that ref along to the timer. If the timer was already
running, the ref is discarded.

The timer completion routine can then pass the ref along to the call's work
item when it queues it. If the timer or work item where already
queued/running, the extra ref is discarded.

Fixes: a158bdd3247b ("rxrpc: Fix call timeouts")
Reported-by: Marc Dionne <marc.dionne@auristor.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Marc Dionne <marc.dionne@auristor.com>
Tested-by: Marc Dionne <marc.dionne@auristor.com>
cc: linux-afs@lists.infradead.org
Link: http://lists.infradead.org/pipermail/linux-afs/2022-March/005073.html
Link: https://lore.kernel.org/r/164865115696.2943015.11097991776647323586.stgit@warthog.procyon.org.uk
Signed-off-by: Paolo Abeni <pabeni@redhat.com>

authored by

David Howells and committed by
Paolo Abeni
4a7f62f9 e74e0244

+50 -15
+7 -1
include/trace/events/rxrpc.h
··· 83 83 rxrpc_call_error, 84 84 rxrpc_call_got, 85 85 rxrpc_call_got_kernel, 86 + rxrpc_call_got_timer, 86 87 rxrpc_call_got_userid, 87 88 rxrpc_call_new_client, 88 89 rxrpc_call_new_service, 89 90 rxrpc_call_put, 90 91 rxrpc_call_put_kernel, 91 92 rxrpc_call_put_noqueue, 93 + rxrpc_call_put_notimer, 94 + rxrpc_call_put_timer, 92 95 rxrpc_call_put_userid, 93 96 rxrpc_call_queued, 94 97 rxrpc_call_queued_ref, ··· 281 278 EM(rxrpc_call_error, "*E*") \ 282 279 EM(rxrpc_call_got, "GOT") \ 283 280 EM(rxrpc_call_got_kernel, "Gke") \ 281 + EM(rxrpc_call_got_timer, "GTM") \ 284 282 EM(rxrpc_call_got_userid, "Gus") \ 285 283 EM(rxrpc_call_new_client, "NWc") \ 286 284 EM(rxrpc_call_new_service, "NWs") \ 287 285 EM(rxrpc_call_put, "PUT") \ 288 286 EM(rxrpc_call_put_kernel, "Pke") \ 289 - EM(rxrpc_call_put_noqueue, "PNQ") \ 287 + EM(rxrpc_call_put_noqueue, "PnQ") \ 288 + EM(rxrpc_call_put_notimer, "PnT") \ 289 + EM(rxrpc_call_put_timer, "PTM") \ 290 290 EM(rxrpc_call_put_userid, "Pus") \ 291 291 EM(rxrpc_call_queued, "QUE") \ 292 292 EM(rxrpc_call_queued_ref, "QUR") \
+7 -8
net/rxrpc/ar-internal.h
··· 777 777 enum rxrpc_propose_ack_trace); 778 778 void rxrpc_process_call(struct work_struct *); 779 779 780 - static inline void rxrpc_reduce_call_timer(struct rxrpc_call *call, 781 - unsigned long expire_at, 782 - unsigned long now, 783 - enum rxrpc_timer_trace why) 784 - { 785 - trace_rxrpc_timer(call, why, now); 786 - timer_reduce(&call->timer, expire_at); 787 - } 780 + void rxrpc_reduce_call_timer(struct rxrpc_call *call, 781 + unsigned long expire_at, 782 + unsigned long now, 783 + enum rxrpc_timer_trace why); 784 + 785 + void rxrpc_delete_call_timer(struct rxrpc_call *call); 788 786 789 787 /* 790 788 * call_object.c ··· 806 808 bool __rxrpc_queue_call(struct rxrpc_call *); 807 809 bool rxrpc_queue_call(struct rxrpc_call *); 808 810 void rxrpc_see_call(struct rxrpc_call *); 811 + bool rxrpc_try_get_call(struct rxrpc_call *call, enum rxrpc_call_trace op); 809 812 void rxrpc_get_call(struct rxrpc_call *, enum rxrpc_call_trace); 810 813 void rxrpc_put_call(struct rxrpc_call *, enum rxrpc_call_trace); 811 814 void rxrpc_cleanup_call(struct rxrpc_call *);
+1 -1
net/rxrpc/call_event.c
··· 310 310 } 311 311 312 312 if (call->state == RXRPC_CALL_COMPLETE) { 313 - del_timer_sync(&call->timer); 313 + rxrpc_delete_call_timer(call); 314 314 goto out_put; 315 315 } 316 316
+35 -5
net/rxrpc/call_object.c
··· 53 53 54 54 if (call->state < RXRPC_CALL_COMPLETE) { 55 55 trace_rxrpc_timer(call, rxrpc_timer_expired, jiffies); 56 - rxrpc_queue_call(call); 56 + __rxrpc_queue_call(call); 57 + } else { 58 + rxrpc_put_call(call, rxrpc_call_put); 57 59 } 60 + } 61 + 62 + void rxrpc_reduce_call_timer(struct rxrpc_call *call, 63 + unsigned long expire_at, 64 + unsigned long now, 65 + enum rxrpc_timer_trace why) 66 + { 67 + if (rxrpc_try_get_call(call, rxrpc_call_got_timer)) { 68 + trace_rxrpc_timer(call, why, now); 69 + if (timer_reduce(&call->timer, expire_at)) 70 + rxrpc_put_call(call, rxrpc_call_put_notimer); 71 + } 72 + } 73 + 74 + void rxrpc_delete_call_timer(struct rxrpc_call *call) 75 + { 76 + if (del_timer_sync(&call->timer)) 77 + rxrpc_put_call(call, rxrpc_call_put_timer); 58 78 } 59 79 60 80 static struct lock_class_key rxrpc_call_user_mutex_lock_class_key; ··· 483 463 } 484 464 } 485 465 466 + bool rxrpc_try_get_call(struct rxrpc_call *call, enum rxrpc_call_trace op) 467 + { 468 + const void *here = __builtin_return_address(0); 469 + int n = atomic_fetch_add_unless(&call->usage, 1, 0); 470 + 471 + if (n == 0) 472 + return false; 473 + trace_rxrpc_call(call->debug_id, op, n, here, NULL); 474 + return true; 475 + } 476 + 486 477 /* 487 478 * Note the addition of a ref on a call. 488 479 */ ··· 541 510 spin_unlock_bh(&call->lock); 542 511 543 512 rxrpc_put_call_slot(call); 544 - 545 - del_timer_sync(&call->timer); 513 + rxrpc_delete_call_timer(call); 546 514 547 515 /* Make sure we don't get any more notifications */ 548 516 write_lock_bh(&rx->recvmsg_lock); ··· 648 618 struct rxrpc_call *call = container_of(work, struct rxrpc_call, processor); 649 619 struct rxrpc_net *rxnet = call->rxnet; 650 620 621 + rxrpc_delete_call_timer(call); 622 + 651 623 rxrpc_put_connection(call->conn); 652 624 rxrpc_put_peer(call->peer); 653 625 kfree(call->rxtx_buffer); ··· 683 651 _net("DESTROY CALL %d", call->debug_id); 684 652 685 653 memset(&call->sock_node, 0xcd, sizeof(call->sock_node)); 686 - 687 - del_timer_sync(&call->timer); 688 654 689 655 ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE); 690 656 ASSERT(test_bit(RXRPC_CALL_RELEASED, &call->flags));