Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

rxrpc: Call channels should have separate call number spaces

Each channel on a connection has a separate, independent number space from
which to allocate callNumber values. It is entirely possible, for example,
to have a connection with four active calls, each with call number 1.

Note that the callNumber values for any particular channel don't have to
start at 1, but they are supposed to increment monotonically for that
channel from a client's perspective and may not be reused once the call
number is transmitted (until the epoch cycles all the way back round).

Currently, however, call numbers are allocated on a per-connection basis
and, further, are held in an rb-tree. The rb-tree is redundant as the four
channel pointers in the rxrpc_connection struct are entirely capable of
pointing to all the calls currently in progress on a connection.

To this end, make the following changes:

(1) Handle call number allocation independently per channel.

(2) Get rid of the conn->calls rb-tree. This is overkill as a connection
may have a maximum of four calls in progress at any one time. Use the
pointers in the channels[] array instead, indexed by the channel
number from the packet.

(3) For each channel, save the result of the last call that was in
progress on that channel in conn->channels[] so that the final ACK or
ABORT packet can be replayed if necessary. Any call earlier than that
is just ignored. If we've seen the next call number in a packet, the
last one is most definitely defunct.

(4) When generating a RESPONSE packet for a connection, the call number
counter for each channel must be included in it.

(5) When parsing a RESPONSE packet for a connection, the call number
counters contained therein should be used to set the minimum expected
call numbers on each channel.

To do in future commits:

(1) Replay terminal packets based on the last call stored in
conn->channels[].

(2) Connections should be retired before the callNumber space on any
channel runs out.

(3) A server is expected to disregard or reject any new incoming call that
has a call number less than the current call number counter. The call
number counter for that channel must be advanced to the new call
number.

Note that the server cannot just require that the next call that it
sees on a channel be exactly the call number counter + 1 because then
there's a scenario that could cause a problem: The client transmits a
packet to initiate a connection, the network goes out, the server
sends an ACK (which gets lost), the client sends an ABORT (which also
gets lost); the network then reconnects, the client then reuses the
call number for the next call (it doesn't know the server already saw
the call number), but the server thinks it already has the first
packet of this call (it doesn't know that the client doesn't know that
it saw the call number the first time).

Signed-off-by: David Howells <dhowells@redhat.com>

+107 -120
+9 -5
net/rxrpc/ar-internal.h
··· 292 292 struct rxrpc_conn_parameters params; 293 293 294 294 spinlock_t channel_lock; 295 - struct rxrpc_call __rcu *channels[RXRPC_MAXCALLS]; /* active calls */ 295 + 296 + struct rxrpc_channel { 297 + struct rxrpc_call __rcu *call; /* Active call */ 298 + u32 call_id; /* ID of current call */ 299 + u32 call_counter; /* Call ID counter */ 300 + u32 last_call; /* ID of last call */ 301 + u32 last_result; /* Result of last call (0/abort) */ 302 + } channels[RXRPC_MAXCALLS]; 296 303 wait_queue_head_t channel_wq; /* queue to wait for channel to become available */ 297 304 298 305 struct rcu_head rcu; ··· 309 302 struct rb_node service_node; /* Node in peer->service_conns */ 310 303 }; 311 304 struct list_head link; /* link in master connection list */ 312 - struct rb_root calls; /* calls on this connection */ 313 305 struct sk_buff_head rx_queue; /* received conn-level packets */ 314 306 const struct rxrpc_security *security; /* applied security module */ 315 307 struct key *server_key; /* security for this service */ ··· 317 311 unsigned long flags; 318 312 unsigned long events; 319 313 unsigned long put_time; /* Time at which last put */ 320 - rwlock_t lock; /* access lock */ 321 314 spinlock_t state_lock; /* state-change lock */ 322 315 atomic_t usage; 323 316 enum rxrpc_conn_proto_state state : 8; /* current state of connection */ ··· 324 319 u32 remote_abort; /* remote abort code */ 325 320 int error; /* local error incurred */ 326 321 int debug_id; /* debug ID for printks */ 327 - unsigned int call_counter; /* call ID counter */ 328 322 atomic_t serial; /* packet serial number counter */ 329 323 atomic_t hi_serial; /* highest serial number received */ 330 324 atomic_t avail_chans; /* number of channels available */ ··· 416 412 struct hlist_node error_link; /* link in error distribution list */ 417 413 struct list_head accept_link; /* calls awaiting acceptance */ 418 414 struct rb_node sock_node; /* node in socket call tree */ 419 - struct rb_node conn_node; /* node in connection call tree */ 420 415 struct sk_buff_head rx_queue; /* received packets */ 421 416 struct sk_buff_head rx_oos_queue; /* packets received out of sequence */ 422 417 struct sk_buff *tx_pending; /* Tx socket buffer being filled */ ··· 567 564 struct rxrpc_connection *rxrpc_find_connection(struct rxrpc_local *, 568 565 struct rxrpc_peer *, 569 566 struct sk_buff *); 567 + void __rxrpc_disconnect_call(struct rxrpc_call *); 570 568 void rxrpc_disconnect_call(struct rxrpc_call *); 571 569 void rxrpc_put_connection(struct rxrpc_connection *); 572 570 void __exit rxrpc_destroy_all_connections(void);
+22 -36
net/rxrpc/call_object.c
··· 456 456 { 457 457 struct rxrpc_skb_priv *sp = rxrpc_skb(skb); 458 458 struct rxrpc_call *call, *candidate; 459 - struct rb_node **p, *parent; 460 - u32 call_id; 459 + u32 call_id, chan; 461 460 462 461 _enter(",%d", conn->debug_id); 463 462 ··· 466 467 if (!candidate) 467 468 return ERR_PTR(-EBUSY); 468 469 470 + chan = sp->hdr.cid & RXRPC_CHANNELMASK; 469 471 candidate->socket = rx; 470 472 candidate->conn = conn; 471 473 candidate->cid = sp->hdr.cid; 472 474 candidate->call_id = sp->hdr.callNumber; 473 - candidate->channel = sp->hdr.cid & RXRPC_CHANNELMASK; 475 + candidate->channel = chan; 474 476 candidate->rx_data_post = 0; 475 477 candidate->state = RXRPC_CALL_SERVER_ACCEPTING; 476 478 if (conn->security_ix > 0) 477 479 candidate->state = RXRPC_CALL_SERVER_SECURING; 478 480 479 - write_lock_bh(&conn->lock); 481 + spin_lock(&conn->channel_lock); 480 482 481 483 /* set the channel for this call */ 482 - call = rcu_dereference_protected(conn->channels[candidate->channel], 483 - lockdep_is_held(&conn->lock)); 484 + call = rcu_dereference_protected(conn->channels[chan].call, 485 + lockdep_is_held(&conn->channel_lock)); 486 + 484 487 _debug("channel[%u] is %p", candidate->channel, call); 485 488 if (call && call->call_id == sp->hdr.callNumber) { 486 489 /* already set; must've been a duplicate packet */ ··· 511 510 call->debug_id, rxrpc_call_states[call->state]); 512 511 513 512 if (call->state >= RXRPC_CALL_COMPLETE) { 514 - conn->channels[call->channel] = NULL; 513 + __rxrpc_disconnect_call(call); 515 514 } else { 516 - write_unlock_bh(&conn->lock); 515 + spin_unlock(&conn->channel_lock); 517 516 kmem_cache_free(rxrpc_call_jar, candidate); 518 517 _leave(" = -EBUSY"); 519 518 return ERR_PTR(-EBUSY); ··· 523 522 /* check the call number isn't duplicate */ 524 523 _debug("check dup"); 525 524 call_id = sp->hdr.callNumber; 526 - p = &conn->calls.rb_node; 527 - parent = NULL; 528 - while (*p) { 529 - parent = *p; 530 - call = rb_entry(parent, struct rxrpc_call, conn_node); 531 525 532 - /* The tree is sorted in order of the __be32 value without 533 - * turning it into host order. 534 - */ 535 - if (call_id < call->call_id) 536 - p = &(*p)->rb_left; 537 - else if (call_id > call->call_id) 538 - p = &(*p)->rb_right; 539 - else 540 - goto old_call; 541 - } 526 + /* We just ignore calls prior to the current call ID. Terminated calls 527 + * are handled via the connection. 528 + */ 529 + if (call_id <= conn->channels[chan].call_counter) 530 + goto old_call; /* TODO: Just drop packet */ 542 531 543 532 /* make the call available */ 544 533 _debug("new call"); 545 534 call = candidate; 546 535 candidate = NULL; 547 - rb_link_node(&call->conn_node, parent, p); 548 - rb_insert_color(&call->conn_node, &conn->calls); 549 - rcu_assign_pointer(conn->channels[call->channel], call); 536 + conn->channels[chan].call_counter = call_id; 537 + rcu_assign_pointer(conn->channels[chan].call, call); 550 538 sock_hold(&rx->sk); 551 539 rxrpc_get_connection(conn); 552 - write_unlock_bh(&conn->lock); 540 + spin_unlock(&conn->channel_lock); 553 541 554 542 spin_lock(&conn->params.peer->lock); 555 543 hlist_add_head(&call->error_link, &conn->params.peer->error_targets); ··· 578 588 return call; 579 589 580 590 extant_call: 581 - write_unlock_bh(&conn->lock); 591 + spin_unlock(&conn->channel_lock); 582 592 kmem_cache_free(rxrpc_call_jar, candidate); 583 593 _leave(" = %p {%d} [extant]", call, call ? call->debug_id : -1); 584 594 return call; 585 595 586 596 aborted_call: 587 - write_unlock_bh(&conn->lock); 597 + spin_unlock(&conn->channel_lock); 588 598 kmem_cache_free(rxrpc_call_jar, candidate); 589 599 _leave(" = -ECONNABORTED"); 590 600 return ERR_PTR(-ECONNABORTED); 591 601 592 602 old_call: 593 - write_unlock_bh(&conn->lock); 603 + spin_unlock(&conn->channel_lock); 594 604 kmem_cache_free(rxrpc_call_jar, candidate); 595 605 _leave(" = -ECONNRESET [old]"); 596 606 return ERR_PTR(-ECONNRESET); ··· 638 648 write_unlock_bh(&rx->call_lock); 639 649 640 650 /* free up the channel for reuse */ 641 - write_lock_bh(&conn->lock); 642 - write_lock(&call->state_lock); 651 + write_lock_bh(&call->state_lock); 643 652 644 653 if (call->state < RXRPC_CALL_COMPLETE && 645 654 call->state != RXRPC_CALL_CLIENT_FINAL_ACK) { ··· 646 657 call->state = RXRPC_CALL_LOCALLY_ABORTED; 647 658 call->local_abort = RX_CALL_DEAD; 648 659 } 649 - write_unlock(&call->state_lock); 650 - 651 - rb_erase(&call->conn_node, &conn->calls); 652 - write_unlock_bh(&conn->lock); 660 + write_unlock_bh(&call->state_lock); 653 661 654 662 rxrpc_disconnect_call(call); 655 663
+13 -11
net/rxrpc/conn_event.c
··· 31 31 u32 abort_code) 32 32 { 33 33 struct rxrpc_call *call; 34 - struct rb_node *p; 34 + int i; 35 35 36 36 _enter("{%d},%x", conn->debug_id, abort_code); 37 37 38 - read_lock_bh(&conn->lock); 38 + spin_lock(&conn->channel_lock); 39 39 40 - for (p = rb_first(&conn->calls); p; p = rb_next(p)) { 41 - call = rb_entry(p, struct rxrpc_call, conn_node); 42 - write_lock(&call->state_lock); 40 + for (i = 0; i < RXRPC_MAXCALLS; i++) { 41 + call = rcu_dereference_protected( 42 + conn->channels[i].call, 43 + lockdep_is_held(&conn->channel_lock)); 44 + write_lock_bh(&call->state_lock); 43 45 if (call->state <= RXRPC_CALL_COMPLETE) { 44 46 call->state = state; 45 47 if (state == RXRPC_CALL_LOCALLY_ABORTED) { ··· 53 51 } 54 52 rxrpc_queue_call(call); 55 53 } 56 - write_unlock(&call->state_lock); 54 + write_unlock_bh(&call->state_lock); 57 55 } 58 56 59 - read_unlock_bh(&conn->lock); 57 + spin_unlock(&conn->channel_lock); 60 58 _leave(""); 61 59 } 62 60 ··· 194 192 if (ret < 0) 195 193 return ret; 196 194 197 - read_lock_bh(&conn->lock); 195 + spin_lock(&conn->channel_lock); 198 196 spin_lock(&conn->state_lock); 199 197 200 198 if (conn->state == RXRPC_CONN_SERVICE_CHALLENGING) { ··· 202 200 for (loop = 0; loop < RXRPC_MAXCALLS; loop++) 203 201 rxrpc_call_is_secure( 204 202 rcu_dereference_protected( 205 - conn->channels[loop], 206 - lockdep_is_held(&conn->lock))); 203 + conn->channels[loop].call, 204 + lockdep_is_held(&conn->channel_lock))); 207 205 } 208 206 209 207 spin_unlock(&conn->state_lock); 210 - read_unlock_bh(&conn->lock); 208 + spin_unlock(&conn->channel_lock); 211 209 return 0; 212 210 213 211 default:
+34 -51
net/rxrpc/conn_object.c
··· 46 46 init_waitqueue_head(&conn->channel_wq); 47 47 INIT_WORK(&conn->processor, &rxrpc_process_connection); 48 48 INIT_LIST_HEAD(&conn->link); 49 - conn->calls = RB_ROOT; 50 49 skb_queue_head_init(&conn->rx_queue); 51 50 conn->security = &rxrpc_no_security; 52 - rwlock_init(&conn->lock); 53 51 spin_lock_init(&conn->state_lock); 54 52 atomic_set(&conn->usage, 1); 55 53 conn->debug_id = atomic_inc_return(&rxrpc_debug_id); ··· 58 60 59 61 _leave(" = %p{%d}", conn, conn ? conn->debug_id : 0); 60 62 return conn; 61 - } 62 - 63 - /* 64 - * add a call to a connection's call-by-ID tree 65 - */ 66 - static void rxrpc_add_call_ID_to_conn(struct rxrpc_connection *conn, 67 - struct rxrpc_call *call) 68 - { 69 - struct rxrpc_call *xcall; 70 - struct rb_node *parent, **p; 71 - u32 call_id; 72 - 73 - write_lock_bh(&conn->lock); 74 - 75 - call_id = call->call_id; 76 - p = &conn->calls.rb_node; 77 - parent = NULL; 78 - while (*p) { 79 - parent = *p; 80 - xcall = rb_entry(parent, struct rxrpc_call, conn_node); 81 - 82 - if (call_id < xcall->call_id) 83 - p = &(*p)->rb_left; 84 - else if (call_id > xcall->call_id) 85 - p = &(*p)->rb_right; 86 - else 87 - BUG(); 88 - } 89 - 90 - rb_link_node(&call->conn_node, parent, p); 91 - rb_insert_color(&call->conn_node, &conn->calls); 92 - 93 - write_unlock_bh(&conn->lock); 94 63 } 95 64 96 65 /* ··· 242 277 call->channel = chan; 243 278 call->epoch = conn->proto.epoch; 244 279 call->cid = conn->proto.cid | chan; 245 - call->call_id = ++conn->call_counter; 246 - rcu_assign_pointer(conn->channels[chan], call); 280 + call->call_id = ++conn->channels[chan].call_counter; 281 + conn->channels[chan].call_id = call->call_id; 282 + rcu_assign_pointer(conn->channels[chan].call, call); 247 283 248 284 _net("CONNECT call %d on conn %d", call->debug_id, conn->debug_id); 249 285 250 - rxrpc_add_call_ID_to_conn(conn, call); 251 286 spin_unlock(&conn->channel_lock); 252 287 rxrpc_put_peer(cp->peer); 253 288 cp->peer = NULL; ··· 291 326 spin_lock(&conn->channel_lock); 292 327 293 328 for (chan = 0; chan < RXRPC_MAXCALLS; chan++) 294 - if (!conn->channels[chan]) 329 + if (!conn->channels[chan].call) 295 330 goto found_channel; 296 331 BUG(); 297 332 ··· 496 531 497 532 /* 498 533 * Disconnect a call and clear any channel it occupies when that call 534 + * terminates. The caller must hold the channel_lock and must release the 535 + * call's ref on the connection. 536 + */ 537 + void __rxrpc_disconnect_call(struct rxrpc_call *call) 538 + { 539 + struct rxrpc_connection *conn = call->conn; 540 + struct rxrpc_channel *chan = &conn->channels[call->channel]; 541 + 542 + _enter("%d,%d", conn->debug_id, call->channel); 543 + 544 + if (rcu_access_pointer(chan->call) == call) { 545 + /* Save the result of the call so that we can repeat it if necessary 546 + * through the channel, whilst disposing of the actual call record. 547 + */ 548 + chan->last_result = call->local_abort; 549 + smp_wmb(); 550 + chan->last_call = chan->call_id; 551 + chan->call_id = chan->call_counter; 552 + 553 + rcu_assign_pointer(chan->call, NULL); 554 + atomic_inc(&conn->avail_chans); 555 + wake_up(&conn->channel_wq); 556 + } 557 + 558 + _leave(""); 559 + } 560 + 561 + /* 562 + * Disconnect a call and clear any channel it occupies when that call 499 563 * terminates. 500 564 */ 501 565 void rxrpc_disconnect_call(struct rxrpc_call *call) 502 566 { 503 567 struct rxrpc_connection *conn = call->conn; 504 - unsigned chan = call->channel; 505 - 506 - _enter("%d,%d", conn->debug_id, call->channel); 507 568 508 569 spin_lock(&conn->channel_lock); 509 - 510 - if (rcu_access_pointer(conn->channels[chan]) == call) { 511 - rcu_assign_pointer(conn->channels[chan], NULL); 512 - atomic_inc(&conn->avail_chans); 513 - wake_up(&conn->channel_wq); 514 - } 515 - 570 + __rxrpc_disconnect_call(call); 516 571 spin_unlock(&conn->channel_lock); 517 572 518 573 call->conn = NULL; 519 574 rxrpc_put_connection(conn); 520 - _leave(""); 521 575 } 522 576 523 577 /* ··· 575 591 576 592 _net("DESTROY CONN %d", conn->debug_id); 577 593 578 - ASSERT(RB_EMPTY_ROOT(&conn->calls)); 579 594 rxrpc_purge_queue(&conn->rx_queue); 580 595 581 596 conn->security->clear(conn);
+2 -3
net/rxrpc/proc.c
··· 137 137 if (v == &rxrpc_connections) { 138 138 seq_puts(seq, 139 139 "Proto Local Remote " 140 - " SvID ConnID Calls End Use State Key " 140 + " SvID ConnID End Use State Key " 141 141 " Serial ISerial\n" 142 142 ); 143 143 return 0; ··· 154 154 ntohs(conn->params.peer->srx.transport.sin.sin_port)); 155 155 156 156 seq_printf(seq, 157 - "UDP %-22.22s %-22.22s %4x %08x %08x %s %3u" 157 + "UDP %-22.22s %-22.22s %4x %08x %s %3u" 158 158 " %s %08x %08x %08x\n", 159 159 lbuff, 160 160 rbuff, 161 161 conn->params.service_id, 162 162 conn->proto.cid, 163 - conn->call_counter, 164 163 rxrpc_conn_is_service(conn) ? "Svc" : "Clt", 165 164 atomic_read(&conn->usage), 166 165 rxrpc_conn_states[conn->state],
+27 -14
net/rxrpc/rxkad.c
··· 767 767 resp.kvno = htonl(token->kad->kvno); 768 768 resp.ticket_len = htonl(token->kad->ticket_len); 769 769 770 - resp.encrypted.call_id[0] = 771 - htonl(conn->channels[0] ? conn->channels[0]->call_id : 0); 772 - resp.encrypted.call_id[1] = 773 - htonl(conn->channels[1] ? conn->channels[1]->call_id : 0); 774 - resp.encrypted.call_id[2] = 775 - htonl(conn->channels[2] ? conn->channels[2]->call_id : 0); 776 - resp.encrypted.call_id[3] = 777 - htonl(conn->channels[3] ? conn->channels[3]->call_id : 0); 770 + resp.encrypted.call_id[0] = htonl(conn->channels[0].call_counter); 771 + resp.encrypted.call_id[1] = htonl(conn->channels[1].call_counter); 772 + resp.encrypted.call_id[2] = htonl(conn->channels[2].call_counter); 773 + resp.encrypted.call_id[3] = htonl(conn->channels[3].call_counter); 778 774 779 775 /* calculate the response checksum and then do the encryption */ 780 776 rxkad_calc_response_checksum(&resp); ··· 987 991 void *ticket; 988 992 u32 abort_code, version, kvno, ticket_len, level; 989 993 __be32 csum; 990 - int ret; 994 + int ret, i; 991 995 992 996 _enter("{%d,%x}", conn->debug_id, key_serial(conn->server_key)); 993 997 ··· 1050 1054 if (response.encrypted.checksum != csum) 1051 1055 goto protocol_error_free; 1052 1056 1053 - if (ntohl(response.encrypted.call_id[0]) > INT_MAX || 1054 - ntohl(response.encrypted.call_id[1]) > INT_MAX || 1055 - ntohl(response.encrypted.call_id[2]) > INT_MAX || 1056 - ntohl(response.encrypted.call_id[3]) > INT_MAX) 1057 - goto protocol_error_free; 1057 + spin_lock(&conn->channel_lock); 1058 + for (i = 0; i < RXRPC_MAXCALLS; i++) { 1059 + struct rxrpc_call *call; 1060 + u32 call_id = ntohl(response.encrypted.call_id[i]); 1061 + 1062 + if (call_id > INT_MAX) 1063 + goto protocol_error_unlock; 1064 + 1065 + if (call_id < conn->channels[i].call_counter) 1066 + goto protocol_error_unlock; 1067 + if (call_id > conn->channels[i].call_counter) { 1068 + call = rcu_dereference_protected( 1069 + conn->channels[i].call, 1070 + lockdep_is_held(&conn->channel_lock)); 1071 + if (call && call->state < RXRPC_CALL_COMPLETE) 1072 + goto protocol_error_unlock; 1073 + conn->channels[i].call_counter = call_id; 1074 + } 1075 + } 1076 + spin_unlock(&conn->channel_lock); 1058 1077 1059 1078 abort_code = RXKADOUTOFSEQUENCE; 1060 1079 if (ntohl(response.encrypted.inc_nonce) != conn->security_nonce + 1) ··· 1094 1083 _leave(" = 0"); 1095 1084 return 0; 1096 1085 1086 + protocol_error_unlock: 1087 + spin_unlock(&conn->channel_lock); 1097 1088 protocol_error_free: 1098 1089 kfree(ticket); 1099 1090 protocol_error: