Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'rxrpc-fixes'

David Howells says:

====================
rxrpc: Miscellaneous fixes

Here some miscellaneous fixes for AF_RXRPC:

(1) The zero serial number has a special meaning in an ACK packet serial
reference, so skip it when assigning serial numbers to transmitted
packets.

(2) Don't set the reference serial number in a delayed ACK as the ACK
cannot be used for RTT calculation.

(3) Don't emit a DUP ACK response to a PING RESPONSE ACK coming back to a
call that completed in the meantime.

(4) Fix the counting of acks and nacks in ACK packet to better drive
congestion management. We want to know if there have been new
acks/nacks since the last ACK packet, not that there are still
acks/nacks. This is more complicated as we have to save the old SACK
table and compare it.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>

+154 -43
+5 -3
include/trace/events/rxrpc.h
··· 128 128 EM(rxrpc_skb_eaten_by_unshare_nomem, "ETN unshar-nm") \ 129 129 EM(rxrpc_skb_get_conn_secured, "GET conn-secd") \ 130 130 EM(rxrpc_skb_get_conn_work, "GET conn-work") \ 131 + EM(rxrpc_skb_get_last_nack, "GET last-nack") \ 131 132 EM(rxrpc_skb_get_local_work, "GET locl-work") \ 132 133 EM(rxrpc_skb_get_reject_work, "GET rej-work ") \ 133 134 EM(rxrpc_skb_get_to_recvmsg, "GET to-recv ") \ ··· 142 141 EM(rxrpc_skb_put_error_report, "PUT error-rep") \ 143 142 EM(rxrpc_skb_put_input, "PUT input ") \ 144 143 EM(rxrpc_skb_put_jumbo_subpacket, "PUT jumbo-sub") \ 144 + EM(rxrpc_skb_put_last_nack, "PUT last-nack") \ 145 145 EM(rxrpc_skb_put_purge, "PUT purge ") \ 146 146 EM(rxrpc_skb_put_rotate, "PUT rotate ") \ 147 147 EM(rxrpc_skb_put_unknown, "PUT unknown ") \ ··· 1554 1552 memcpy(&__entry->sum, summary, sizeof(__entry->sum)); 1555 1553 ), 1556 1554 1557 - TP_printk("c=%08x r=%08x %s q=%08x %s cw=%u ss=%u nA=%u,%u+%u r=%u b=%u u=%u d=%u l=%x%s%s%s", 1555 + TP_printk("c=%08x r=%08x %s q=%08x %s cw=%u ss=%u nA=%u,%u+%u,%u b=%u u=%u d=%u l=%x%s%s%s", 1558 1556 __entry->call, 1559 1557 __entry->ack_serial, 1560 1558 __print_symbolic(__entry->sum.ack_reason, rxrpc_ack_names), ··· 1562 1560 __print_symbolic(__entry->sum.mode, rxrpc_congest_modes), 1563 1561 __entry->sum.cwnd, 1564 1562 __entry->sum.ssthresh, 1565 - __entry->sum.nr_acks, __entry->sum.saw_nacks, 1563 + __entry->sum.nr_acks, __entry->sum.nr_retained_nacks, 1566 1564 __entry->sum.nr_new_acks, 1567 - __entry->sum.nr_rot_new_acks, 1565 + __entry->sum.nr_new_nacks, 1568 1566 __entry->top - __entry->hard_ack, 1569 1567 __entry->sum.cumulative_acks, 1570 1568 __entry->sum.dup_acks,
+30 -7
net/rxrpc/ar-internal.h
··· 199 199 */ 200 200 struct rxrpc_skb_priv { 201 201 struct rxrpc_connection *conn; /* Connection referred to (poke packet) */ 202 - u16 offset; /* Offset of data */ 203 - u16 len; /* Length of data */ 204 - u8 flags; 202 + union { 203 + struct { 204 + u16 offset; /* Offset of data */ 205 + u16 len; /* Length of data */ 206 + u8 flags; 205 207 #define RXRPC_RX_VERIFIED 0x01 206 - 208 + }; 209 + struct { 210 + rxrpc_seq_t first_ack; /* First packet in acks table */ 211 + u8 nr_acks; /* Number of acks+nacks */ 212 + u8 nr_nacks; /* Number of nacks */ 213 + }; 214 + }; 207 215 struct rxrpc_host_header hdr; /* RxRPC packet header from this packet */ 208 216 }; 209 217 ··· 518 510 enum rxrpc_call_completion completion; /* Completion condition */ 519 511 s32 abort_code; /* Abort code of connection abort */ 520 512 int debug_id; /* debug ID for printks */ 521 - atomic_t serial; /* packet serial number counter */ 513 + rxrpc_serial_t tx_serial; /* Outgoing packet serial number counter */ 522 514 unsigned int hi_serial; /* highest serial number received */ 523 515 u32 service_id; /* Service ID, possibly upgraded */ 524 516 u32 security_level; /* Security level selected */ ··· 700 692 u8 cong_dup_acks; /* Count of ACKs showing missing packets */ 701 693 u8 cong_cumul_acks; /* Cumulative ACK count */ 702 694 ktime_t cong_tstamp; /* Last time cwnd was changed */ 695 + struct sk_buff *cong_last_nack; /* Last ACK with nacks received */ 703 696 704 697 /* Receive-phase ACK management (ACKs we send). */ 705 698 u8 ackr_reason; /* reason to ACK */ 706 699 u16 ackr_sack_base; /* Starting slot in SACK table ring */ 707 - rxrpc_serial_t ackr_serial; /* serial of packet being ACK'd */ 708 700 rxrpc_seq_t ackr_window; /* Base of SACK window */ 709 701 rxrpc_seq_t ackr_wtop; /* Base of SACK window */ 710 702 unsigned int ackr_nr_unacked; /* Number of unacked packets */ ··· 738 730 struct rxrpc_ack_summary { 739 731 u16 nr_acks; /* Number of ACKs in packet */ 740 732 u16 nr_new_acks; /* Number of new ACKs in packet */ 741 - u16 nr_rot_new_acks; /* Number of rotated new ACKs */ 733 + u16 nr_new_nacks; /* Number of new nacks in packet */ 734 + u16 nr_retained_nacks; /* Number of nacks retained between ACKs */ 742 735 u8 ack_reason; 743 736 bool saw_nacks; /* Saw NACKs in packet */ 744 737 bool new_low_nack; /* T if new low NACK found */ ··· 830 821 } 831 822 832 823 #include <trace/events/rxrpc.h> 824 + 825 + /* 826 + * Allocate the next serial number on a connection. 0 must be skipped. 827 + */ 828 + static inline rxrpc_serial_t rxrpc_get_next_serial(struct rxrpc_connection *conn) 829 + { 830 + rxrpc_serial_t serial; 831 + 832 + serial = conn->tx_serial; 833 + if (serial == 0) 834 + serial = 1; 835 + conn->tx_serial = serial + 1; 836 + return serial; 837 + } 833 838 834 839 /* 835 840 * af_rxrpc.c
+5 -7
net/rxrpc/call_event.c
··· 43 43 unsigned long expiry = rxrpc_soft_ack_delay; 44 44 unsigned long now = jiffies, ack_at; 45 45 46 - call->ackr_serial = serial; 47 - 48 46 if (rxrpc_soft_ack_delay < expiry) 49 47 expiry = rxrpc_soft_ack_delay; 50 48 if (call->peer->srtt_us != 0) ··· 112 114 void rxrpc_resend(struct rxrpc_call *call, struct sk_buff *ack_skb) 113 115 { 114 116 struct rxrpc_ackpacket *ack = NULL; 117 + struct rxrpc_skb_priv *sp; 115 118 struct rxrpc_txbuf *txb; 116 119 unsigned long resend_at; 117 120 rxrpc_seq_t transmitted = READ_ONCE(call->tx_transmitted); ··· 140 141 * explicitly NAK'd packets. 141 142 */ 142 143 if (ack_skb) { 144 + sp = rxrpc_skb(ack_skb); 143 145 ack = (void *)ack_skb->data + sizeof(struct rxrpc_wire_header); 144 146 145 - for (i = 0; i < ack->nAcks; i++) { 147 + for (i = 0; i < sp->nr_acks; i++) { 146 148 rxrpc_seq_t seq; 147 149 148 150 if (ack->acks[i] & 1) 149 151 continue; 150 - seq = ntohl(ack->firstPacket) + i; 152 + seq = sp->first_ack + i; 151 153 if (after(txb->seq, transmitted)) 152 154 break; 153 155 if (after(txb->seq, seq)) ··· 373 373 bool rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb) 374 374 { 375 375 unsigned long now, next, t; 376 - rxrpc_serial_t ackr_serial; 377 376 bool resend = false, expired = false; 378 377 s32 abort_code; 379 378 ··· 422 423 if (time_after_eq(now, t)) { 423 424 trace_rxrpc_timer(call, rxrpc_timer_exp_ack, now); 424 425 cmpxchg(&call->delay_ack_at, t, now + MAX_JIFFY_OFFSET); 425 - ackr_serial = xchg(&call->ackr_serial, 0); 426 - rxrpc_send_ACK(call, RXRPC_ACK_DELAY, ackr_serial, 426 + rxrpc_send_ACK(call, RXRPC_ACK_DELAY, 0, 427 427 rxrpc_propose_ack_ping_for_lost_ack); 428 428 } 429 429
+1
net/rxrpc/call_object.c
··· 686 686 687 687 del_timer_sync(&call->timer); 688 688 689 + rxrpc_free_skb(call->cong_last_nack, rxrpc_skb_put_last_nack); 689 690 rxrpc_cleanup_ring(call); 690 691 while ((txb = list_first_entry_or_null(&call->tx_sendmsg, 691 692 struct rxrpc_txbuf, call_link))) {
+9 -1
net/rxrpc/conn_event.c
··· 95 95 96 96 _enter("%d", conn->debug_id); 97 97 98 + if (sp && sp->hdr.type == RXRPC_PACKET_TYPE_ACK) { 99 + if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), 100 + &pkt.ack, sizeof(pkt.ack)) < 0) 101 + return; 102 + if (pkt.ack.reason == RXRPC_ACK_PING_RESPONSE) 103 + return; 104 + } 105 + 98 106 chan = &conn->channels[channel]; 99 107 100 108 /* If the last call got moved on whilst we were waiting to run, just ··· 125 117 iov[2].iov_base = &ack_info; 126 118 iov[2].iov_len = sizeof(ack_info); 127 119 128 - serial = atomic_inc_return(&conn->serial); 120 + serial = rxrpc_get_next_serial(conn); 129 121 130 122 pkt.whdr.epoch = htonl(conn->proto.epoch); 131 123 pkt.whdr.cid = htonl(conn->proto.cid | channel);
+97 -18
net/rxrpc/input.c
··· 45 45 } 46 46 47 47 cumulative_acks += summary->nr_new_acks; 48 - cumulative_acks += summary->nr_rot_new_acks; 49 48 if (cumulative_acks > 255) 50 49 cumulative_acks = 255; 51 50 52 - summary->mode = call->cong_mode; 53 51 summary->cwnd = call->cong_cwnd; 54 52 summary->ssthresh = call->cong_ssthresh; 55 53 summary->cumulative_acks = cumulative_acks; ··· 149 151 cwnd = RXRPC_TX_MAX_WINDOW; 150 152 call->cong_cwnd = cwnd; 151 153 call->cong_cumul_acks = cumulative_acks; 154 + summary->mode = call->cong_mode; 152 155 trace_rxrpc_congest(call, summary, acked_serial, change); 153 156 if (resend) 154 157 rxrpc_resend(call, skb); ··· 212 213 list_for_each_entry_rcu(txb, &call->tx_buffer, call_link, false) { 213 214 if (before_eq(txb->seq, call->acks_hard_ack)) 214 215 continue; 215 - summary->nr_rot_new_acks++; 216 216 if (test_bit(RXRPC_TXBUF_LAST, &txb->flags)) { 217 217 set_bit(RXRPC_CALL_TX_LAST, &call->flags); 218 218 rot_last = true; ··· 251 253 enum rxrpc_abort_reason abort_why) 252 254 { 253 255 ASSERT(test_bit(RXRPC_CALL_TX_LAST, &call->flags)); 256 + 257 + if (unlikely(call->cong_last_nack)) { 258 + rxrpc_free_skb(call->cong_last_nack, rxrpc_skb_put_last_nack); 259 + call->cong_last_nack = NULL; 260 + } 254 261 255 262 switch (__rxrpc_call_state(call)) { 256 263 case RXRPC_CALL_CLIENT_SEND_REQUEST: ··· 706 703 } 707 704 708 705 /* 706 + * Determine how many nacks from the previous ACK have now been satisfied. 707 + */ 708 + static rxrpc_seq_t rxrpc_input_check_prev_ack(struct rxrpc_call *call, 709 + struct rxrpc_ack_summary *summary, 710 + rxrpc_seq_t seq) 711 + { 712 + struct sk_buff *skb = call->cong_last_nack; 713 + struct rxrpc_ackpacket ack; 714 + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); 715 + unsigned int i, new_acks = 0, retained_nacks = 0; 716 + rxrpc_seq_t old_seq = sp->first_ack; 717 + u8 *acks = skb->data + sizeof(struct rxrpc_wire_header) + sizeof(ack); 718 + 719 + if (after_eq(seq, old_seq + sp->nr_acks)) { 720 + summary->nr_new_acks += sp->nr_nacks; 721 + summary->nr_new_acks += seq - (old_seq + sp->nr_acks); 722 + summary->nr_retained_nacks = 0; 723 + } else if (seq == old_seq) { 724 + summary->nr_retained_nacks = sp->nr_nacks; 725 + } else { 726 + for (i = 0; i < sp->nr_acks; i++) { 727 + if (acks[i] == RXRPC_ACK_TYPE_NACK) { 728 + if (before(old_seq + i, seq)) 729 + new_acks++; 730 + else 731 + retained_nacks++; 732 + } 733 + } 734 + 735 + summary->nr_new_acks += new_acks; 736 + summary->nr_retained_nacks = retained_nacks; 737 + } 738 + 739 + return old_seq + sp->nr_acks; 740 + } 741 + 742 + /* 709 743 * Process individual soft ACKs. 710 744 * 711 745 * Each ACK in the array corresponds to one packet and can be either an ACK or ··· 751 711 * the timer on the basis that the peer might just not have processed them at 752 712 * the time the ACK was sent. 753 713 */ 754 - static void rxrpc_input_soft_acks(struct rxrpc_call *call, u8 *acks, 755 - rxrpc_seq_t seq, int nr_acks, 756 - struct rxrpc_ack_summary *summary) 714 + static void rxrpc_input_soft_acks(struct rxrpc_call *call, 715 + struct rxrpc_ack_summary *summary, 716 + struct sk_buff *skb, 717 + rxrpc_seq_t seq, 718 + rxrpc_seq_t since) 757 719 { 758 - unsigned int i; 720 + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); 721 + unsigned int i, old_nacks = 0; 722 + rxrpc_seq_t lowest_nak = seq + sp->nr_acks; 723 + u8 *acks = skb->data + sizeof(struct rxrpc_wire_header) + sizeof(struct rxrpc_ackpacket); 759 724 760 - for (i = 0; i < nr_acks; i++) { 725 + for (i = 0; i < sp->nr_acks; i++) { 761 726 if (acks[i] == RXRPC_ACK_TYPE_ACK) { 762 727 summary->nr_acks++; 763 - summary->nr_new_acks++; 728 + if (after_eq(seq, since)) 729 + summary->nr_new_acks++; 764 730 } else { 765 - if (!summary->saw_nacks && 766 - call->acks_lowest_nak != seq + i) { 767 - call->acks_lowest_nak = seq + i; 768 - summary->new_low_nack = true; 769 - } 770 731 summary->saw_nacks = true; 732 + if (before(seq, since)) { 733 + /* Overlap with previous ACK */ 734 + old_nacks++; 735 + } else { 736 + summary->nr_new_nacks++; 737 + sp->nr_nacks++; 738 + } 739 + 740 + if (before(seq, lowest_nak)) 741 + lowest_nak = seq; 771 742 } 743 + seq++; 772 744 } 745 + 746 + if (lowest_nak != call->acks_lowest_nak) { 747 + call->acks_lowest_nak = lowest_nak; 748 + summary->new_low_nack = true; 749 + } 750 + 751 + /* We *can* have more nacks than we did - the peer is permitted to drop 752 + * packets it has soft-acked and re-request them. Further, it is 753 + * possible for the nack distribution to change whilst the number of 754 + * nacks stays the same or goes down. 755 + */ 756 + if (old_nacks < summary->nr_retained_nacks) 757 + summary->nr_new_acks += summary->nr_retained_nacks - old_nacks; 758 + summary->nr_retained_nacks = old_nacks; 773 759 } 774 760 775 761 /* ··· 839 773 struct rxrpc_skb_priv *sp = rxrpc_skb(skb); 840 774 struct rxrpc_ackinfo info; 841 775 rxrpc_serial_t ack_serial, acked_serial; 842 - rxrpc_seq_t first_soft_ack, hard_ack, prev_pkt; 776 + rxrpc_seq_t first_soft_ack, hard_ack, prev_pkt, since; 843 777 int nr_acks, offset, ioffset; 844 778 845 779 _enter(""); ··· 855 789 prev_pkt = ntohl(ack.previousPacket); 856 790 hard_ack = first_soft_ack - 1; 857 791 nr_acks = ack.nAcks; 792 + sp->first_ack = first_soft_ack; 793 + sp->nr_acks = nr_acks; 858 794 summary.ack_reason = (ack.reason < RXRPC_ACK__INVALID ? 859 795 ack.reason : RXRPC_ACK__INVALID); 860 796 ··· 926 858 if (nr_acks > 0) 927 859 skb_condense(skb); 928 860 861 + if (call->cong_last_nack) { 862 + since = rxrpc_input_check_prev_ack(call, &summary, first_soft_ack); 863 + rxrpc_free_skb(call->cong_last_nack, rxrpc_skb_put_last_nack); 864 + call->cong_last_nack = NULL; 865 + } else { 866 + summary.nr_new_acks = first_soft_ack - call->acks_first_seq; 867 + call->acks_lowest_nak = first_soft_ack + nr_acks; 868 + since = first_soft_ack; 869 + } 870 + 929 871 call->acks_latest_ts = skb->tstamp; 930 872 call->acks_first_seq = first_soft_ack; 931 873 call->acks_prev_seq = prev_pkt; ··· 944 866 case RXRPC_ACK_PING: 945 867 break; 946 868 default: 947 - if (after(acked_serial, call->acks_highest_serial)) 869 + if (acked_serial && after(acked_serial, call->acks_highest_serial)) 948 870 call->acks_highest_serial = acked_serial; 949 871 break; 950 872 } ··· 983 905 if (nr_acks > 0) { 984 906 if (offset > (int)skb->len - nr_acks) 985 907 return rxrpc_proto_abort(call, 0, rxrpc_eproto_ackr_short_sack); 986 - rxrpc_input_soft_acks(call, skb->data + offset, first_soft_ack, 987 - nr_acks, &summary); 908 + rxrpc_input_soft_acks(call, &summary, skb, first_soft_ack, since); 909 + rxrpc_get_skb(skb, rxrpc_skb_get_last_nack); 910 + call->cong_last_nack = skb; 988 911 } 989 912 990 913 if (test_bit(RXRPC_CALL_TX_LAST, &call->flags) &&
+4 -4
net/rxrpc/output.c
··· 216 216 iov[0].iov_len = sizeof(txb->wire) + sizeof(txb->ack) + n; 217 217 len = iov[0].iov_len; 218 218 219 - serial = atomic_inc_return(&conn->serial); 219 + serial = rxrpc_get_next_serial(conn); 220 220 txb->wire.serial = htonl(serial); 221 221 trace_rxrpc_tx_ack(call->debug_id, serial, 222 222 ntohl(txb->ack.firstPacket), ··· 302 302 iov[0].iov_base = &pkt; 303 303 iov[0].iov_len = sizeof(pkt); 304 304 305 - serial = atomic_inc_return(&conn->serial); 305 + serial = rxrpc_get_next_serial(conn); 306 306 pkt.whdr.serial = htonl(serial); 307 307 308 308 iov_iter_kvec(&msg.msg_iter, WRITE, iov, 1, sizeof(pkt)); ··· 334 334 _enter("%x,{%d}", txb->seq, txb->len); 335 335 336 336 /* Each transmission of a Tx packet needs a new serial number */ 337 - serial = atomic_inc_return(&conn->serial); 337 + serial = rxrpc_get_next_serial(conn); 338 338 txb->wire.serial = htonl(serial); 339 339 340 340 if (test_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags) && ··· 558 558 559 559 len = iov[0].iov_len + iov[1].iov_len; 560 560 561 - serial = atomic_inc_return(&conn->serial); 561 + serial = rxrpc_get_next_serial(conn); 562 562 whdr.serial = htonl(serial); 563 563 564 564 iov_iter_kvec(&msg.msg_iter, WRITE, iov, 2, len);
+1 -1
net/rxrpc/proc.c
··· 181 181 atomic_read(&conn->active), 182 182 state, 183 183 key_serial(conn->key), 184 - atomic_read(&conn->serial), 184 + conn->tx_serial, 185 185 conn->hi_serial, 186 186 conn->channels[0].call_id, 187 187 conn->channels[1].call_id,
+2 -2
net/rxrpc/rxkad.c
··· 664 664 665 665 len = iov[0].iov_len + iov[1].iov_len; 666 666 667 - serial = atomic_inc_return(&conn->serial); 667 + serial = rxrpc_get_next_serial(conn); 668 668 whdr.serial = htonl(serial); 669 669 670 670 ret = kernel_sendmsg(conn->local->socket, &msg, iov, 2, len); ··· 721 721 722 722 len = iov[0].iov_len + iov[1].iov_len + iov[2].iov_len; 723 723 724 - serial = atomic_inc_return(&conn->serial); 724 + serial = rxrpc_get_next_serial(conn); 725 725 whdr.serial = htonl(serial); 726 726 727 727 rxrpc_local_dont_fragment(conn->local, false);