Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

tcp: fix tcp_ack() performance problem

We worked hard to improve tcp_ack() performance, by not accessing
skb_shinfo() in fast path (cd7d8498c9a5 tcp: change tcp_skb_pcount()
location)

We still have one spurious access because of ACK timestamping,
added in commit e1c8a607b281 ("net-timestamp: ACK timestamp for
bytestreams")

By checking if sk_tsflags has SOF_TIMESTAMPING_TX_ACK set,
we can avoid two cache line misses for the common case.

While we are at it, add two prefetchw() :

One in tcp_ack() to bring skb at the head of write queue.

One in tcp_clean_rtx_queue() loop to bring following skb,
as we will delete skb from the write queue and dirty skb->next->prev.

Add a couple of [un]likely() clauses.

After this patch, tcp_ack() is no longer the most consuming
function in tcp stack.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Willem de Bruijn <willemb@google.com>
Cc: Neal Cardwell <ncardwell@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
Cc: Van Jacobson <vanj@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Eric Dumazet and committed by
David S. Miller
ad971f61 14cee8e3

+27 -9
+27 -9
net/ipv4/tcp_input.c
··· 68 68 #include <linux/module.h> 69 69 #include <linux/sysctl.h> 70 70 #include <linux/kernel.h> 71 + #include <linux/prefetch.h> 71 72 #include <net/dst.h> 72 73 #include <net/tcp.h> 73 74 #include <net/inet_common.h> ··· 3030 3029 return packets_acked; 3031 3030 } 3032 3031 3032 + static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb, 3033 + u32 prior_snd_una) 3034 + { 3035 + const struct skb_shared_info *shinfo; 3036 + 3037 + /* Avoid cache line misses to get skb_shinfo() and shinfo->tx_flags */ 3038 + if (likely(!(sk->sk_tsflags & SOF_TIMESTAMPING_TX_ACK))) 3039 + return; 3040 + 3041 + shinfo = skb_shinfo(skb); 3042 + if ((shinfo->tx_flags & SKBTX_ACK_TSTAMP) && 3043 + between(shinfo->tskey, prior_snd_una, tcp_sk(sk)->snd_una - 1)) 3044 + __skb_tstamp_tx(skb, NULL, sk, SCM_TSTAMP_ACK); 3045 + } 3046 + 3033 3047 /* Remove acknowledged frames from the retransmission queue. If our packet 3034 3048 * is before the ack sequence we can discard it as it's confirmed to have 3035 3049 * arrived at the other end. ··· 3068 3052 first_ackt.v64 = 0; 3069 3053 3070 3054 while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) { 3071 - struct skb_shared_info *shinfo = skb_shinfo(skb); 3072 3055 struct tcp_skb_cb *scb = TCP_SKB_CB(skb); 3073 3056 u8 sacked = scb->sacked; 3074 3057 u32 acked_pcount; 3075 3058 3076 - if (unlikely(shinfo->tx_flags & SKBTX_ACK_TSTAMP) && 3077 - between(shinfo->tskey, prior_snd_una, tp->snd_una - 1)) 3078 - __skb_tstamp_tx(skb, NULL, sk, SCM_TSTAMP_ACK); 3059 + tcp_ack_tstamp(sk, skb, prior_snd_una); 3079 3060 3080 3061 /* Determine how many packets and what bytes were acked, tso and else */ 3081 3062 if (after(scb->end_seq, tp->snd_una)) { ··· 3086 3073 3087 3074 fully_acked = false; 3088 3075 } else { 3076 + /* Speedup tcp_unlink_write_queue() and next loop */ 3077 + prefetchw(skb->next); 3089 3078 acked_pcount = tcp_skb_pcount(skb); 3090 3079 } 3091 3080 3092 - if (sacked & TCPCB_RETRANS) { 3081 + if (unlikely(sacked & TCPCB_RETRANS)) { 3093 3082 if (sacked & TCPCB_SACKED_RETRANS) 3094 3083 tp->retrans_out -= acked_pcount; 3095 3084 flag |= FLAG_RETRANS_DATA_ACKED; ··· 3122 3107 * connection startup slow start one packet too 3123 3108 * quickly. This is severely frowned upon behavior. 3124 3109 */ 3125 - if (!(scb->tcp_flags & TCPHDR_SYN)) { 3110 + if (likely(!(scb->tcp_flags & TCPHDR_SYN))) { 3126 3111 flag |= FLAG_DATA_ACKED; 3127 3112 } else { 3128 3113 flag |= FLAG_SYN_ACKED; ··· 3134 3119 3135 3120 tcp_unlink_write_queue(skb, sk); 3136 3121 sk_wmem_free_skb(sk, skb); 3137 - if (skb == tp->retransmit_skb_hint) 3122 + if (unlikely(skb == tp->retransmit_skb_hint)) 3138 3123 tp->retransmit_skb_hint = NULL; 3139 - if (skb == tp->lost_skb_hint) 3124 + if (unlikely(skb == tp->lost_skb_hint)) 3140 3125 tp->lost_skb_hint = NULL; 3141 3126 } 3142 3127 ··· 3147 3132 flag |= FLAG_SACK_RENEGING; 3148 3133 3149 3134 skb_mstamp_get(&now); 3150 - if (first_ackt.v64) { 3135 + if (likely(first_ackt.v64)) { 3151 3136 seq_rtt_us = skb_mstamp_us_delta(&now, &first_ackt); 3152 3137 ca_seq_rtt_us = skb_mstamp_us_delta(&now, &last_ackt); 3153 3138 } ··· 3408 3393 const int prior_unsacked = tp->packets_out - tp->sacked_out; 3409 3394 int acked = 0; /* Number of packets newly acked */ 3410 3395 long sack_rtt_us = -1L; 3396 + 3397 + /* We very likely will need to access write queue head. */ 3398 + prefetchw(sk->sk_write_queue.next); 3411 3399 3412 3400 /* If the ack is older than previous acks 3413 3401 * then we can probably ignore it.