Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

tcp: export count for rehash attempts

Using IPv6 flow-label to swiftly route around avoid congested or
disconnected network path can greatly improve TCP reliability.

This patch adds SNMP counters and a OPT_STATS counter to track both
host-level and connection-level statistics. Network administrators
can use these counters to evaluate the impact of this new ability better.

Export count for rehash attempts to
1) two SNMP counters: TcpTimeoutRehash (rehash due to timeouts),
and TcpDuplicateDataRehash (rehash due to receiving duplicate
packets)
2) Timestamping API SOF_TIMESTAMPING_OPT_STATS.

Signed-off-by: Abdul Kabbani <akabbani@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Kevin(Yudong) Yang <yyd@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Abdul Kabbani and committed by
David S. Miller
32efcc06 6efca894

+18 -1
+2
include/linux/tcp.h
··· 386 386 #define BPF_SOCK_OPS_TEST_FLAG(TP, ARG) 0 387 387 #endif 388 388 389 + u16 timeout_rehash; /* Timeout-triggered rehash attempts */ 390 + 389 391 u32 rcv_ooopack; /* Received out-of-order packets, for tcpinfo */ 390 392 391 393 /* Receiver side RTT estimation */
+2
include/uapi/linux/snmp.h
··· 285 285 LINUX_MIB_TCPRCVQDROP, /* TCPRcvQDrop */ 286 286 LINUX_MIB_TCPWQUEUETOOBIG, /* TCPWqueueTooBig */ 287 287 LINUX_MIB_TCPFASTOPENPASSIVEALTKEY, /* TCPFastOpenPassiveAltKey */ 288 + LINUX_MIB_TCPTIMEOUTREHASH, /* TCPTimeoutRehash */ 289 + LINUX_MIB_TCPDUPLICATEDATAREHASH, /* TCPDuplicateDataRehash */ 288 290 __LINUX_MIB_MAX 289 291 }; 290 292
+1
include/uapi/linux/tcp.h
··· 311 311 TCP_NLA_DSACK_DUPS, /* DSACK blocks received */ 312 312 TCP_NLA_REORD_SEEN, /* reordering events seen */ 313 313 TCP_NLA_SRTT, /* smoothed RTT in usecs */ 314 + TCP_NLA_TIMEOUT_REHASH, /* Timeout-triggered rehash attempts */ 314 315 }; 315 316 316 317 /* for TCP_MD5SIG socket option */
+2
net/ipv4/proc.c
··· 289 289 SNMP_MIB_ITEM("TCPRcvQDrop", LINUX_MIB_TCPRCVQDROP), 290 290 SNMP_MIB_ITEM("TCPWqueueTooBig", LINUX_MIB_TCPWQUEUETOOBIG), 291 291 SNMP_MIB_ITEM("TCPFastOpenPassiveAltKey", LINUX_MIB_TCPFASTOPENPASSIVEALTKEY), 292 + SNMP_MIB_ITEM("TcpTimeoutRehash", LINUX_MIB_TCPTIMEOUTREHASH), 293 + SNMP_MIB_ITEM("TcpDuplicateDataRehash", LINUX_MIB_TCPDUPLICATEDATAREHASH), 292 294 SNMP_MIB_SENTINEL 293 295 }; 294 296
+2
net/ipv4/tcp.c
··· 3337 3337 nla_total_size(sizeof(u32)) + /* TCP_NLA_DSACK_DUPS */ 3338 3338 nla_total_size(sizeof(u32)) + /* TCP_NLA_REORD_SEEN */ 3339 3339 nla_total_size(sizeof(u32)) + /* TCP_NLA_SRTT */ 3340 + nla_total_size(sizeof(u16)) + /* TCP_NLA_TIMEOUT_REHASH */ 3340 3341 0; 3341 3342 } 3342 3343 ··· 3392 3391 nla_put_u32(stats, TCP_NLA_DSACK_DUPS, tp->dsack_dups); 3393 3392 nla_put_u32(stats, TCP_NLA_REORD_SEEN, tp->reord_seen); 3394 3393 nla_put_u32(stats, TCP_NLA_SRTT, tp->srtt_us >> 3); 3394 + nla_put_u16(stats, TCP_NLA_TIMEOUT_REHASH, tp->timeout_rehash); 3395 3395 3396 3396 return stats; 3397 3397 }
+3 -1
net/ipv4/tcp_input.c
··· 4271 4271 * The receiver remembers and reflects via DSACKs. Leverage the 4272 4272 * DSACK state and change the txhash to re-route speculatively. 4273 4273 */ 4274 - if (TCP_SKB_CB(skb)->seq == tcp_sk(sk)->duplicate_sack[0].start_seq) 4274 + if (TCP_SKB_CB(skb)->seq == tcp_sk(sk)->duplicate_sack[0].start_seq) { 4275 4275 sk_rethink_txhash(sk); 4276 + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDUPLICATEDATAREHASH); 4277 + } 4276 4278 } 4277 4279 4278 4280 static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb)
+6
net/ipv4/tcp_timer.c
··· 223 223 dst_negative_advice(sk); 224 224 } else { 225 225 sk_rethink_txhash(sk); 226 + tp->timeout_rehash++; 227 + __NET_INC_STATS(sock_net(sk), 228 + LINUX_MIB_TCPTIMEOUTREHASH); 226 229 } 227 230 retry_until = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_syn_retries; 228 231 expired = icsk->icsk_retransmits >= retry_until; ··· 237 234 dst_negative_advice(sk); 238 235 } else { 239 236 sk_rethink_txhash(sk); 237 + tp->timeout_rehash++; 238 + __NET_INC_STATS(sock_net(sk), 239 + LINUX_MIB_TCPTIMEOUTREHASH); 240 240 } 241 241 242 242 retry_until = net->ipv4.sysctl_tcp_retries2;