Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

tcp: Add stats for socket migration.

This commit adds two stats for the socket migration feature to evaluate the
effectiveness: LINUX_MIB_TCPMIGRATEREQ(SUCCESS|FAILURE).

If the migration fails because of the own_req race in receiving ACK and
sending SYN+ACK paths, we do not increment the failure stat. Then another
CPU is responsible for the req.

Link: https://lore.kernel.org/bpf/CAK6E8=cgFKuGecTzSCSQ8z3YJ_163C0uwO9yRvfDSE7vOe9mJA@mail.gmail.com/
Suggested-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
Acked-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Kuniyuki Iwashima and committed by
David S. Miller
55d444b3 7525de25

+31 -6
+2
include/uapi/linux/snmp.h
··· 290 290 LINUX_MIB_TCPDUPLICATEDATAREHASH, /* TCPDuplicateDataRehash */ 291 291 LINUX_MIB_TCPDSACKRECVSEGS, /* TCPDSACKRecvSegs */ 292 292 LINUX_MIB_TCPDSACKIGNOREDDUBIOUS, /* TCPDSACKIgnoredDubious */ 293 + LINUX_MIB_TCPMIGRATEREQSUCCESS, /* TCPMigrateReqSuccess */ 294 + LINUX_MIB_TCPMIGRATEREQFAILURE, /* TCPMigrateReqFailure */ 293 295 __LINUX_MIB_MAX 294 296 }; 295 297
+11 -4
net/core/sock_reuseport.c
··· 6 6 * selecting the socket index from the array of available sockets. 7 7 */ 8 8 9 + #include <net/ip.h> 9 10 #include <net/sock_reuseport.h> 10 11 #include <linux/bpf.h> 11 12 #include <linux/idr.h> ··· 537 536 538 537 socks = READ_ONCE(reuse->num_socks); 539 538 if (unlikely(!socks)) 540 - goto out; 539 + goto failure; 541 540 542 541 /* paired with smp_wmb() in __reuseport_add_sock() */ 543 542 smp_rmb(); ··· 547 546 if (!prog || prog->expected_attach_type != BPF_SK_REUSEPORT_SELECT_OR_MIGRATE) { 548 547 if (sock_net(sk)->ipv4.sysctl_tcp_migrate_req) 549 548 goto select_by_hash; 550 - goto out; 549 + goto failure; 551 550 } 552 551 553 552 if (!skb) { 554 553 skb = alloc_skb(0, GFP_ATOMIC); 555 554 if (!skb) 556 - goto out; 555 + goto failure; 557 556 allocated = true; 558 557 } 559 558 ··· 566 565 if (!nsk) 567 566 nsk = reuseport_select_sock_by_hash(reuse, hash, socks); 568 567 569 - if (IS_ERR_OR_NULL(nsk) || unlikely(!refcount_inc_not_zero(&nsk->sk_refcnt))) 568 + if (IS_ERR_OR_NULL(nsk) || unlikely(!refcount_inc_not_zero(&nsk->sk_refcnt))) { 570 569 nsk = NULL; 570 + goto failure; 571 + } 571 572 572 573 out: 573 574 rcu_read_unlock(); 574 575 return nsk; 576 + 577 + failure: 578 + __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMIGRATEREQFAILURE); 579 + goto out; 575 580 } 576 581 EXPORT_SYMBOL(reuseport_migrate_sock); 577 582
+13 -2
net/ipv4/inet_connection_sock.c
··· 703 703 704 704 nreq = kmem_cache_alloc(req->rsk_ops->slab, GFP_ATOMIC | __GFP_NOWARN); 705 705 if (!nreq) { 706 + __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMIGRATEREQFAILURE); 707 + 706 708 /* paired with refcount_inc_not_zero() in reuseport_migrate_sock() */ 707 709 sock_put(sk); 708 710 return NULL; ··· 878 876 if (!inet_ehash_insert(req_to_sk(nreq), req_to_sk(oreq), NULL)) { 879 877 /* delete timer */ 880 878 inet_csk_reqsk_queue_drop(sk_listener, nreq); 881 - goto drop; 879 + goto no_ownership; 882 880 } 883 881 882 + __NET_INC_STATS(net, LINUX_MIB_TCPMIGRATEREQSUCCESS); 884 883 reqsk_migrate_reset(oreq); 885 884 reqsk_queue_removed(&inet_csk(oreq->rsk_listener)->icsk_accept_queue, oreq); 886 885 reqsk_put(oreq); ··· 890 887 return; 891 888 } 892 889 893 - drop: 894 890 /* Even if we can clone the req, we may need not retransmit any more 895 891 * SYN+ACKs (nreq->num_timeout > max_syn_ack_retries, etc), or another 896 892 * CPU may win the "own_req" race so that inet_ehash_insert() fails. 897 893 */ 898 894 if (nreq) { 895 + __NET_INC_STATS(net, LINUX_MIB_TCPMIGRATEREQFAILURE); 896 + no_ownership: 899 897 reqsk_migrate_reset(nreq); 900 898 reqsk_queue_removed(queue, nreq); 901 899 __reqsk_free(nreq); 902 900 } 903 901 902 + drop: 904 903 inet_csk_reqsk_queue_drop_and_put(oreq->rsk_listener, oreq); 905 904 } 906 905 ··· 1140 1135 1141 1136 refcount_set(&nreq->rsk_refcnt, 1); 1142 1137 if (inet_csk_reqsk_queue_add(sk, nreq, child)) { 1138 + __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMIGRATEREQSUCCESS); 1143 1139 reqsk_migrate_reset(req); 1144 1140 reqsk_put(req); 1145 1141 return child; 1146 1142 } 1147 1143 1144 + __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMIGRATEREQFAILURE); 1148 1145 reqsk_migrate_reset(nreq); 1149 1146 __reqsk_free(nreq); 1150 1147 } else if (inet_csk_reqsk_queue_add(sk, req, child)) { ··· 1195 1188 refcount_set(&nreq->rsk_refcnt, 1); 1196 1189 1197 1190 if (inet_csk_reqsk_queue_add(nsk, nreq, child)) { 1191 + __NET_INC_STATS(sock_net(nsk), 1192 + LINUX_MIB_TCPMIGRATEREQSUCCESS); 1198 1193 reqsk_migrate_reset(req); 1199 1194 } else { 1195 + __NET_INC_STATS(sock_net(nsk), 1196 + LINUX_MIB_TCPMIGRATEREQFAILURE); 1200 1197 reqsk_migrate_reset(nreq); 1201 1198 __reqsk_free(nreq); 1202 1199 }
+2
net/ipv4/proc.c
··· 295 295 SNMP_MIB_ITEM("TcpDuplicateDataRehash", LINUX_MIB_TCPDUPLICATEDATAREHASH), 296 296 SNMP_MIB_ITEM("TCPDSACKRecvSegs", LINUX_MIB_TCPDSACKRECVSEGS), 297 297 SNMP_MIB_ITEM("TCPDSACKIgnoredDubious", LINUX_MIB_TCPDSACKIGNOREDDUBIOUS), 298 + SNMP_MIB_ITEM("TCPMigrateReqSuccess", LINUX_MIB_TCPMIGRATEREQSUCCESS), 299 + SNMP_MIB_ITEM("TCPMigrateReqFailure", LINUX_MIB_TCPMIGRATEREQFAILURE), 298 300 SNMP_MIB_SENTINEL 299 301 }; 300 302
+3
net/ipv4/tcp_minisocks.c
··· 786 786 return inet_csk_complete_hashdance(sk, child, req, own_req); 787 787 788 788 listen_overflow: 789 + if (sk != req->rsk_listener) 790 + __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMIGRATEREQFAILURE); 791 + 789 792 if (!sock_net(sk)->ipv4.sysctl_tcp_abort_on_overflow) { 790 793 inet_rsk(req)->acked = 1; 791 794 return NULL;