Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

mptcp: add and use MIB counter infrastructure

Exported via same /proc file as the Linux TCP MIB counters, so "netstat -s"
or "nstat" will show them automatically.

The MPTCP MIB counters are allocated in a distinct pcpu area in order to
avoid bloating/wasting TCP pcpu memory.

Counters are allocated once the first MPTCP socket is created in a
network namespace and free'd on exit.

If no sockets have been allocated, all-zero mptcp counters are shown.

The MIB counter list is taken from the multipath-tcp.org kernel, but
only a few counters have been picked up so far. The counter list can
be increased at any time later on.

v2 -> v3:
- remove 'inline' in foo.c files (David S. Miller)

Co-developed-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Florian Westphal and committed by
David S. Miller
fc518953 5147dfb5

+172 -15
+4
include/net/mptcp.h
··· 12 12 #include <linux/tcp.h> 13 13 #include <linux/types.h> 14 14 15 + struct seq_file; 16 + 15 17 /* MPTCP sk_buff extension data */ 16 18 struct mptcp_ext { 17 19 u64 data_ack; ··· 125 123 126 124 bool mptcp_sk_is_subflow(const struct sock *sk); 127 125 126 + void mptcp_seq_show(struct seq_file *seq); 128 127 #else 129 128 130 129 static inline void mptcp_init(void) ··· 197 194 return false; 198 195 } 199 196 197 + static inline void mptcp_seq_show(struct seq_file *seq) { } 200 198 #endif /* CONFIG_MPTCP */ 201 199 202 200 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
+3
include/net/netns/mib.h
··· 27 27 #if IS_ENABLED(CONFIG_TLS) 28 28 DEFINE_SNMP_STAT(struct linux_tls_mib, tls_statistics); 29 29 #endif 30 + #ifdef CONFIG_MPTCP 31 + DEFINE_SNMP_STAT(struct mptcp_mib, mptcp_statistics); 32 + #endif 30 33 }; 31 34 32 35 #endif
+4
net/ipv4/af_inet.c
··· 1793 1793 free_percpu(net->mib.net_statistics); 1794 1794 free_percpu(net->mib.ip_statistics); 1795 1795 free_percpu(net->mib.tcp_statistics); 1796 + #ifdef CONFIG_MPTCP 1797 + /* allocated on demand, see mptcp_init_sock() */ 1798 + free_percpu(net->mib.mptcp_statistics); 1799 + #endif 1796 1800 } 1797 1801 1798 1802 static __net_initdata struct pernet_operations ipv4_mib_ops = {
+2
net/ipv4/proc.c
··· 32 32 #include <net/icmp.h> 33 33 #include <net/protocol.h> 34 34 #include <net/tcp.h> 35 + #include <net/mptcp.h> 35 36 #include <net/udp.h> 36 37 #include <net/udplite.h> 37 38 #include <linux/bottom_half.h> ··· 486 485 offsetof(struct ipstats_mib, syncp))); 487 486 488 487 seq_putc(seq, '\n'); 488 + mptcp_seq_show(seq); 489 489 return 0; 490 490 } 491 491
+1 -1
net/mptcp/Makefile
··· 1 1 # SPDX-License-Identifier: GPL-2.0 2 2 obj-$(CONFIG_MPTCP) += mptcp.o 3 3 4 - mptcp-y := protocol.o subflow.o options.o token.o crypto.o ctrl.o pm.o diag.o 4 + mptcp-y := protocol.o subflow.o options.o token.o crypto.o ctrl.o pm.o diag.o mib.o
+69
net/mptcp/mib.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-or-later 2 + 3 + #include <linux/seq_file.h> 4 + #include <net/ip.h> 5 + #include <net/mptcp.h> 6 + #include <net/snmp.h> 7 + #include <net/net_namespace.h> 8 + 9 + #include "mib.h" 10 + 11 + static const struct snmp_mib mptcp_snmp_list[] = { 12 + SNMP_MIB_ITEM("MPCapableSYNRX", MPTCP_MIB_MPCAPABLEPASSIVE), 13 + SNMP_MIB_ITEM("MPCapableACKRX", MPTCP_MIB_MPCAPABLEPASSIVEACK), 14 + SNMP_MIB_ITEM("MPCapableFallbackACK", MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK), 15 + SNMP_MIB_ITEM("MPCapableFallbackSYNACK", MPTCP_MIB_MPCAPABLEACTIVEFALLBACK), 16 + SNMP_MIB_ITEM("MPTCPRetrans", MPTCP_MIB_RETRANSSEGS), 17 + SNMP_MIB_ITEM("MPJoinNoTokenFound", MPTCP_MIB_JOINNOTOKEN), 18 + SNMP_MIB_ITEM("MPJoinSynRx", MPTCP_MIB_JOINSYNRX), 19 + SNMP_MIB_ITEM("MPJoinSynAckRx", MPTCP_MIB_JOINSYNACKRX), 20 + SNMP_MIB_ITEM("MPJoinSynAckHMacFailure", MPTCP_MIB_JOINSYNACKMAC), 21 + SNMP_MIB_ITEM("MPJoinAckRx", MPTCP_MIB_JOINACKRX), 22 + SNMP_MIB_ITEM("MPJoinAckHMacFailure", MPTCP_MIB_JOINACKMAC), 23 + SNMP_MIB_ITEM("DSSNotMatching", MPTCP_MIB_DSSNOMATCH), 24 + SNMP_MIB_ITEM("InfiniteMapRx", MPTCP_MIB_INFINITEMAPRX), 25 + SNMP_MIB_SENTINEL 26 + }; 27 + 28 + /* mptcp_mib_alloc - allocate percpu mib counters 29 + * 30 + * These are allocated when the first mptcp socket is created so 31 + * we do not waste percpu memory if mptcp isn't in use. 32 + */ 33 + bool mptcp_mib_alloc(struct net *net) 34 + { 35 + struct mptcp_mib __percpu *mib = alloc_percpu(struct mptcp_mib); 36 + 37 + if (!mib) 38 + return false; 39 + 40 + if (cmpxchg(&net->mib.mptcp_statistics, NULL, mib)) 41 + free_percpu(mib); 42 + 43 + return true; 44 + } 45 + 46 + void mptcp_seq_show(struct seq_file *seq) 47 + { 48 + struct net *net = seq->private; 49 + int i; 50 + 51 + seq_puts(seq, "MPTcpExt:"); 52 + for (i = 0; mptcp_snmp_list[i].name; i++) 53 + seq_printf(seq, " %s", mptcp_snmp_list[i].name); 54 + 55 + seq_puts(seq, "\nMPTcpExt:"); 56 + 57 + if (!net->mib.mptcp_statistics) { 58 + for (i = 0; mptcp_snmp_list[i].name; i++) 59 + seq_puts(seq, " 0"); 60 + 61 + return; 62 + } 63 + 64 + for (i = 0; mptcp_snmp_list[i].name; i++) 65 + seq_printf(seq, " %lu", 66 + snmp_fold_field(net->mib.mptcp_statistics, 67 + mptcp_snmp_list[i].entry)); 68 + seq_putc(seq, '\n'); 69 + }
+40
net/mptcp/mib.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0-or-later */ 2 + 3 + enum linux_mptcp_mib_field { 4 + MPTCP_MIB_NUM = 0, 5 + MPTCP_MIB_MPCAPABLEPASSIVE, /* Received SYN with MP_CAPABLE */ 6 + MPTCP_MIB_MPCAPABLEPASSIVEACK, /* Received third ACK with MP_CAPABLE */ 7 + MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK,/* Server-side fallback during 3-way handshake */ 8 + MPTCP_MIB_MPCAPABLEACTIVEFALLBACK, /* Client-side fallback during 3-way handshake */ 9 + MPTCP_MIB_RETRANSSEGS, /* Segments retransmitted at the MPTCP-level */ 10 + MPTCP_MIB_JOINNOTOKEN, /* Received MP_JOIN but the token was not found */ 11 + MPTCP_MIB_JOINSYNRX, /* Received a SYN + MP_JOIN */ 12 + MPTCP_MIB_JOINSYNACKRX, /* Received a SYN/ACK + MP_JOIN */ 13 + MPTCP_MIB_JOINSYNACKMAC, /* HMAC was wrong on SYN/ACK + MP_JOIN */ 14 + MPTCP_MIB_JOINACKRX, /* Received an ACK + MP_JOIN */ 15 + MPTCP_MIB_JOINACKMAC, /* HMAC was wrong on ACK + MP_JOIN */ 16 + MPTCP_MIB_DSSNOMATCH, /* Received a new mapping that did not match the previous one */ 17 + MPTCP_MIB_INFINITEMAPRX, /* Received an infinite mapping */ 18 + __MPTCP_MIB_MAX 19 + }; 20 + 21 + #define LINUX_MIB_MPTCP_MAX __MPTCP_MIB_MAX 22 + struct mptcp_mib { 23 + unsigned long mibs[LINUX_MIB_MPTCP_MAX]; 24 + }; 25 + 26 + static inline void MPTCP_INC_STATS(struct net *net, 27 + enum linux_mptcp_mib_field field) 28 + { 29 + if (likely(net->mib.mptcp_statistics)) 30 + SNMP_INC_STATS(net->mib.mptcp_statistics, field); 31 + } 32 + 33 + static inline void __MPTCP_INC_STATS(struct net *net, 34 + enum linux_mptcp_mib_field field) 35 + { 36 + if (likely(net->mib.mptcp_statistics)) 37 + __SNMP_INC_STATS(net->mib.mptcp_statistics, field); 38 + } 39 + 40 + bool mptcp_mib_alloc(struct net *net);
+22 -8
net/mptcp/protocol.c
··· 21 21 #endif 22 22 #include <net/mptcp.h> 23 23 #include "protocol.h" 24 + #include "mib.h" 24 25 25 26 #define MPTCP_SAME_STATE TCP_MAX_STATES 26 27 ··· 1033 1032 if (ret < 0) 1034 1033 break; 1035 1034 1035 + MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RETRANSSEGS); 1036 1036 copied += ret; 1037 1037 dfrag->data_len -= ret; 1038 1038 dfrag->offset += ret; ··· 1083 1081 1084 1082 static int mptcp_init_sock(struct sock *sk) 1085 1083 { 1086 - int ret = __mptcp_init_sock(sk); 1084 + struct net *net = sock_net(sk); 1085 + int ret; 1087 1086 1087 + if (!mptcp_is_enabled(net)) 1088 + return -ENOPROTOOPT; 1089 + 1090 + if (unlikely(!net->mib.mptcp_statistics) && !mptcp_mib_alloc(net)) 1091 + return -ENOMEM; 1092 + 1093 + ret = __mptcp_init_sock(sk); 1088 1094 if (ret) 1089 1095 return ret; 1090 1096 1091 1097 sk_sockets_allocated_inc(sk); 1092 1098 sk->sk_sndbuf = sock_net(sk)->ipv4.sysctl_tcp_wmem[2]; 1093 - 1094 - if (!mptcp_is_enabled(sock_net(sk))) 1095 - return -ENOPROTOOPT; 1096 1099 1097 1100 return 0; 1098 1101 } ··· 1334 1327 list_add(&subflow->node, &msk->conn_list); 1335 1328 1336 1329 bh_unlock_sock(new_mptcp_sock); 1330 + 1331 + __MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEPASSIVEACK); 1337 1332 local_bh_enable(); 1333 + } else { 1334 + MPTCP_INC_STATS(sock_net(sk), 1335 + MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK); 1338 1336 } 1339 1337 1340 1338 return newsk; ··· 1460 1448 u64 ack_seq; 1461 1449 1462 1450 subflow = mptcp_subflow_ctx(ssk); 1463 - 1464 - if (!subflow->mp_capable) 1465 - return; 1466 - 1467 1451 sk = subflow->conn; 1468 1452 msk = mptcp_sk(sk); 1453 + 1454 + if (!subflow->mp_capable) { 1455 + MPTCP_INC_STATS(sock_net(sk), 1456 + MPTCP_MIB_MPCAPABLEACTIVEFALLBACK); 1457 + return; 1458 + } 1469 1459 1470 1460 pr_debug("msk=%p, token=%u", sk, subflow->token); 1471 1461
+27 -6
net/mptcp/subflow.c
··· 20 20 #endif 21 21 #include <net/mptcp.h> 22 22 #include "protocol.h" 23 + #include "mib.h" 24 + 25 + static void SUBFLOW_REQ_INC_STATS(struct request_sock *req, 26 + enum linux_mptcp_mib_field field) 27 + { 28 + MPTCP_INC_STATS(sock_net(req_to_sk(req)), field); 29 + } 23 30 24 31 static int subflow_rebuild_header(struct sock *sk) 25 32 { ··· 95 88 96 89 msk = mptcp_token_get_sock(subflow_req->token); 97 90 if (!msk) { 98 - pr_debug("subflow_req=%p, token=%u - not found\n", 99 - subflow_req, subflow_req->token); 91 + SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINNOTOKEN); 100 92 return false; 101 93 } 102 94 ··· 143 137 return; 144 138 #endif 145 139 146 - if (rx_opt.mptcp.mp_capable && rx_opt.mptcp.mp_join) 147 - return; 140 + if (rx_opt.mptcp.mp_capable) { 141 + SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MPCAPABLEPASSIVE); 142 + 143 + if (rx_opt.mptcp.mp_join) 144 + return; 145 + } else if (rx_opt.mptcp.mp_join) { 146 + SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINSYNRX); 147 + } 148 148 149 149 if (rx_opt.mptcp.mp_capable && listener->request_mptcp) { 150 150 int err; ··· 249 237 subflow, subflow->thmac, 250 238 subflow->remote_nonce); 251 239 if (!subflow_thmac_valid(subflow)) { 240 + MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINACKMAC); 252 241 subflow->mp_join = 0; 253 242 goto do_reset; 254 243 } ··· 266 253 goto do_reset; 267 254 268 255 subflow->conn_finished = 1; 256 + MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNACKRX); 269 257 } else { 270 258 do_reset: 271 259 tcp_send_active_reset(sk, GFP_ATOMIC); ··· 396 382 opt_rx.mptcp.mp_join = 0; 397 383 mptcp_get_options(skb, &opt_rx); 398 384 if (!opt_rx.mptcp.mp_join || 399 - !subflow_hmac_valid(req, &opt_rx)) 385 + !subflow_hmac_valid(req, &opt_rx)) { 386 + SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKMAC); 400 387 return NULL; 388 + } 401 389 } 402 390 403 391 create_child: ··· 436 420 ctx->conn = (struct sock *)owner; 437 421 if (!mptcp_finish_join(child)) 438 422 goto close_child; 423 + 424 + SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKRX); 439 425 } 440 426 } 441 427 ··· 553 535 data_len = mpext->data_len; 554 536 if (data_len == 0) { 555 537 pr_err("Infinite mapping not handled"); 538 + MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_INFINITEMAPRX); 556 539 return MAPPING_INVALID; 557 540 } 558 541 ··· 597 578 /* If this skb data are fully covered by the current mapping, 598 579 * the new map would need caching, which is not supported 599 580 */ 600 - if (skb_is_fully_mapped(ssk, skb)) 581 + if (skb_is_fully_mapped(ssk, skb)) { 582 + MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DSSNOMATCH); 601 583 return MAPPING_INVALID; 584 + } 602 585 603 586 /* will validate the next map after consuming the current one */ 604 587 return MAPPING_OK;