Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

netfilter: nf_flow_table: count pending offload workqueue tasks

To improve hardware offload debuggability count pending 'add', 'del' and
'stats' flow_table offload workqueue tasks. Counters are incremented before
scheduling new task and decremented when workqueue handler finishes
executing. These counters allow user to diagnose congestion on hardware
offload workqueues that can happen when either CPU is starved and workqueue
jobs are executed at lower rate than new ones are added or when
hardware/driver can't keep up with the rate.

Implement the described counters as percpu counters inside new struct
netns_ft which is stored inside struct net. Expose them via new procfs file
'/proc/net/stats/nf_flowtable' that is similar to existing 'nf_conntrack'
file.

Signed-off-by: Vlad Buslov <vladbu@nvidia.com>
Signed-off-by: Oz Shlomo <ozsh@nvidia.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>

authored by

Vlad Buslov and committed by
Pablo Neira Ayuso
b0381776 fc54d906

+206 -4
+6
include/net/net_namespace.h
··· 26 26 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) 27 27 #include <net/netns/conntrack.h> 28 28 #endif 29 + #if IS_ENABLED(CONFIG_NF_FLOW_TABLE) 30 + #include <net/netns/flow_table.h> 31 + #endif 29 32 #include <net/netns/nftables.h> 30 33 #include <net/netns/xfrm.h> 31 34 #include <net/netns/mpls.h> ··· 144 141 #endif 145 142 #if defined(CONFIG_NF_TABLES) || defined(CONFIG_NF_TABLES_MODULE) 146 143 struct netns_nftables nft; 144 + #endif 145 + #if IS_ENABLED(CONFIG_NF_FLOW_TABLE) 146 + struct netns_ft ft; 147 147 #endif 148 148 #endif 149 149 #ifdef CONFIG_WEXT_CORE
+21
include/net/netfilter/nf_flow_table.h
··· 335 335 return 0; 336 336 } 337 337 338 + #define NF_FLOW_TABLE_STAT_INC(net, count) __this_cpu_inc((net)->ft.stat->count) 339 + #define NF_FLOW_TABLE_STAT_DEC(net, count) __this_cpu_dec((net)->ft.stat->count) 340 + #define NF_FLOW_TABLE_STAT_INC_ATOMIC(net, count) \ 341 + this_cpu_inc((net)->ft.stat->count) 342 + #define NF_FLOW_TABLE_STAT_DEC_ATOMIC(net, count) \ 343 + this_cpu_dec((net)->ft.stat->count) 344 + 345 + #ifdef CONFIG_NF_FLOW_TABLE_PROCFS 346 + int nf_flow_table_init_proc(struct net *net); 347 + void nf_flow_table_fini_proc(struct net *net); 348 + #else 349 + static inline int nf_flow_table_init_proc(struct net *net) 350 + { 351 + return 0; 352 + } 353 + 354 + static inline void nf_flow_table_fini_proc(struct net *net) 355 + { 356 + } 357 + #endif /* CONFIG_NF_FLOW_TABLE_PROCFS */ 358 + 338 359 #endif /* _NF_FLOW_TABLE_H */
+14
include/net/netns/flow_table.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + #ifndef __NETNS_FLOW_TABLE_H 3 + #define __NETNS_FLOW_TABLE_H 4 + 5 + struct nf_flow_table_stat { 6 + unsigned int count_wq_add; 7 + unsigned int count_wq_del; 8 + unsigned int count_wq_stats; 9 + }; 10 + 11 + struct netns_ft { 12 + struct nf_flow_table_stat __percpu *stat; 13 + }; 14 + #endif
+9
net/netfilter/Kconfig
··· 734 734 735 735 To compile it as a module, choose M here. 736 736 737 + config NF_FLOW_TABLE_PROCFS 738 + bool "Supply flow table statistics in procfs" 739 + default y 740 + depends on PROC_FS 741 + depends on SYSCTL 742 + help 743 + This option enables for the flow table offload statistics 744 + to be shown in procfs under net/netfilter/nf_flowtable. 745 + 737 746 config NETFILTER_XTABLES 738 747 tristate "Netfilter Xtables support (required for ip_tables)" 739 748 default m if NETFILTER_ADVANCED=n
+1
net/netfilter/Makefile
··· 128 128 obj-$(CONFIG_NF_FLOW_TABLE) += nf_flow_table.o 129 129 nf_flow_table-objs := nf_flow_table_core.o nf_flow_table_ip.o \ 130 130 nf_flow_table_offload.o 131 + nf_flow_table-$(CONFIG_NF_FLOW_TABLE_PROCFS) += nf_flow_table_procfs.o 131 132 132 133 obj-$(CONFIG_NF_FLOW_TABLE_INET) += nf_flow_table_inet.o 133 134
+61 -1
net/netfilter/nf_flow_table_core.c
··· 614 614 } 615 615 EXPORT_SYMBOL_GPL(nf_flow_table_free); 616 616 617 + static int nf_flow_table_init_net(struct net *net) 618 + { 619 + net->ft.stat = alloc_percpu(struct nf_flow_table_stat); 620 + return net->ft.stat ? 0 : -ENOMEM; 621 + } 622 + 623 + static void nf_flow_table_fini_net(struct net *net) 624 + { 625 + free_percpu(net->ft.stat); 626 + } 627 + 628 + static int nf_flow_table_pernet_init(struct net *net) 629 + { 630 + int ret; 631 + 632 + ret = nf_flow_table_init_net(net); 633 + if (ret < 0) 634 + return ret; 635 + 636 + ret = nf_flow_table_init_proc(net); 637 + if (ret < 0) 638 + goto out_proc; 639 + 640 + return 0; 641 + 642 + out_proc: 643 + nf_flow_table_fini_net(net); 644 + return ret; 645 + } 646 + 647 + static void nf_flow_table_pernet_exit(struct list_head *net_exit_list) 648 + { 649 + struct net *net; 650 + 651 + list_for_each_entry(net, net_exit_list, exit_list) { 652 + nf_flow_table_fini_proc(net); 653 + nf_flow_table_fini_net(net); 654 + } 655 + } 656 + 657 + static struct pernet_operations nf_flow_table_net_ops = { 658 + .init = nf_flow_table_pernet_init, 659 + .exit_batch = nf_flow_table_pernet_exit, 660 + }; 661 + 617 662 static int __init nf_flow_table_module_init(void) 618 663 { 619 - return nf_flow_table_offload_init(); 664 + int ret; 665 + 666 + ret = register_pernet_subsys(&nf_flow_table_net_ops); 667 + if (ret < 0) 668 + return ret; 669 + 670 + ret = nf_flow_table_offload_init(); 671 + if (ret) 672 + goto out_offload; 673 + 674 + return 0; 675 + 676 + out_offload: 677 + unregister_pernet_subsys(&nf_flow_table_net_ops); 678 + return ret; 620 679 } 621 680 622 681 static void __exit nf_flow_table_module_exit(void) 623 682 { 624 683 nf_flow_table_offload_exit(); 684 + unregister_pernet_subsys(&nf_flow_table_net_ops); 625 685 } 626 686 627 687 module_init(nf_flow_table_module_init);
+14 -3
net/netfilter/nf_flow_table_offload.c
··· 967 967 static void flow_offload_work_handler(struct work_struct *work) 968 968 { 969 969 struct flow_offload_work *offload; 970 + struct net *net; 970 971 971 972 offload = container_of(work, struct flow_offload_work, work); 973 + net = read_pnet(&offload->flowtable->net); 972 974 switch (offload->cmd) { 973 975 case FLOW_CLS_REPLACE: 974 976 flow_offload_work_add(offload); 977 + NF_FLOW_TABLE_STAT_DEC_ATOMIC(net, count_wq_add); 975 978 break; 976 979 case FLOW_CLS_DESTROY: 977 980 flow_offload_work_del(offload); 981 + NF_FLOW_TABLE_STAT_DEC_ATOMIC(net, count_wq_del); 978 982 break; 979 983 case FLOW_CLS_STATS: 980 984 flow_offload_work_stats(offload); 985 + NF_FLOW_TABLE_STAT_DEC_ATOMIC(net, count_wq_stats); 981 986 break; 982 987 default: 983 988 WARN_ON_ONCE(1); ··· 994 989 995 990 static void flow_offload_queue_work(struct flow_offload_work *offload) 996 991 { 997 - if (offload->cmd == FLOW_CLS_REPLACE) 992 + struct net *net = read_pnet(&offload->flowtable->net); 993 + 994 + if (offload->cmd == FLOW_CLS_REPLACE) { 995 + NF_FLOW_TABLE_STAT_INC(net, count_wq_add); 998 996 queue_work(nf_flow_offload_add_wq, &offload->work); 999 - else if (offload->cmd == FLOW_CLS_DESTROY) 997 + } else if (offload->cmd == FLOW_CLS_DESTROY) { 998 + NF_FLOW_TABLE_STAT_INC(net, count_wq_del); 1000 999 queue_work(nf_flow_offload_del_wq, &offload->work); 1001 - else 1000 + } else { 1001 + NF_FLOW_TABLE_STAT_INC(net, count_wq_stats); 1002 1002 queue_work(nf_flow_offload_stats_wq, &offload->work); 1003 + } 1003 1004 } 1004 1005 1005 1006 static struct flow_offload_work *
+80
net/netfilter/nf_flow_table_procfs.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + #include <linux/kernel.h> 3 + #include <linux/proc_fs.h> 4 + #include <net/netfilter/nf_flow_table.h> 5 + 6 + static void *nf_flow_table_cpu_seq_start(struct seq_file *seq, loff_t *pos) 7 + { 8 + struct net *net = seq_file_net(seq); 9 + int cpu; 10 + 11 + if (*pos == 0) 12 + return SEQ_START_TOKEN; 13 + 14 + for (cpu = *pos - 1; cpu < nr_cpu_ids; ++cpu) { 15 + if (!cpu_possible(cpu)) 16 + continue; 17 + *pos = cpu + 1; 18 + return per_cpu_ptr(net->ft.stat, cpu); 19 + } 20 + 21 + return NULL; 22 + } 23 + 24 + static void *nf_flow_table_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos) 25 + { 26 + struct net *net = seq_file_net(seq); 27 + int cpu; 28 + 29 + for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) { 30 + if (!cpu_possible(cpu)) 31 + continue; 32 + *pos = cpu + 1; 33 + return per_cpu_ptr(net->ft.stat, cpu); 34 + } 35 + (*pos)++; 36 + return NULL; 37 + } 38 + 39 + static void nf_flow_table_cpu_seq_stop(struct seq_file *seq, void *v) 40 + { 41 + } 42 + 43 + static int nf_flow_table_cpu_seq_show(struct seq_file *seq, void *v) 44 + { 45 + const struct nf_flow_table_stat *st = v; 46 + 47 + if (v == SEQ_START_TOKEN) { 48 + seq_puts(seq, "wq_add wq_del wq_stats\n"); 49 + return 0; 50 + } 51 + 52 + seq_printf(seq, "%8d %8d %8d\n", 53 + st->count_wq_add, 54 + st->count_wq_del, 55 + st->count_wq_stats 56 + ); 57 + return 0; 58 + } 59 + 60 + static const struct seq_operations nf_flow_table_cpu_seq_ops = { 61 + .start = nf_flow_table_cpu_seq_start, 62 + .next = nf_flow_table_cpu_seq_next, 63 + .stop = nf_flow_table_cpu_seq_stop, 64 + .show = nf_flow_table_cpu_seq_show, 65 + }; 66 + 67 + int nf_flow_table_init_proc(struct net *net) 68 + { 69 + struct proc_dir_entry *pde; 70 + 71 + pde = proc_create_net("nf_flowtable", 0444, net->proc_net_stat, 72 + &nf_flow_table_cpu_seq_ops, 73 + sizeof(struct seq_net_private)); 74 + return pde ? 0 : -ENOMEM; 75 + } 76 + 77 + void nf_flow_table_fini_proc(struct net *net) 78 + { 79 + remove_proc_entry("nf_flowtable", net->proc_net_stat); 80 + }