Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next

tjh.dev / kernel

fork atom

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

fork atom

Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next

Pablo Neira Ayuso says:

====================
Netfilter updates for net-next

The following patchset contains Netfilter updates for net-next:

1) Clean up and consolidate ct ecache infrastructure by merging ct and
expect notifiers, from Florian Westphal.

2) Missing counters and timestamp in nfnetlink_queue and _log conntrack
information.

3) Missing error check for xt_register_template() in iptables mangle,
as a incremental fix for the previous pull request, also from
Florian Westphal.

4) Add netfilter hooks for the SRv6 lightweigh tunnel driver, from
Ryoga Sato. The hooks are enabled via nf_hooks_lwtunnel sysctl
to make sure existing netfilter rulesets do not break. There is
a static key to disable the hooks by default.

The pktgen_bench_xmit_mode_netif_receive.sh shows no noticeable
impact in the seg6_input path for non-netfilter users: similar
numbers with and without this patch.

This is a sample of the perf report output:

11.67% kpktgend_0 [ipv6] [k] ipv6_get_saddr_eval
7.89% kpktgend_0 [ipv6] [k] __ipv6_addr_label
7.52% kpktgend_0 [ipv6] [k] __ipv6_dev_get_saddr
6.63% kpktgend_0 [kernel.vmlinux] [k] asm_exc_nmi
4.74% kpktgend_0 [ipv6] [k] fib6_node_lookup_1
3.48% kpktgend_0 [kernel.vmlinux] [k] pskb_expand_head
3.33% kpktgend_0 [ipv6] [k] ip6_rcv_core.isra.29
3.33% kpktgend_0 [ipv6] [k] seg6_do_srh_encap
2.53% kpktgend_0 [ipv6] [k] ipv6_dev_get_saddr
2.45% kpktgend_0 [ipv6] [k] fib6_table_lookup
2.24% kpktgend_0 [kernel.vmlinux] [k] ___cache_free
2.16% kpktgend_0 [ipv6] [k] ip6_pol_route
2.11% kpktgend_0 [kernel.vmlinux] [k] __ipv6_addr_type
====================

Signed-off-by: David S. Miller <davem@davemloft.net>

David S. Miller 4 years ago 9dfa859d 724812d8

+362 -251

14 changed files

expand all collapse all

Documentation

networking

nf_conntrack-sysctl.rst

include

net

lwtunnel.h

netfilter

nf_conntrack_ecache.h

nf_hooks_lwtunnel.h

netns

conntrack.h

net

core

lwtunnel.c

ipv4

netfilter

iptable_mangle.c

ipv6

seg6_iptunnel.c

seg6_local.c

netfilter

Makefile

nf_conntrack_ecache.c

nf_conntrack_netlink.c

nf_conntrack_standalone.c

nf_hooks_lwtunnel.c

Documentation/networking/nf_conntrack-sysctl.rst

reviewed

··· 184 184 This extended timeout will be used in case there is an GRE stream 185 185 detected. 186 186 187 187 + nf_hooks_lwtunnel - BOOLEAN 188 188 + - 0 - disabled (default) 189 189 + - not 0 - enabled 190 190 + 191 191 + If this option is enabled, the lightweight tunnel netfilter hooks are 192 192 + enabled. This option cannot be disabled once it is enabled. 193 193 + 187 194 nf_flowtable_tcp_timeout - INTEGER (seconds) 188 195 default 30 189 196

include/net/lwtunnel.h

reviewed

··· 51 51 }; 52 52 53 53 #ifdef CONFIG_LWTUNNEL 54 54 + 55 55 + DECLARE_STATIC_KEY_FALSE(nf_hooks_lwtunnel_enabled); 56 56 + 54 57 void lwtstate_free(struct lwtunnel_state *lws); 55 58 56 59 static inline struct lwtunnel_state *

+12 -22

include/net/netfilter/nf_conntrack_ecache.h

reviewed

··· 72 72 int report; 73 73 }; 74 74 75 75 - struct nf_ct_event_notifier { 76 76 - int (*fcn)(unsigned int events, struct nf_ct_event *item); 75 75 + struct nf_exp_event { 76 76 + struct nf_conntrack_expect *exp; 77 77 + u32 portid; 78 78 + int report; 77 79 }; 78 80 79 79 - int nf_conntrack_register_notifier(struct net *net, 80 80 - struct nf_ct_event_notifier *nb); 81 81 - void nf_conntrack_unregister_notifier(struct net *net, 82 82 - struct nf_ct_event_notifier *nb); 81 81 + struct nf_ct_event_notifier { 82 82 + int (*ct_event)(unsigned int events, const struct nf_ct_event *item); 83 83 + int (*exp_event)(unsigned int events, const struct nf_exp_event *item); 84 84 + }; 85 85 + 86 86 + void nf_conntrack_register_notifier(struct net *net, 87 87 + const struct nf_ct_event_notifier *nb); 88 88 + void nf_conntrack_unregister_notifier(struct net *net); 83 89 84 90 void nf_ct_deliver_cached_events(struct nf_conn *ct); 85 91 int nf_conntrack_eventmask_report(unsigned int eventmask, struct nf_conn *ct, ··· 157 151 } 158 152 159 153 #ifdef CONFIG_NF_CONNTRACK_EVENTS 160 160 - 161 161 - struct nf_exp_event { 162 162 - struct nf_conntrack_expect *exp; 163 163 - u32 portid; 164 164 - int report; 165 165 - }; 166 166 - 167 167 - struct nf_exp_event_notifier { 168 168 - int (*fcn)(unsigned int events, struct nf_exp_event *item); 169 169 - }; 170 170 - 171 171 - int nf_ct_expect_register_notifier(struct net *net, 172 172 - struct nf_exp_event_notifier *nb); 173 173 - void nf_ct_expect_unregister_notifier(struct net *net, 174 174 - struct nf_exp_event_notifier *nb); 175 175 - 176 154 void nf_ct_expect_event_report(enum ip_conntrack_expect_events event, 177 155 struct nf_conntrack_expect *exp, 178 156 u32 portid, int report);

include/net/netfilter/nf_hooks_lwtunnel.h

reviewed

··· 1 1 + #include <linux/sysctl.h> 2 2 + #include <linux/types.h> 3 3 + 4 4 + #ifdef CONFIG_SYSCTL 5 5 + int nf_hooks_lwtunnel_sysctl_handler(struct ctl_table *table, int write, 6 6 + void *buffer, size_t *lenp, loff_t *ppos); 7 7 + #endif

-1

include/net/netns/conntrack.h

reviewed

··· 113 113 struct ct_pcpu __percpu *pcpu_lists; 114 114 struct ip_conntrack_stat __percpu *stat; 115 115 struct nf_ct_event_notifier __rcu *nf_conntrack_event_cb; 116 116 - struct nf_exp_event_notifier __rcu *nf_expect_event_cb; 117 116 struct nf_ip_net nf_ct_proto; 118 117 #if defined(CONFIG_NF_CONNTRACK_LABELS) 119 118 unsigned int labels_used;

net/core/lwtunnel.c

reviewed

··· 23 23 #include <net/ip6_fib.h> 24 24 #include <net/rtnh.h> 25 25 26 26 + DEFINE_STATIC_KEY_FALSE(nf_hooks_lwtunnel_enabled); 27 27 + EXPORT_SYMBOL_GPL(nf_hooks_lwtunnel_enabled); 28 28 + 26 29 #ifdef CONFIG_MODULES 27 30 28 31 static const char *lwtunnel_encap_str(enum lwtunnel_encap_types encap_type)

net/ipv4/netfilter/iptable_mangle.c

reviewed

··· 112 112 { 113 113 int ret = xt_register_template(&packet_mangler, 114 114 iptable_mangle_table_init); 115 115 + if (ret < 0) 116 116 + return ret; 115 117 116 118 mangle_ops = xt_hook_ops_alloc(&packet_mangler, iptable_mangle_hook); 117 119 if (IS_ERR(mangle_ops)) {

+72 -3

net/ipv6/seg6_iptunnel.c

reviewed

··· 26 26 #ifdef CONFIG_IPV6_SEG6_HMAC 27 27 #include <net/seg6_hmac.h> 28 28 #endif 29 29 + #include <net/lwtunnel.h> 30 30 + #include <linux/netfilter.h> 29 31 30 32 static size_t seg6_lwt_headroom(struct seg6_iptunnel_encap *tuninfo) 31 33 { ··· 297 295 298 296 ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); 299 297 skb_set_transport_header(skb, sizeof(struct ipv6hdr)); 298 298 + nf_reset_ct(skb); 300 299 301 300 return 0; 302 301 } 303 302 304 304 - static int seg6_input(struct sk_buff *skb) 303 303 + static int seg6_input_finish(struct net *net, struct sock *sk, 304 304 + struct sk_buff *skb) 305 305 + { 306 306 + return dst_input(skb); 307 307 + } 308 308 + 309 309 + static int seg6_input_core(struct net *net, struct sock *sk, 310 310 + struct sk_buff *skb) 305 311 { 306 312 struct dst_entry *orig_dst = skb_dst(skb); 307 313 struct dst_entry *dst = NULL; ··· 347 337 if (unlikely(err)) 348 338 return err; 349 339 350 350 - return dst_input(skb); 340 340 + if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) 341 341 + return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, 342 342 + dev_net(skb->dev), NULL, skb, NULL, 343 343 + skb_dst(skb)->dev, seg6_input_finish); 344 344 + 345 345 + return seg6_input_finish(dev_net(skb->dev), NULL, skb); 351 346 } 352 347 353 353 - static int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb) 348 348 + static int seg6_input_nf(struct sk_buff *skb) 349 349 + { 350 350 + struct net_device *dev = skb_dst(skb)->dev; 351 351 + struct net *net = dev_net(skb->dev); 352 352 + 353 353 + switch (skb->protocol) { 354 354 + case htons(ETH_P_IP): 355 355 + return NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, net, NULL, 356 356 + skb, NULL, dev, seg6_input_core); 357 357 + case htons(ETH_P_IPV6): 358 358 + return NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, net, NULL, 359 359 + skb, NULL, dev, seg6_input_core); 360 360 + } 361 361 + 362 362 + return -EINVAL; 363 363 + } 364 364 + 365 365 + static int seg6_input(struct sk_buff *skb) 366 366 + { 367 367 + if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) 368 368 + return seg6_input_nf(skb); 369 369 + 370 370 + return seg6_input_core(dev_net(skb->dev), NULL, skb); 371 371 + } 372 372 + 373 373 + static int seg6_output_core(struct net *net, struct sock *sk, 374 374 + struct sk_buff *skb) 354 375 { 355 376 struct dst_entry *orig_dst = skb_dst(skb); 356 377 struct dst_entry *dst = NULL; ··· 428 387 if (unlikely(err)) 429 388 goto drop; 430 389 390 390 + if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) 391 391 + return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb, 392 392 + NULL, skb_dst(skb)->dev, dst_output); 393 393 + 431 394 return dst_output(net, sk, skb); 432 395 drop: 433 396 kfree_skb(skb); 434 397 return err; 398 398 + } 399 399 + 400 400 + static int seg6_output_nf(struct net *net, struct sock *sk, struct sk_buff *skb) 401 401 + { 402 402 + struct net_device *dev = skb_dst(skb)->dev; 403 403 + 404 404 + switch (skb->protocol) { 405 405 + case htons(ETH_P_IP): 406 406 + return NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, net, sk, skb, 407 407 + NULL, dev, seg6_output_core); 408 408 + case htons(ETH_P_IPV6): 409 409 + return NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, net, sk, skb, 410 410 + NULL, dev, seg6_output_core); 411 411 + } 412 412 + 413 413 + return -EINVAL; 414 414 + } 415 415 + 416 416 + static int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb) 417 417 + { 418 418 + if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) 419 419 + return seg6_output_nf(net, sk, skb); 420 420 + 421 421 + return seg6_output_core(net, sk, skb); 435 422 } 436 423 437 424 static int seg6_build_state(struct net *net, struct nlattr *nla,

+78 -33

net/ipv6/seg6_local.c

reviewed

··· 30 30 #include <net/seg6_local.h> 31 31 #include <linux/etherdevice.h> 32 32 #include <linux/bpf.h> 33 33 + #include <net/lwtunnel.h> 34 34 + #include <linux/netfilter.h> 33 35 34 36 #define SEG6_F_ATTR(i) BIT(i) 35 37 ··· 415 413 return -EINVAL; 416 414 } 417 415 416 416 + static int input_action_end_dx6_finish(struct net *net, struct sock *sk, 417 417 + struct sk_buff *skb) 418 418 + { 419 419 + struct dst_entry *orig_dst = skb_dst(skb); 420 420 + struct in6_addr *nhaddr = NULL; 421 421 + struct seg6_local_lwt *slwt; 422 422 + 423 423 + slwt = seg6_local_lwtunnel(orig_dst->lwtstate); 424 424 + 425 425 + /* The inner packet is not associated to any local interface, 426 426 + * so we do not call netif_rx(). 427 427 + * 428 428 + * If slwt->nh6 is set to ::, then lookup the nexthop for the 429 429 + * inner packet's DA. Otherwise, use the specified nexthop. 430 430 + */ 431 431 + if (!ipv6_addr_any(&slwt->nh6)) 432 432 + nhaddr = &slwt->nh6; 433 433 + 434 434 + seg6_lookup_nexthop(skb, nhaddr, 0); 435 435 + 436 436 + return dst_input(skb); 437 437 + } 438 438 + 418 439 /* decapsulate and forward to specified nexthop */ 419 440 static int input_action_end_dx6(struct sk_buff *skb, 420 441 struct seg6_local_lwt *slwt) 421 442 { 422 422 - struct in6_addr *nhaddr = NULL; 423 423 - 424 443 /* this function accepts IPv6 encapsulated packets, with either 425 444 * an SRH with SL=0, or no SRH. 426 445 */ ··· 452 429 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) 453 430 goto drop; 454 431 455 455 - /* The inner packet is not associated to any local interface, 456 456 - * so we do not call netif_rx(). 457 457 - * 458 458 - * If slwt->nh6 is set to ::, then lookup the nexthop for the 459 459 - * inner packet's DA. Otherwise, use the specified nexthop. 460 460 - */ 461 461 - 462 462 - if (!ipv6_addr_any(&slwt->nh6)) 463 463 - nhaddr = &slwt->nh6; 464 464 - 465 432 skb_set_transport_header(skb, sizeof(struct ipv6hdr)); 433 433 + nf_reset_ct(skb); 466 434 467 467 - seg6_lookup_nexthop(skb, nhaddr, 0); 435 435 + if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) 436 436 + return NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, 437 437 + dev_net(skb->dev), NULL, skb, NULL, 438 438 + skb_dst(skb)->dev, input_action_end_dx6_finish); 468 439 469 469 - return dst_input(skb); 440 440 + return input_action_end_dx6_finish(dev_net(skb->dev), NULL, skb); 470 441 drop: 471 442 kfree_skb(skb); 472 443 return -EINVAL; 473 444 } 474 445 475 475 - static int input_action_end_dx4(struct sk_buff *skb, 476 476 - struct seg6_local_lwt *slwt) 446 446 + static int input_action_end_dx4_finish(struct net *net, struct sock *sk, 447 447 + struct sk_buff *skb) 477 448 { 449 449 + struct dst_entry *orig_dst = skb_dst(skb); 450 450 + struct seg6_local_lwt *slwt; 478 451 struct iphdr *iph; 479 452 __be32 nhaddr; 480 453 int err; 481 454 482 482 - if (!decap_and_validate(skb, IPPROTO_IPIP)) 483 483 - goto drop; 484 484 - 485 485 - if (!pskb_may_pull(skb, sizeof(struct iphdr))) 486 486 - goto drop; 487 487 - 488 488 - skb->protocol = htons(ETH_P_IP); 455 455 + slwt = seg6_local_lwtunnel(orig_dst->lwtstate); 489 456 490 457 iph = ip_hdr(skb); 491 458 ··· 483 470 484 471 skb_dst_drop(skb); 485 472 486 486 - skb_set_transport_header(skb, sizeof(struct iphdr)); 487 487 - 488 473 err = ip_route_input(skb, nhaddr, iph->saddr, 0, skb->dev); 489 489 - if (err) 490 490 - goto drop; 474 474 + if (err) { 475 475 + kfree_skb(skb); 476 476 + return -EINVAL; 477 477 + } 491 478 492 479 return dst_input(skb); 480 480 + } 493 481 482 482 + static int input_action_end_dx4(struct sk_buff *skb, 483 483 + struct seg6_local_lwt *slwt) 484 484 + { 485 485 + if (!decap_and_validate(skb, IPPROTO_IPIP)) 486 486 + goto drop; 487 487 + 488 488 + if (!pskb_may_pull(skb, sizeof(struct iphdr))) 489 489 + goto drop; 490 490 + 491 491 + skb->protocol = htons(ETH_P_IP); 492 492 + skb_set_transport_header(skb, sizeof(struct iphdr)); 493 493 + nf_reset_ct(skb); 494 494 + 495 495 + if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) 496 496 + return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, 497 497 + dev_net(skb->dev), NULL, skb, NULL, 498 498 + skb_dst(skb)->dev, input_action_end_dx4_finish); 499 499 + 500 500 + return input_action_end_dx4_finish(dev_net(skb->dev), NULL, skb); 494 501 drop: 495 502 kfree_skb(skb); 496 503 return -EINVAL; ··· 678 645 skb_dst_drop(skb); 679 646 680 647 skb_set_transport_header(skb, hdrlen); 648 648 + nf_reset_ct(skb); 681 649 682 650 return end_dt_vrf_rcv(skb, family, vrf); 683 651 ··· 1112 1078 u64_stats_update_end(&pcounters->syncp); 1113 1079 } 1114 1080 1115 1115 - static int seg6_local_input(struct sk_buff *skb) 1081 1081 + static int seg6_local_input_core(struct net *net, struct sock *sk, 1082 1082 + struct sk_buff *skb) 1116 1083 { 1117 1084 struct dst_entry *orig_dst = skb_dst(skb); 1118 1085 struct seg6_action_desc *desc; 1119 1086 struct seg6_local_lwt *slwt; 1120 1087 unsigned int len = skb->len; 1121 1088 int rc; 1122 1122 - 1123 1123 - if (skb->protocol != htons(ETH_P_IPV6)) { 1124 1124 - kfree_skb(skb); 1125 1125 - return -EINVAL; 1126 1126 - } 1127 1089 1128 1090 slwt = seg6_local_lwtunnel(orig_dst->lwtstate); 1129 1091 desc = slwt->desc; ··· 1132 1102 seg6_local_update_counters(slwt, len, rc); 1133 1103 1134 1104 return rc; 1105 1105 + } 1106 1106 + 1107 1107 + static int seg6_local_input(struct sk_buff *skb) 1108 1108 + { 1109 1109 + if (skb->protocol != htons(ETH_P_IPV6)) { 1110 1110 + kfree_skb(skb); 1111 1111 + return -EINVAL; 1112 1112 + } 1113 1113 + 1114 1114 + if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) 1115 1115 + return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_IN, 1116 1116 + dev_net(skb->dev), NULL, skb, skb->dev, NULL, 1117 1117 + seg6_local_input_core); 1118 1118 + 1119 1119 + return seg6_local_input_core(dev_net(skb->dev), NULL, skb); 1135 1120 } 1136 1121 1137 1122 static const struct nla_policy seg6_local_policy[SEG6_LOCAL_MAX + 1] = {

net/netfilter/Makefile

reviewed

··· 212 212 213 213 # IPVS 214 214 obj-$(CONFIG_IP_VS) += ipvs/ 215 215 + 216 216 + # lwtunnel 217 217 + obj-$(CONFIG_LWTUNNEL) += nf_hooks_lwtunnel.o

+93 -150

net/netfilter/nf_conntrack_ecache.c

reviewed

··· 130 130 schedule_delayed_work(&cnet->ecache_dwork, delay); 131 131 } 132 132 133 133 - int nf_conntrack_eventmask_report(unsigned int eventmask, struct nf_conn *ct, 134 134 - u32 portid, int report) 133 133 + static int __nf_conntrack_eventmask_report(struct nf_conntrack_ecache *e, 134 134 + const unsigned int events, 135 135 + const unsigned long missed, 136 136 + const struct nf_ct_event *item) 135 137 { 136 136 - int ret = 0; 137 137 - struct net *net = nf_ct_net(ct); 138 138 + struct nf_conn *ct = item->ct; 139 139 + struct net *net = nf_ct_net(item->ct); 138 140 struct nf_ct_event_notifier *notify; 139 139 - struct nf_conntrack_ecache *e; 140 140 - 141 141 - rcu_read_lock(); 142 142 - notify = rcu_dereference(net->ct.nf_conntrack_event_cb); 143 143 - if (!notify) 144 144 - goto out_unlock; 145 145 - 146 146 - e = nf_ct_ecache_find(ct); 147 147 - if (!e) 148 148 - goto out_unlock; 149 149 - 150 150 - if (nf_ct_is_confirmed(ct)) { 151 151 - struct nf_ct_event item = { 152 152 - .ct = ct, 153 153 - .portid = e->portid ? e->portid : portid, 154 154 - .report = report 155 155 - }; 156 156 - /* This is a resent of a destroy event? If so, skip missed */ 157 157 - unsigned long missed = e->portid ? 0 : e->missed; 158 158 - 159 159 - if (!((eventmask | missed) & e->ctmask)) 160 160 - goto out_unlock; 161 161 - 162 162 - ret = notify->fcn(eventmask | missed, &item); 163 163 - if (unlikely(ret < 0 || missed)) { 164 164 - spin_lock_bh(&ct->lock); 165 165 - if (ret < 0) { 166 166 - /* This is a destroy event that has been 167 167 - * triggered by a process, we store the PORTID 168 168 - * to include it in the retransmission. 169 169 - */ 170 170 - if (eventmask & (1 << IPCT_DESTROY)) { 171 171 - if (e->portid == 0 && portid != 0) 172 172 - e->portid = portid; 173 173 - e->state = NFCT_ECACHE_DESTROY_FAIL; 174 174 - } else { 175 175 - e->missed |= eventmask; 176 176 - } 177 177 - } else { 178 178 - e->missed &= ~missed; 179 179 - } 180 180 - spin_unlock_bh(&ct->lock); 181 181 - } 182 182 - } 183 183 - out_unlock: 184 184 - rcu_read_unlock(); 185 185 - return ret; 186 186 - } 187 187 - EXPORT_SYMBOL_GPL(nf_conntrack_eventmask_report); 188 188 - 189 189 - /* deliver cached events and clear cache entry - must be called with locally 190 190 - * disabled softirqs */ 191 191 - void nf_ct_deliver_cached_events(struct nf_conn *ct) 192 192 - { 193 193 - struct net *net = nf_ct_net(ct); 194 194 - unsigned long events, missed; 195 195 - struct nf_ct_event_notifier *notify; 196 196 - struct nf_conntrack_ecache *e; 197 197 - struct nf_ct_event item; 198 141 int ret; 199 142 200 200 - rcu_read_lock(); 201 201 - notify = rcu_dereference(net->ct.nf_conntrack_event_cb); 202 202 - if (notify == NULL) 203 203 - goto out_unlock; 204 204 - 205 205 - if (!nf_ct_is_confirmed(ct) || nf_ct_is_dying(ct)) 206 206 - goto out_unlock; 207 207 - 208 208 - e = nf_ct_ecache_find(ct); 209 209 - if (e == NULL) 210 210 - goto out_unlock; 211 211 - 212 212 - events = xchg(&e->cache, 0); 213 213 - 214 214 - /* We make a copy of the missed event cache without taking 215 215 - * the lock, thus we may send missed events twice. However, 216 216 - * this does not harm and it happens very rarely. */ 217 217 - missed = e->missed; 218 218 - 219 143 if (!((events | missed) & e->ctmask)) 220 220 - goto out_unlock; 144 144 + return 0; 221 145 222 222 - item.ct = ct; 223 223 - item.portid = 0; 224 224 - item.report = 0; 146 146 + rcu_read_lock(); 225 147 226 226 - ret = notify->fcn(events | missed, &item); 148 148 + notify = rcu_dereference(net->ct.nf_conntrack_event_cb); 149 149 + if (!notify) { 150 150 + rcu_read_unlock(); 151 151 + return 0; 152 152 + } 227 153 228 228 - if (likely(ret == 0 && !missed)) 229 229 - goto out_unlock; 154 154 + ret = notify->ct_event(events | missed, item); 155 155 + rcu_read_unlock(); 156 156 + 157 157 + if (likely(ret >= 0 && missed == 0)) 158 158 + return 0; 230 159 231 160 spin_lock_bh(&ct->lock); 232 161 if (ret < 0) ··· 164 235 e->missed &= ~missed; 165 236 spin_unlock_bh(&ct->lock); 166 237 167 167 - out_unlock: 168 168 - rcu_read_unlock(); 238 238 + return ret; 239 239 + } 240 240 + 241 241 + int nf_conntrack_eventmask_report(unsigned int events, struct nf_conn *ct, 242 242 + u32 portid, int report) 243 243 + { 244 244 + struct nf_conntrack_ecache *e; 245 245 + struct nf_ct_event item; 246 246 + unsigned long missed; 247 247 + int ret; 248 248 + 249 249 + if (!nf_ct_is_confirmed(ct)) 250 250 + return 0; 251 251 + 252 252 + e = nf_ct_ecache_find(ct); 253 253 + if (!e) 254 254 + return 0; 255 255 + 256 256 + memset(&item, 0, sizeof(item)); 257 257 + 258 258 + item.ct = ct; 259 259 + item.portid = e->portid ? e->portid : portid; 260 260 + item.report = report; 261 261 + 262 262 + /* This is a resent of a destroy event? If so, skip missed */ 263 263 + missed = e->portid ? 0 : e->missed; 264 264 + 265 265 + ret = __nf_conntrack_eventmask_report(e, events, missed, &item); 266 266 + if (unlikely(ret < 0 && (events & (1 << IPCT_DESTROY)))) { 267 267 + /* This is a destroy event that has been triggered by a process, 268 268 + * we store the PORTID to include it in the retransmission. 269 269 + */ 270 270 + if (e->portid == 0 && portid != 0) 271 271 + e->portid = portid; 272 272 + e->state = NFCT_ECACHE_DESTROY_FAIL; 273 273 + } 274 274 + 275 275 + return ret; 276 276 + } 277 277 + EXPORT_SYMBOL_GPL(nf_conntrack_eventmask_report); 278 278 + 279 279 + /* deliver cached events and clear cache entry - must be called with locally 280 280 + * disabled softirqs */ 281 281 + void nf_ct_deliver_cached_events(struct nf_conn *ct) 282 282 + { 283 283 + struct nf_conntrack_ecache *e; 284 284 + struct nf_ct_event item; 285 285 + unsigned long events; 286 286 + 287 287 + if (!nf_ct_is_confirmed(ct) || nf_ct_is_dying(ct)) 288 288 + return; 289 289 + 290 290 + e = nf_ct_ecache_find(ct); 291 291 + if (e == NULL) 292 292 + return; 293 293 + 294 294 + events = xchg(&e->cache, 0); 295 295 + 296 296 + item.ct = ct; 297 297 + item.portid = 0; 298 298 + item.report = 0; 299 299 + 300 300 + /* We make a copy of the missed event cache without taking 301 301 + * the lock, thus we may send missed events twice. However, 302 302 + * this does not harm and it happens very rarely. 303 303 + */ 304 304 + __nf_conntrack_eventmask_report(e, events, e->missed, &item); 169 305 } 170 306 EXPORT_SYMBOL_GPL(nf_ct_deliver_cached_events); 171 307 ··· 240 246 241 247 { 242 248 struct net *net = nf_ct_exp_net(exp); 243 243 - struct nf_exp_event_notifier *notify; 249 249 + struct nf_ct_event_notifier *notify; 244 250 struct nf_conntrack_ecache *e; 245 251 246 252 rcu_read_lock(); 247 247 - notify = rcu_dereference(net->ct.nf_expect_event_cb); 253 253 + notify = rcu_dereference(net->ct.nf_conntrack_event_cb); 248 254 if (!notify) 249 255 goto out_unlock; 250 256 ··· 258 264 .portid = portid, 259 265 .report = report 260 266 }; 261 261 - notify->fcn(1 << event, &item); 267 267 + notify->exp_event(1 << event, &item); 262 268 } 263 269 out_unlock: 264 270 rcu_read_unlock(); 265 271 } 266 272 267 267 - int nf_conntrack_register_notifier(struct net *net, 268 268 - struct nf_ct_event_notifier *new) 273 273 + void nf_conntrack_register_notifier(struct net *net, 274 274 + const struct nf_ct_event_notifier *new) 269 275 { 270 270 - int ret; 271 276 struct nf_ct_event_notifier *notify; 272 277 273 278 mutex_lock(&nf_ct_ecache_mutex); 274 279 notify = rcu_dereference_protected(net->ct.nf_conntrack_event_cb, 275 280 lockdep_is_held(&nf_ct_ecache_mutex)); 276 276 - if (notify != NULL) { 277 277 - ret = -EBUSY; 278 278 - goto out_unlock; 279 279 - } 281 281 + WARN_ON_ONCE(notify); 280 282 rcu_assign_pointer(net->ct.nf_conntrack_event_cb, new); 281 281 - ret = 0; 282 282 - 283 283 - out_unlock: 284 283 mutex_unlock(&nf_ct_ecache_mutex); 285 285 - return ret; 286 284 } 287 285 EXPORT_SYMBOL_GPL(nf_conntrack_register_notifier); 288 286 289 289 - void nf_conntrack_unregister_notifier(struct net *net, 290 290 - struct nf_ct_event_notifier *new) 287 287 + void nf_conntrack_unregister_notifier(struct net *net) 291 288 { 292 292 - struct nf_ct_event_notifier *notify; 293 293 - 294 289 mutex_lock(&nf_ct_ecache_mutex); 295 295 - notify = rcu_dereference_protected(net->ct.nf_conntrack_event_cb, 296 296 - lockdep_is_held(&nf_ct_ecache_mutex)); 297 297 - BUG_ON(notify != new); 298 290 RCU_INIT_POINTER(net->ct.nf_conntrack_event_cb, NULL); 299 291 mutex_unlock(&nf_ct_ecache_mutex); 300 300 - /* synchronize_rcu() is called from ctnetlink_exit. */ 292 292 + /* synchronize_rcu() is called after netns pre_exit */ 301 293 } 302 294 EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier); 303 303 - 304 304 - int nf_ct_expect_register_notifier(struct net *net, 305 305 - struct nf_exp_event_notifier *new) 306 306 - { 307 307 - int ret; 308 308 - struct nf_exp_event_notifier *notify; 309 309 - 310 310 - mutex_lock(&nf_ct_ecache_mutex); 311 311 - notify = rcu_dereference_protected(net->ct.nf_expect_event_cb, 312 312 - lockdep_is_held(&nf_ct_ecache_mutex)); 313 313 - if (notify != NULL) { 314 314 - ret = -EBUSY; 315 315 - goto out_unlock; 316 316 - } 317 317 - rcu_assign_pointer(net->ct.nf_expect_event_cb, new); 318 318 - ret = 0; 319 319 - 320 320 - out_unlock: 321 321 - mutex_unlock(&nf_ct_ecache_mutex); 322 322 - return ret; 323 323 - } 324 324 - EXPORT_SYMBOL_GPL(nf_ct_expect_register_notifier); 325 325 - 326 326 - void nf_ct_expect_unregister_notifier(struct net *net, 327 327 - struct nf_exp_event_notifier *new) 328 328 - { 329 329 - struct nf_exp_event_notifier *notify; 330 330 - 331 331 - mutex_lock(&nf_ct_ecache_mutex); 332 332 - notify = rcu_dereference_protected(net->ct.nf_expect_event_cb, 333 333 - lockdep_is_held(&nf_ct_ecache_mutex)); 334 334 - BUG_ON(notify != new); 335 335 - RCU_INIT_POINTER(net->ct.nf_expect_event_cb, NULL); 336 336 - mutex_unlock(&nf_ct_ecache_mutex); 337 337 - /* synchronize_rcu() is called from ctnetlink_exit. */ 338 338 - } 339 339 - EXPORT_SYMBOL_GPL(nf_ct_expect_unregister_notifier); 340 295 341 296 void nf_conntrack_ecache_work(struct net *net, enum nf_ct_ecache_state state) 342 297 {

+14 -42

net/netfilter/nf_conntrack_netlink.c

reviewed

··· 706 706 } 707 707 708 708 static int 709 709 - ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) 709 709 + ctnetlink_conntrack_event(unsigned int events, const struct nf_ct_event *item) 710 710 { 711 711 const struct nf_conntrack_zone *zone; 712 712 struct net *net; ··· 2669 2669 + nla_total_size(0) /* CTA_HELP */ 2670 2670 + nla_total_size(NF_CT_HELPER_NAME_LEN) /* CTA_HELP_NAME */ 2671 2671 + ctnetlink_secctx_size(ct) 2672 2672 + + ctnetlink_acct_size(ct) 2673 2673 + + ctnetlink_timestamp_size(ct) 2672 2674 #if IS_ENABLED(CONFIG_NF_NAT) 2673 2675 + 2 * nla_total_size(0) /* CTA_NAT_SEQ_ADJ_ORIG|REPL */ 2674 2676 + 6 * nla_total_size(sizeof(u_int32_t)) /* CTA_NAT_SEQ_OFFSET */ ··· 2726 2724 goto nla_put_failure; 2727 2725 2728 2726 if (ctnetlink_dump_protoinfo(skb, ct, false) < 0) 2727 2727 + goto nla_put_failure; 2728 2728 + 2729 2729 + if (ctnetlink_dump_acct(skb, ct, IPCTNL_MSG_CT_GET) < 0 || 2730 2730 + ctnetlink_dump_timestamp(skb, ct) < 0) 2729 2731 goto nla_put_failure; 2730 2732 2731 2733 if (ctnetlink_dump_helpinfo(skb, ct) < 0) ··· 3110 3104 3111 3105 #ifdef CONFIG_NF_CONNTRACK_EVENTS 3112 3106 static int 3113 3113 - ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item) 3107 3107 + ctnetlink_expect_event(unsigned int events, const struct nf_exp_event *item) 3114 3108 { 3115 3109 struct nf_conntrack_expect *exp = item->exp; 3116 3110 struct net *net = nf_ct_exp_net(exp); ··· 3761 3755 3762 3756 #ifdef CONFIG_NF_CONNTRACK_EVENTS 3763 3757 static struct nf_ct_event_notifier ctnl_notifier = { 3764 3764 - .fcn = ctnetlink_conntrack_event, 3765 3765 - }; 3766 3766 - 3767 3767 - static struct nf_exp_event_notifier ctnl_notifier_exp = { 3768 3768 - .fcn = ctnetlink_expect_event, 3758 3758 + .ct_event = ctnetlink_conntrack_event, 3759 3759 + .exp_event = ctnetlink_expect_event, 3769 3760 }; 3770 3761 #endif 3771 3762 ··· 3855 3852 static int __net_init ctnetlink_net_init(struct net *net) 3856 3853 { 3857 3854 #ifdef CONFIG_NF_CONNTRACK_EVENTS 3858 3858 - int ret; 3859 3859 - 3860 3860 - ret = nf_conntrack_register_notifier(net, &ctnl_notifier); 3861 3861 - if (ret < 0) { 3862 3862 - pr_err("ctnetlink_init: cannot register notifier.\n"); 3863 3863 - goto err_out; 3864 3864 - } 3865 3865 - 3866 3866 - ret = nf_ct_expect_register_notifier(net, &ctnl_notifier_exp); 3867 3867 - if (ret < 0) { 3868 3868 - pr_err("ctnetlink_init: cannot expect register notifier.\n"); 3869 3869 - goto err_unreg_notifier; 3870 3870 - } 3855 3855 + nf_conntrack_register_notifier(net, &ctnl_notifier); 3871 3856 #endif 3872 3857 return 0; 3873 3873 - 3874 3874 - #ifdef CONFIG_NF_CONNTRACK_EVENTS 3875 3875 - err_unreg_notifier: 3876 3876 - nf_conntrack_unregister_notifier(net, &ctnl_notifier); 3877 3877 - err_out: 3878 3878 - return ret; 3879 3879 - #endif 3880 3858 } 3881 3859 3882 3882 - static void ctnetlink_net_exit(struct net *net) 3860 3860 + static void ctnetlink_net_pre_exit(struct net *net) 3883 3861 { 3884 3862 #ifdef CONFIG_NF_CONNTRACK_EVENTS 3885 3885 - nf_ct_expect_unregister_notifier(net, &ctnl_notifier_exp); 3886 3886 - nf_conntrack_unregister_notifier(net, &ctnl_notifier); 3863 3863 + nf_conntrack_unregister_notifier(net); 3887 3864 #endif 3888 3888 - } 3889 3889 - 3890 3890 - static void __net_exit ctnetlink_net_exit_batch(struct list_head *net_exit_list) 3891 3891 - { 3892 3892 - struct net *net; 3893 3893 - 3894 3894 - list_for_each_entry(net, net_exit_list, exit_list) 3895 3895 - ctnetlink_net_exit(net); 3896 3896 - 3897 3897 - /* wait for other cpus until they are done with ctnl_notifiers */ 3898 3898 - synchronize_rcu(); 3899 3865 } 3900 3866 3901 3867 static struct pernet_operations ctnetlink_net_ops = { 3902 3868 .init = ctnetlink_net_init, 3903 3903 - .exit_batch = ctnetlink_net_exit_batch, 3869 3869 + .pre_exit = ctnetlink_net_pre_exit, 3904 3870 }; 3905 3871 3906 3872 static int __init ctnetlink_init(void)

+15

net/netfilter/nf_conntrack_standalone.c

reviewed

··· 22 22 #include <net/netfilter/nf_conntrack_acct.h> 23 23 #include <net/netfilter/nf_conntrack_zones.h> 24 24 #include <net/netfilter/nf_conntrack_timestamp.h> 25 25 + #ifdef CONFIG_LWTUNNEL 26 26 + #include <net/netfilter/nf_hooks_lwtunnel.h> 27 27 + #endif 25 28 #include <linux/rculist_nulls.h> 26 29 27 30 static bool enable_hooks __read_mostly; ··· 615 612 NF_SYSCTL_CT_PROTO_TIMEOUT_GRE, 616 613 NF_SYSCTL_CT_PROTO_TIMEOUT_GRE_STREAM, 617 614 #endif 615 615 + #ifdef CONFIG_LWTUNNEL 616 616 + NF_SYSCTL_CT_LWTUNNEL, 617 617 + #endif 618 618 619 619 __NF_SYSCTL_CT_LAST_SYSCTL, 620 620 }; ··· 963 957 .maxlen = sizeof(unsigned int), 964 958 .mode = 0644, 965 959 .proc_handler = proc_dointvec_jiffies, 960 960 + }, 961 961 + #endif 962 962 + #ifdef CONFIG_LWTUNNEL 963 963 + [NF_SYSCTL_CT_LWTUNNEL] = { 964 964 + .procname = "nf_hooks_lwtunnel", 965 965 + .data = NULL, 966 966 + .maxlen = sizeof(int), 967 967 + .mode = 0644, 968 968 + .proc_handler = nf_hooks_lwtunnel_sysctl_handler, 966 969 }, 967 970 #endif 968 971 {}

+53

net/netfilter/nf_hooks_lwtunnel.c

reviewed

··· 1 1 + // SPDX-License-Identifier: GPL-2.0 2 2 + 3 3 + #include <linux/sysctl.h> 4 4 + #include <net/lwtunnel.h> 5 5 + #include <net/netfilter/nf_hooks_lwtunnel.h> 6 6 + 7 7 + static inline int nf_hooks_lwtunnel_get(void) 8 8 + { 9 9 + if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) 10 10 + return 1; 11 11 + else 12 12 + return 0; 13 13 + } 14 14 + 15 15 + static inline int nf_hooks_lwtunnel_set(int enable) 16 16 + { 17 17 + if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) { 18 18 + if (!enable) 19 19 + return -EBUSY; 20 20 + } else if (enable) { 21 21 + static_branch_enable(&nf_hooks_lwtunnel_enabled); 22 22 + } 23 23 + 24 24 + return 0; 25 25 + } 26 26 + 27 27 + #ifdef CONFIG_SYSCTL 28 28 + int nf_hooks_lwtunnel_sysctl_handler(struct ctl_table *table, int write, 29 29 + void *buffer, size_t *lenp, loff_t *ppos) 30 30 + { 31 31 + int proc_nf_hooks_lwtunnel_enabled = 0; 32 32 + struct ctl_table tmp = { 33 33 + .procname = table->procname, 34 34 + .data = &proc_nf_hooks_lwtunnel_enabled, 35 35 + .maxlen = sizeof(int), 36 36 + .mode = table->mode, 37 37 + .extra1 = SYSCTL_ZERO, 38 38 + .extra2 = SYSCTL_ONE, 39 39 + }; 40 40 + int ret; 41 41 + 42 42 + if (!write) 43 43 + proc_nf_hooks_lwtunnel_enabled = nf_hooks_lwtunnel_get(); 44 44 + 45 45 + ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); 46 46 + 47 47 + if (write && ret == 0) 48 48 + ret = nf_hooks_lwtunnel_set(proc_nf_hooks_lwtunnel_enabled); 49 49 + 50 50 + return ret; 51 51 + } 52 52 + EXPORT_SYMBOL_GPL(nf_hooks_lwtunnel_sysctl_handler); 53 53 + #endif /* CONFIG_SYSCTL */