Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next

Pablo Neira Ayuso says:

====================
Netfilter updates for net-next

The following patchset contains Netfilter updates for net-next:

1) Clean up and consolidate ct ecache infrastructure by merging ct and
expect notifiers, from Florian Westphal.

2) Missing counters and timestamp in nfnetlink_queue and _log conntrack
information.

3) Missing error check for xt_register_template() in iptables mangle,
as a incremental fix for the previous pull request, also from
Florian Westphal.

4) Add netfilter hooks for the SRv6 lightweigh tunnel driver, from
Ryoga Sato. The hooks are enabled via nf_hooks_lwtunnel sysctl
to make sure existing netfilter rulesets do not break. There is
a static key to disable the hooks by default.

The pktgen_bench_xmit_mode_netif_receive.sh shows no noticeable
impact in the seg6_input path for non-netfilter users: similar
numbers with and without this patch.

This is a sample of the perf report output:

11.67% kpktgend_0 [ipv6] [k] ipv6_get_saddr_eval
7.89% kpktgend_0 [ipv6] [k] __ipv6_addr_label
7.52% kpktgend_0 [ipv6] [k] __ipv6_dev_get_saddr
6.63% kpktgend_0 [kernel.vmlinux] [k] asm_exc_nmi
4.74% kpktgend_0 [ipv6] [k] fib6_node_lookup_1
3.48% kpktgend_0 [kernel.vmlinux] [k] pskb_expand_head
3.33% kpktgend_0 [ipv6] [k] ip6_rcv_core.isra.29
3.33% kpktgend_0 [ipv6] [k] seg6_do_srh_encap
2.53% kpktgend_0 [ipv6] [k] ipv6_dev_get_saddr
2.45% kpktgend_0 [ipv6] [k] fib6_table_lookup
2.24% kpktgend_0 [kernel.vmlinux] [k] ___cache_free
2.16% kpktgend_0 [ipv6] [k] ip6_pol_route
2.11% kpktgend_0 [kernel.vmlinux] [k] __ipv6_addr_type
====================

Signed-off-by: David S. Miller <davem@davemloft.net>

+362 -251
+7
Documentation/networking/nf_conntrack-sysctl.rst
··· 184 184 This extended timeout will be used in case there is an GRE stream 185 185 detected. 186 186 187 + nf_hooks_lwtunnel - BOOLEAN 188 + - 0 - disabled (default) 189 + - not 0 - enabled 190 + 191 + If this option is enabled, the lightweight tunnel netfilter hooks are 192 + enabled. This option cannot be disabled once it is enabled. 193 + 187 194 nf_flowtable_tcp_timeout - INTEGER (seconds) 188 195 default 30 189 196
+3
include/net/lwtunnel.h
··· 51 51 }; 52 52 53 53 #ifdef CONFIG_LWTUNNEL 54 + 55 + DECLARE_STATIC_KEY_FALSE(nf_hooks_lwtunnel_enabled); 56 + 54 57 void lwtstate_free(struct lwtunnel_state *lws); 55 58 56 59 static inline struct lwtunnel_state *
+12 -22
include/net/netfilter/nf_conntrack_ecache.h
··· 72 72 int report; 73 73 }; 74 74 75 - struct nf_ct_event_notifier { 76 - int (*fcn)(unsigned int events, struct nf_ct_event *item); 75 + struct nf_exp_event { 76 + struct nf_conntrack_expect *exp; 77 + u32 portid; 78 + int report; 77 79 }; 78 80 79 - int nf_conntrack_register_notifier(struct net *net, 80 - struct nf_ct_event_notifier *nb); 81 - void nf_conntrack_unregister_notifier(struct net *net, 82 - struct nf_ct_event_notifier *nb); 81 + struct nf_ct_event_notifier { 82 + int (*ct_event)(unsigned int events, const struct nf_ct_event *item); 83 + int (*exp_event)(unsigned int events, const struct nf_exp_event *item); 84 + }; 85 + 86 + void nf_conntrack_register_notifier(struct net *net, 87 + const struct nf_ct_event_notifier *nb); 88 + void nf_conntrack_unregister_notifier(struct net *net); 83 89 84 90 void nf_ct_deliver_cached_events(struct nf_conn *ct); 85 91 int nf_conntrack_eventmask_report(unsigned int eventmask, struct nf_conn *ct, ··· 157 151 } 158 152 159 153 #ifdef CONFIG_NF_CONNTRACK_EVENTS 160 - 161 - struct nf_exp_event { 162 - struct nf_conntrack_expect *exp; 163 - u32 portid; 164 - int report; 165 - }; 166 - 167 - struct nf_exp_event_notifier { 168 - int (*fcn)(unsigned int events, struct nf_exp_event *item); 169 - }; 170 - 171 - int nf_ct_expect_register_notifier(struct net *net, 172 - struct nf_exp_event_notifier *nb); 173 - void nf_ct_expect_unregister_notifier(struct net *net, 174 - struct nf_exp_event_notifier *nb); 175 - 176 154 void nf_ct_expect_event_report(enum ip_conntrack_expect_events event, 177 155 struct nf_conntrack_expect *exp, 178 156 u32 portid, int report);
+7
include/net/netfilter/nf_hooks_lwtunnel.h
··· 1 + #include <linux/sysctl.h> 2 + #include <linux/types.h> 3 + 4 + #ifdef CONFIG_SYSCTL 5 + int nf_hooks_lwtunnel_sysctl_handler(struct ctl_table *table, int write, 6 + void *buffer, size_t *lenp, loff_t *ppos); 7 + #endif
-1
include/net/netns/conntrack.h
··· 113 113 struct ct_pcpu __percpu *pcpu_lists; 114 114 struct ip_conntrack_stat __percpu *stat; 115 115 struct nf_ct_event_notifier __rcu *nf_conntrack_event_cb; 116 - struct nf_exp_event_notifier __rcu *nf_expect_event_cb; 117 116 struct nf_ip_net nf_ct_proto; 118 117 #if defined(CONFIG_NF_CONNTRACK_LABELS) 119 118 unsigned int labels_used;
+3
net/core/lwtunnel.c
··· 23 23 #include <net/ip6_fib.h> 24 24 #include <net/rtnh.h> 25 25 26 + DEFINE_STATIC_KEY_FALSE(nf_hooks_lwtunnel_enabled); 27 + EXPORT_SYMBOL_GPL(nf_hooks_lwtunnel_enabled); 28 + 26 29 #ifdef CONFIG_MODULES 27 30 28 31 static const char *lwtunnel_encap_str(enum lwtunnel_encap_types encap_type)
+2
net/ipv4/netfilter/iptable_mangle.c
··· 112 112 { 113 113 int ret = xt_register_template(&packet_mangler, 114 114 iptable_mangle_table_init); 115 + if (ret < 0) 116 + return ret; 115 117 116 118 mangle_ops = xt_hook_ops_alloc(&packet_mangler, iptable_mangle_hook); 117 119 if (IS_ERR(mangle_ops)) {
+72 -3
net/ipv6/seg6_iptunnel.c
··· 26 26 #ifdef CONFIG_IPV6_SEG6_HMAC 27 27 #include <net/seg6_hmac.h> 28 28 #endif 29 + #include <net/lwtunnel.h> 30 + #include <linux/netfilter.h> 29 31 30 32 static size_t seg6_lwt_headroom(struct seg6_iptunnel_encap *tuninfo) 31 33 { ··· 297 295 298 296 ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); 299 297 skb_set_transport_header(skb, sizeof(struct ipv6hdr)); 298 + nf_reset_ct(skb); 300 299 301 300 return 0; 302 301 } 303 302 304 - static int seg6_input(struct sk_buff *skb) 303 + static int seg6_input_finish(struct net *net, struct sock *sk, 304 + struct sk_buff *skb) 305 + { 306 + return dst_input(skb); 307 + } 308 + 309 + static int seg6_input_core(struct net *net, struct sock *sk, 310 + struct sk_buff *skb) 305 311 { 306 312 struct dst_entry *orig_dst = skb_dst(skb); 307 313 struct dst_entry *dst = NULL; ··· 347 337 if (unlikely(err)) 348 338 return err; 349 339 350 - return dst_input(skb); 340 + if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) 341 + return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, 342 + dev_net(skb->dev), NULL, skb, NULL, 343 + skb_dst(skb)->dev, seg6_input_finish); 344 + 345 + return seg6_input_finish(dev_net(skb->dev), NULL, skb); 351 346 } 352 347 353 - static int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb) 348 + static int seg6_input_nf(struct sk_buff *skb) 349 + { 350 + struct net_device *dev = skb_dst(skb)->dev; 351 + struct net *net = dev_net(skb->dev); 352 + 353 + switch (skb->protocol) { 354 + case htons(ETH_P_IP): 355 + return NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, net, NULL, 356 + skb, NULL, dev, seg6_input_core); 357 + case htons(ETH_P_IPV6): 358 + return NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, net, NULL, 359 + skb, NULL, dev, seg6_input_core); 360 + } 361 + 362 + return -EINVAL; 363 + } 364 + 365 + static int seg6_input(struct sk_buff *skb) 366 + { 367 + if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) 368 + return seg6_input_nf(skb); 369 + 370 + return seg6_input_core(dev_net(skb->dev), NULL, skb); 371 + } 372 + 373 + static int seg6_output_core(struct net *net, struct sock *sk, 374 + struct sk_buff *skb) 354 375 { 355 376 struct dst_entry *orig_dst = skb_dst(skb); 356 377 struct dst_entry *dst = NULL; ··· 428 387 if (unlikely(err)) 429 388 goto drop; 430 389 390 + if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) 391 + return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb, 392 + NULL, skb_dst(skb)->dev, dst_output); 393 + 431 394 return dst_output(net, sk, skb); 432 395 drop: 433 396 kfree_skb(skb); 434 397 return err; 398 + } 399 + 400 + static int seg6_output_nf(struct net *net, struct sock *sk, struct sk_buff *skb) 401 + { 402 + struct net_device *dev = skb_dst(skb)->dev; 403 + 404 + switch (skb->protocol) { 405 + case htons(ETH_P_IP): 406 + return NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, net, sk, skb, 407 + NULL, dev, seg6_output_core); 408 + case htons(ETH_P_IPV6): 409 + return NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, net, sk, skb, 410 + NULL, dev, seg6_output_core); 411 + } 412 + 413 + return -EINVAL; 414 + } 415 + 416 + static int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb) 417 + { 418 + if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) 419 + return seg6_output_nf(net, sk, skb); 420 + 421 + return seg6_output_core(net, sk, skb); 435 422 } 436 423 437 424 static int seg6_build_state(struct net *net, struct nlattr *nla,
+78 -33
net/ipv6/seg6_local.c
··· 30 30 #include <net/seg6_local.h> 31 31 #include <linux/etherdevice.h> 32 32 #include <linux/bpf.h> 33 + #include <net/lwtunnel.h> 34 + #include <linux/netfilter.h> 33 35 34 36 #define SEG6_F_ATTR(i) BIT(i) 35 37 ··· 415 413 return -EINVAL; 416 414 } 417 415 416 + static int input_action_end_dx6_finish(struct net *net, struct sock *sk, 417 + struct sk_buff *skb) 418 + { 419 + struct dst_entry *orig_dst = skb_dst(skb); 420 + struct in6_addr *nhaddr = NULL; 421 + struct seg6_local_lwt *slwt; 422 + 423 + slwt = seg6_local_lwtunnel(orig_dst->lwtstate); 424 + 425 + /* The inner packet is not associated to any local interface, 426 + * so we do not call netif_rx(). 427 + * 428 + * If slwt->nh6 is set to ::, then lookup the nexthop for the 429 + * inner packet's DA. Otherwise, use the specified nexthop. 430 + */ 431 + if (!ipv6_addr_any(&slwt->nh6)) 432 + nhaddr = &slwt->nh6; 433 + 434 + seg6_lookup_nexthop(skb, nhaddr, 0); 435 + 436 + return dst_input(skb); 437 + } 438 + 418 439 /* decapsulate and forward to specified nexthop */ 419 440 static int input_action_end_dx6(struct sk_buff *skb, 420 441 struct seg6_local_lwt *slwt) 421 442 { 422 - struct in6_addr *nhaddr = NULL; 423 - 424 443 /* this function accepts IPv6 encapsulated packets, with either 425 444 * an SRH with SL=0, or no SRH. 426 445 */ ··· 452 429 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) 453 430 goto drop; 454 431 455 - /* The inner packet is not associated to any local interface, 456 - * so we do not call netif_rx(). 457 - * 458 - * If slwt->nh6 is set to ::, then lookup the nexthop for the 459 - * inner packet's DA. Otherwise, use the specified nexthop. 460 - */ 461 - 462 - if (!ipv6_addr_any(&slwt->nh6)) 463 - nhaddr = &slwt->nh6; 464 - 465 432 skb_set_transport_header(skb, sizeof(struct ipv6hdr)); 433 + nf_reset_ct(skb); 466 434 467 - seg6_lookup_nexthop(skb, nhaddr, 0); 435 + if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) 436 + return NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, 437 + dev_net(skb->dev), NULL, skb, NULL, 438 + skb_dst(skb)->dev, input_action_end_dx6_finish); 468 439 469 - return dst_input(skb); 440 + return input_action_end_dx6_finish(dev_net(skb->dev), NULL, skb); 470 441 drop: 471 442 kfree_skb(skb); 472 443 return -EINVAL; 473 444 } 474 445 475 - static int input_action_end_dx4(struct sk_buff *skb, 476 - struct seg6_local_lwt *slwt) 446 + static int input_action_end_dx4_finish(struct net *net, struct sock *sk, 447 + struct sk_buff *skb) 477 448 { 449 + struct dst_entry *orig_dst = skb_dst(skb); 450 + struct seg6_local_lwt *slwt; 478 451 struct iphdr *iph; 479 452 __be32 nhaddr; 480 453 int err; 481 454 482 - if (!decap_and_validate(skb, IPPROTO_IPIP)) 483 - goto drop; 484 - 485 - if (!pskb_may_pull(skb, sizeof(struct iphdr))) 486 - goto drop; 487 - 488 - skb->protocol = htons(ETH_P_IP); 455 + slwt = seg6_local_lwtunnel(orig_dst->lwtstate); 489 456 490 457 iph = ip_hdr(skb); 491 458 ··· 483 470 484 471 skb_dst_drop(skb); 485 472 486 - skb_set_transport_header(skb, sizeof(struct iphdr)); 487 - 488 473 err = ip_route_input(skb, nhaddr, iph->saddr, 0, skb->dev); 489 - if (err) 490 - goto drop; 474 + if (err) { 475 + kfree_skb(skb); 476 + return -EINVAL; 477 + } 491 478 492 479 return dst_input(skb); 480 + } 493 481 482 + static int input_action_end_dx4(struct sk_buff *skb, 483 + struct seg6_local_lwt *slwt) 484 + { 485 + if (!decap_and_validate(skb, IPPROTO_IPIP)) 486 + goto drop; 487 + 488 + if (!pskb_may_pull(skb, sizeof(struct iphdr))) 489 + goto drop; 490 + 491 + skb->protocol = htons(ETH_P_IP); 492 + skb_set_transport_header(skb, sizeof(struct iphdr)); 493 + nf_reset_ct(skb); 494 + 495 + if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) 496 + return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, 497 + dev_net(skb->dev), NULL, skb, NULL, 498 + skb_dst(skb)->dev, input_action_end_dx4_finish); 499 + 500 + return input_action_end_dx4_finish(dev_net(skb->dev), NULL, skb); 494 501 drop: 495 502 kfree_skb(skb); 496 503 return -EINVAL; ··· 678 645 skb_dst_drop(skb); 679 646 680 647 skb_set_transport_header(skb, hdrlen); 648 + nf_reset_ct(skb); 681 649 682 650 return end_dt_vrf_rcv(skb, family, vrf); 683 651 ··· 1112 1078 u64_stats_update_end(&pcounters->syncp); 1113 1079 } 1114 1080 1115 - static int seg6_local_input(struct sk_buff *skb) 1081 + static int seg6_local_input_core(struct net *net, struct sock *sk, 1082 + struct sk_buff *skb) 1116 1083 { 1117 1084 struct dst_entry *orig_dst = skb_dst(skb); 1118 1085 struct seg6_action_desc *desc; 1119 1086 struct seg6_local_lwt *slwt; 1120 1087 unsigned int len = skb->len; 1121 1088 int rc; 1122 - 1123 - if (skb->protocol != htons(ETH_P_IPV6)) { 1124 - kfree_skb(skb); 1125 - return -EINVAL; 1126 - } 1127 1089 1128 1090 slwt = seg6_local_lwtunnel(orig_dst->lwtstate); 1129 1091 desc = slwt->desc; ··· 1132 1102 seg6_local_update_counters(slwt, len, rc); 1133 1103 1134 1104 return rc; 1105 + } 1106 + 1107 + static int seg6_local_input(struct sk_buff *skb) 1108 + { 1109 + if (skb->protocol != htons(ETH_P_IPV6)) { 1110 + kfree_skb(skb); 1111 + return -EINVAL; 1112 + } 1113 + 1114 + if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) 1115 + return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_IN, 1116 + dev_net(skb->dev), NULL, skb, skb->dev, NULL, 1117 + seg6_local_input_core); 1118 + 1119 + return seg6_local_input_core(dev_net(skb->dev), NULL, skb); 1135 1120 } 1136 1121 1137 1122 static const struct nla_policy seg6_local_policy[SEG6_LOCAL_MAX + 1] = {
+3
net/netfilter/Makefile
··· 212 212 213 213 # IPVS 214 214 obj-$(CONFIG_IP_VS) += ipvs/ 215 + 216 + # lwtunnel 217 + obj-$(CONFIG_LWTUNNEL) += nf_hooks_lwtunnel.o
+93 -150
net/netfilter/nf_conntrack_ecache.c
··· 130 130 schedule_delayed_work(&cnet->ecache_dwork, delay); 131 131 } 132 132 133 - int nf_conntrack_eventmask_report(unsigned int eventmask, struct nf_conn *ct, 134 - u32 portid, int report) 133 + static int __nf_conntrack_eventmask_report(struct nf_conntrack_ecache *e, 134 + const unsigned int events, 135 + const unsigned long missed, 136 + const struct nf_ct_event *item) 135 137 { 136 - int ret = 0; 137 - struct net *net = nf_ct_net(ct); 138 + struct nf_conn *ct = item->ct; 139 + struct net *net = nf_ct_net(item->ct); 138 140 struct nf_ct_event_notifier *notify; 139 - struct nf_conntrack_ecache *e; 140 - 141 - rcu_read_lock(); 142 - notify = rcu_dereference(net->ct.nf_conntrack_event_cb); 143 - if (!notify) 144 - goto out_unlock; 145 - 146 - e = nf_ct_ecache_find(ct); 147 - if (!e) 148 - goto out_unlock; 149 - 150 - if (nf_ct_is_confirmed(ct)) { 151 - struct nf_ct_event item = { 152 - .ct = ct, 153 - .portid = e->portid ? e->portid : portid, 154 - .report = report 155 - }; 156 - /* This is a resent of a destroy event? If so, skip missed */ 157 - unsigned long missed = e->portid ? 0 : e->missed; 158 - 159 - if (!((eventmask | missed) & e->ctmask)) 160 - goto out_unlock; 161 - 162 - ret = notify->fcn(eventmask | missed, &item); 163 - if (unlikely(ret < 0 || missed)) { 164 - spin_lock_bh(&ct->lock); 165 - if (ret < 0) { 166 - /* This is a destroy event that has been 167 - * triggered by a process, we store the PORTID 168 - * to include it in the retransmission. 169 - */ 170 - if (eventmask & (1 << IPCT_DESTROY)) { 171 - if (e->portid == 0 && portid != 0) 172 - e->portid = portid; 173 - e->state = NFCT_ECACHE_DESTROY_FAIL; 174 - } else { 175 - e->missed |= eventmask; 176 - } 177 - } else { 178 - e->missed &= ~missed; 179 - } 180 - spin_unlock_bh(&ct->lock); 181 - } 182 - } 183 - out_unlock: 184 - rcu_read_unlock(); 185 - return ret; 186 - } 187 - EXPORT_SYMBOL_GPL(nf_conntrack_eventmask_report); 188 - 189 - /* deliver cached events and clear cache entry - must be called with locally 190 - * disabled softirqs */ 191 - void nf_ct_deliver_cached_events(struct nf_conn *ct) 192 - { 193 - struct net *net = nf_ct_net(ct); 194 - unsigned long events, missed; 195 - struct nf_ct_event_notifier *notify; 196 - struct nf_conntrack_ecache *e; 197 - struct nf_ct_event item; 198 141 int ret; 199 142 200 - rcu_read_lock(); 201 - notify = rcu_dereference(net->ct.nf_conntrack_event_cb); 202 - if (notify == NULL) 203 - goto out_unlock; 204 - 205 - if (!nf_ct_is_confirmed(ct) || nf_ct_is_dying(ct)) 206 - goto out_unlock; 207 - 208 - e = nf_ct_ecache_find(ct); 209 - if (e == NULL) 210 - goto out_unlock; 211 - 212 - events = xchg(&e->cache, 0); 213 - 214 - /* We make a copy of the missed event cache without taking 215 - * the lock, thus we may send missed events twice. However, 216 - * this does not harm and it happens very rarely. */ 217 - missed = e->missed; 218 - 219 143 if (!((events | missed) & e->ctmask)) 220 - goto out_unlock; 144 + return 0; 221 145 222 - item.ct = ct; 223 - item.portid = 0; 224 - item.report = 0; 146 + rcu_read_lock(); 225 147 226 - ret = notify->fcn(events | missed, &item); 148 + notify = rcu_dereference(net->ct.nf_conntrack_event_cb); 149 + if (!notify) { 150 + rcu_read_unlock(); 151 + return 0; 152 + } 227 153 228 - if (likely(ret == 0 && !missed)) 229 - goto out_unlock; 154 + ret = notify->ct_event(events | missed, item); 155 + rcu_read_unlock(); 156 + 157 + if (likely(ret >= 0 && missed == 0)) 158 + return 0; 230 159 231 160 spin_lock_bh(&ct->lock); 232 161 if (ret < 0) ··· 164 235 e->missed &= ~missed; 165 236 spin_unlock_bh(&ct->lock); 166 237 167 - out_unlock: 168 - rcu_read_unlock(); 238 + return ret; 239 + } 240 + 241 + int nf_conntrack_eventmask_report(unsigned int events, struct nf_conn *ct, 242 + u32 portid, int report) 243 + { 244 + struct nf_conntrack_ecache *e; 245 + struct nf_ct_event item; 246 + unsigned long missed; 247 + int ret; 248 + 249 + if (!nf_ct_is_confirmed(ct)) 250 + return 0; 251 + 252 + e = nf_ct_ecache_find(ct); 253 + if (!e) 254 + return 0; 255 + 256 + memset(&item, 0, sizeof(item)); 257 + 258 + item.ct = ct; 259 + item.portid = e->portid ? e->portid : portid; 260 + item.report = report; 261 + 262 + /* This is a resent of a destroy event? If so, skip missed */ 263 + missed = e->portid ? 0 : e->missed; 264 + 265 + ret = __nf_conntrack_eventmask_report(e, events, missed, &item); 266 + if (unlikely(ret < 0 && (events & (1 << IPCT_DESTROY)))) { 267 + /* This is a destroy event that has been triggered by a process, 268 + * we store the PORTID to include it in the retransmission. 269 + */ 270 + if (e->portid == 0 && portid != 0) 271 + e->portid = portid; 272 + e->state = NFCT_ECACHE_DESTROY_FAIL; 273 + } 274 + 275 + return ret; 276 + } 277 + EXPORT_SYMBOL_GPL(nf_conntrack_eventmask_report); 278 + 279 + /* deliver cached events and clear cache entry - must be called with locally 280 + * disabled softirqs */ 281 + void nf_ct_deliver_cached_events(struct nf_conn *ct) 282 + { 283 + struct nf_conntrack_ecache *e; 284 + struct nf_ct_event item; 285 + unsigned long events; 286 + 287 + if (!nf_ct_is_confirmed(ct) || nf_ct_is_dying(ct)) 288 + return; 289 + 290 + e = nf_ct_ecache_find(ct); 291 + if (e == NULL) 292 + return; 293 + 294 + events = xchg(&e->cache, 0); 295 + 296 + item.ct = ct; 297 + item.portid = 0; 298 + item.report = 0; 299 + 300 + /* We make a copy of the missed event cache without taking 301 + * the lock, thus we may send missed events twice. However, 302 + * this does not harm and it happens very rarely. 303 + */ 304 + __nf_conntrack_eventmask_report(e, events, e->missed, &item); 169 305 } 170 306 EXPORT_SYMBOL_GPL(nf_ct_deliver_cached_events); 171 307 ··· 240 246 241 247 { 242 248 struct net *net = nf_ct_exp_net(exp); 243 - struct nf_exp_event_notifier *notify; 249 + struct nf_ct_event_notifier *notify; 244 250 struct nf_conntrack_ecache *e; 245 251 246 252 rcu_read_lock(); 247 - notify = rcu_dereference(net->ct.nf_expect_event_cb); 253 + notify = rcu_dereference(net->ct.nf_conntrack_event_cb); 248 254 if (!notify) 249 255 goto out_unlock; 250 256 ··· 258 264 .portid = portid, 259 265 .report = report 260 266 }; 261 - notify->fcn(1 << event, &item); 267 + notify->exp_event(1 << event, &item); 262 268 } 263 269 out_unlock: 264 270 rcu_read_unlock(); 265 271 } 266 272 267 - int nf_conntrack_register_notifier(struct net *net, 268 - struct nf_ct_event_notifier *new) 273 + void nf_conntrack_register_notifier(struct net *net, 274 + const struct nf_ct_event_notifier *new) 269 275 { 270 - int ret; 271 276 struct nf_ct_event_notifier *notify; 272 277 273 278 mutex_lock(&nf_ct_ecache_mutex); 274 279 notify = rcu_dereference_protected(net->ct.nf_conntrack_event_cb, 275 280 lockdep_is_held(&nf_ct_ecache_mutex)); 276 - if (notify != NULL) { 277 - ret = -EBUSY; 278 - goto out_unlock; 279 - } 281 + WARN_ON_ONCE(notify); 280 282 rcu_assign_pointer(net->ct.nf_conntrack_event_cb, new); 281 - ret = 0; 282 - 283 - out_unlock: 284 283 mutex_unlock(&nf_ct_ecache_mutex); 285 - return ret; 286 284 } 287 285 EXPORT_SYMBOL_GPL(nf_conntrack_register_notifier); 288 286 289 - void nf_conntrack_unregister_notifier(struct net *net, 290 - struct nf_ct_event_notifier *new) 287 + void nf_conntrack_unregister_notifier(struct net *net) 291 288 { 292 - struct nf_ct_event_notifier *notify; 293 - 294 289 mutex_lock(&nf_ct_ecache_mutex); 295 - notify = rcu_dereference_protected(net->ct.nf_conntrack_event_cb, 296 - lockdep_is_held(&nf_ct_ecache_mutex)); 297 - BUG_ON(notify != new); 298 290 RCU_INIT_POINTER(net->ct.nf_conntrack_event_cb, NULL); 299 291 mutex_unlock(&nf_ct_ecache_mutex); 300 - /* synchronize_rcu() is called from ctnetlink_exit. */ 292 + /* synchronize_rcu() is called after netns pre_exit */ 301 293 } 302 294 EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier); 303 - 304 - int nf_ct_expect_register_notifier(struct net *net, 305 - struct nf_exp_event_notifier *new) 306 - { 307 - int ret; 308 - struct nf_exp_event_notifier *notify; 309 - 310 - mutex_lock(&nf_ct_ecache_mutex); 311 - notify = rcu_dereference_protected(net->ct.nf_expect_event_cb, 312 - lockdep_is_held(&nf_ct_ecache_mutex)); 313 - if (notify != NULL) { 314 - ret = -EBUSY; 315 - goto out_unlock; 316 - } 317 - rcu_assign_pointer(net->ct.nf_expect_event_cb, new); 318 - ret = 0; 319 - 320 - out_unlock: 321 - mutex_unlock(&nf_ct_ecache_mutex); 322 - return ret; 323 - } 324 - EXPORT_SYMBOL_GPL(nf_ct_expect_register_notifier); 325 - 326 - void nf_ct_expect_unregister_notifier(struct net *net, 327 - struct nf_exp_event_notifier *new) 328 - { 329 - struct nf_exp_event_notifier *notify; 330 - 331 - mutex_lock(&nf_ct_ecache_mutex); 332 - notify = rcu_dereference_protected(net->ct.nf_expect_event_cb, 333 - lockdep_is_held(&nf_ct_ecache_mutex)); 334 - BUG_ON(notify != new); 335 - RCU_INIT_POINTER(net->ct.nf_expect_event_cb, NULL); 336 - mutex_unlock(&nf_ct_ecache_mutex); 337 - /* synchronize_rcu() is called from ctnetlink_exit. */ 338 - } 339 - EXPORT_SYMBOL_GPL(nf_ct_expect_unregister_notifier); 340 295 341 296 void nf_conntrack_ecache_work(struct net *net, enum nf_ct_ecache_state state) 342 297 {
+14 -42
net/netfilter/nf_conntrack_netlink.c
··· 706 706 } 707 707 708 708 static int 709 - ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) 709 + ctnetlink_conntrack_event(unsigned int events, const struct nf_ct_event *item) 710 710 { 711 711 const struct nf_conntrack_zone *zone; 712 712 struct net *net; ··· 2669 2669 + nla_total_size(0) /* CTA_HELP */ 2670 2670 + nla_total_size(NF_CT_HELPER_NAME_LEN) /* CTA_HELP_NAME */ 2671 2671 + ctnetlink_secctx_size(ct) 2672 + + ctnetlink_acct_size(ct) 2673 + + ctnetlink_timestamp_size(ct) 2672 2674 #if IS_ENABLED(CONFIG_NF_NAT) 2673 2675 + 2 * nla_total_size(0) /* CTA_NAT_SEQ_ADJ_ORIG|REPL */ 2674 2676 + 6 * nla_total_size(sizeof(u_int32_t)) /* CTA_NAT_SEQ_OFFSET */ ··· 2726 2724 goto nla_put_failure; 2727 2725 2728 2726 if (ctnetlink_dump_protoinfo(skb, ct, false) < 0) 2727 + goto nla_put_failure; 2728 + 2729 + if (ctnetlink_dump_acct(skb, ct, IPCTNL_MSG_CT_GET) < 0 || 2730 + ctnetlink_dump_timestamp(skb, ct) < 0) 2729 2731 goto nla_put_failure; 2730 2732 2731 2733 if (ctnetlink_dump_helpinfo(skb, ct) < 0) ··· 3110 3104 3111 3105 #ifdef CONFIG_NF_CONNTRACK_EVENTS 3112 3106 static int 3113 - ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item) 3107 + ctnetlink_expect_event(unsigned int events, const struct nf_exp_event *item) 3114 3108 { 3115 3109 struct nf_conntrack_expect *exp = item->exp; 3116 3110 struct net *net = nf_ct_exp_net(exp); ··· 3761 3755 3762 3756 #ifdef CONFIG_NF_CONNTRACK_EVENTS 3763 3757 static struct nf_ct_event_notifier ctnl_notifier = { 3764 - .fcn = ctnetlink_conntrack_event, 3765 - }; 3766 - 3767 - static struct nf_exp_event_notifier ctnl_notifier_exp = { 3768 - .fcn = ctnetlink_expect_event, 3758 + .ct_event = ctnetlink_conntrack_event, 3759 + .exp_event = ctnetlink_expect_event, 3769 3760 }; 3770 3761 #endif 3771 3762 ··· 3855 3852 static int __net_init ctnetlink_net_init(struct net *net) 3856 3853 { 3857 3854 #ifdef CONFIG_NF_CONNTRACK_EVENTS 3858 - int ret; 3859 - 3860 - ret = nf_conntrack_register_notifier(net, &ctnl_notifier); 3861 - if (ret < 0) { 3862 - pr_err("ctnetlink_init: cannot register notifier.\n"); 3863 - goto err_out; 3864 - } 3865 - 3866 - ret = nf_ct_expect_register_notifier(net, &ctnl_notifier_exp); 3867 - if (ret < 0) { 3868 - pr_err("ctnetlink_init: cannot expect register notifier.\n"); 3869 - goto err_unreg_notifier; 3870 - } 3855 + nf_conntrack_register_notifier(net, &ctnl_notifier); 3871 3856 #endif 3872 3857 return 0; 3873 - 3874 - #ifdef CONFIG_NF_CONNTRACK_EVENTS 3875 - err_unreg_notifier: 3876 - nf_conntrack_unregister_notifier(net, &ctnl_notifier); 3877 - err_out: 3878 - return ret; 3879 - #endif 3880 3858 } 3881 3859 3882 - static void ctnetlink_net_exit(struct net *net) 3860 + static void ctnetlink_net_pre_exit(struct net *net) 3883 3861 { 3884 3862 #ifdef CONFIG_NF_CONNTRACK_EVENTS 3885 - nf_ct_expect_unregister_notifier(net, &ctnl_notifier_exp); 3886 - nf_conntrack_unregister_notifier(net, &ctnl_notifier); 3863 + nf_conntrack_unregister_notifier(net); 3887 3864 #endif 3888 - } 3889 - 3890 - static void __net_exit ctnetlink_net_exit_batch(struct list_head *net_exit_list) 3891 - { 3892 - struct net *net; 3893 - 3894 - list_for_each_entry(net, net_exit_list, exit_list) 3895 - ctnetlink_net_exit(net); 3896 - 3897 - /* wait for other cpus until they are done with ctnl_notifiers */ 3898 - synchronize_rcu(); 3899 3865 } 3900 3866 3901 3867 static struct pernet_operations ctnetlink_net_ops = { 3902 3868 .init = ctnetlink_net_init, 3903 - .exit_batch = ctnetlink_net_exit_batch, 3869 + .pre_exit = ctnetlink_net_pre_exit, 3904 3870 }; 3905 3871 3906 3872 static int __init ctnetlink_init(void)
+15
net/netfilter/nf_conntrack_standalone.c
··· 22 22 #include <net/netfilter/nf_conntrack_acct.h> 23 23 #include <net/netfilter/nf_conntrack_zones.h> 24 24 #include <net/netfilter/nf_conntrack_timestamp.h> 25 + #ifdef CONFIG_LWTUNNEL 26 + #include <net/netfilter/nf_hooks_lwtunnel.h> 27 + #endif 25 28 #include <linux/rculist_nulls.h> 26 29 27 30 static bool enable_hooks __read_mostly; ··· 615 612 NF_SYSCTL_CT_PROTO_TIMEOUT_GRE, 616 613 NF_SYSCTL_CT_PROTO_TIMEOUT_GRE_STREAM, 617 614 #endif 615 + #ifdef CONFIG_LWTUNNEL 616 + NF_SYSCTL_CT_LWTUNNEL, 617 + #endif 618 618 619 619 __NF_SYSCTL_CT_LAST_SYSCTL, 620 620 }; ··· 963 957 .maxlen = sizeof(unsigned int), 964 958 .mode = 0644, 965 959 .proc_handler = proc_dointvec_jiffies, 960 + }, 961 + #endif 962 + #ifdef CONFIG_LWTUNNEL 963 + [NF_SYSCTL_CT_LWTUNNEL] = { 964 + .procname = "nf_hooks_lwtunnel", 965 + .data = NULL, 966 + .maxlen = sizeof(int), 967 + .mode = 0644, 968 + .proc_handler = nf_hooks_lwtunnel_sysctl_handler, 966 969 }, 967 970 #endif 968 971 {}
+53
net/netfilter/nf_hooks_lwtunnel.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + #include <linux/sysctl.h> 4 + #include <net/lwtunnel.h> 5 + #include <net/netfilter/nf_hooks_lwtunnel.h> 6 + 7 + static inline int nf_hooks_lwtunnel_get(void) 8 + { 9 + if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) 10 + return 1; 11 + else 12 + return 0; 13 + } 14 + 15 + static inline int nf_hooks_lwtunnel_set(int enable) 16 + { 17 + if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) { 18 + if (!enable) 19 + return -EBUSY; 20 + } else if (enable) { 21 + static_branch_enable(&nf_hooks_lwtunnel_enabled); 22 + } 23 + 24 + return 0; 25 + } 26 + 27 + #ifdef CONFIG_SYSCTL 28 + int nf_hooks_lwtunnel_sysctl_handler(struct ctl_table *table, int write, 29 + void *buffer, size_t *lenp, loff_t *ppos) 30 + { 31 + int proc_nf_hooks_lwtunnel_enabled = 0; 32 + struct ctl_table tmp = { 33 + .procname = table->procname, 34 + .data = &proc_nf_hooks_lwtunnel_enabled, 35 + .maxlen = sizeof(int), 36 + .mode = table->mode, 37 + .extra1 = SYSCTL_ZERO, 38 + .extra2 = SYSCTL_ONE, 39 + }; 40 + int ret; 41 + 42 + if (!write) 43 + proc_nf_hooks_lwtunnel_enabled = nf_hooks_lwtunnel_get(); 44 + 45 + ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); 46 + 47 + if (write && ret == 0) 48 + ret = nf_hooks_lwtunnel_set(proc_nf_hooks_lwtunnel_enabled); 49 + 50 + return ret; 51 + } 52 + EXPORT_SYMBOL_GPL(nf_hooks_lwtunnel_sysctl_handler); 53 + #endif /* CONFIG_SYSCTL */