Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'nf-next-25-03-23' of git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf-next

Pablo Neira Ayuso says:

====================
Netfilter updates for net-next

The following batch contains Netfilter updates for net-next:

1) Use kvmalloc in xt_hashlimit, from Denis Kirjanov.

2) Tighten nf_conntrack sysctl accepted values for nf_conntrack_max
and nf_ct_expect_max, from Nicolas Bouchinet.

3) Avoid lookup in nft_fib if socket is available, from Florian Westphal.

4) Initialize struct lsm_context in nfnetlink_queue to avoid
hypothetical ENOMEM errors, Chenyuan Yang.

5) Use strscpy() instead of _pad when initializing xtables table name,
kzalloc is already used to initialized the table memory area.
From Thorsten Blum.

6) Missing socket lookup by conntrack information for IPv6 traffic
in nft_socket, there is a similar chunk in IPv4, this was never
added when IPv6 NAT was introduced. From Maxim Mikityanskiy.

7) Fix clang issues with nf_tables CONFIG_MITIGATION_RETPOLINE,
from WangYuli.

* tag 'nf-next-25-03-23' of git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf-next:
netfilter: nf_tables: Only use nf_skip_indirect_calls() when MITIGATION_RETPOLINE
netfilter: socket: Lookup orig tuple for IPv6 SNAT
netfilter: xtables: Use strscpy() instead of strscpy_pad()
netfilter: nfnetlink_queue: Initialize ctx to avoid memory allocation error
netfilter: fib: avoid lookup if socket is available
netfilter: conntrack: Bound nf_conntrack sysctl writes
netfilter: xt_hashlimit: replace vmalloc calls with kvmalloc
====================

Link: https://patch.msgid.link/20250323100922.59983-1-pablo@netfilter.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

+79 -34
+21
include/net/netfilter/nft_fib.h
··· 18 18 return skb->pkt_type == PACKET_LOOPBACK || in->flags & IFF_LOOPBACK; 19 19 } 20 20 21 + static inline bool nft_fib_can_skip(const struct nft_pktinfo *pkt) 22 + { 23 + const struct net_device *indev = nft_in(pkt); 24 + const struct sock *sk; 25 + 26 + switch (nft_hook(pkt)) { 27 + case NF_INET_PRE_ROUTING: 28 + case NF_INET_INGRESS: 29 + case NF_INET_LOCAL_IN: 30 + break; 31 + default: 32 + return false; 33 + } 34 + 35 + sk = pkt->skb->sk; 36 + if (sk && sk_fullsock(sk)) 37 + return sk->sk_rx_dst_ifindex == indev->ifindex; 38 + 39 + return nft_fib_is_loopback(pkt->skb, indev); 40 + } 41 + 21 42 int nft_fib_dump(struct sk_buff *skb, const struct nft_expr *expr, bool reset); 22 43 int nft_fib_init(const struct nft_ctx *ctx, const struct nft_expr *expr, 23 44 const struct nlattr * const tb[]);
+5 -6
net/ipv4/netfilter/nft_fib_ipv4.c
··· 71 71 const struct net_device *oif; 72 72 const struct net_device *found; 73 73 74 + if (nft_fib_can_skip(pkt)) { 75 + nft_fib_store_result(dest, priv, nft_in(pkt)); 76 + return; 77 + } 78 + 74 79 /* 75 80 * Do not set flowi4_oif, it restricts results (for example, asking 76 81 * for oif 3 will get RTN_UNICAST result even if the daddr exits ··· 89 84 oif = nft_in(pkt); 90 85 else 91 86 oif = NULL; 92 - 93 - if (nft_hook(pkt) == NF_INET_PRE_ROUTING && 94 - nft_fib_is_loopback(pkt->skb, nft_in(pkt))) { 95 - nft_fib_store_result(dest, priv, nft_in(pkt)); 96 - return; 97 - } 98 87 99 88 iph = skb_header_pointer(pkt->skb, noff, sizeof(_iph), &_iph); 100 89 if (!iph) {
+23
net/ipv6/netfilter/nf_socket_ipv6.c
··· 103 103 struct sk_buff *data_skb = NULL; 104 104 int doff = 0; 105 105 int thoff = 0, tproto; 106 + #if IS_ENABLED(CONFIG_NF_CONNTRACK) 107 + enum ip_conntrack_info ctinfo; 108 + struct nf_conn const *ct; 109 + #endif 106 110 107 111 tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL); 108 112 if (tproto < 0) { ··· 139 135 } else { 140 136 return NULL; 141 137 } 138 + 139 + #if IS_ENABLED(CONFIG_NF_CONNTRACK) 140 + /* Do the lookup with the original socket address in 141 + * case this is a reply packet of an established 142 + * SNAT-ted connection. 143 + */ 144 + ct = nf_ct_get(skb, &ctinfo); 145 + if (ct && 146 + ((tproto != IPPROTO_ICMPV6 && 147 + ctinfo == IP_CT_ESTABLISHED_REPLY) || 148 + (tproto == IPPROTO_ICMPV6 && 149 + ctinfo == IP_CT_RELATED_REPLY)) && 150 + (ct->status & IPS_SRC_NAT_DONE)) { 151 + daddr = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.in6; 152 + dport = (tproto == IPPROTO_TCP) ? 153 + ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.tcp.port : 154 + ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.udp.port; 155 + } 156 + #endif 142 157 143 158 return nf_socket_get_sock_v6(net, data_skb, doff, tproto, saddr, daddr, 144 159 sport, dport, indev);
+10 -9
net/ipv6/netfilter/nft_fib_ipv6.c
··· 170 170 struct rt6_info *rt; 171 171 int lookup_flags; 172 172 173 + if (nft_fib_can_skip(pkt)) { 174 + nft_fib_store_result(dest, priv, nft_in(pkt)); 175 + return; 176 + } 177 + 173 178 if (priv->flags & NFTA_FIB_F_IIF) 174 179 oif = nft_in(pkt); 175 180 else if (priv->flags & NFTA_FIB_F_OIF) ··· 186 181 return; 187 182 } 188 183 189 - lookup_flags = nft_fib6_flowi_init(&fl6, priv, pkt, oif, iph); 190 - 191 - if (nft_hook(pkt) == NF_INET_PRE_ROUTING || 192 - nft_hook(pkt) == NF_INET_INGRESS) { 193 - if (nft_fib_is_loopback(pkt->skb, nft_in(pkt)) || 194 - nft_fib_v6_skip_icmpv6(pkt->skb, pkt->tprot, iph)) { 195 - nft_fib_store_result(dest, priv, nft_in(pkt)); 196 - return; 197 - } 184 + if (nft_fib_v6_skip_icmpv6(pkt->skb, pkt->tprot, iph)) { 185 + nft_fib_store_result(dest, priv, nft_in(pkt)); 186 + return; 198 187 } 188 + 189 + lookup_flags = nft_fib6_flowi_init(&fl6, priv, pkt, oif, iph); 199 190 200 191 *dest = 0; 201 192 rt = (void *)ip6_route_lookup(nft_net(pkt), &fl6, pkt->skb,
+9 -3
net/netfilter/nf_conntrack_standalone.c
··· 618 618 .data = &nf_conntrack_max, 619 619 .maxlen = sizeof(int), 620 620 .mode = 0644, 621 - .proc_handler = proc_dointvec, 621 + .proc_handler = proc_dointvec_minmax, 622 + .extra1 = SYSCTL_ZERO, 623 + .extra2 = SYSCTL_INT_MAX, 622 624 }, 623 625 [NF_SYSCTL_CT_COUNT] = { 624 626 .procname = "nf_conntrack_count", ··· 656 654 .data = &nf_ct_expect_max, 657 655 .maxlen = sizeof(int), 658 656 .mode = 0644, 659 - .proc_handler = proc_dointvec, 657 + .proc_handler = proc_dointvec_minmax, 658 + .extra1 = SYSCTL_ONE, 659 + .extra2 = SYSCTL_INT_MAX, 660 660 }, 661 661 [NF_SYSCTL_CT_ACCT] = { 662 662 .procname = "nf_conntrack_acct", ··· 951 947 .data = &nf_conntrack_max, 952 948 .maxlen = sizeof(int), 953 949 .mode = 0644, 954 - .proc_handler = proc_dointvec, 950 + .proc_handler = proc_dointvec_minmax, 951 + .extra1 = SYSCTL_ZERO, 952 + .extra2 = SYSCTL_INT_MAX, 955 953 }, 956 954 }; 957 955
+4 -7
net/netfilter/nf_tables_core.c
··· 21 21 #include <net/netfilter/nf_log.h> 22 22 #include <net/netfilter/nft_meta.h> 23 23 24 - #if defined(CONFIG_MITIGATION_RETPOLINE) && defined(CONFIG_X86) 25 - 24 + #ifdef CONFIG_MITIGATION_RETPOLINE 26 25 static struct static_key_false nf_tables_skip_direct_calls; 27 26 28 - static bool nf_skip_indirect_calls(void) 27 + static inline bool nf_skip_indirect_calls(void) 29 28 { 30 29 return static_branch_likely(&nf_tables_skip_direct_calls); 31 30 } 32 31 33 - static void __init nf_skip_indirect_calls_enable(void) 32 + static inline void __init nf_skip_indirect_calls_enable(void) 34 33 { 35 34 if (!cpu_feature_enabled(X86_FEATURE_RETPOLINE)) 36 35 static_branch_enable(&nf_tables_skip_direct_calls); 37 36 } 38 37 #else 39 - static inline bool nf_skip_indirect_calls(void) { return false; } 40 - 41 38 static inline void nf_skip_indirect_calls_enable(void) { } 42 - #endif 39 + #endif /* CONFIG_MITIGATION_RETPOLINE */ 43 40 44 41 static noinline void __nft_trace_packet(const struct nft_pktinfo *pkt, 45 42 const struct nft_verdict *verdict,
+5 -7
net/netfilter/xt_hashlimit.c
··· 15 15 #include <linux/random.h> 16 16 #include <linux/jhash.h> 17 17 #include <linux/slab.h> 18 - #include <linux/vmalloc.h> 19 18 #include <linux/proc_fs.h> 20 19 #include <linux/seq_file.h> 21 20 #include <linux/list.h> ··· 293 294 if (size < 16) 294 295 size = 16; 295 296 } 296 - /* FIXME: don't use vmalloc() here or anywhere else -HW */ 297 - hinfo = vmalloc(struct_size(hinfo, hash, size)); 297 + hinfo = kvmalloc(struct_size(hinfo, hash, size), GFP_KERNEL); 298 298 if (hinfo == NULL) 299 299 return -ENOMEM; 300 300 *out_hinfo = hinfo; ··· 301 303 /* copy match config into hashtable config */ 302 304 ret = cfg_copy(&hinfo->cfg, (void *)cfg, 3); 303 305 if (ret) { 304 - vfree(hinfo); 306 + kvfree(hinfo); 305 307 return ret; 306 308 } 307 309 ··· 320 322 hinfo->rnd_initialized = false; 321 323 hinfo->name = kstrdup(name, GFP_KERNEL); 322 324 if (!hinfo->name) { 323 - vfree(hinfo); 325 + kvfree(hinfo); 324 326 return -ENOMEM; 325 327 } 326 328 spin_lock_init(&hinfo->lock); ··· 342 344 ops, hinfo); 343 345 if (hinfo->pde == NULL) { 344 346 kfree(hinfo->name); 345 - vfree(hinfo); 347 + kvfree(hinfo); 346 348 return -ENOMEM; 347 349 } 348 350 hinfo->net = net; ··· 431 433 cancel_delayed_work_sync(&hinfo->gc_work); 432 434 htable_selective_cleanup(hinfo, true); 433 435 kfree(hinfo->name); 434 - vfree(hinfo); 436 + kvfree(hinfo); 435 437 } 436 438 } 437 439
+1 -1
net/netfilter/xt_repldata.h
··· 29 29 if (tbl == NULL) \ 30 30 return NULL; \ 31 31 term = (struct type##_error *)&(((char *)tbl)[term_offset]); \ 32 - strscpy_pad(tbl->repl.name, info->name, sizeof(tbl->repl.name)); \ 32 + strscpy(tbl->repl.name, info->name); \ 33 33 *term = (struct type##_error)typ2##_ERROR_INIT; \ 34 34 tbl->repl.valid_hooks = hook_mask; \ 35 35 tbl->repl.num_entries = nhooks + 1; \