Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf

Pablo Neira Ayuso says:

====================
Netfilter fixes for net

The following patchset contains Netfilter fixes for you net tree,
they are:

1) There was a race condition between parallel save/swap and delete,
which resulted a kernel crash due to the increase ref for save, swap,
wrong ref decrease operations. Reported and fixed by Vishwanath Pai.

2) OVS should call into CT NAT for packets of new expected connections only
when the conntrack state is persisted with the 'commit' option to the
OVS CT action. From Jarno Rajahalme.

3) Resolve kconfig dependencies with new OVS NAT support. From Arnd Bergmann.

4) Early validation of entry->target_offset to make sure it doesn't take us
out from the blob, from Florian Westphal.

5) Again early validation of entry->next_offset to make sure it doesn't take
out from the blob, also from Florian.

6) Check that entry->target_offset is always of of sizeof(struct xt_entry)
for unconditional entries, when checking both from check_underflow()
and when checking for loops in mark_source_chains(), again from
Florian.

7) Fix inconsistent behaviour in nfnetlink_queue when
NFQA_CFG_F_FAIL_OPEN is set and netlink_unicast() fails due to buffer
overrun, we have to reinject the packet as the user expects.

8) Enforce nul-terminated table names from getsockopt GET_ENTRIES
requests.

9) Don't assume skb->sk is set from nft_bridge_reject and synproxy,
this fixes a recent update of the code to namespaceify
ip_default_ttl, patch from Liping Zhang.

This batch comes with four patches to validate x_tables blobs coming
from userspace. CONFIG_USERNS exposes the x_tables interface to
unpriviledged users and to be honest this interface never received the
attention for this move away from the CAP_NET_ADMIN domain. Florian is
working on another round with more patches with more sanity checks, so
expect a bit more Netfilter fixes in this development cycle than usual.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>

+170 -122
+4
include/linux/netfilter/ipset/ip_set.h
··· 234 234 spinlock_t lock; 235 235 /* References to the set */ 236 236 u32 ref; 237 + /* References to the set for netlink events like dump, 238 + * ref can be swapped out by ip_set_swap 239 + */ 240 + u32 ref_netlink; 237 241 /* The core set type */ 238 242 struct ip_set_type *type; 239 243 /* The type variant doing the real job */
+4
net/bridge/netfilter/ebtables.c
··· 1521 1521 if (copy_from_user(&tmp, user, sizeof(tmp))) 1522 1522 return -EFAULT; 1523 1523 1524 + tmp.name[sizeof(tmp.name) - 1] = '\0'; 1525 + 1524 1526 t = find_table_lock(net, tmp.name, &ret, &ebt_mutex); 1525 1527 if (!t) 1526 1528 return ret; ··· 2333 2331 2334 2332 if (copy_from_user(&tmp, user, sizeof(tmp))) 2335 2333 return -EFAULT; 2334 + 2335 + tmp.name[sizeof(tmp.name) - 1] = '\0'; 2336 2336 2337 2337 t = find_table_lock(net, tmp.name, &ret, &ebt_mutex); 2338 2338 if (!t)
+10 -10
net/bridge/netfilter/nft_reject_bridge.c
··· 40 40 /* We cannot use oldskb->dev, it can be either bridge device (NF_BRIDGE INPUT) 41 41 * or the bridge port (NF_BRIDGE PREROUTING). 42 42 */ 43 - static void nft_reject_br_send_v4_tcp_reset(struct sk_buff *oldskb, 43 + static void nft_reject_br_send_v4_tcp_reset(struct net *net, 44 + struct sk_buff *oldskb, 44 45 const struct net_device *dev, 45 46 int hook) 46 47 { ··· 49 48 struct iphdr *niph; 50 49 const struct tcphdr *oth; 51 50 struct tcphdr _oth; 52 - struct net *net = sock_net(oldskb->sk); 53 51 54 52 if (!nft_bridge_iphdr_validate(oldskb)) 55 53 return; ··· 75 75 br_deliver(br_port_get_rcu(dev), nskb); 76 76 } 77 77 78 - static void nft_reject_br_send_v4_unreach(struct sk_buff *oldskb, 78 + static void nft_reject_br_send_v4_unreach(struct net *net, 79 + struct sk_buff *oldskb, 79 80 const struct net_device *dev, 80 81 int hook, u8 code) 81 82 { ··· 87 86 void *payload; 88 87 __wsum csum; 89 88 u8 proto; 90 - struct net *net = sock_net(oldskb->sk); 91 89 92 90 if (oldskb->csum_bad || !nft_bridge_iphdr_validate(oldskb)) 93 91 return; ··· 273 273 case htons(ETH_P_IP): 274 274 switch (priv->type) { 275 275 case NFT_REJECT_ICMP_UNREACH: 276 - nft_reject_br_send_v4_unreach(pkt->skb, pkt->in, 277 - pkt->hook, 276 + nft_reject_br_send_v4_unreach(pkt->net, pkt->skb, 277 + pkt->in, pkt->hook, 278 278 priv->icmp_code); 279 279 break; 280 280 case NFT_REJECT_TCP_RST: 281 - nft_reject_br_send_v4_tcp_reset(pkt->skb, pkt->in, 282 - pkt->hook); 281 + nft_reject_br_send_v4_tcp_reset(pkt->net, pkt->skb, 282 + pkt->in, pkt->hook); 283 283 break; 284 284 case NFT_REJECT_ICMPX_UNREACH: 285 - nft_reject_br_send_v4_unreach(pkt->skb, pkt->in, 286 - pkt->hook, 285 + nft_reject_br_send_v4_unreach(pkt->net, pkt->skb, 286 + pkt->in, pkt->hook, 287 287 nft_reject_icmp_code(priv->icmp_code)); 288 288 break; 289 289 }
+23 -20
net/ipv4/netfilter/arp_tables.c
··· 359 359 } 360 360 361 361 /* All zeroes == unconditional rule. */ 362 - static inline bool unconditional(const struct arpt_arp *arp) 362 + static inline bool unconditional(const struct arpt_entry *e) 363 363 { 364 364 static const struct arpt_arp uncond; 365 365 366 - return memcmp(arp, &uncond, sizeof(uncond)) == 0; 366 + return e->target_offset == sizeof(struct arpt_entry) && 367 + memcmp(&e->arp, &uncond, sizeof(uncond)) == 0; 367 368 } 368 369 369 370 /* Figures out from what hook each rule can be called: returns 0 if ··· 403 402 |= ((1 << hook) | (1 << NF_ARP_NUMHOOKS)); 404 403 405 404 /* Unconditional return/END. */ 406 - if ((e->target_offset == sizeof(struct arpt_entry) && 405 + if ((unconditional(e) && 407 406 (strcmp(t->target.u.user.name, 408 407 XT_STANDARD_TARGET) == 0) && 409 - t->verdict < 0 && unconditional(&e->arp)) || 410 - visited) { 408 + t->verdict < 0) || visited) { 411 409 unsigned int oldpos, size; 412 410 413 411 if ((strcmp(t->target.u.user.name, ··· 474 474 return 1; 475 475 } 476 476 477 - static inline int check_entry(const struct arpt_entry *e, const char *name) 477 + static inline int check_entry(const struct arpt_entry *e) 478 478 { 479 479 const struct xt_entry_target *t; 480 480 481 - if (!arp_checkentry(&e->arp)) { 482 - duprintf("arp_tables: arp check failed %p %s.\n", e, name); 481 + if (!arp_checkentry(&e->arp)) 483 482 return -EINVAL; 484 - } 485 483 486 484 if (e->target_offset + sizeof(struct xt_entry_target) > e->next_offset) 487 485 return -EINVAL; ··· 520 522 struct xt_target *target; 521 523 int ret; 522 524 523 - ret = check_entry(e, name); 524 - if (ret) 525 - return ret; 526 - 527 525 e->counters.pcnt = xt_percpu_counter_alloc(); 528 526 if (IS_ERR_VALUE(e->counters.pcnt)) 529 527 return -ENOMEM; ··· 551 557 const struct xt_entry_target *t; 552 558 unsigned int verdict; 553 559 554 - if (!unconditional(&e->arp)) 560 + if (!unconditional(e)) 555 561 return false; 556 562 t = arpt_get_target_c(e); 557 563 if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0) ··· 570 576 unsigned int valid_hooks) 571 577 { 572 578 unsigned int h; 579 + int err; 573 580 574 581 if ((unsigned long)e % __alignof__(struct arpt_entry) != 0 || 575 - (unsigned char *)e + sizeof(struct arpt_entry) >= limit) { 582 + (unsigned char *)e + sizeof(struct arpt_entry) >= limit || 583 + (unsigned char *)e + e->next_offset > limit) { 576 584 duprintf("Bad offset %p\n", e); 577 585 return -EINVAL; 578 586 } ··· 586 590 return -EINVAL; 587 591 } 588 592 593 + err = check_entry(e); 594 + if (err) 595 + return err; 596 + 589 597 /* Check hooks & underflows */ 590 598 for (h = 0; h < NF_ARP_NUMHOOKS; h++) { 591 599 if (!(valid_hooks & (1 << h))) ··· 598 598 newinfo->hook_entry[h] = hook_entries[h]; 599 599 if ((unsigned char *)e - base == underflows[h]) { 600 600 if (!check_underflow(e)) { 601 - pr_err("Underflows must be unconditional and " 602 - "use the STANDARD target with " 603 - "ACCEPT/DROP\n"); 601 + pr_debug("Underflows must be unconditional and " 602 + "use the STANDARD target with " 603 + "ACCEPT/DROP\n"); 604 604 return -EINVAL; 605 605 } 606 606 newinfo->underflow[h] = underflows[h]; ··· 969 969 sizeof(struct arpt_get_entries) + get.size); 970 970 return -EINVAL; 971 971 } 972 + get.name[sizeof(get.name) - 1] = '\0'; 972 973 973 974 t = xt_find_table_lock(net, NFPROTO_ARP, get.name); 974 975 if (!IS_ERR_OR_NULL(t)) { ··· 1234 1233 1235 1234 duprintf("check_compat_entry_size_and_hooks %p\n", e); 1236 1235 if ((unsigned long)e % __alignof__(struct compat_arpt_entry) != 0 || 1237 - (unsigned char *)e + sizeof(struct compat_arpt_entry) >= limit) { 1236 + (unsigned char *)e + sizeof(struct compat_arpt_entry) >= limit || 1237 + (unsigned char *)e + e->next_offset > limit) { 1238 1238 duprintf("Bad offset %p, limit = %p\n", e, limit); 1239 1239 return -EINVAL; 1240 1240 } ··· 1248 1246 } 1249 1247 1250 1248 /* For purposes of check_entry casting the compat entry is fine */ 1251 - ret = check_entry((struct arpt_entry *)e, name); 1249 + ret = check_entry((struct arpt_entry *)e); 1252 1250 if (ret) 1253 1251 return ret; 1254 1252 ··· 1664 1662 *len, sizeof(get) + get.size); 1665 1663 return -EINVAL; 1666 1664 } 1665 + get.name[sizeof(get.name) - 1] = '\0'; 1667 1666 1668 1667 xt_compat_lock(NFPROTO_ARP); 1669 1668 t = xt_find_table_lock(net, NFPROTO_ARP, get.name);
+25 -23
net/ipv4/netfilter/ip_tables.c
··· 168 168 169 169 /* All zeroes == unconditional rule. */ 170 170 /* Mildly perf critical (only if packet tracing is on) */ 171 - static inline bool unconditional(const struct ipt_ip *ip) 171 + static inline bool unconditional(const struct ipt_entry *e) 172 172 { 173 173 static const struct ipt_ip uncond; 174 174 175 - return memcmp(ip, &uncond, sizeof(uncond)) == 0; 175 + return e->target_offset == sizeof(struct ipt_entry) && 176 + memcmp(&e->ip, &uncond, sizeof(uncond)) == 0; 176 177 #undef FWINV 177 178 } 178 179 ··· 230 229 } else if (s == e) { 231 230 (*rulenum)++; 232 231 233 - if (s->target_offset == sizeof(struct ipt_entry) && 232 + if (unconditional(s) && 234 233 strcmp(t->target.u.kernel.target->name, 235 234 XT_STANDARD_TARGET) == 0 && 236 - t->verdict < 0 && 237 - unconditional(&s->ip)) { 235 + t->verdict < 0) { 238 236 /* Tail of chains: STANDARD target (return/policy) */ 239 237 *comment = *chainname == hookname 240 238 ? comments[NF_IP_TRACE_COMMENT_POLICY] ··· 476 476 e->comefrom |= ((1 << hook) | (1 << NF_INET_NUMHOOKS)); 477 477 478 478 /* Unconditional return/END. */ 479 - if ((e->target_offset == sizeof(struct ipt_entry) && 479 + if ((unconditional(e) && 480 480 (strcmp(t->target.u.user.name, 481 481 XT_STANDARD_TARGET) == 0) && 482 - t->verdict < 0 && unconditional(&e->ip)) || 483 - visited) { 482 + t->verdict < 0) || visited) { 484 483 unsigned int oldpos, size; 485 484 486 485 if ((strcmp(t->target.u.user.name, ··· 568 569 } 569 570 570 571 static int 571 - check_entry(const struct ipt_entry *e, const char *name) 572 + check_entry(const struct ipt_entry *e) 572 573 { 573 574 const struct xt_entry_target *t; 574 575 575 - if (!ip_checkentry(&e->ip)) { 576 - duprintf("ip check failed %p %s.\n", e, name); 576 + if (!ip_checkentry(&e->ip)) 577 577 return -EINVAL; 578 - } 579 578 580 579 if (e->target_offset + sizeof(struct xt_entry_target) > 581 580 e->next_offset) ··· 663 666 struct xt_mtchk_param mtpar; 664 667 struct xt_entry_match *ematch; 665 668 666 - ret = check_entry(e, name); 667 - if (ret) 668 - return ret; 669 - 670 669 e->counters.pcnt = xt_percpu_counter_alloc(); 671 670 if (IS_ERR_VALUE(e->counters.pcnt)) 672 671 return -ENOMEM; ··· 714 721 const struct xt_entry_target *t; 715 722 unsigned int verdict; 716 723 717 - if (!unconditional(&e->ip)) 724 + if (!unconditional(e)) 718 725 return false; 719 726 t = ipt_get_target_c(e); 720 727 if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0) ··· 734 741 unsigned int valid_hooks) 735 742 { 736 743 unsigned int h; 744 + int err; 737 745 738 746 if ((unsigned long)e % __alignof__(struct ipt_entry) != 0 || 739 - (unsigned char *)e + sizeof(struct ipt_entry) >= limit) { 747 + (unsigned char *)e + sizeof(struct ipt_entry) >= limit || 748 + (unsigned char *)e + e->next_offset > limit) { 740 749 duprintf("Bad offset %p\n", e); 741 750 return -EINVAL; 742 751 } ··· 750 755 return -EINVAL; 751 756 } 752 757 758 + err = check_entry(e); 759 + if (err) 760 + return err; 761 + 753 762 /* Check hooks & underflows */ 754 763 for (h = 0; h < NF_INET_NUMHOOKS; h++) { 755 764 if (!(valid_hooks & (1 << h))) ··· 762 763 newinfo->hook_entry[h] = hook_entries[h]; 763 764 if ((unsigned char *)e - base == underflows[h]) { 764 765 if (!check_underflow(e)) { 765 - pr_err("Underflows must be unconditional and " 766 - "use the STANDARD target with " 767 - "ACCEPT/DROP\n"); 766 + pr_debug("Underflows must be unconditional and " 767 + "use the STANDARD target with " 768 + "ACCEPT/DROP\n"); 768 769 return -EINVAL; 769 770 } 770 771 newinfo->underflow[h] = underflows[h]; ··· 1156 1157 *len, sizeof(get) + get.size); 1157 1158 return -EINVAL; 1158 1159 } 1160 + get.name[sizeof(get.name) - 1] = '\0'; 1159 1161 1160 1162 t = xt_find_table_lock(net, AF_INET, get.name); 1161 1163 if (!IS_ERR_OR_NULL(t)) { ··· 1493 1493 1494 1494 duprintf("check_compat_entry_size_and_hooks %p\n", e); 1495 1495 if ((unsigned long)e % __alignof__(struct compat_ipt_entry) != 0 || 1496 - (unsigned char *)e + sizeof(struct compat_ipt_entry) >= limit) { 1496 + (unsigned char *)e + sizeof(struct compat_ipt_entry) >= limit || 1497 + (unsigned char *)e + e->next_offset > limit) { 1497 1498 duprintf("Bad offset %p, limit = %p\n", e, limit); 1498 1499 return -EINVAL; 1499 1500 } ··· 1507 1506 } 1508 1507 1509 1508 /* For purposes of check_entry casting the compat entry is fine */ 1510 - ret = check_entry((struct ipt_entry *)e, name); 1509 + ret = check_entry((struct ipt_entry *)e); 1511 1510 if (ret) 1512 1511 return ret; 1513 1512 ··· 1936 1935 *len, sizeof(get) + get.size); 1937 1936 return -EINVAL; 1938 1937 } 1938 + get.name[sizeof(get.name) - 1] = '\0'; 1939 1939 1940 1940 xt_compat_lock(AF_INET); 1941 1941 t = xt_find_table_lock(net, AF_INET, get.name);
+28 -26
net/ipv4/netfilter/ipt_SYNPROXY.c
··· 18 18 #include <net/netfilter/nf_conntrack_synproxy.h> 19 19 20 20 static struct iphdr * 21 - synproxy_build_ip(struct sk_buff *skb, __be32 saddr, __be32 daddr) 21 + synproxy_build_ip(struct net *net, struct sk_buff *skb, __be32 saddr, 22 + __be32 daddr) 22 23 { 23 24 struct iphdr *iph; 24 - struct net *net = sock_net(skb->sk); 25 25 26 26 skb_reset_network_header(skb); 27 27 iph = (struct iphdr *)skb_put(skb, sizeof(*iph)); ··· 40 40 } 41 41 42 42 static void 43 - synproxy_send_tcp(const struct synproxy_net *snet, 43 + synproxy_send_tcp(struct net *net, 44 44 const struct sk_buff *skb, struct sk_buff *nskb, 45 45 struct nf_conntrack *nfct, enum ip_conntrack_info ctinfo, 46 46 struct iphdr *niph, struct tcphdr *nth, 47 47 unsigned int tcp_hdr_size) 48 48 { 49 - struct net *net = nf_ct_net(snet->tmpl); 50 - 51 49 nth->check = ~tcp_v4_check(tcp_hdr_size, niph->saddr, niph->daddr, 0); 52 50 nskb->ip_summed = CHECKSUM_PARTIAL; 53 51 nskb->csum_start = (unsigned char *)nth - nskb->head; ··· 70 72 } 71 73 72 74 static void 73 - synproxy_send_client_synack(const struct synproxy_net *snet, 75 + synproxy_send_client_synack(struct net *net, 74 76 const struct sk_buff *skb, const struct tcphdr *th, 75 77 const struct synproxy_options *opts) 76 78 { ··· 89 91 return; 90 92 skb_reserve(nskb, MAX_TCP_HEADER); 91 93 92 - niph = synproxy_build_ip(nskb, iph->daddr, iph->saddr); 94 + niph = synproxy_build_ip(net, nskb, iph->daddr, iph->saddr); 93 95 94 96 skb_reset_transport_header(nskb); 95 97 nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size); ··· 107 109 108 110 synproxy_build_options(nth, opts); 109 111 110 - synproxy_send_tcp(snet, skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY, 112 + synproxy_send_tcp(net, skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY, 111 113 niph, nth, tcp_hdr_size); 112 114 } 113 115 114 116 static void 115 - synproxy_send_server_syn(const struct synproxy_net *snet, 117 + synproxy_send_server_syn(struct net *net, 116 118 const struct sk_buff *skb, const struct tcphdr *th, 117 119 const struct synproxy_options *opts, u32 recv_seq) 118 120 { 121 + struct synproxy_net *snet = synproxy_pernet(net); 119 122 struct sk_buff *nskb; 120 123 struct iphdr *iph, *niph; 121 124 struct tcphdr *nth; ··· 131 132 return; 132 133 skb_reserve(nskb, MAX_TCP_HEADER); 133 134 134 - niph = synproxy_build_ip(nskb, iph->saddr, iph->daddr); 135 + niph = synproxy_build_ip(net, nskb, iph->saddr, iph->daddr); 135 136 136 137 skb_reset_transport_header(nskb); 137 138 nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size); ··· 152 153 153 154 synproxy_build_options(nth, opts); 154 155 155 - synproxy_send_tcp(snet, skb, nskb, &snet->tmpl->ct_general, IP_CT_NEW, 156 + synproxy_send_tcp(net, skb, nskb, &snet->tmpl->ct_general, IP_CT_NEW, 156 157 niph, nth, tcp_hdr_size); 157 158 } 158 159 159 160 static void 160 - synproxy_send_server_ack(const struct synproxy_net *snet, 161 + synproxy_send_server_ack(struct net *net, 161 162 const struct ip_ct_tcp *state, 162 163 const struct sk_buff *skb, const struct tcphdr *th, 163 164 const struct synproxy_options *opts) ··· 176 177 return; 177 178 skb_reserve(nskb, MAX_TCP_HEADER); 178 179 179 - niph = synproxy_build_ip(nskb, iph->daddr, iph->saddr); 180 + niph = synproxy_build_ip(net, nskb, iph->daddr, iph->saddr); 180 181 181 182 skb_reset_transport_header(nskb); 182 183 nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size); ··· 192 193 193 194 synproxy_build_options(nth, opts); 194 195 195 - synproxy_send_tcp(snet, skb, nskb, NULL, 0, niph, nth, tcp_hdr_size); 196 + synproxy_send_tcp(net, skb, nskb, NULL, 0, niph, nth, tcp_hdr_size); 196 197 } 197 198 198 199 static void 199 - synproxy_send_client_ack(const struct synproxy_net *snet, 200 + synproxy_send_client_ack(struct net *net, 200 201 const struct sk_buff *skb, const struct tcphdr *th, 201 202 const struct synproxy_options *opts) 202 203 { ··· 214 215 return; 215 216 skb_reserve(nskb, MAX_TCP_HEADER); 216 217 217 - niph = synproxy_build_ip(nskb, iph->saddr, iph->daddr); 218 + niph = synproxy_build_ip(net, nskb, iph->saddr, iph->daddr); 218 219 219 220 skb_reset_transport_header(nskb); 220 221 nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size); ··· 230 231 231 232 synproxy_build_options(nth, opts); 232 233 233 - synproxy_send_tcp(snet, skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY, 234 + synproxy_send_tcp(net, skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY, 234 235 niph, nth, tcp_hdr_size); 235 236 } 236 237 237 238 static bool 238 - synproxy_recv_client_ack(const struct synproxy_net *snet, 239 + synproxy_recv_client_ack(struct net *net, 239 240 const struct sk_buff *skb, const struct tcphdr *th, 240 241 struct synproxy_options *opts, u32 recv_seq) 241 242 { 243 + struct synproxy_net *snet = synproxy_pernet(net); 242 244 int mss; 243 245 244 246 mss = __cookie_v4_check(ip_hdr(skb), th, ntohl(th->ack_seq) - 1); ··· 255 255 if (opts->options & XT_SYNPROXY_OPT_TIMESTAMP) 256 256 synproxy_check_timestamp_cookie(opts); 257 257 258 - synproxy_send_server_syn(snet, skb, th, opts, recv_seq); 258 + synproxy_send_server_syn(net, skb, th, opts, recv_seq); 259 259 return true; 260 260 } 261 261 ··· 263 263 synproxy_tg4(struct sk_buff *skb, const struct xt_action_param *par) 264 264 { 265 265 const struct xt_synproxy_info *info = par->targinfo; 266 - struct synproxy_net *snet = synproxy_pernet(par->net); 266 + struct net *net = par->net; 267 + struct synproxy_net *snet = synproxy_pernet(net); 267 268 struct synproxy_options opts = {}; 268 269 struct tcphdr *th, _th; 269 270 ··· 293 292 XT_SYNPROXY_OPT_SACK_PERM | 294 293 XT_SYNPROXY_OPT_ECN); 295 294 296 - synproxy_send_client_synack(snet, skb, th, &opts); 295 + synproxy_send_client_synack(net, skb, th, &opts); 297 296 return NF_DROP; 298 297 299 298 } else if (th->ack && !(th->fin || th->rst || th->syn)) { 300 299 /* ACK from client */ 301 - synproxy_recv_client_ack(snet, skb, th, &opts, ntohl(th->seq)); 300 + synproxy_recv_client_ack(net, skb, th, &opts, ntohl(th->seq)); 302 301 return NF_DROP; 303 302 } 304 303 ··· 309 308 struct sk_buff *skb, 310 309 const struct nf_hook_state *nhs) 311 310 { 312 - struct synproxy_net *snet = synproxy_pernet(nhs->net); 311 + struct net *net = nhs->net; 312 + struct synproxy_net *snet = synproxy_pernet(net); 313 313 enum ip_conntrack_info ctinfo; 314 314 struct nf_conn *ct; 315 315 struct nf_conn_synproxy *synproxy; ··· 367 365 * therefore we need to add 1 to make the SYN sequence 368 366 * number match the one of first SYN. 369 367 */ 370 - if (synproxy_recv_client_ack(snet, skb, th, &opts, 368 + if (synproxy_recv_client_ack(net, skb, th, &opts, 371 369 ntohl(th->seq) + 1)) 372 370 this_cpu_inc(snet->stats->cookie_retrans); 373 371 ··· 393 391 XT_SYNPROXY_OPT_SACK_PERM); 394 392 395 393 swap(opts.tsval, opts.tsecr); 396 - synproxy_send_server_ack(snet, state, skb, th, &opts); 394 + synproxy_send_server_ack(net, state, skb, th, &opts); 397 395 398 396 nf_ct_seqadj_init(ct, ctinfo, synproxy->isn - ntohl(th->seq)); 399 397 400 398 swap(opts.tsval, opts.tsecr); 401 - synproxy_send_client_ack(snet, skb, th, &opts); 399 + synproxy_send_client_ack(net, skb, th, &opts); 402 400 403 401 consume_skb(skb); 404 402 return NF_STOLEN;
+25 -23
net/ipv6/netfilter/ip6_tables.c
··· 198 198 199 199 /* All zeroes == unconditional rule. */ 200 200 /* Mildly perf critical (only if packet tracing is on) */ 201 - static inline bool unconditional(const struct ip6t_ip6 *ipv6) 201 + static inline bool unconditional(const struct ip6t_entry *e) 202 202 { 203 203 static const struct ip6t_ip6 uncond; 204 204 205 - return memcmp(ipv6, &uncond, sizeof(uncond)) == 0; 205 + return e->target_offset == sizeof(struct ip6t_entry) && 206 + memcmp(&e->ipv6, &uncond, sizeof(uncond)) == 0; 206 207 } 207 208 208 209 static inline const struct xt_entry_target * ··· 259 258 } else if (s == e) { 260 259 (*rulenum)++; 261 260 262 - if (s->target_offset == sizeof(struct ip6t_entry) && 261 + if (unconditional(s) && 263 262 strcmp(t->target.u.kernel.target->name, 264 263 XT_STANDARD_TARGET) == 0 && 265 - t->verdict < 0 && 266 - unconditional(&s->ipv6)) { 264 + t->verdict < 0) { 267 265 /* Tail of chains: STANDARD target (return/policy) */ 268 266 *comment = *chainname == hookname 269 267 ? comments[NF_IP6_TRACE_COMMENT_POLICY] ··· 488 488 e->comefrom |= ((1 << hook) | (1 << NF_INET_NUMHOOKS)); 489 489 490 490 /* Unconditional return/END. */ 491 - if ((e->target_offset == sizeof(struct ip6t_entry) && 491 + if ((unconditional(e) && 492 492 (strcmp(t->target.u.user.name, 493 493 XT_STANDARD_TARGET) == 0) && 494 - t->verdict < 0 && 495 - unconditional(&e->ipv6)) || visited) { 494 + t->verdict < 0) || visited) { 496 495 unsigned int oldpos, size; 497 496 498 497 if ((strcmp(t->target.u.user.name, ··· 580 581 } 581 582 582 583 static int 583 - check_entry(const struct ip6t_entry *e, const char *name) 584 + check_entry(const struct ip6t_entry *e) 584 585 { 585 586 const struct xt_entry_target *t; 586 587 587 - if (!ip6_checkentry(&e->ipv6)) { 588 - duprintf("ip_tables: ip check failed %p %s.\n", e, name); 588 + if (!ip6_checkentry(&e->ipv6)) 589 589 return -EINVAL; 590 - } 591 590 592 591 if (e->target_offset + sizeof(struct xt_entry_target) > 593 592 e->next_offset) ··· 676 679 struct xt_mtchk_param mtpar; 677 680 struct xt_entry_match *ematch; 678 681 679 - ret = check_entry(e, name); 680 - if (ret) 681 - return ret; 682 - 683 682 e->counters.pcnt = xt_percpu_counter_alloc(); 684 683 if (IS_ERR_VALUE(e->counters.pcnt)) 685 684 return -ENOMEM; ··· 726 733 const struct xt_entry_target *t; 727 734 unsigned int verdict; 728 735 729 - if (!unconditional(&e->ipv6)) 736 + if (!unconditional(e)) 730 737 return false; 731 738 t = ip6t_get_target_c(e); 732 739 if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0) ··· 746 753 unsigned int valid_hooks) 747 754 { 748 755 unsigned int h; 756 + int err; 749 757 750 758 if ((unsigned long)e % __alignof__(struct ip6t_entry) != 0 || 751 - (unsigned char *)e + sizeof(struct ip6t_entry) >= limit) { 759 + (unsigned char *)e + sizeof(struct ip6t_entry) >= limit || 760 + (unsigned char *)e + e->next_offset > limit) { 752 761 duprintf("Bad offset %p\n", e); 753 762 return -EINVAL; 754 763 } ··· 762 767 return -EINVAL; 763 768 } 764 769 770 + err = check_entry(e); 771 + if (err) 772 + return err; 773 + 765 774 /* Check hooks & underflows */ 766 775 for (h = 0; h < NF_INET_NUMHOOKS; h++) { 767 776 if (!(valid_hooks & (1 << h))) ··· 774 775 newinfo->hook_entry[h] = hook_entries[h]; 775 776 if ((unsigned char *)e - base == underflows[h]) { 776 777 if (!check_underflow(e)) { 777 - pr_err("Underflows must be unconditional and " 778 - "use the STANDARD target with " 779 - "ACCEPT/DROP\n"); 778 + pr_debug("Underflows must be unconditional and " 779 + "use the STANDARD target with " 780 + "ACCEPT/DROP\n"); 780 781 return -EINVAL; 781 782 } 782 783 newinfo->underflow[h] = underflows[h]; ··· 1168 1169 *len, sizeof(get) + get.size); 1169 1170 return -EINVAL; 1170 1171 } 1172 + get.name[sizeof(get.name) - 1] = '\0'; 1171 1173 1172 1174 t = xt_find_table_lock(net, AF_INET6, get.name); 1173 1175 if (!IS_ERR_OR_NULL(t)) { ··· 1505 1505 1506 1506 duprintf("check_compat_entry_size_and_hooks %p\n", e); 1507 1507 if ((unsigned long)e % __alignof__(struct compat_ip6t_entry) != 0 || 1508 - (unsigned char *)e + sizeof(struct compat_ip6t_entry) >= limit) { 1508 + (unsigned char *)e + sizeof(struct compat_ip6t_entry) >= limit || 1509 + (unsigned char *)e + e->next_offset > limit) { 1509 1510 duprintf("Bad offset %p, limit = %p\n", e, limit); 1510 1511 return -EINVAL; 1511 1512 } ··· 1519 1518 } 1520 1519 1521 1520 /* For purposes of check_entry casting the compat entry is fine */ 1522 - ret = check_entry((struct ip6t_entry *)e, name); 1521 + ret = check_entry((struct ip6t_entry *)e); 1523 1522 if (ret) 1524 1523 return ret; 1525 1524 ··· 1945 1944 *len, sizeof(get) + get.size); 1946 1945 return -EINVAL; 1947 1946 } 1947 + get.name[sizeof(get.name) - 1] = '\0'; 1948 1948 1949 1949 xt_compat_lock(AF_INET6); 1950 1950 t = xt_find_table_lock(net, AF_INET6, get.name);
+1 -1
net/netfilter/ipset/ip_set_bitmap_gen.h
··· 95 95 if (!nested) 96 96 goto nla_put_failure; 97 97 if (mtype_do_head(skb, map) || 98 - nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) || 98 + nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref)) || 99 99 nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize))) 100 100 goto nla_put_failure; 101 101 if (unlikely(ip_set_put_flags(skb, set)))
+28 -5
net/netfilter/ipset/ip_set_core.c
··· 497 497 write_unlock_bh(&ip_set_ref_lock); 498 498 } 499 499 500 + /* set->ref can be swapped out by ip_set_swap, netlink events (like dump) need 501 + * a separate reference counter 502 + */ 503 + static inline void 504 + __ip_set_get_netlink(struct ip_set *set) 505 + { 506 + write_lock_bh(&ip_set_ref_lock); 507 + set->ref_netlink++; 508 + write_unlock_bh(&ip_set_ref_lock); 509 + } 510 + 511 + static inline void 512 + __ip_set_put_netlink(struct ip_set *set) 513 + { 514 + write_lock_bh(&ip_set_ref_lock); 515 + BUG_ON(set->ref_netlink == 0); 516 + set->ref_netlink--; 517 + write_unlock_bh(&ip_set_ref_lock); 518 + } 519 + 500 520 /* Add, del and test set entries from kernel. 501 521 * 502 522 * The set behind the index must exist and must be referenced ··· 1022 1002 if (!attr[IPSET_ATTR_SETNAME]) { 1023 1003 for (i = 0; i < inst->ip_set_max; i++) { 1024 1004 s = ip_set(inst, i); 1025 - if (s && s->ref) { 1005 + if (s && (s->ref || s->ref_netlink)) { 1026 1006 ret = -IPSET_ERR_BUSY; 1027 1007 goto out; 1028 1008 } ··· 1044 1024 if (!s) { 1045 1025 ret = -ENOENT; 1046 1026 goto out; 1047 - } else if (s->ref) { 1027 + } else if (s->ref || s->ref_netlink) { 1048 1028 ret = -IPSET_ERR_BUSY; 1049 1029 goto out; 1050 1030 } ··· 1191 1171 from->family == to->family)) 1192 1172 return -IPSET_ERR_TYPE_MISMATCH; 1193 1173 1174 + if (from->ref_netlink || to->ref_netlink) 1175 + return -EBUSY; 1176 + 1194 1177 strncpy(from_name, from->name, IPSET_MAXNAMELEN); 1195 1178 strncpy(from->name, to->name, IPSET_MAXNAMELEN); 1196 1179 strncpy(to->name, from_name, IPSET_MAXNAMELEN); ··· 1229 1206 if (set->variant->uref) 1230 1207 set->variant->uref(set, cb, false); 1231 1208 pr_debug("release set %s\n", set->name); 1232 - __ip_set_put_byindex(inst, index); 1209 + __ip_set_put_netlink(set); 1233 1210 } 1234 1211 return 0; 1235 1212 } ··· 1351 1328 if (!cb->args[IPSET_CB_ARG0]) { 1352 1329 /* Start listing: make sure set won't be destroyed */ 1353 1330 pr_debug("reference set\n"); 1354 - set->ref++; 1331 + set->ref_netlink++; 1355 1332 } 1356 1333 write_unlock_bh(&ip_set_ref_lock); 1357 1334 nlh = start_msg(skb, NETLINK_CB(cb->skb).portid, ··· 1419 1396 if (set->variant->uref) 1420 1397 set->variant->uref(set, cb, false); 1421 1398 pr_debug("release set %s\n", set->name); 1422 - __ip_set_put_byindex(inst, index); 1399 + __ip_set_put_netlink(set); 1423 1400 cb->args[IPSET_CB_ARG0] = 0; 1424 1401 } 1425 1402 out:
+1 -1
net/netfilter/ipset/ip_set_hash_gen.h
··· 1082 1082 if (nla_put_u32(skb, IPSET_ATTR_MARKMASK, h->markmask)) 1083 1083 goto nla_put_failure; 1084 1084 #endif 1085 - if (nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) || 1085 + if (nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref)) || 1086 1086 nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize))) 1087 1087 goto nla_put_failure; 1088 1088 if (unlikely(ip_set_put_flags(skb, set)))
+1 -1
net/netfilter/ipset/ip_set_list_set.c
··· 458 458 if (!nested) 459 459 goto nla_put_failure; 460 460 if (nla_put_net32(skb, IPSET_ATTR_SIZE, htonl(map->size)) || 461 - nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) || 461 + nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref)) || 462 462 nla_put_net32(skb, IPSET_ATTR_MEMSIZE, 463 463 htonl(sizeof(*map) + n * set->dsize))) 464 464 goto nla_put_failure;
+3 -1
net/openvswitch/Kconfig
··· 7 7 depends on INET 8 8 depends on !NF_CONNTRACK || \ 9 9 (NF_CONNTRACK && ((!NF_DEFRAG_IPV6 || NF_DEFRAG_IPV6) && \ 10 - (!NF_NAT || NF_NAT))) 10 + (!NF_NAT || NF_NAT) && \ 11 + (!NF_NAT_IPV4 || NF_NAT_IPV4) && \ 12 + (!NF_NAT_IPV6 || NF_NAT_IPV6))) 11 13 select LIBCRC32C 12 14 select MPLS 13 15 select NET_MPLS_GSO
+11 -10
net/openvswitch/conntrack.c
··· 535 535 switch (ctinfo) { 536 536 case IP_CT_RELATED: 537 537 case IP_CT_RELATED_REPLY: 538 - if (skb->protocol == htons(ETH_P_IP) && 538 + if (IS_ENABLED(CONFIG_NF_NAT_IPV4) && 539 + skb->protocol == htons(ETH_P_IP) && 539 540 ip_hdr(skb)->protocol == IPPROTO_ICMP) { 540 541 if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo, 541 542 hooknum)) 542 543 err = NF_DROP; 543 544 goto push; 544 - #if IS_ENABLED(CONFIG_NF_NAT_IPV6) 545 - } else if (skb->protocol == htons(ETH_P_IPV6)) { 545 + } else if (IS_ENABLED(CONFIG_NF_NAT_IPV6) && 546 + skb->protocol == htons(ETH_P_IPV6)) { 546 547 __be16 frag_off; 547 548 u8 nexthdr = ipv6_hdr(skb)->nexthdr; 548 549 int hdrlen = ipv6_skip_exthdr(skb, ··· 558 557 err = NF_DROP; 559 558 goto push; 560 559 } 561 - #endif 562 560 } 563 561 /* Non-ICMP, fall thru to initialize if needed. */ 564 562 case IP_CT_NEW: ··· 664 664 665 665 /* Determine NAT type. 666 666 * Check if the NAT type can be deduced from the tracked connection. 667 - * Make sure expected traffic is NATted only when committing. 667 + * Make sure new expected connections (IP_CT_RELATED) are NATted only 668 + * when committing. 668 669 */ 669 670 if (info->nat & OVS_CT_NAT && ctinfo != IP_CT_NEW && 670 671 ct->status & IPS_NAT_MASK && 671 - (!(ct->status & IPS_EXPECTED_BIT) || info->commit)) { 672 + (ctinfo != IP_CT_RELATED || info->commit)) { 672 673 /* NAT an established or related connection like before. */ 673 674 if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) 674 675 /* This is the REPLY direction for a connection ··· 1240 1239 } 1241 1240 1242 1241 if (info->range.flags & NF_NAT_RANGE_MAP_IPS) { 1243 - if (info->family == NFPROTO_IPV4) { 1242 + if (IS_ENABLED(CONFIG_NF_NAT_IPV4) && 1243 + info->family == NFPROTO_IPV4) { 1244 1244 if (nla_put_in_addr(skb, OVS_NAT_ATTR_IP_MIN, 1245 1245 info->range.min_addr.ip) || 1246 1246 (info->range.max_addr.ip ··· 1249 1247 (nla_put_in_addr(skb, OVS_NAT_ATTR_IP_MAX, 1250 1248 info->range.max_addr.ip)))) 1251 1249 return false; 1252 - #if IS_ENABLED(CONFIG_NF_NAT_IPV6) 1253 - } else if (info->family == NFPROTO_IPV6) { 1250 + } else if (IS_ENABLED(CONFIG_NF_NAT_IPV6) && 1251 + info->family == NFPROTO_IPV6) { 1254 1252 if (nla_put_in6_addr(skb, OVS_NAT_ATTR_IP_MIN, 1255 1253 &info->range.min_addr.in6) || 1256 1254 (memcmp(&info->range.max_addr.in6, ··· 1259 1257 (nla_put_in6_addr(skb, OVS_NAT_ATTR_IP_MAX, 1260 1258 &info->range.max_addr.in6)))) 1261 1259 return false; 1262 - #endif 1263 1260 } else { 1264 1261 return false; 1265 1262 }