Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next

Pablo Neira Ayuso says:

====================
Netfilter updates for net-next

The following patchset contains Netfilter updates for net-next.
Basically, more incremental updates for br_netfilter from Florian
Westphal, small nf_tables updates (including one fix for rb-tree
locking) and small two-liner to add extra validation for the REJECT6
target.

More specifically, they are:

1) Use the conntrack status flags from br_netfilter to know that DNAT is
happening. Patch for Florian Westphal.

2) nf_bridge->physoutdev == NULL already indicates that the traffic is
bridged, so let's get rid of the BRNF_BRIDGED flag. Also from Florian.

3) Another patch to prepare voidization of seq_printf/seq_puts/seq_putc,
from Joe Perches.

4) Consolidation of nf_tables_newtable() error path.

5) Kill nf_bridge_pad used by br_netfilter from ip_fragment(),
from Florian Westphal.

6) Access rb-tree root node inside the lock and remove unnecessary
locking from the get path (we already hold nfnl_lock there), from
Patrick McHardy.

7) You cannot use a NFT_SET_ELEM_INTERVAL_END when the set doesn't
support interval, also from Patrick.

8) Enforce IP6T_F_PROTO from ip6t_REJECT to make sure the core is
actually restricting matches to TCP.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>

+77 -68
-29
include/linux/netfilter_bridge.h
··· 19 19 20 20 #define BRNF_PKT_TYPE 0x01 21 21 #define BRNF_BRIDGED_DNAT 0x02 22 - #define BRNF_BRIDGED 0x04 23 22 #define BRNF_NF_BRIDGE_PREROUTING 0x08 24 23 #define BRNF_8021Q 0x10 25 24 #define BRNF_PPPoE 0x20 26 - 27 - static inline unsigned int nf_bridge_encap_header_len(const struct sk_buff *skb) 28 - { 29 - switch (skb->protocol) { 30 - case __cpu_to_be16(ETH_P_8021Q): 31 - return VLAN_HLEN; 32 - case __cpu_to_be16(ETH_P_PPP_SES): 33 - return PPPOE_SES_HLEN; 34 - default: 35 - return 0; 36 - } 37 - } 38 25 39 26 static inline unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb) 40 27 { ··· 32 45 33 46 int br_handle_frame_finish(struct sk_buff *skb); 34 47 35 - /* This is called by the IP fragmenting code and it ensures there is 36 - * enough room for the encapsulating header (if there is one). */ 37 - static inline unsigned int nf_bridge_pad(const struct sk_buff *skb) 38 - { 39 - if (skb->nf_bridge) 40 - return nf_bridge_encap_header_len(skb); 41 - return 0; 42 - } 43 - 44 - struct bridge_skb_cb { 45 - union { 46 - __be32 ipv4; 47 - } daddr; 48 - }; 49 - 50 48 static inline void br_drop_fake_rtable(struct sk_buff *skb) 51 49 { 52 50 struct dst_entry *dst = skb_dst(skb); ··· 41 69 } 42 70 43 71 #else 44 - #define nf_bridge_pad(skb) (0) 45 72 #define br_drop_fake_rtable(skb) do { } while (0) 46 73 #endif /* CONFIG_BRIDGE_NETFILTER */ 47 74
+39 -9
net/bridge/br_netfilter.c
··· 37 37 #include <net/route.h> 38 38 #include <net/netfilter/br_netfilter.h> 39 39 40 + #if IS_ENABLED(CONFIG_NF_CONNTRACK) 41 + #include <net/netfilter/nf_conntrack.h> 42 + #endif 43 + 40 44 #include <asm/uaccess.h> 41 45 #include "br_private.h" 42 46 #ifdef CONFIG_SYSCTL 43 47 #include <linux/sysctl.h> 44 48 #endif 45 - 46 - #define skb_origaddr(skb) (((struct bridge_skb_cb *) \ 47 - (skb->nf_bridge->data))->daddr.ipv4) 48 - #define store_orig_dstaddr(skb) (skb_origaddr(skb) = ip_hdr(skb)->daddr) 49 - #define dnat_took_place(skb) (skb_origaddr(skb) != ip_hdr(skb)->daddr) 50 49 51 50 #ifdef CONFIG_SYSCTL 52 51 static struct ctl_table_header *brnf_sysctl_header; ··· 151 152 nf_bridge = tmp; 152 153 } 153 154 return nf_bridge; 155 + } 156 + 157 + static unsigned int nf_bridge_encap_header_len(const struct sk_buff *skb) 158 + { 159 + switch (skb->protocol) { 160 + case __cpu_to_be16(ETH_P_8021Q): 161 + return VLAN_HLEN; 162 + case __cpu_to_be16(ETH_P_PPP_SES): 163 + return PPPOE_SES_HLEN; 164 + default: 165 + return 0; 166 + } 154 167 } 155 168 156 169 static inline void nf_bridge_push_encap_header(struct sk_buff *skb) ··· 331 320 free_skb: 332 321 kfree_skb(skb); 333 322 return 0; 323 + } 324 + 325 + static bool dnat_took_place(const struct sk_buff *skb) 326 + { 327 + #if IS_ENABLED(CONFIG_NF_CONNTRACK) 328 + enum ip_conntrack_info ctinfo; 329 + struct nf_conn *ct; 330 + 331 + ct = nf_ct_get(skb, &ctinfo); 332 + if (!ct || nf_ct_is_untracked(ct)) 333 + return false; 334 + 335 + return test_bit(IPS_DST_NAT_BIT, &ct->status); 336 + #else 337 + return false; 338 + #endif 334 339 } 335 340 336 341 /* This requires some explaining. If DNAT has taken place, ··· 652 625 return NF_DROP; 653 626 if (!setup_pre_routing(skb)) 654 627 return NF_DROP; 655 - store_orig_dstaddr(skb); 628 + 656 629 skb->protocol = htons(ETH_P_IP); 657 630 658 631 NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, skb, skb->dev, NULL, ··· 748 721 if (pf == NFPROTO_IPV4 && br_parse_ip_options(skb)) 749 722 return NF_DROP; 750 723 751 - /* The physdev module checks on this */ 752 - nf_bridge->mask |= BRNF_BRIDGED; 753 724 nf_bridge->physoutdev = skb->dev; 754 725 if (pf == NFPROTO_IPV4) 755 726 skb->protocol = htons(ETH_P_IP); ··· 867 842 struct net_device *realoutdev = bridge_parent(skb->dev); 868 843 u_int8_t pf; 869 844 870 - if (!nf_bridge || !(nf_bridge->mask & BRNF_BRIDGED)) 845 + /* if nf_bridge is set, but ->physoutdev is NULL, this packet came in 846 + * on a bridge, but was delivered locally and is now being routed: 847 + * 848 + * POST_ROUTING was already invoked from the ip stack. 849 + */ 850 + if (!nf_bridge || !nf_bridge->physoutdev) 871 851 return NF_ACCEPT; 872 852 873 853 if (!realoutdev)
+1 -4
net/ipv4/ip_output.c
··· 636 636 left = skb->len - hlen; /* Space per frame */ 637 637 ptr = hlen; /* Where to start from */ 638 638 639 - /* for bridged IP traffic encapsulated inside f.e. a vlan header, 640 - * we need to make room for the encapsulating header 641 - */ 642 - ll_rs = LL_RESERVED_SPACE_EXTRA(rt->dst.dev, nf_bridge_pad(skb)); 639 + ll_rs = LL_RESERVED_SPACE(rt->dst.dev); 643 640 644 641 /* 645 642 * Fragment the datagram.
+3 -1
net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
··· 300 300 __nf_ct_l3proto_find(exp->tuple.src.l3num), 301 301 __nf_ct_l4proto_find(exp->tuple.src.l3num, 302 302 exp->tuple.dst.protonum)); 303 - return seq_putc(s, '\n'); 303 + seq_putc(s, '\n'); 304 + 305 + return 0; 304 306 } 305 307 306 308 static const struct seq_operations exp_seq_ops = {
+2 -1
net/ipv6/netfilter/ip6t_REJECT.c
··· 83 83 return -EINVAL; 84 84 } else if (rejinfo->with == IP6T_TCP_RESET) { 85 85 /* Must specify that it's a TCP packet */ 86 - if (e->ipv6.proto != IPPROTO_TCP || 86 + if (!(e->ipv6.flags & IP6T_F_PROTO) || 87 + e->ipv6.proto != IPPROTO_TCP || 87 88 (e->ipv6.invflags & XT_INV_PROTO)) { 88 89 pr_info("TCP_RESET illegal for non-tcp\n"); 89 90 return -EINVAL;
+5 -3
net/netfilter/nf_conntrack_acct.c
··· 47 47 return 0; 48 48 49 49 counter = acct->counter; 50 - return seq_printf(s, "packets=%llu bytes=%llu ", 51 - (unsigned long long)atomic64_read(&counter[dir].packets), 52 - (unsigned long long)atomic64_read(&counter[dir].bytes)); 50 + seq_printf(s, "packets=%llu bytes=%llu ", 51 + (unsigned long long)atomic64_read(&counter[dir].packets), 52 + (unsigned long long)atomic64_read(&counter[dir].bytes)); 53 + 54 + return 0; 53 55 }; 54 56 EXPORT_SYMBOL_GPL(seq_print_acct); 55 57
+3 -1
net/netfilter/nf_conntrack_expect.c
··· 561 561 helper->expect_policy[expect->class].name); 562 562 } 563 563 564 - return seq_putc(s, '\n'); 564 + seq_putc(s, '\n'); 565 + 566 + return 0; 565 567 } 566 568 567 569 static const struct seq_operations exp_seq_ops = {
+14 -9
net/netfilter/nf_tables_api.c
··· 687 687 if (!try_module_get(afi->owner)) 688 688 return -EAFNOSUPPORT; 689 689 690 + err = -ENOMEM; 690 691 table = kzalloc(sizeof(*table), GFP_KERNEL); 691 - if (table == NULL) { 692 - module_put(afi->owner); 693 - return -ENOMEM; 694 - } 692 + if (table == NULL) 693 + goto err1; 695 694 696 695 nla_strlcpy(table->name, name, NFT_TABLE_MAXNAMELEN); 697 696 INIT_LIST_HEAD(&table->chains); ··· 699 700 700 701 nft_ctx_init(&ctx, skb, nlh, afi, table, NULL, nla); 701 702 err = nft_trans_table_add(&ctx, NFT_MSG_NEWTABLE); 702 - if (err < 0) { 703 - kfree(table); 704 - module_put(afi->owner); 705 - return err; 706 - } 703 + if (err < 0) 704 + goto err2; 705 + 707 706 list_add_tail_rcu(&table->list, &afi->tables); 708 707 return 0; 708 + err2: 709 + kfree(table); 710 + err1: 711 + module_put(afi->owner); 712 + return err; 709 713 } 710 714 711 715 static int nft_flush_table(struct nft_ctx *ctx) ··· 3137 3135 if (nla[NFTA_SET_ELEM_FLAGS] != NULL) { 3138 3136 elem.flags = ntohl(nla_get_be32(nla[NFTA_SET_ELEM_FLAGS])); 3139 3137 if (elem.flags & ~NFT_SET_ELEM_INTERVAL_END) 3138 + return -EINVAL; 3139 + if (!(set->flags & NFT_SET_INTERVAL) && 3140 + elem.flags & NFT_SET_ELEM_INTERVAL_END) 3140 3141 return -EINVAL; 3141 3142 } 3142 3143
+2 -4
net/netfilter/nft_rbtree.c
··· 37 37 { 38 38 const struct nft_rbtree *priv = nft_set_priv(set); 39 39 const struct nft_rbtree_elem *rbe, *interval = NULL; 40 - const struct rb_node *parent = priv->root.rb_node; 40 + const struct rb_node *parent; 41 41 int d; 42 42 43 43 spin_lock_bh(&nft_rbtree_lock); 44 + parent = priv->root.rb_node; 44 45 while (parent != NULL) { 45 46 rbe = rb_entry(parent, struct nft_rbtree_elem, node); 46 47 ··· 159 158 struct nft_rbtree_elem *rbe; 160 159 int d; 161 160 162 - spin_lock_bh(&nft_rbtree_lock); 163 161 while (parent != NULL) { 164 162 rbe = rb_entry(parent, struct nft_rbtree_elem, node); 165 163 ··· 173 173 !(rbe->flags & NFT_SET_ELEM_INTERVAL_END)) 174 174 nft_data_copy(&elem->data, rbe->data); 175 175 elem->flags = rbe->flags; 176 - spin_unlock_bh(&nft_rbtree_lock); 177 176 return 0; 178 177 } 179 178 } 180 - spin_unlock_bh(&nft_rbtree_lock); 181 179 return -ENOENT; 182 180 } 183 181
+1 -2
net/netfilter/xt_physdev.c
··· 56 56 57 57 /* This only makes sense in the FORWARD and POSTROUTING chains */ 58 58 if ((info->bitmask & XT_PHYSDEV_OP_BRIDGED) && 59 - (!!(nf_bridge->mask & BRNF_BRIDGED) ^ 60 - !(info->invert & XT_PHYSDEV_OP_BRIDGED))) 59 + (!!nf_bridge->physoutdev ^ !(info->invert & XT_PHYSDEV_OP_BRIDGED))) 61 60 return false; 62 61 63 62 if ((info->bitmask & XT_PHYSDEV_OP_ISIN &&