Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

netfilter: nf_conntrack: add efficient mark to zone mapping

This work adds the possibility of deriving the zone id from the skb->mark
field in a scalable manner. This allows for having only a single template
serving hundreds/thousands of different zones, for example, instead of the
need to have one match for each zone as an extra CT jump target.

Note that we'd need to have this information attached to the template as at
the time when we're trying to lookup a possible ct object, we already need
to know zone information for a possible match when going into
__nf_conntrack_find_get(). This work provides a minimal implementation for
a possible mapping.

In order to not add/expose an extra ct->status bit, the zone structure has
been extended to carry a flag for deriving the mark.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>

authored by

Daniel Borkmann and committed by
Pablo Neira Ayuso
5e8018fc deedb590

+70 -42
+42 -3
include/net/netfilter/nf_conntrack_zones.h
··· 10 10 11 11 #define NF_CT_DEFAULT_ZONE_DIR (NF_CT_ZONE_DIR_ORIG | NF_CT_ZONE_DIR_REPL) 12 12 13 + #define NF_CT_FLAG_MARK 1 14 + 13 15 struct nf_conntrack_zone { 14 16 u16 id; 15 - u16 dir; 17 + u8 flags; 18 + u8 dir; 16 19 }; 17 20 18 21 extern const struct nf_conntrack_zone nf_ct_zone_dflt; ··· 35 32 } 36 33 37 34 static inline const struct nf_conntrack_zone * 38 - nf_ct_zone_tmpl(const struct nf_conn *tmpl) 35 + nf_ct_zone_init(struct nf_conntrack_zone *zone, u16 id, u8 dir, u8 flags) 39 36 { 40 - return tmpl ? nf_ct_zone(tmpl) : &nf_ct_zone_dflt; 37 + zone->id = id; 38 + zone->flags = flags; 39 + zone->dir = dir; 40 + 41 + return zone; 42 + } 43 + 44 + static inline const struct nf_conntrack_zone * 45 + nf_ct_zone_tmpl(const struct nf_conn *tmpl, const struct sk_buff *skb, 46 + struct nf_conntrack_zone *tmp) 47 + { 48 + const struct nf_conntrack_zone *zone; 49 + 50 + if (!tmpl) 51 + return &nf_ct_zone_dflt; 52 + 53 + zone = nf_ct_zone(tmpl); 54 + if (zone->flags & NF_CT_FLAG_MARK) 55 + zone = nf_ct_zone_init(tmp, skb->mark, zone->dir, 0); 56 + 57 + return zone; 58 + } 59 + 60 + static inline int nf_ct_zone_add(struct nf_conn *ct, gfp_t flags, 61 + const struct nf_conntrack_zone *info) 62 + { 63 + #ifdef CONFIG_NF_CONNTRACK_ZONES 64 + struct nf_conntrack_zone *nf_ct_zone; 65 + 66 + nf_ct_zone = nf_ct_ext_add(ct, NF_CT_EXT_ZONE, flags); 67 + if (!nf_ct_zone) 68 + return -ENOMEM; 69 + 70 + nf_ct_zone_init(nf_ct_zone, info->id, info->dir, 71 + info->flags); 72 + #endif 73 + return 0; 41 74 } 42 75 43 76 static inline bool nf_ct_zone_matches_dir(const struct nf_conntrack_zone *zone,
+3 -1
include/uapi/linux/netfilter/xt_CT.h
··· 8 8 XT_CT_NOTRACK_ALIAS = 1 << 1, 9 9 XT_CT_ZONE_DIR_ORIG = 1 << 2, 10 10 XT_CT_ZONE_DIR_REPL = 1 << 3, 11 + XT_CT_ZONE_MARK = 1 << 4, 11 12 12 13 XT_CT_MASK = XT_CT_NOTRACK | XT_CT_NOTRACK_ALIAS | 13 - XT_CT_ZONE_DIR_ORIG | XT_CT_ZONE_DIR_REPL, 14 + XT_CT_ZONE_DIR_ORIG | XT_CT_ZONE_DIR_REPL | 15 + XT_CT_ZONE_MARK, 14 16 }; 15 17 16 18 struct xt_ct_target_info {
+2 -1
net/ipv4/netfilter/nf_conntrack_proto_icmp.c
··· 135 135 const struct nf_conntrack_l4proto *innerproto; 136 136 const struct nf_conntrack_tuple_hash *h; 137 137 const struct nf_conntrack_zone *zone; 138 + struct nf_conntrack_zone tmp; 138 139 139 140 NF_CT_ASSERT(skb->nfct == NULL); 140 - zone = nf_ct_zone_tmpl(tmpl); 141 + zone = nf_ct_zone_tmpl(tmpl, skb, &tmp); 141 142 142 143 /* Are they talking about one of our connections? */ 143 144 if (!nf_ct_get_tuplepr(skb,
+3 -1
net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
··· 150 150 struct nf_conntrack_tuple intuple, origtuple; 151 151 const struct nf_conntrack_tuple_hash *h; 152 152 const struct nf_conntrack_l4proto *inproto; 153 + struct nf_conntrack_zone tmp; 153 154 154 155 NF_CT_ASSERT(skb->nfct == NULL); 155 156 ··· 177 176 178 177 *ctinfo = IP_CT_RELATED; 179 178 180 - h = nf_conntrack_find_get(net, nf_ct_zone_tmpl(tmpl), &intuple); 179 + h = nf_conntrack_find_get(net, nf_ct_zone_tmpl(tmpl, skb, &tmp), 180 + &intuple); 181 181 if (!h) { 182 182 pr_debug("icmpv6_error: no match\n"); 183 183 return -NF_ACCEPT;
+14 -32
net/netfilter/nf_conntrack_core.c
··· 301 301 tmpl->status = IPS_TEMPLATE; 302 302 write_pnet(&tmpl->ct_net, net); 303 303 304 - #ifdef CONFIG_NF_CONNTRACK_ZONES 305 - if (zone) { 306 - struct nf_conntrack_zone *nf_ct_zone; 304 + if (nf_ct_zone_add(tmpl, flags, zone) < 0) 305 + goto out_free; 307 306 308 - nf_ct_zone = nf_ct_ext_add(tmpl, NF_CT_EXT_ZONE, GFP_ATOMIC); 309 - if (!nf_ct_zone) 310 - goto out_free; 311 - nf_ct_zone->id = zone->id; 312 - nf_ct_zone->dir = zone->dir; 313 - } 314 - #endif 315 307 atomic_set(&tmpl->ct_general.use, 0); 316 308 317 309 return tmpl; 318 - #ifdef CONFIG_NF_CONNTRACK_ZONES 319 310 out_free: 320 311 kfree(tmpl); 321 312 return NULL; 322 - #endif 323 313 } 324 314 EXPORT_SYMBOL_GPL(nf_ct_tmpl_alloc); 325 315 ··· 840 850 * SLAB_DESTROY_BY_RCU. 841 851 */ 842 852 ct = kmem_cache_alloc(net->ct.nf_conntrack_cachep, gfp); 843 - if (ct == NULL) { 844 - atomic_dec(&net->ct.count); 845 - return ERR_PTR(-ENOMEM); 846 - } 853 + if (ct == NULL) 854 + goto out; 855 + 847 856 spin_lock_init(&ct->lock); 848 857 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig; 849 858 ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode.pprev = NULL; ··· 856 867 memset(&ct->__nfct_init_offset[0], 0, 857 868 offsetof(struct nf_conn, proto) - 858 869 offsetof(struct nf_conn, __nfct_init_offset[0])); 859 - #ifdef CONFIG_NF_CONNTRACK_ZONES 860 - if (zone) { 861 - struct nf_conntrack_zone *nf_ct_zone; 862 870 863 - nf_ct_zone = nf_ct_ext_add(ct, NF_CT_EXT_ZONE, GFP_ATOMIC); 864 - if (!nf_ct_zone) 865 - goto out_free; 866 - nf_ct_zone->id = zone->id; 867 - nf_ct_zone->dir = zone->dir; 868 - } 869 - #endif 871 + if (zone && nf_ct_zone_add(ct, GFP_ATOMIC, zone) < 0) 872 + goto out_free; 873 + 870 874 /* Because we use RCU lookups, we set ct_general.use to zero before 871 875 * this is inserted in any list. 872 876 */ 873 877 atomic_set(&ct->ct_general.use, 0); 874 878 return ct; 875 - 876 - #ifdef CONFIG_NF_CONNTRACK_ZONES 877 879 out_free: 878 - atomic_dec(&net->ct.count); 879 880 kmem_cache_free(net->ct.nf_conntrack_cachep, ct); 881 + out: 882 + atomic_dec(&net->ct.count); 880 883 return ERR_PTR(-ENOMEM); 881 - #endif 882 884 } 883 885 884 886 struct nf_conn *nf_conntrack_alloc(struct net *net, ··· 917 937 struct nf_conntrack_expect *exp = NULL; 918 938 const struct nf_conntrack_zone *zone; 919 939 struct nf_conn_timeout *timeout_ext; 940 + struct nf_conntrack_zone tmp; 920 941 unsigned int *timeouts; 921 942 922 943 if (!nf_ct_invert_tuple(&repl_tuple, tuple, l3proto, l4proto)) { ··· 925 944 return NULL; 926 945 } 927 946 928 - zone = nf_ct_zone_tmpl(tmpl); 947 + zone = nf_ct_zone_tmpl(tmpl, skb, &tmp); 929 948 ct = __nf_conntrack_alloc(net, zone, tuple, &repl_tuple, GFP_ATOMIC, 930 949 hash); 931 950 if (IS_ERR(ct)) ··· 1023 1042 const struct nf_conntrack_zone *zone; 1024 1043 struct nf_conntrack_tuple tuple; 1025 1044 struct nf_conntrack_tuple_hash *h; 1045 + struct nf_conntrack_zone tmp; 1026 1046 struct nf_conn *ct; 1027 1047 u32 hash; 1028 1048 ··· 1035 1053 } 1036 1054 1037 1055 /* look for tuple match */ 1038 - zone = nf_ct_zone_tmpl(tmpl); 1056 + zone = nf_ct_zone_tmpl(tmpl, skb, &tmp); 1039 1057 hash = hash_conntrack_raw(&tuple); 1040 1058 h = __nf_conntrack_find_get(net, zone, &tuple, hash); 1041 1059 if (!h) {
+2 -3
net/netfilter/nf_conntrack_netlink.c
··· 956 956 ctnetlink_parse_zone(const struct nlattr *attr, 957 957 struct nf_conntrack_zone *zone) 958 958 { 959 - zone->id = NF_CT_DEFAULT_ZONE_ID; 960 - zone->dir = NF_CT_DEFAULT_ZONE_DIR; 961 - 959 + nf_ct_zone_init(zone, NF_CT_DEFAULT_ZONE_ID, 960 + NF_CT_DEFAULT_ZONE_DIR, 0); 962 961 #ifdef CONFIG_NF_CONNTRACK_ZONES 963 962 if (attr) 964 963 zone->id = ntohs(nla_get_be16(attr));
+4 -1
net/netfilter/xt_CT.c
··· 208 208 209 209 #ifndef CONFIG_NF_CONNTRACK_ZONES 210 210 if (info->zone || info->flags & (XT_CT_ZONE_DIR_ORIG | 211 - XT_CT_ZONE_DIR_REPL)) 211 + XT_CT_ZONE_DIR_REPL | 212 + XT_CT_ZONE_MARK)) 212 213 goto err1; 213 214 #endif 214 215 ··· 220 219 memset(&zone, 0, sizeof(zone)); 221 220 zone.id = info->zone; 222 221 zone.dir = xt_ct_flags_to_dir(info); 222 + if (info->flags & XT_CT_ZONE_MARK) 223 + zone.flags |= NF_CT_FLAG_MARK; 223 224 224 225 ct = nf_ct_tmpl_alloc(par->net, &zone, GFP_KERNEL); 225 226 ret = PTR_ERR(ct);