Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

netfilter: nf_conntrack: IPS_UNTRACKED bit

NOTRACK makes all cpus share a cache line on nf_conntrack_untracked
twice per packet. This is bad for performance.
__read_mostly annotation is also a bad choice.

This patch introduces IPS_UNTRACKED bit so that we can use later a
per_cpu untrack structure more easily.

A new helper, nf_ct_untracked_get() returns a pointer to
nf_conntrack_untracked.

Another one, nf_ct_untracked_status_or() is used by nf_nat_init() to add
IPS_NAT_DONE_MASK bits to untracked status.

nf_ct_is_untracked() prototype is changed to work on a nf_conn pointer.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>

authored by

Eric Dumazet and committed by
Patrick McHardy
5bfddbd4 339bb99e

+47 -29
+4
include/linux/netfilter/nf_conntrack_common.h
··· 76 76 /* Conntrack is a template */ 77 77 IPS_TEMPLATE_BIT = 11, 78 78 IPS_TEMPLATE = (1 << IPS_TEMPLATE_BIT), 79 + 80 + /* Conntrack is a fake untracked entry */ 81 + IPS_UNTRACKED_BIT = 12, 82 + IPS_UNTRACKED = (1 << IPS_UNTRACKED_BIT), 79 83 }; 80 84 81 85 /* Connection tracking event types */
+9 -3
include/net/netfilter/nf_conntrack.h
··· 261 261 u32 seq); 262 262 263 263 /* Fake conntrack entry for untracked connections */ 264 - extern struct nf_conn nf_conntrack_untracked; 264 + static inline struct nf_conn *nf_ct_untracked_get(void) 265 + { 266 + extern struct nf_conn nf_conntrack_untracked; 267 + 268 + return &nf_conntrack_untracked; 269 + } 270 + extern void nf_ct_untracked_status_or(unsigned long bits); 265 271 266 272 /* Iterate over all conntracks: if iter returns true, it's deleted. */ 267 273 extern void ··· 295 289 return test_bit(IPS_DYING_BIT, &ct->status); 296 290 } 297 291 298 - static inline int nf_ct_is_untracked(const struct sk_buff *skb) 292 + static inline int nf_ct_is_untracked(const struct nf_conn *ct) 299 293 { 300 - return (skb->nfct == &nf_conntrack_untracked.ct_general); 294 + return test_bit(IPS_UNTRACKED_BIT, &ct->status); 301 295 } 302 296 303 297 extern int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp);
+1 -1
include/net/netfilter/nf_conntrack_core.h
··· 60 60 struct nf_conn *ct = (struct nf_conn *)skb->nfct; 61 61 int ret = NF_ACCEPT; 62 62 63 - if (ct && ct != &nf_conntrack_untracked) { 63 + if (ct && !nf_ct_is_untracked(ct)) { 64 64 if (!nf_ct_is_confirmed(ct)) 65 65 ret = __nf_conntrack_confirm(skb); 66 66 if (likely(ret == NF_ACCEPT))
+1 -1
net/ipv4/netfilter/nf_nat_core.c
··· 742 742 spin_unlock_bh(&nf_nat_lock); 743 743 744 744 /* Initialize fake conntrack so that NAT will skip it */ 745 - nf_conntrack_untracked.status |= IPS_NAT_DONE_MASK; 745 + nf_ct_untracked_status_or(IPS_NAT_DONE_MASK); 746 746 747 747 l3proto = nf_ct_l3proto_find_get((u_int16_t)AF_INET); 748 748
+1 -1
net/ipv4/netfilter/nf_nat_standalone.c
··· 98 98 return NF_ACCEPT; 99 99 100 100 /* Don't try to NAT if this packet is not conntracked */ 101 - if (ct == &nf_conntrack_untracked) 101 + if (nf_ct_is_untracked(ct)) 102 102 return NF_ACCEPT; 103 103 104 104 nat = nfct_nat(ct);
+1 -1
net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
··· 208 208 type = icmp6h->icmp6_type - 130; 209 209 if (type >= 0 && type < sizeof(noct_valid_new) && 210 210 noct_valid_new[type]) { 211 - skb->nfct = &nf_conntrack_untracked.ct_general; 211 + skb->nfct = &nf_ct_untracked_get()->ct_general; 212 212 skb->nfctinfo = IP_CT_NEW; 213 213 nf_conntrack_get(skb->nfct); 214 214 return NF_ACCEPT;
+8 -3
net/netfilter/nf_conntrack_core.c
··· 62 62 unsigned int nf_conntrack_max __read_mostly; 63 63 EXPORT_SYMBOL_GPL(nf_conntrack_max); 64 64 65 - struct nf_conn nf_conntrack_untracked __read_mostly; 65 + struct nf_conn nf_conntrack_untracked; 66 66 EXPORT_SYMBOL_GPL(nf_conntrack_untracked); 67 67 68 68 static int nf_conntrack_hash_rnd_initted; ··· 1321 1321 module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint, 1322 1322 &nf_conntrack_htable_size, 0600); 1323 1323 1324 + void nf_ct_untracked_status_or(unsigned long bits) 1325 + { 1326 + nf_conntrack_untracked.status |= bits; 1327 + } 1328 + EXPORT_SYMBOL_GPL(nf_ct_untracked_status_or); 1329 + 1324 1330 static int nf_conntrack_init_init_net(void) 1325 1331 { 1326 1332 int max_factor = 8; ··· 1374 1368 #endif 1375 1369 atomic_set(&nf_conntrack_untracked.ct_general.use, 1); 1376 1370 /* - and look it like as a confirmed connection */ 1377 - set_bit(IPS_CONFIRMED_BIT, &nf_conntrack_untracked.status); 1378 - 1371 + nf_ct_untracked_status_or(IPS_CONFIRMED | IPS_UNTRACKED); 1379 1372 return 0; 1380 1373 1381 1374 #ifdef CONFIG_NF_CONNTRACK_ZONES
+1 -1
net/netfilter/nf_conntrack_netlink.c
··· 480 480 int err; 481 481 482 482 /* ignore our fake conntrack entry */ 483 - if (ct == &nf_conntrack_untracked) 483 + if (nf_ct_is_untracked(ct)) 484 484 return 0; 485 485 486 486 if (events & (1 << IPCT_DESTROY)) {
+2 -2
net/netfilter/xt_CT.c
··· 67 67 return -EINVAL; 68 68 69 69 if (info->flags & XT_CT_NOTRACK) { 70 - ct = &nf_conntrack_untracked; 70 + ct = nf_ct_untracked_get(); 71 71 atomic_inc(&ct->ct_general.use); 72 72 goto out; 73 73 } ··· 132 132 struct nf_conn *ct = info->ct; 133 133 struct nf_conn_help *help; 134 134 135 - if (ct != &nf_conntrack_untracked) { 135 + if (!nf_ct_is_untracked(ct)) { 136 136 help = nfct_help(ct); 137 137 if (help) 138 138 module_put(help->helper->me);
+1 -1
net/netfilter/xt_NOTRACK.c
··· 23 23 If there is a real ct entry correspondig to this packet, 24 24 it'll hang aroun till timing out. We don't deal with it 25 25 for performance reasons. JK */ 26 - skb->nfct = &nf_conntrack_untracked.ct_general; 26 + skb->nfct = &nf_ct_untracked_get()->ct_general; 27 27 skb->nfctinfo = IP_CT_NEW; 28 28 nf_conntrack_get(skb->nfct); 29 29
+2 -2
net/netfilter/xt_TEE.c
··· 104 104 #ifdef WITH_CONNTRACK 105 105 /* Avoid counting cloned packets towards the original connection. */ 106 106 nf_conntrack_put(skb->nfct); 107 - skb->nfct = &nf_conntrack_untracked.ct_general; 107 + skb->nfct = &nf_ct_untracked_get()->ct_general; 108 108 skb->nfctinfo = IP_CT_NEW; 109 109 nf_conntrack_get(skb->nfct); 110 110 #endif ··· 177 177 178 178 #ifdef WITH_CONNTRACK 179 179 nf_conntrack_put(skb->nfct); 180 - skb->nfct = &nf_conntrack_untracked.ct_general; 180 + skb->nfct = &nf_ct_untracked_get()->ct_general; 181 181 skb->nfctinfo = IP_CT_NEW; 182 182 nf_conntrack_get(skb->nfct); 183 183 #endif
+1 -1
net/netfilter/xt_cluster.c
··· 120 120 if (ct == NULL) 121 121 return false; 122 122 123 - if (ct == &nf_conntrack_untracked) 123 + if (nf_ct_is_untracked(ct)) 124 124 return false; 125 125 126 126 if (ct->master)
+6 -5
net/netfilter/xt_conntrack.c
··· 123 123 124 124 ct = nf_ct_get(skb, &ctinfo); 125 125 126 - if (ct == &nf_conntrack_untracked) 127 - statebit = XT_CONNTRACK_STATE_UNTRACKED; 128 - else if (ct != NULL) 129 - statebit = XT_CONNTRACK_STATE_BIT(ctinfo); 130 - else 126 + if (ct) { 127 + if (nf_ct_is_untracked(ct)) 128 + statebit = XT_CONNTRACK_STATE_UNTRACKED; 129 + else 130 + statebit = XT_CONNTRACK_STATE_BIT(ctinfo); 131 + } else 131 132 statebit = XT_CONNTRACK_STATE_INVALID; 132 133 133 134 if (info->match_flags & XT_CONNTRACK_STATE) {
+1 -1
net/netfilter/xt_socket.c
··· 127 127 * reply packet of an established SNAT-ted connection. */ 128 128 129 129 ct = nf_ct_get(skb, &ctinfo); 130 - if (ct && (ct != &nf_conntrack_untracked) && 130 + if (ct && !nf_ct_is_untracked(ct) && 131 131 ((iph->protocol != IPPROTO_ICMP && 132 132 ctinfo == IP_CT_IS_REPLY + IP_CT_ESTABLISHED) || 133 133 (iph->protocol == IPPROTO_ICMP &&
+8 -6
net/netfilter/xt_state.c
··· 26 26 const struct xt_state_info *sinfo = par->matchinfo; 27 27 enum ip_conntrack_info ctinfo; 28 28 unsigned int statebit; 29 + struct nf_conn *ct = nf_ct_get(skb, &ctinfo); 29 30 30 - if (nf_ct_is_untracked(skb)) 31 - statebit = XT_STATE_UNTRACKED; 32 - else if (!nf_ct_get(skb, &ctinfo)) 31 + if (!ct) 33 32 statebit = XT_STATE_INVALID; 34 - else 35 - statebit = XT_STATE_BIT(ctinfo); 36 - 33 + else { 34 + if (nf_ct_is_untracked(ct)) 35 + statebit = XT_STATE_UNTRACKED; 36 + else 37 + statebit = XT_STATE_BIT(ctinfo); 38 + } 37 39 return (sinfo->statemask & statebit); 38 40 } 39 41