Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

netfilter: extensions: introduce extension genid count

Multiple netfilter extensions store pointers to external data
in their extension area struct.

Examples:
1. Timeout policies
2. Connection tracking helpers.

No references are taken for these.

When a helper or timeout policy is removed, the conntrack table gets
traversed and affected extensions are cleared.

Conntrack entries not yet in the hashtable are referenced via a special
list, the unconfirmed list.

On removal of a policy or connection tracking helper, the unconfirmed
list gets traversed an all entries are marked as dying, this prevents
them from getting committed to the table at insertion time: core checks
for dying bit, if set, the conntrack entry gets destroyed at confirm
time.

The disadvantage is that each new conntrack has to be added to the percpu
unconfirmed list, and each insertion needs to remove it from this list.
The list is only ever needed when a policy or helper is removed -- a rare
occurrence.

Add a generation ID count: Instead of adding to the list and then
traversing that list on policy/helper removal, increment a counter
that is stored in the extension area.

For unconfirmed conntracks, the extension has the genid valid at ct
allocation time.

Removal of a helper/policy etc. increments the counter.
At confirmation time, validate that ext->genid == global_id.

If the stored number is not the same, do not allow the conntrack
insertion, just like as if a confirmed-list traversal would have flagged
the entry as dying.

After insertion, the genid is no longer relevant (conntrack entries
are now reachable via the conntrack table iterators and is set to 0.

This allows removal of the percpu unconfirmed list.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>

authored by

Florian Westphal and committed by
Pablo Neira Ayuso
c56716c6 17438b42

+111 -17
+16 -15
include/net/netfilter/nf_conntrack_extend.h
··· 34 34 NF_CT_EXT_NUM, 35 35 }; 36 36 37 - #define NF_CT_EXT_HELPER_TYPE struct nf_conn_help 38 - #define NF_CT_EXT_NAT_TYPE struct nf_conn_nat 39 - #define NF_CT_EXT_SEQADJ_TYPE struct nf_conn_seqadj 40 - #define NF_CT_EXT_ACCT_TYPE struct nf_conn_acct 41 - #define NF_CT_EXT_ECACHE_TYPE struct nf_conntrack_ecache 42 - #define NF_CT_EXT_TSTAMP_TYPE struct nf_conn_tstamp 43 - #define NF_CT_EXT_TIMEOUT_TYPE struct nf_conn_timeout 44 - #define NF_CT_EXT_LABELS_TYPE struct nf_conn_labels 45 - #define NF_CT_EXT_SYNPROXY_TYPE struct nf_conn_synproxy 46 - #define NF_CT_EXT_ACT_CT_TYPE struct nf_conn_act_ct_ext 47 - 48 37 /* Extensions: optional stuff which isn't permanently in struct. */ 49 38 struct nf_ct_ext { 50 39 u8 offset[NF_CT_EXT_NUM]; 51 40 u8 len; 41 + unsigned int gen_id; 52 42 char data[] __aligned(8); 53 43 }; 54 44 ··· 52 62 return (ct->ext && __nf_ct_ext_exist(ct->ext, id)); 53 63 } 54 64 55 - static inline void *__nf_ct_ext_find(const struct nf_conn *ct, u8 id) 65 + void *__nf_ct_ext_find(const struct nf_ct_ext *ext, u8 id); 66 + 67 + static inline void *nf_ct_ext_find(const struct nf_conn *ct, u8 id) 56 68 { 57 - if (!nf_ct_ext_exist(ct, id)) 69 + struct nf_ct_ext *ext = ct->ext; 70 + 71 + if (!ext || !__nf_ct_ext_exist(ext, id)) 58 72 return NULL; 73 + 74 + if (unlikely(ext->gen_id)) 75 + return __nf_ct_ext_find(ext, id); 59 76 60 77 return (void *)ct->ext + ct->ext->offset[id]; 61 78 } 62 - #define nf_ct_ext_find(ext, id) \ 63 - ((id##_TYPE *)__nf_ct_ext_find((ext), (id))) 64 79 65 80 /* Add this type, returns pointer to data or NULL. */ 66 81 void *nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp); 82 + 83 + /* ext genid. if ext->id != ext_genid, extensions cannot be used 84 + * anymore unless conntrack has CONFIRMED bit set. 85 + */ 86 + extern atomic_t nf_conntrack_ext_genid; 87 + void nf_ct_ext_bump_genid(void); 67 88 68 89 #endif /* _NF_CONNTRACK_EXTEND_H */
+9 -1
include/net/netfilter/nf_conntrack_labels.h
··· 17 17 unsigned long bits[NF_CT_LABELS_MAX_SIZE / sizeof(long)]; 18 18 }; 19 19 20 + /* Can't use nf_ct_ext_find(), flow dissector cannot use symbols 21 + * exported by nf_conntrack module. 22 + */ 20 23 static inline struct nf_conn_labels *nf_ct_labels_find(const struct nf_conn *ct) 21 24 { 22 25 #ifdef CONFIG_NF_CONNTRACK_LABELS 23 - return nf_ct_ext_find(ct, NF_CT_EXT_LABELS); 26 + struct nf_ct_ext *ext = ct->ext; 27 + 28 + if (!ext || !__nf_ct_ext_exist(ext, NF_CT_EXT_LABELS)) 29 + return NULL; 30 + 31 + return (void *)ct->ext + ct->ext->offset[NF_CT_EXT_LABELS]; 24 32 #else 25 33 return NULL; 26 34 #endif
+55
net/netfilter/nf_conntrack_core.c
··· 876 876 &nf_conntrack_hash[reply_hash]); 877 877 } 878 878 879 + static bool nf_ct_ext_valid_pre(const struct nf_ct_ext *ext) 880 + { 881 + /* if ext->gen_id is not equal to nf_conntrack_ext_genid, some extensions 882 + * may contain stale pointers to e.g. helper that has been removed. 883 + * 884 + * The helper can't clear this because the nf_conn object isn't in 885 + * any hash and synchronize_rcu() isn't enough because associated skb 886 + * might sit in a queue. 887 + */ 888 + return !ext || ext->gen_id == atomic_read(&nf_conntrack_ext_genid); 889 + } 890 + 891 + static bool nf_ct_ext_valid_post(struct nf_ct_ext *ext) 892 + { 893 + if (!ext) 894 + return true; 895 + 896 + if (ext->gen_id != atomic_read(&nf_conntrack_ext_genid)) 897 + return false; 898 + 899 + /* inserted into conntrack table, nf_ct_iterate_cleanup() 900 + * will find it. Disable nf_ct_ext_find() id check. 901 + */ 902 + WRITE_ONCE(ext->gen_id, 0); 903 + return true; 904 + } 905 + 879 906 int 880 907 nf_conntrack_hash_check_insert(struct nf_conn *ct) 881 908 { ··· 917 890 int err = -EEXIST; 918 891 919 892 zone = nf_ct_zone(ct); 893 + 894 + if (!nf_ct_ext_valid_pre(ct->ext)) { 895 + NF_CT_STAT_INC(net, insert_failed); 896 + return -ETIMEDOUT; 897 + } 920 898 921 899 local_bh_disable(); 922 900 do { ··· 963 931 nf_conntrack_double_unlock(hash, reply_hash); 964 932 NF_CT_STAT_INC(net, insert); 965 933 local_bh_enable(); 934 + 935 + if (!nf_ct_ext_valid_post(ct->ext)) { 936 + nf_ct_kill(ct); 937 + NF_CT_STAT_INC(net, drop); 938 + return -ETIMEDOUT; 939 + } 940 + 966 941 return 0; 967 942 chaintoolong: 968 943 NF_CT_STAT_INC(net, chaintoolong); ··· 1237 1198 return NF_DROP; 1238 1199 } 1239 1200 1201 + if (!nf_ct_ext_valid_pre(ct->ext)) { 1202 + NF_CT_STAT_INC(net, insert_failed); 1203 + goto dying; 1204 + } 1205 + 1240 1206 pr_debug("Confirming conntrack %p\n", ct); 1241 1207 /* We have to check the DYING flag after unlink to prevent 1242 1208 * a race against nf_ct_get_next_corpse() possibly called from ··· 1297 1253 __nf_conntrack_hash_insert(ct, hash, reply_hash); 1298 1254 nf_conntrack_double_unlock(hash, reply_hash); 1299 1255 local_bh_enable(); 1256 + 1257 + /* ext area is still valid (rcu read lock is held, 1258 + * but will go out of scope soon, we need to remove 1259 + * this conntrack again. 1260 + */ 1261 + if (!nf_ct_ext_valid_post(ct->ext)) { 1262 + nf_ct_kill(ct); 1263 + NF_CT_STAT_INC(net, drop); 1264 + return NF_DROP; 1265 + } 1300 1266 1301 1267 help = nfct_help(ct); 1302 1268 if (help && help->helper) ··· 2545 2491 */ 2546 2492 synchronize_net(); 2547 2493 2494 + nf_ct_ext_bump_genid(); 2548 2495 nf_ct_iterate_cleanup(iter, data, 0, 0); 2549 2496 } 2550 2497 EXPORT_SYMBOL_GPL(nf_ct_iterate_destroy);
+31 -1
net/netfilter/nf_conntrack_extend.c
··· 27 27 28 28 #define NF_CT_EXT_PREALLOC 128u /* conntrack events are on by default */ 29 29 30 + atomic_t nf_conntrack_ext_genid __read_mostly = ATOMIC_INIT(1); 31 + 30 32 static const u8 nf_ct_ext_type_len[NF_CT_EXT_NUM] = { 31 33 [NF_CT_EXT_HELPER] = sizeof(struct nf_conn_help), 32 34 #if IS_ENABLED(CONFIG_NF_NAT) ··· 118 116 if (!new) 119 117 return NULL; 120 118 121 - if (!ct->ext) 119 + if (!ct->ext) { 122 120 memset(new->offset, 0, sizeof(new->offset)); 121 + new->gen_id = atomic_read(&nf_conntrack_ext_genid); 122 + } 123 123 124 124 new->offset[id] = newoff; 125 125 new->len = newlen; ··· 131 127 return (void *)new + newoff; 132 128 } 133 129 EXPORT_SYMBOL(nf_ct_ext_add); 130 + 131 + /* Use nf_ct_ext_find wrapper. This is only useful for unconfirmed entries. */ 132 + void *__nf_ct_ext_find(const struct nf_ct_ext *ext, u8 id) 133 + { 134 + unsigned int gen_id = atomic_read(&nf_conntrack_ext_genid); 135 + unsigned int this_id = READ_ONCE(ext->gen_id); 136 + 137 + if (!__nf_ct_ext_exist(ext, id)) 138 + return NULL; 139 + 140 + if (this_id == 0 || ext->gen_id == gen_id) 141 + return (void *)ext + ext->offset[id]; 142 + 143 + return NULL; 144 + } 145 + EXPORT_SYMBOL(__nf_ct_ext_find); 146 + 147 + void nf_ct_ext_bump_genid(void) 148 + { 149 + unsigned int value = atomic_inc_return(&nf_conntrack_ext_genid); 150 + 151 + if (value == UINT_MAX) 152 + atomic_set(&nf_conntrack_ext_genid, 1); 153 + 154 + msleep(HZ); 155 + }