Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

netfilter: nf_tables: add rule blob layout

This patch adds a blob layout per chain to represent the ruleset in the
packet datapath.

size (unsigned long)
struct nft_rule_dp
struct nft_expr
...
struct nft_rule_dp
struct nft_expr
...
struct nft_rule_dp (is_last=1)

The new structure nft_rule_dp represents the rule in a more compact way
(smaller memory footprint) compared to the control-plane nft_rule
structure.

The ruleset blob is a read-only data structure. The first field contains
the blob size, then the rules containing expressions. There is a trailing
rule which is used by the tracing infrastructure which is equivalent to
the NULL rule marker in the previous representation. The blob size field
does not include the size of this trailing rule marker.

The ruleset blob is generated from the commit path.

This patch reuses the infrastructure available since 0cbc06b3faba
("netfilter: nf_tables: remove synchronize_rcu in commit phase") to
build the array of rules per chain.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>

+147 -67
+18 -4
include/net/netfilter/nf_tables.h
··· 974 974 975 975 #define NFT_CHAIN_POLICY_UNSET U8_MAX 976 976 977 + struct nft_rule_dp { 978 + u64 is_last:1, 979 + dlen:12, 980 + handle:42; /* for tracing */ 981 + unsigned char data[] 982 + __attribute__((aligned(__alignof__(struct nft_expr)))); 983 + }; 984 + 985 + struct nft_rule_blob { 986 + unsigned long size; 987 + unsigned char data[] 988 + __attribute__((aligned(__alignof__(struct nft_rule_dp)))); 989 + }; 990 + 977 991 /** 978 992 * struct nft_chain - nf_tables chain 979 993 * ··· 1001 987 * @name: name of the chain 1002 988 */ 1003 989 struct nft_chain { 1004 - struct nft_rule *__rcu *rules_gen_0; 1005 - struct nft_rule *__rcu *rules_gen_1; 990 + struct nft_rule_blob __rcu *blob_gen_0; 991 + struct nft_rule_blob __rcu *blob_gen_1; 1006 992 struct list_head rules; 1007 993 struct list_head list; 1008 994 struct rhlist_head rhlhead; ··· 1017 1003 u8 *udata; 1018 1004 1019 1005 /* Only used during control plane commit phase: */ 1020 - struct nft_rule **rules_next; 1006 + struct nft_rule_blob *blob_next; 1021 1007 }; 1022 1008 1023 1009 int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain); ··· 1335 1321 const struct nft_pktinfo *pkt; 1336 1322 const struct nft_base_chain *basechain; 1337 1323 const struct nft_chain *chain; 1338 - const struct nft_rule *rule; 1324 + const struct nft_rule_dp *rule; 1339 1325 const struct nft_verdict *verdict; 1340 1326 enum nft_trace_types type; 1341 1327 bool packet_dumped;
+101 -48
net/netfilter/nf_tables_api.c
··· 1747 1747 1748 1748 static void nf_tables_chain_free_chain_rules(struct nft_chain *chain) 1749 1749 { 1750 - struct nft_rule **g0 = rcu_dereference_raw(chain->rules_gen_0); 1751 - struct nft_rule **g1 = rcu_dereference_raw(chain->rules_gen_1); 1750 + struct nft_rule_blob *g0 = rcu_dereference_raw(chain->blob_gen_0); 1751 + struct nft_rule_blob *g1 = rcu_dereference_raw(chain->blob_gen_1); 1752 1752 1753 1753 if (g0 != g1) 1754 1754 kvfree(g1); 1755 1755 kvfree(g0); 1756 1756 1757 1757 /* should be NULL either via abort or via successful commit */ 1758 - WARN_ON_ONCE(chain->rules_next); 1759 - kvfree(chain->rules_next); 1758 + WARN_ON_ONCE(chain->blob_next); 1759 + kvfree(chain->blob_next); 1760 1760 } 1761 1761 1762 1762 void nf_tables_chain_destroy(struct nft_ctx *ctx) ··· 2002 2002 2003 2003 struct nft_rules_old { 2004 2004 struct rcu_head h; 2005 - struct nft_rule **start; 2005 + struct nft_rule_blob *blob; 2006 2006 }; 2007 2007 2008 - static struct nft_rule **nf_tables_chain_alloc_rules(const struct nft_chain *chain, 2009 - unsigned int alloc) 2008 + static void nft_last_rule(struct nft_rule_blob *blob, const void *ptr) 2010 2009 { 2011 - if (alloc > INT_MAX) 2010 + struct nft_rule_dp *prule; 2011 + 2012 + prule = (struct nft_rule_dp *)ptr; 2013 + prule->is_last = 1; 2014 + ptr += offsetof(struct nft_rule_dp, data); 2015 + /* blob size does not include the trailer rule */ 2016 + } 2017 + 2018 + static struct nft_rule_blob *nf_tables_chain_alloc_rules(unsigned int size) 2019 + { 2020 + struct nft_rule_blob *blob; 2021 + 2022 + /* size must include room for the last rule */ 2023 + if (size < offsetof(struct nft_rule_dp, data)) 2012 2024 return NULL; 2013 2025 2014 - alloc += 1; /* NULL, ends rules */ 2015 - if (sizeof(struct nft_rule *) > INT_MAX / alloc) 2026 + size += sizeof(struct nft_rule_blob) + sizeof(struct nft_rules_old); 2027 + if (size > INT_MAX) 2016 2028 return NULL; 2017 2029 2018 - alloc *= sizeof(struct nft_rule *); 2019 - alloc += sizeof(struct nft_rules_old); 2030 + blob = kvmalloc(size, GFP_KERNEL); 2031 + if (!blob) 2032 + return NULL; 2020 2033 2021 - return kvmalloc(alloc, GFP_KERNEL); 2034 + blob->size = 0; 2035 + nft_last_rule(blob, blob->data); 2036 + 2037 + return blob; 2022 2038 } 2023 2039 2024 2040 static void nft_basechain_hook_init(struct nf_hook_ops *ops, u8 family, ··· 2107 2091 struct nft_stats __percpu *stats; 2108 2092 struct net *net = ctx->net; 2109 2093 char name[NFT_NAME_MAXLEN]; 2094 + struct nft_rule_blob *blob; 2110 2095 struct nft_trans *trans; 2111 2096 struct nft_chain *chain; 2112 - struct nft_rule **rules; 2097 + unsigned int data_size; 2113 2098 int err; 2114 2099 2115 2100 if (table->use == UINT_MAX) ··· 2195 2178 chain->udlen = nla_len(nla[NFTA_CHAIN_USERDATA]); 2196 2179 } 2197 2180 2198 - rules = nf_tables_chain_alloc_rules(chain, 0); 2199 - if (!rules) { 2181 + data_size = offsetof(struct nft_rule_dp, data); /* last rule */ 2182 + blob = nf_tables_chain_alloc_rules(data_size); 2183 + if (!blob) { 2200 2184 err = -ENOMEM; 2201 2185 goto err_destroy_chain; 2202 2186 } 2203 2187 2204 - *rules = NULL; 2205 - rcu_assign_pointer(chain->rules_gen_0, rules); 2206 - rcu_assign_pointer(chain->rules_gen_1, rules); 2188 + RCU_INIT_POINTER(chain->blob_gen_0, blob); 2189 + RCU_INIT_POINTER(chain->blob_gen_1, blob); 2207 2190 2208 2191 err = nf_tables_register_hook(net, table, chain); 2209 2192 if (err < 0) ··· 8258 8241 8259 8242 static int nf_tables_commit_chain_prepare(struct net *net, struct nft_chain *chain) 8260 8243 { 8244 + const struct nft_expr *expr, *last; 8245 + unsigned int size, data_size; 8246 + void *data, *data_boundary; 8247 + struct nft_rule_dp *prule; 8261 8248 struct nft_rule *rule; 8262 - unsigned int alloc = 0; 8263 8249 int i; 8264 8250 8265 8251 /* already handled or inactive chain? */ 8266 - if (chain->rules_next || !nft_is_active_next(net, chain)) 8252 + if (chain->blob_next || !nft_is_active_next(net, chain)) 8267 8253 return 0; 8268 8254 8269 8255 rule = list_entry(&chain->rules, struct nft_rule, list); 8270 8256 i = 0; 8271 8257 8272 8258 list_for_each_entry_continue(rule, &chain->rules, list) { 8273 - if (nft_is_active_next(net, rule)) 8274 - alloc++; 8259 + if (nft_is_active_next(net, rule)) { 8260 + data_size += sizeof(*prule) + rule->dlen; 8261 + if (data_size > INT_MAX) 8262 + return -ENOMEM; 8263 + } 8275 8264 } 8265 + data_size += offsetof(struct nft_rule_dp, data); /* last rule */ 8276 8266 8277 - chain->rules_next = nf_tables_chain_alloc_rules(chain, alloc); 8278 - if (!chain->rules_next) 8267 + chain->blob_next = nf_tables_chain_alloc_rules(data_size); 8268 + if (!chain->blob_next) 8279 8269 return -ENOMEM; 8280 8270 8271 + data = (void *)chain->blob_next->data; 8272 + data_boundary = data + data_size; 8273 + size = 0; 8274 + 8281 8275 list_for_each_entry_continue(rule, &chain->rules, list) { 8282 - if (nft_is_active_next(net, rule)) 8283 - chain->rules_next[i++] = rule; 8276 + if (!nft_is_active_next(net, rule)) 8277 + continue; 8278 + 8279 + prule = (struct nft_rule_dp *)data; 8280 + data += offsetof(struct nft_rule_dp, data); 8281 + if (WARN_ON_ONCE(data > data_boundary)) 8282 + return -ENOMEM; 8283 + 8284 + nft_rule_for_each_expr(expr, last, rule) { 8285 + if (WARN_ON_ONCE(data + expr->ops->size > data_boundary)) 8286 + return -ENOMEM; 8287 + 8288 + memcpy(data + size, expr, expr->ops->size); 8289 + size += expr->ops->size; 8290 + } 8291 + if (WARN_ON_ONCE(size >= 1 << 12)) 8292 + return -ENOMEM; 8293 + 8294 + prule->handle = rule->handle; 8295 + prule->dlen = size; 8296 + prule->is_last = 0; 8297 + 8298 + data += size; 8299 + size = 0; 8300 + chain->blob_next->size += (unsigned long)(data - (void *)prule); 8284 8301 } 8285 8302 8286 - chain->rules_next[i] = NULL; 8303 + prule = (struct nft_rule_dp *)data; 8304 + data += offsetof(struct nft_rule_dp, data); 8305 + if (WARN_ON_ONCE(data > data_boundary)) 8306 + return -ENOMEM; 8307 + 8308 + nft_last_rule(chain->blob_next, prule); 8309 + 8287 8310 return 0; 8288 8311 } 8289 8312 ··· 8337 8280 8338 8281 if (trans->msg_type == NFT_MSG_NEWRULE || 8339 8282 trans->msg_type == NFT_MSG_DELRULE) { 8340 - kvfree(chain->rules_next); 8341 - chain->rules_next = NULL; 8283 + kvfree(chain->blob_next); 8284 + chain->blob_next = NULL; 8342 8285 } 8343 8286 } 8344 8287 } ··· 8347 8290 { 8348 8291 struct nft_rules_old *o = container_of(h, struct nft_rules_old, h); 8349 8292 8350 - kvfree(o->start); 8293 + kvfree(o->blob); 8351 8294 } 8352 8295 8353 - static void nf_tables_commit_chain_free_rules_old(struct nft_rule **rules) 8296 + static void nf_tables_commit_chain_free_rules_old(struct nft_rule_blob *blob) 8354 8297 { 8355 - struct nft_rule **r = rules; 8356 8298 struct nft_rules_old *old; 8357 8299 8358 - while (*r) 8359 - r++; 8360 - 8361 - r++; /* rcu_head is after end marker */ 8362 - old = (void *) r; 8363 - old->start = rules; 8300 + /* rcu_head is after end marker */ 8301 + old = (void *)blob + sizeof(*blob) + blob->size; 8302 + old->blob = blob; 8364 8303 8365 8304 call_rcu(&old->h, __nf_tables_commit_chain_free_rules_old); 8366 8305 } 8367 8306 8368 8307 static void nf_tables_commit_chain(struct net *net, struct nft_chain *chain) 8369 8308 { 8370 - struct nft_rule **g0, **g1; 8309 + struct nft_rule_blob *g0, *g1; 8371 8310 bool next_genbit; 8372 8311 8373 8312 next_genbit = nft_gencursor_next(net); 8374 8313 8375 - g0 = rcu_dereference_protected(chain->rules_gen_0, 8314 + g0 = rcu_dereference_protected(chain->blob_gen_0, 8376 8315 lockdep_commit_lock_is_held(net)); 8377 - g1 = rcu_dereference_protected(chain->rules_gen_1, 8316 + g1 = rcu_dereference_protected(chain->blob_gen_1, 8378 8317 lockdep_commit_lock_is_held(net)); 8379 8318 8380 8319 /* No changes to this chain? */ 8381 - if (chain->rules_next == NULL) { 8320 + if (chain->blob_next == NULL) { 8382 8321 /* chain had no change in last or next generation */ 8383 8322 if (g0 == g1) 8384 8323 return; ··· 8383 8330 * one uses same rules as current generation. 8384 8331 */ 8385 8332 if (next_genbit) { 8386 - rcu_assign_pointer(chain->rules_gen_1, g0); 8333 + rcu_assign_pointer(chain->blob_gen_1, g0); 8387 8334 nf_tables_commit_chain_free_rules_old(g1); 8388 8335 } else { 8389 - rcu_assign_pointer(chain->rules_gen_0, g1); 8336 + rcu_assign_pointer(chain->blob_gen_0, g1); 8390 8337 nf_tables_commit_chain_free_rules_old(g0); 8391 8338 } 8392 8339 ··· 8394 8341 } 8395 8342 8396 8343 if (next_genbit) 8397 - rcu_assign_pointer(chain->rules_gen_1, chain->rules_next); 8344 + rcu_assign_pointer(chain->blob_gen_1, chain->blob_next); 8398 8345 else 8399 - rcu_assign_pointer(chain->rules_gen_0, chain->rules_next); 8346 + rcu_assign_pointer(chain->blob_gen_0, chain->blob_next); 8400 8347 8401 - chain->rules_next = NULL; 8348 + chain->blob_next = NULL; 8402 8349 8403 8350 if (g0 == g1) 8404 8351 return;
+27 -14
net/netfilter/nf_tables_core.c
··· 38 38 39 39 static inline void nft_trace_packet(struct nft_traceinfo *info, 40 40 const struct nft_chain *chain, 41 - const struct nft_rule *rule, 41 + const struct nft_rule_dp *rule, 42 42 enum nft_trace_types type) 43 43 { 44 44 if (static_branch_unlikely(&nft_trace_enabled)) { ··· 88 88 89 89 static inline void nft_trace_verdict(struct nft_traceinfo *info, 90 90 const struct nft_chain *chain, 91 - const struct nft_rule *rule, 91 + const struct nft_rule_dp *rule, 92 92 const struct nft_regs *regs) 93 93 { 94 94 if (static_branch_unlikely(&nft_trace_enabled)) { ··· 153 153 } 154 154 155 155 struct nft_jumpstack { 156 - const struct nft_chain *chain; 157 - struct nft_rule *const *rules; 156 + const struct nft_chain *chain; 157 + const struct nft_rule_dp *rule; 158 + const struct nft_rule_dp *last_rule; 158 159 }; 159 160 160 161 static void expr_call_ops_eval(const struct nft_expr *expr, ··· 184 183 expr->ops->eval(expr, regs, pkt); 185 184 } 186 185 186 + #define nft_rule_expr_first(rule) (struct nft_expr *)&rule->data[0] 187 + #define nft_rule_expr_next(expr) ((void *)expr) + expr->ops->size 188 + #define nft_rule_expr_last(rule) (struct nft_expr *)&rule->data[rule->dlen] 189 + #define nft_rule_next(rule) (void *)rule + sizeof(*rule) + rule->dlen 190 + 191 + #define nft_rule_dp_for_each_expr(expr, last, rule) \ 192 + for ((expr) = nft_rule_expr_first(rule), (last) = nft_rule_expr_last(rule); \ 193 + (expr) != (last); \ 194 + (expr) = nft_rule_expr_next(expr)) 195 + 187 196 unsigned int 188 197 nft_do_chain(struct nft_pktinfo *pkt, void *priv) 189 198 { 190 199 const struct nft_chain *chain = priv, *basechain = chain; 200 + const struct nft_rule_dp *rule, *last_rule; 191 201 const struct net *net = nft_net(pkt); 192 - struct nft_rule *const *rules; 193 - const struct nft_rule *rule; 194 202 const struct nft_expr *expr, *last; 195 203 struct nft_regs regs; 196 204 unsigned int stackptr = 0; 197 205 struct nft_jumpstack jumpstack[NFT_JUMP_STACK_SIZE]; 198 206 bool genbit = READ_ONCE(net->nft.gencursor); 207 + struct nft_rule_blob *blob; 199 208 struct nft_traceinfo info; 200 209 201 210 info.trace = false; ··· 213 202 nft_trace_init(&info, pkt, &regs.verdict, basechain); 214 203 do_chain: 215 204 if (genbit) 216 - rules = rcu_dereference(chain->rules_gen_1); 205 + blob = rcu_dereference(chain->blob_gen_1); 217 206 else 218 - rules = rcu_dereference(chain->rules_gen_0); 207 + blob = rcu_dereference(chain->blob_gen_0); 219 208 209 + rule = (struct nft_rule_dp *)blob->data; 210 + last_rule = (void *)blob->data + blob->size; 220 211 next_rule: 221 - rule = *rules; 222 212 regs.verdict.code = NFT_CONTINUE; 223 - for (; *rules ; rules++) { 224 - rule = *rules; 225 - nft_rule_for_each_expr(expr, last, rule) { 213 + for (; rule < last_rule; rule = nft_rule_next(rule)) { 214 + nft_rule_dp_for_each_expr(expr, last, rule) { 226 215 if (expr->ops == &nft_cmp_fast_ops) 227 216 nft_cmp_fast_eval(expr, &regs); 228 217 else if (expr->ops == &nft_bitwise_fast_ops) ··· 262 251 if (WARN_ON_ONCE(stackptr >= NFT_JUMP_STACK_SIZE)) 263 252 return NF_DROP; 264 253 jumpstack[stackptr].chain = chain; 265 - jumpstack[stackptr].rules = rules + 1; 254 + jumpstack[stackptr].rule = nft_rule_next(rule); 255 + jumpstack[stackptr].last_rule = last_rule; 266 256 stackptr++; 267 257 fallthrough; 268 258 case NFT_GOTO: ··· 279 267 if (stackptr > 0) { 280 268 stackptr--; 281 269 chain = jumpstack[stackptr].chain; 282 - rules = jumpstack[stackptr].rules; 270 + rule = jumpstack[stackptr].rule; 271 + last_rule = jumpstack[stackptr].last_rule; 283 272 goto next_rule; 284 273 } 285 274
+1 -1
net/netfilter/nf_tables_trace.c
··· 142 142 static int nf_trace_fill_rule_info(struct sk_buff *nlskb, 143 143 const struct nft_traceinfo *info) 144 144 { 145 - if (!info->rule) 145 + if (!info->rule || info->rule->is_last) 146 146 return 0; 147 147 148 148 /* a continue verdict with ->type == RETURN means that this is