Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

netfilter: nft_dynset: dynamic stateful expression instantiation

Support instantiating stateful expressions based on a template that
are associated with dynamically created set entries. The expressions
are evaluated when adding or updating the set element.

This allows to maintain per flow state using the existing set
infrastructure and expression types, with arbitrary definitions of
a flow.

Usage is currently restricted to anonymous sets, meaning only a single
binding can exist, since the desired semantics of multiple independant
bindings haven't been defined so far.

Examples (userspace syntax is still WIP):

1. Limit the rate of new SSH connections per host, similar to iptables
hashlimit:

flow ip saddr timeout 60s \
limit 10/second \
accept

2. Account network traffic between each set of /24 networks:

flow ip saddr & 255.255.255.0 . ip daddr & 255.255.255.0 \
counter

3. Account traffic to each host per user:

flow skuid . ip daddr \
counter

4. Account traffic for each combination of source address and TCP flags:

flow ip saddr . tcp flags \
counter

The resulting set content after a Xmas-scan look like this:

{
192.168.122.1 . fin | psh | urg : counter packets 1001 bytes 40040,
192.168.122.1 . ack : counter packets 74 bytes 3848,
192.168.122.1 . psh | ack : counter packets 35 bytes 3144
}

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>

authored by

Patrick McHardy and committed by
Pablo Neira Ayuso
3e135cd4 7c6c6e95

+52 -4
+2
include/uapi/linux/netfilter/nf_tables.h
··· 567 567 * @NFTA_DYNSET_SREG_KEY: source register of the key (NLA_U32) 568 568 * @NFTA_DYNSET_SREG_DATA: source register of the data (NLA_U32) 569 569 * @NFTA_DYNSET_TIMEOUT: timeout value for the new element (NLA_U64) 570 + * @NFTA_DYNSET_EXPR: expression (NLA_NESTED: nft_expr_attributes) 570 571 */ 571 572 enum nft_dynset_attributes { 572 573 NFTA_DYNSET_UNSPEC, ··· 577 576 NFTA_DYNSET_SREG_KEY, 578 577 NFTA_DYNSET_SREG_DATA, 579 578 NFTA_DYNSET_TIMEOUT, 579 + NFTA_DYNSET_EXPR, 580 580 __NFTA_DYNSET_MAX, 581 581 }; 582 582 #define NFTA_DYNSET_MAX (__NFTA_DYNSET_MAX - 1)
+50 -4
net/netfilter/nft_dynset.c
··· 23 23 enum nft_registers sreg_key:8; 24 24 enum nft_registers sreg_data:8; 25 25 u64 timeout; 26 + struct nft_expr *expr; 26 27 struct nft_set_binding binding; 27 28 }; 28 29 ··· 31 30 struct nft_regs *regs) 32 31 { 33 32 const struct nft_dynset *priv = nft_expr_priv(expr); 33 + struct nft_set_ext *ext; 34 34 u64 timeout; 35 35 void *elem; 36 36 ··· 46 44 if (elem == NULL) { 47 45 if (set->size) 48 46 atomic_dec(&set->nelems); 47 + return NULL; 49 48 } 49 + 50 + ext = nft_set_elem_ext(set, elem); 51 + if (priv->expr != NULL) 52 + nft_expr_clone(nft_set_ext_expr(ext), priv->expr); 53 + 50 54 return elem; 51 55 } 52 56 ··· 63 55 const struct nft_dynset *priv = nft_expr_priv(expr); 64 56 struct nft_set *set = priv->set; 65 57 const struct nft_set_ext *ext; 58 + const struct nft_expr *sexpr; 66 59 u64 timeout; 67 60 68 61 if (set->ops->update(set, &regs->data[priv->sreg_key], nft_dynset_new, 69 62 expr, regs, &ext)) { 63 + sexpr = NULL; 64 + if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPR)) 65 + sexpr = nft_set_ext_expr(ext); 66 + 70 67 if (priv->op == NFT_DYNSET_OP_UPDATE && 71 68 nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION)) { 72 69 timeout = priv->timeout ? : set->timeout; 73 70 *nft_set_ext_expiration(ext) = jiffies + timeout; 74 - return; 75 - } 76 - } 71 + } else if (sexpr == NULL) 72 + goto out; 77 73 74 + if (sexpr != NULL) 75 + sexpr->ops->eval(sexpr, regs, pkt); 76 + return; 77 + } 78 + out: 78 79 regs->verdict.code = NFT_BREAK; 79 80 } 80 81 ··· 94 77 [NFTA_DYNSET_SREG_KEY] = { .type = NLA_U32 }, 95 78 [NFTA_DYNSET_SREG_DATA] = { .type = NLA_U32 }, 96 79 [NFTA_DYNSET_TIMEOUT] = { .type = NLA_U64 }, 80 + [NFTA_DYNSET_EXPR] = { .type = NLA_NESTED }, 97 81 }; 98 82 99 83 static int nft_dynset_init(const struct nft_ctx *ctx, ··· 160 142 } else if (set->flags & NFT_SET_MAP) 161 143 return -EINVAL; 162 144 145 + if (tb[NFTA_DYNSET_EXPR] != NULL) { 146 + if (!(set->flags & NFT_SET_EVAL)) 147 + return -EINVAL; 148 + if (!(set->flags & NFT_SET_ANONYMOUS)) 149 + return -EOPNOTSUPP; 150 + 151 + priv->expr = nft_expr_init(ctx, tb[NFTA_DYNSET_EXPR]); 152 + if (IS_ERR(priv->expr)) 153 + return PTR_ERR(priv->expr); 154 + 155 + err = -EOPNOTSUPP; 156 + if (!(priv->expr->ops->type->flags & NFT_EXPR_STATEFUL)) 157 + goto err1; 158 + } else if (set->flags & NFT_SET_EVAL) 159 + return -EINVAL; 160 + 163 161 nft_set_ext_prepare(&priv->tmpl); 164 162 nft_set_ext_add_length(&priv->tmpl, NFT_SET_EXT_KEY, set->klen); 165 163 if (set->flags & NFT_SET_MAP) 166 164 nft_set_ext_add_length(&priv->tmpl, NFT_SET_EXT_DATA, set->dlen); 165 + if (priv->expr != NULL) 166 + nft_set_ext_add_length(&priv->tmpl, NFT_SET_EXT_EXPR, 167 + priv->expr->ops->size); 167 168 if (set->flags & NFT_SET_TIMEOUT) { 168 169 if (timeout || set->timeout) 169 170 nft_set_ext_add(&priv->tmpl, NFT_SET_EXT_EXPIRATION); ··· 192 155 193 156 err = nf_tables_bind_set(ctx, set, &priv->binding); 194 157 if (err < 0) 195 - return err; 158 + goto err1; 196 159 197 160 priv->set = set; 198 161 return 0; 162 + 163 + err1: 164 + if (priv->expr != NULL) 165 + nft_expr_destroy(ctx, priv->expr); 166 + return err; 199 167 } 200 168 201 169 static void nft_dynset_destroy(const struct nft_ctx *ctx, ··· 209 167 struct nft_dynset *priv = nft_expr_priv(expr); 210 168 211 169 nf_tables_unbind_set(ctx, priv->set, &priv->binding); 170 + if (priv->expr != NULL) 171 + nft_expr_destroy(ctx, priv->expr); 212 172 } 213 173 214 174 static int nft_dynset_dump(struct sk_buff *skb, const struct nft_expr *expr) ··· 227 183 if (nla_put_string(skb, NFTA_DYNSET_SET_NAME, priv->set->name)) 228 184 goto nla_put_failure; 229 185 if (nla_put_be64(skb, NFTA_DYNSET_TIMEOUT, cpu_to_be64(priv->timeout))) 186 + goto nla_put_failure; 187 + if (priv->expr && nft_expr_dump(skb, NFTA_DYNSET_EXPR, priv->expr)) 230 188 goto nla_put_failure; 231 189 return 0; 232 190