Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

netfilter: nfnetlink: add batch support and use it from nf_tables

This patch adds a batch support to nfnetlink. Basically, it adds
two new control messages:

* NFNL_MSG_BATCH_BEGIN, that indicates the beginning of a batch,
the nfgenmsg->res_id indicates the nfnetlink subsystem ID.

* NFNL_MSG_BATCH_END, that results in the invocation of the
ss->commit callback function. If not specified or an error
ocurred in the batch, the ss->abort function is invoked
instead.

The end message represents the commit operation in nftables, the
lack of end message results in an abort. This patch also adds the
.call_batch function that is only called from the batch receival
path.

This patch adds atomic rule updates and dumps based on
bitmask generations. This allows to atomically commit a set of
rule-set updates incrementally without altering the internal
state of existing nf_tables expressions/matches/targets.

The idea consists of using a generation cursor of 1 bit and
a bitmask of 2 bits per rule. Assuming the gencursor is 0,
then the genmask (expressed as a bitmask) can be interpreted
as:

00 active in the present, will be active in the next generation.
01 inactive in the present, will be active in the next generation.
10 active in the present, will be deleted in the next generation.
^
gencursor

Once you invoke the transition to the next generation, the global
gencursor is updated:

00 active in the present, will be active in the next generation.
01 active in the present, needs to zero its future, it becomes 00.
10 inactive in the present, delete now.
^
gencursor

If a dump is in progress and nf_tables enters a new generation,
the dump will stop and return -EBUSY to let userspace know that
it has to retry again. In order to invalidate dumps, a global
genctr counter is increased everytime nf_tables enters a new
generation.

This new operation can be used from the user-space utility
that controls the firewall, eg.

nft -f restore

The rule updates contained in `file' will be applied atomically.

cat file
-----
add filter INPUT ip saddr 1.1.1.1 counter accept #1
del filter INPUT ip daddr 2.2.2.2 counter drop #2
-EOF-

Note that the rule 1 will be inactive until the transition to the
next generation, the rule 2 will be evicted in the next generation.

There is a penalty during the rule update due to the branch
misprediction in the packet matching framework. But that should be
quickly resolved once the iteration over the commit list that
contain rules that require updates is finished.

Event notification happens once the rule-set update has been
committed. So we skip notifications is case the rule-set update
is aborted, which can happen in case that the rule-set is tested
to apply correctly.

This patch squashed the following patches from Pablo:

* nf_tables: atomic rule updates and dumps
* nf_tables: get rid of per rule list_head for commits
* nf_tables: use per netns commit list
* nfnetlink: add batch support and use it from nf_tables
* nf_tables: all rule updates are transactional
* nf_tables: attach replacement rule after stale one
* nf_tables: do not allow deletion/replacement of stale rules
* nf_tables: remove unused NFTA_RULE_FLAGS

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>

+402 -24
+5
include/linux/netfilter/nfnetlink.h
··· 14 14 int (*call_rcu)(struct sock *nl, struct sk_buff *skb, 15 15 const struct nlmsghdr *nlh, 16 16 const struct nlattr * const cda[]); 17 + int (*call_batch)(struct sock *nl, struct sk_buff *skb, 18 + const struct nlmsghdr *nlh, 19 + const struct nlattr * const cda[]); 17 20 const struct nla_policy *policy; /* netlink attribute policy */ 18 21 const u_int16_t attr_count; /* number of nlattr's */ 19 22 }; ··· 26 23 __u8 subsys_id; /* nfnetlink subsystem ID */ 27 24 __u8 cb_count; /* number of callbacks */ 28 25 const struct nfnl_callback *cb; /* callback for individual types */ 26 + int (*commit)(struct sk_buff *skb); 27 + int (*abort)(struct sk_buff *skb); 29 28 }; 30 29 31 30 int nfnetlink_subsys_register(const struct nfnetlink_subsystem *n);
+24 -1
include/net/netfilter/nf_tables.h
··· 323 323 * @list: used internally 324 324 * @rcu_head: used internally for rcu 325 325 * @handle: rule handle 326 + * @genmask: generation mask 326 327 * @dlen: length of expression data 327 328 * @data: expression data 328 329 */ 329 330 struct nft_rule { 330 331 struct list_head list; 331 332 struct rcu_head rcu_head; 332 - u64 handle:48, 333 + u64 handle:46, 334 + genmask:2, 333 335 dlen:16; 334 336 unsigned char data[] 335 337 __attribute__((aligned(__alignof__(struct nft_expr)))); 338 + }; 339 + 340 + /** 341 + * struct nft_rule_trans - nf_tables rule update in transaction 342 + * 343 + * @list: used internally 344 + * @rule: rule that needs to be updated 345 + * @chain: chain that this rule belongs to 346 + * @table: table for which this chain applies 347 + * @nlh: netlink header of the message that contain this update 348 + * @family: family expressesed as AF_* 349 + */ 350 + struct nft_rule_trans { 351 + struct list_head list; 352 + struct nft_rule *rule; 353 + const struct nft_chain *chain; 354 + const struct nft_table *table; 355 + const struct nlmsghdr *nlh; 356 + u8 family; 336 357 }; 337 358 338 359 static inline struct nft_expr *nft_expr_first(const struct nft_rule *rule) ··· 391 370 * @rules: list of rules in the chain 392 371 * @list: used internally 393 372 * @rcu_head: used internally 373 + * @net: net namespace that this chain belongs to 394 374 * @handle: chain handle 395 375 * @flags: bitmask of enum nft_chain_flags 396 376 * @use: number of jump references to this chain ··· 402 380 struct list_head rules; 403 381 struct list_head list; 404 382 struct rcu_head rcu_head; 383 + struct net *net; 405 384 u64 handle; 406 385 u8 flags; 407 386 u16 use;
+3
include/net/netns/nftables.h
··· 7 7 8 8 struct netns_nftables { 9 9 struct list_head af_info; 10 + struct list_head commit_list; 10 11 struct nft_af_info *ipv4; 11 12 struct nft_af_info *ipv6; 12 13 struct nft_af_info *bridge; 14 + u8 gencursor; 15 + u8 genctr; 13 16 }; 14 17 15 18 #endif
+4
include/uapi/linux/netfilter/nfnetlink.h
··· 57 57 #define NFNL_SUBSYS_NFT_COMPAT 11 58 58 #define NFNL_SUBSYS_COUNT 12 59 59 60 + /* Reserved control nfnetlink messages */ 61 + #define NFNL_MSG_BATCH_BEGIN NLMSG_MIN_TYPE 62 + #define NFNL_MSG_BATCH_END NLMSG_MIN_TYPE+1 63 + 60 64 #endif /* _UAPI_NFNETLINK_H */
+185 -19
net/netfilter/nf_tables_api.c
··· 978 978 979 979 INIT_LIST_HEAD(&chain->rules); 980 980 chain->handle = nf_tables_alloc_handle(table); 981 + chain->net = net; 981 982 nla_strlcpy(chain->name, name, NFT_CHAIN_MAXNAMELEN); 982 983 983 984 if (!(table->flags & NFT_TABLE_F_DORMANT) && ··· 1372 1371 return err; 1373 1372 } 1374 1373 1374 + static inline bool 1375 + nft_rule_is_active(struct net *net, const struct nft_rule *rule) 1376 + { 1377 + return (rule->genmask & (1 << net->nft.gencursor)) == 0; 1378 + } 1379 + 1380 + static inline int gencursor_next(struct net *net) 1381 + { 1382 + return net->nft.gencursor+1 == 1 ? 1 : 0; 1383 + } 1384 + 1385 + static inline int 1386 + nft_rule_is_active_next(struct net *net, const struct nft_rule *rule) 1387 + { 1388 + return (rule->genmask & (1 << gencursor_next(net))) == 0; 1389 + } 1390 + 1391 + static inline void 1392 + nft_rule_activate_next(struct net *net, struct nft_rule *rule) 1393 + { 1394 + /* Now inactive, will be active in the future */ 1395 + rule->genmask = (1 << net->nft.gencursor); 1396 + } 1397 + 1398 + static inline void 1399 + nft_rule_disactivate_next(struct net *net, struct nft_rule *rule) 1400 + { 1401 + rule->genmask = (1 << gencursor_next(net)); 1402 + } 1403 + 1404 + static inline void nft_rule_clear(struct net *net, struct nft_rule *rule) 1405 + { 1406 + rule->genmask = 0; 1407 + } 1408 + 1375 1409 static int nf_tables_dump_rules(struct sk_buff *skb, 1376 1410 struct netlink_callback *cb) 1377 1411 { ··· 1418 1382 unsigned int idx = 0, s_idx = cb->args[0]; 1419 1383 struct net *net = sock_net(skb->sk); 1420 1384 int family = nfmsg->nfgen_family; 1385 + u8 genctr = ACCESS_ONCE(net->nft.genctr); 1386 + u8 gencursor = ACCESS_ONCE(net->nft.gencursor); 1421 1387 1422 1388 list_for_each_entry(afi, &net->nft.af_info, list) { 1423 1389 if (family != NFPROTO_UNSPEC && family != afi->family) ··· 1428 1390 list_for_each_entry(table, &afi->tables, list) { 1429 1391 list_for_each_entry(chain, &table->chains, list) { 1430 1392 list_for_each_entry(rule, &chain->rules, list) { 1393 + if (!nft_rule_is_active(net, rule)) 1394 + goto cont; 1431 1395 if (idx < s_idx) 1432 1396 goto cont; 1433 1397 if (idx > s_idx) ··· 1448 1408 } 1449 1409 } 1450 1410 done: 1411 + /* Invalidate this dump, a transition to the new generation happened */ 1412 + if (gencursor != net->nft.gencursor || genctr != net->nft.genctr) 1413 + return -EBUSY; 1414 + 1451 1415 cb->args[0] = idx; 1452 1416 return skb->len; 1453 1417 } ··· 1536 1492 1537 1493 static struct nft_expr_info *info; 1538 1494 1495 + static struct nft_rule_trans * 1496 + nf_tables_trans_add(struct nft_rule *rule, const struct nft_ctx *ctx) 1497 + { 1498 + struct nft_rule_trans *rupd; 1499 + 1500 + rupd = kmalloc(sizeof(struct nft_rule_trans), GFP_KERNEL); 1501 + if (rupd == NULL) 1502 + return NULL; 1503 + 1504 + rupd->chain = ctx->chain; 1505 + rupd->table = ctx->table; 1506 + rupd->rule = rule; 1507 + rupd->family = ctx->afi->family; 1508 + rupd->nlh = ctx->nlh; 1509 + list_add_tail(&rupd->list, &ctx->net->nft.commit_list); 1510 + 1511 + return rupd; 1512 + } 1513 + 1539 1514 static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, 1540 1515 const struct nlmsghdr *nlh, 1541 1516 const struct nlattr * const nla[]) ··· 1565 1502 struct nft_table *table; 1566 1503 struct nft_chain *chain; 1567 1504 struct nft_rule *rule, *old_rule = NULL; 1505 + struct nft_rule_trans *repl = NULL; 1568 1506 struct nft_expr *expr; 1569 1507 struct nft_ctx ctx; 1570 1508 struct nlattr *tmp; ··· 1640 1576 if (rule == NULL) 1641 1577 goto err1; 1642 1578 1579 + nft_rule_activate_next(net, rule); 1580 + 1643 1581 rule->handle = handle; 1644 1582 rule->dlen = size; 1645 1583 ··· 1655 1589 } 1656 1590 1657 1591 if (nlh->nlmsg_flags & NLM_F_REPLACE) { 1658 - list_replace_rcu(&old_rule->list, &rule->list); 1659 - nf_tables_rule_destroy(old_rule); 1592 + if (nft_rule_is_active_next(net, old_rule)) { 1593 + repl = nf_tables_trans_add(old_rule, &ctx); 1594 + if (repl == NULL) { 1595 + err = -ENOMEM; 1596 + goto err2; 1597 + } 1598 + nft_rule_disactivate_next(net, old_rule); 1599 + list_add_tail(&rule->list, &old_rule->list); 1600 + } else { 1601 + err = -ENOENT; 1602 + goto err2; 1603 + } 1660 1604 } else if (nlh->nlmsg_flags & NLM_F_APPEND) 1661 1605 if (old_rule) 1662 1606 list_add_rcu(&rule->list, &old_rule->list); ··· 1679 1603 list_add_rcu(&rule->list, &chain->rules); 1680 1604 } 1681 1605 1682 - nf_tables_rule_notify(skb, nlh, table, chain, rule, NFT_MSG_NEWRULE, 1683 - nlh->nlmsg_flags & (NLM_F_APPEND | NLM_F_REPLACE), 1684 - nfmsg->nfgen_family); 1606 + if (nf_tables_trans_add(rule, &ctx) == NULL) { 1607 + err = -ENOMEM; 1608 + goto err3; 1609 + } 1685 1610 return 0; 1686 1611 1612 + err3: 1613 + list_del_rcu(&rule->list); 1614 + if (repl) { 1615 + list_del_rcu(&repl->rule->list); 1616 + list_del(&repl->list); 1617 + nft_rule_clear(net, repl->rule); 1618 + kfree(repl); 1619 + } 1687 1620 err2: 1688 1621 nf_tables_rule_destroy(rule); 1689 1622 err1: ··· 1701 1616 module_put(info[i].ops->type->owner); 1702 1617 } 1703 1618 return err; 1619 + } 1620 + 1621 + static int 1622 + nf_tables_delrule_one(struct nft_ctx *ctx, struct nft_rule *rule) 1623 + { 1624 + /* You cannot delete the same rule twice */ 1625 + if (nft_rule_is_active_next(ctx->net, rule)) { 1626 + if (nf_tables_trans_add(rule, ctx) == NULL) 1627 + return -ENOMEM; 1628 + nft_rule_disactivate_next(ctx->net, rule); 1629 + return 0; 1630 + } 1631 + return -ENOENT; 1704 1632 } 1705 1633 1706 1634 static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb, ··· 1726 1628 const struct nft_table *table; 1727 1629 struct nft_chain *chain; 1728 1630 struct nft_rule *rule, *tmp; 1729 - int family = nfmsg->nfgen_family; 1631 + int family = nfmsg->nfgen_family, err = 0; 1632 + struct nft_ctx ctx; 1730 1633 1731 1634 afi = nf_tables_afinfo_lookup(net, family, false); 1732 1635 if (IS_ERR(afi)) ··· 1741 1642 if (IS_ERR(chain)) 1742 1643 return PTR_ERR(chain); 1743 1644 1645 + nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla); 1646 + 1744 1647 if (nla[NFTA_RULE_HANDLE]) { 1745 1648 rule = nf_tables_rule_lookup(chain, nla[NFTA_RULE_HANDLE]); 1746 1649 if (IS_ERR(rule)) 1747 1650 return PTR_ERR(rule); 1748 1651 1749 - /* List removal must be visible before destroying expressions */ 1750 - list_del_rcu(&rule->list); 1751 - 1752 - nf_tables_rule_notify(skb, nlh, table, chain, rule, 1753 - NFT_MSG_DELRULE, 0, family); 1754 - nf_tables_rule_destroy(rule); 1652 + err = nf_tables_delrule_one(&ctx, rule); 1755 1653 } else { 1756 1654 /* Remove all rules in this chain */ 1757 1655 list_for_each_entry_safe(rule, tmp, &chain->rules, list) { 1758 - list_del_rcu(&rule->list); 1759 - 1760 - nf_tables_rule_notify(skb, nlh, table, chain, rule, 1761 - NFT_MSG_DELRULE, 0, family); 1762 - nf_tables_rule_destroy(rule); 1656 + err = nf_tables_delrule_one(&ctx, rule); 1657 + if (err < 0) 1658 + break; 1763 1659 } 1764 1660 } 1765 1661 1662 + return err; 1663 + } 1664 + 1665 + static int nf_tables_commit(struct sk_buff *skb) 1666 + { 1667 + struct net *net = sock_net(skb->sk); 1668 + struct nft_rule_trans *rupd, *tmp; 1669 + 1670 + /* Bump generation counter, invalidate any dump in progress */ 1671 + net->nft.genctr++; 1672 + 1673 + /* A new generation has just started */ 1674 + net->nft.gencursor = gencursor_next(net); 1675 + 1676 + /* Make sure all packets have left the previous generation before 1677 + * purging old rules. 1678 + */ 1679 + synchronize_rcu(); 1680 + 1681 + list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) { 1682 + /* Delete this rule from the dirty list */ 1683 + list_del(&rupd->list); 1684 + 1685 + /* This rule was inactive in the past and just became active. 1686 + * Clear the next bit of the genmask since its meaning has 1687 + * changed, now it is the future. 1688 + */ 1689 + if (nft_rule_is_active(net, rupd->rule)) { 1690 + nft_rule_clear(net, rupd->rule); 1691 + nf_tables_rule_notify(skb, rupd->nlh, rupd->table, 1692 + rupd->chain, rupd->rule, 1693 + NFT_MSG_NEWRULE, 0, 1694 + rupd->family); 1695 + kfree(rupd); 1696 + continue; 1697 + } 1698 + 1699 + /* This rule is in the past, get rid of it */ 1700 + list_del_rcu(&rupd->rule->list); 1701 + nf_tables_rule_notify(skb, rupd->nlh, rupd->table, rupd->chain, 1702 + rupd->rule, NFT_MSG_DELRULE, 0, 1703 + rupd->family); 1704 + nf_tables_rule_destroy(rupd->rule); 1705 + kfree(rupd); 1706 + } 1707 + 1708 + return 0; 1709 + } 1710 + 1711 + static int nf_tables_abort(struct sk_buff *skb) 1712 + { 1713 + struct net *net = sock_net(skb->sk); 1714 + struct nft_rule_trans *rupd, *tmp; 1715 + 1716 + list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) { 1717 + /* Delete all rules from the dirty list */ 1718 + list_del(&rupd->list); 1719 + 1720 + if (!nft_rule_is_active_next(net, rupd->rule)) { 1721 + nft_rule_clear(net, rupd->rule); 1722 + kfree(rupd); 1723 + continue; 1724 + } 1725 + 1726 + /* This rule is inactive, get rid of it */ 1727 + list_del_rcu(&rupd->rule->list); 1728 + nf_tables_rule_destroy(rupd->rule); 1729 + kfree(rupd); 1730 + } 1766 1731 return 0; 1767 1732 } 1768 1733 ··· 2797 2634 .policy = nft_chain_policy, 2798 2635 }, 2799 2636 [NFT_MSG_NEWRULE] = { 2800 - .call = nf_tables_newrule, 2637 + .call_batch = nf_tables_newrule, 2801 2638 .attr_count = NFTA_RULE_MAX, 2802 2639 .policy = nft_rule_policy, 2803 2640 }, ··· 2807 2644 .policy = nft_rule_policy, 2808 2645 }, 2809 2646 [NFT_MSG_DELRULE] = { 2810 - .call = nf_tables_delrule, 2647 + .call_batch = nf_tables_delrule, 2811 2648 .attr_count = NFTA_RULE_MAX, 2812 2649 .policy = nft_rule_policy, 2813 2650 }, ··· 2848 2685 .subsys_id = NFNL_SUBSYS_NFTABLES, 2849 2686 .cb_count = NFT_MSG_MAX, 2850 2687 .cb = nf_tables_cb, 2688 + .commit = nf_tables_commit, 2689 + .abort = nf_tables_abort, 2851 2690 }; 2852 2691 2853 2692 /* ··· 3221 3056 static int nf_tables_init_net(struct net *net) 3222 3057 { 3223 3058 INIT_LIST_HEAD(&net->nft.af_info); 3059 + INIT_LIST_HEAD(&net->nft.commit_list); 3224 3060 return 0; 3225 3061 } 3226 3062
+10
net/netfilter/nf_tables_core.c
··· 88 88 struct nft_data data[NFT_REG_MAX + 1]; 89 89 unsigned int stackptr = 0; 90 90 struct nft_jumpstack jumpstack[NFT_JUMP_STACK_SIZE]; 91 + /* 92 + * Cache cursor to avoid problems in case that the cursor is updated 93 + * while traversing the ruleset. 94 + */ 95 + unsigned int gencursor = ACCESS_ONCE(chain->net->nft.gencursor); 91 96 92 97 do_chain: 93 98 rule = list_entry(&chain->rules, struct nft_rule, list); 94 99 next_rule: 95 100 data[NFT_REG_VERDICT].verdict = NFT_CONTINUE; 96 101 list_for_each_entry_continue_rcu(rule, &chain->rules, list) { 102 + 103 + /* This rule is not active, skip. */ 104 + if (unlikely(rule->genmask & (1 << gencursor))) 105 + continue; 106 + 97 107 nft_rule_for_each_expr(expr, last, rule) { 98 108 if (expr->ops == &nft_cmp_fast_ops) 99 109 nft_cmp_fast_eval(expr, data);
+171 -4
net/netfilter/nfnetlink.c
··· 147 147 const struct nfnetlink_subsystem *ss; 148 148 int type, err; 149 149 150 - if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) 151 - return -EPERM; 152 - 153 150 /* All the messages must at least contain nfgenmsg */ 154 151 if (nlmsg_len(nlh) < sizeof(struct nfgenmsg)) 155 152 return 0; ··· 214 217 } 215 218 } 216 219 220 + static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh, 221 + u_int16_t subsys_id) 222 + { 223 + struct sk_buff *nskb, *oskb = skb; 224 + struct net *net = sock_net(skb->sk); 225 + const struct nfnetlink_subsystem *ss; 226 + const struct nfnl_callback *nc; 227 + bool success = true, done = false; 228 + int err; 229 + 230 + if (subsys_id >= NFNL_SUBSYS_COUNT) 231 + return netlink_ack(skb, nlh, -EINVAL); 232 + replay: 233 + nskb = netlink_skb_clone(oskb, GFP_KERNEL); 234 + if (!nskb) 235 + return netlink_ack(oskb, nlh, -ENOMEM); 236 + 237 + nskb->sk = oskb->sk; 238 + skb = nskb; 239 + 240 + nfnl_lock(subsys_id); 241 + ss = rcu_dereference_protected(table[subsys_id].subsys, 242 + lockdep_is_held(&table[subsys_id].mutex)); 243 + if (!ss) { 244 + #ifdef CONFIG_MODULES 245 + nfnl_unlock(subsys_id); 246 + request_module("nfnetlink-subsys-%d", subsys_id); 247 + nfnl_lock(subsys_id); 248 + ss = rcu_dereference_protected(table[subsys_id].subsys, 249 + lockdep_is_held(&table[subsys_id].mutex)); 250 + if (!ss) 251 + #endif 252 + { 253 + nfnl_unlock(subsys_id); 254 + kfree_skb(nskb); 255 + return netlink_ack(skb, nlh, -EOPNOTSUPP); 256 + } 257 + } 258 + 259 + if (!ss->commit || !ss->abort) { 260 + nfnl_unlock(subsys_id); 261 + kfree_skb(nskb); 262 + return netlink_ack(skb, nlh, -EOPNOTSUPP); 263 + } 264 + 265 + while (skb->len >= nlmsg_total_size(0)) { 266 + int msglen, type; 267 + 268 + nlh = nlmsg_hdr(skb); 269 + err = 0; 270 + 271 + if (nlh->nlmsg_len < NLMSG_HDRLEN) { 272 + err = -EINVAL; 273 + goto ack; 274 + } 275 + 276 + /* Only requests are handled by the kernel */ 277 + if (!(nlh->nlmsg_flags & NLM_F_REQUEST)) { 278 + err = -EINVAL; 279 + goto ack; 280 + } 281 + 282 + type = nlh->nlmsg_type; 283 + if (type == NFNL_MSG_BATCH_BEGIN) { 284 + /* Malformed: Batch begin twice */ 285 + success = false; 286 + goto done; 287 + } else if (type == NFNL_MSG_BATCH_END) { 288 + done = true; 289 + goto done; 290 + } else if (type < NLMSG_MIN_TYPE) { 291 + err = -EINVAL; 292 + goto ack; 293 + } 294 + 295 + /* We only accept a batch with messages for the same 296 + * subsystem. 297 + */ 298 + if (NFNL_SUBSYS_ID(type) != subsys_id) { 299 + err = -EINVAL; 300 + goto ack; 301 + } 302 + 303 + nc = nfnetlink_find_client(type, ss); 304 + if (!nc) { 305 + err = -EINVAL; 306 + goto ack; 307 + } 308 + 309 + { 310 + int min_len = nlmsg_total_size(sizeof(struct nfgenmsg)); 311 + u_int8_t cb_id = NFNL_MSG_TYPE(nlh->nlmsg_type); 312 + struct nlattr *cda[ss->cb[cb_id].attr_count + 1]; 313 + struct nlattr *attr = (void *)nlh + min_len; 314 + int attrlen = nlh->nlmsg_len - min_len; 315 + 316 + err = nla_parse(cda, ss->cb[cb_id].attr_count, 317 + attr, attrlen, ss->cb[cb_id].policy); 318 + if (err < 0) 319 + goto ack; 320 + 321 + if (nc->call_batch) { 322 + err = nc->call_batch(net->nfnl, skb, nlh, 323 + (const struct nlattr **)cda); 324 + } 325 + 326 + /* The lock was released to autoload some module, we 327 + * have to abort and start from scratch using the 328 + * original skb. 329 + */ 330 + if (err == -EAGAIN) { 331 + ss->abort(skb); 332 + nfnl_unlock(subsys_id); 333 + kfree_skb(nskb); 334 + goto replay; 335 + } 336 + } 337 + ack: 338 + if (nlh->nlmsg_flags & NLM_F_ACK || err) { 339 + /* We don't stop processing the batch on errors, thus, 340 + * userspace gets all the errors that the batch 341 + * triggers. 342 + */ 343 + netlink_ack(skb, nlh, err); 344 + if (err) 345 + success = false; 346 + } 347 + 348 + msglen = NLMSG_ALIGN(nlh->nlmsg_len); 349 + if (msglen > skb->len) 350 + msglen = skb->len; 351 + skb_pull(skb, msglen); 352 + } 353 + done: 354 + if (success && done) 355 + ss->commit(skb); 356 + else 357 + ss->abort(skb); 358 + 359 + nfnl_unlock(subsys_id); 360 + kfree_skb(nskb); 361 + } 362 + 217 363 static void nfnetlink_rcv(struct sk_buff *skb) 218 364 { 219 - netlink_rcv_skb(skb, &nfnetlink_rcv_msg); 365 + struct nlmsghdr *nlh = nlmsg_hdr(skb); 366 + struct net *net = sock_net(skb->sk); 367 + int msglen; 368 + 369 + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) 370 + return netlink_ack(skb, nlh, -EPERM); 371 + 372 + if (nlh->nlmsg_len < NLMSG_HDRLEN || 373 + skb->len < nlh->nlmsg_len) 374 + return; 375 + 376 + if (nlh->nlmsg_type == NFNL_MSG_BATCH_BEGIN) { 377 + struct nfgenmsg *nfgenmsg; 378 + 379 + msglen = NLMSG_ALIGN(nlh->nlmsg_len); 380 + if (msglen > skb->len) 381 + msglen = skb->len; 382 + 383 + if (nlh->nlmsg_len < NLMSG_HDRLEN || 384 + skb->len < NLMSG_HDRLEN + sizeof(struct nfgenmsg)) 385 + return; 386 + 387 + nfgenmsg = nlmsg_data(nlh); 388 + skb_pull(skb, msglen); 389 + nfnetlink_rcv_batch(skb, nlh, nfgenmsg->res_id); 390 + } else { 391 + netlink_rcv_skb(skb, &nfnetlink_rcv_msg); 392 + } 220 393 } 221 394 222 395 #ifdef CONFIG_MODULES