Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

net/sched: flower: Fix chain template offload

When a qdisc is deleted from a net device the stack instructs the
underlying driver to remove its flow offload callback from the
associated filter block using the 'FLOW_BLOCK_UNBIND' command. The stack
then continues to replay the removal of the filters in the block for
this driver by iterating over the chains in the block and invoking the
'reoffload' operation of the classifier being used. In turn, the
classifier in its 'reoffload' operation prepares and emits a
'FLOW_CLS_DESTROY' command for each filter.

However, the stack does not do the same for chain templates and the
underlying driver never receives a 'FLOW_CLS_TMPLT_DESTROY' command when
a qdisc is deleted. This results in a memory leak [1] which can be
reproduced using [2].

Fix by introducing a 'tmplt_reoffload' operation and have the stack
invoke it with the appropriate arguments as part of the replay.
Implement the operation in the sole classifier that supports chain
templates (flower) by emitting the 'FLOW_CLS_TMPLT_{CREATE,DESTROY}'
command based on whether a flow offload callback is being bound to a
filter block or being unbound from one.

As far as I can tell, the issue happens since cited commit which
reordered tcf_block_offload_unbind() before tcf_block_flush_all_chains()
in __tcf_block_put(). The order cannot be reversed as the filter block
is expected to be freed after flushing all the chains.

[1]
unreferenced object 0xffff888107e28800 (size 2048):
comm "tc", pid 1079, jiffies 4294958525 (age 3074.287s)
hex dump (first 32 bytes):
b1 a6 7c 11 81 88 ff ff e0 5b b3 10 81 88 ff ff ..|......[......
01 00 00 00 00 00 00 00 e0 aa b0 84 ff ff ff ff ................
backtrace:
[<ffffffff81c06a68>] __kmem_cache_alloc_node+0x1e8/0x320
[<ffffffff81ab374e>] __kmalloc+0x4e/0x90
[<ffffffff832aec6d>] mlxsw_sp_acl_ruleset_get+0x34d/0x7a0
[<ffffffff832bc195>] mlxsw_sp_flower_tmplt_create+0x145/0x180
[<ffffffff832b2e1a>] mlxsw_sp_flow_block_cb+0x1ea/0x280
[<ffffffff83a10613>] tc_setup_cb_call+0x183/0x340
[<ffffffff83a9f85a>] fl_tmplt_create+0x3da/0x4c0
[<ffffffff83a22435>] tc_ctl_chain+0xa15/0x1170
[<ffffffff838a863c>] rtnetlink_rcv_msg+0x3cc/0xed0
[<ffffffff83ac87f0>] netlink_rcv_skb+0x170/0x440
[<ffffffff83ac6270>] netlink_unicast+0x540/0x820
[<ffffffff83ac6e28>] netlink_sendmsg+0x8d8/0xda0
[<ffffffff83793def>] ____sys_sendmsg+0x30f/0xa80
[<ffffffff8379d29a>] ___sys_sendmsg+0x13a/0x1e0
[<ffffffff8379d50c>] __sys_sendmsg+0x11c/0x1f0
[<ffffffff843b9ce0>] do_syscall_64+0x40/0xe0
unreferenced object 0xffff88816d2c0400 (size 1024):
comm "tc", pid 1079, jiffies 4294958525 (age 3074.287s)
hex dump (first 32 bytes):
40 00 00 00 00 00 00 00 57 f6 38 be 00 00 00 00 @.......W.8.....
10 04 2c 6d 81 88 ff ff 10 04 2c 6d 81 88 ff ff ..,m......,m....
backtrace:
[<ffffffff81c06a68>] __kmem_cache_alloc_node+0x1e8/0x320
[<ffffffff81ab36c1>] __kmalloc_node+0x51/0x90
[<ffffffff81a8ed96>] kvmalloc_node+0xa6/0x1f0
[<ffffffff82827d03>] bucket_table_alloc.isra.0+0x83/0x460
[<ffffffff82828d2b>] rhashtable_init+0x43b/0x7c0
[<ffffffff832aed48>] mlxsw_sp_acl_ruleset_get+0x428/0x7a0
[<ffffffff832bc195>] mlxsw_sp_flower_tmplt_create+0x145/0x180
[<ffffffff832b2e1a>] mlxsw_sp_flow_block_cb+0x1ea/0x280
[<ffffffff83a10613>] tc_setup_cb_call+0x183/0x340
[<ffffffff83a9f85a>] fl_tmplt_create+0x3da/0x4c0
[<ffffffff83a22435>] tc_ctl_chain+0xa15/0x1170
[<ffffffff838a863c>] rtnetlink_rcv_msg+0x3cc/0xed0
[<ffffffff83ac87f0>] netlink_rcv_skb+0x170/0x440
[<ffffffff83ac6270>] netlink_unicast+0x540/0x820
[<ffffffff83ac6e28>] netlink_sendmsg+0x8d8/0xda0
[<ffffffff83793def>] ____sys_sendmsg+0x30f/0xa80

[2]
# tc qdisc add dev swp1 clsact
# tc chain add dev swp1 ingress proto ip chain 1 flower dst_ip 0.0.0.0/32
# tc qdisc del dev swp1 clsact
# devlink dev reload pci/0000:06:00.0

Fixes: bbf73830cd48 ("net: sched: traverse chains in block with tcf_get_next_chain()")
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Ido Schimmel and committed by
David S. Miller
32f2a0af 04fe7c50

+35 -1
+4
include/net/sch_generic.h
··· 375 375 struct nlattr **tca, 376 376 struct netlink_ext_ack *extack); 377 377 void (*tmplt_destroy)(void *tmplt_priv); 378 + void (*tmplt_reoffload)(struct tcf_chain *chain, 379 + bool add, 380 + flow_setup_cb_t *cb, 381 + void *cb_priv); 378 382 struct tcf_exts * (*get_exts)(const struct tcf_proto *tp, 379 383 u32 handle); 380 384
+8 -1
net/sched/cls_api.c
··· 1560 1560 chain_prev = chain, 1561 1561 chain = __tcf_get_next_chain(block, chain), 1562 1562 tcf_chain_put(chain_prev)) { 1563 + if (chain->tmplt_ops && add) 1564 + chain->tmplt_ops->tmplt_reoffload(chain, true, cb, 1565 + cb_priv); 1563 1566 for (tp = __tcf_get_next_proto(chain, NULL); tp; 1564 1567 tp_prev = tp, 1565 1568 tp = __tcf_get_next_proto(chain, tp), ··· 1578 1575 goto err_playback_remove; 1579 1576 } 1580 1577 } 1578 + if (chain->tmplt_ops && !add) 1579 + chain->tmplt_ops->tmplt_reoffload(chain, false, cb, 1580 + cb_priv); 1581 1581 } 1582 1582 1583 1583 return 0; ··· 3006 3000 ops = tcf_proto_lookup_ops(name, true, extack); 3007 3001 if (IS_ERR(ops)) 3008 3002 return PTR_ERR(ops); 3009 - if (!ops->tmplt_create || !ops->tmplt_destroy || !ops->tmplt_dump) { 3003 + if (!ops->tmplt_create || !ops->tmplt_destroy || !ops->tmplt_dump || 3004 + !ops->tmplt_reoffload) { 3010 3005 NL_SET_ERR_MSG(extack, "Chain templates are not supported with specified classifier"); 3011 3006 module_put(ops->owner); 3012 3007 return -EOPNOTSUPP;
+23
net/sched/cls_flower.c
··· 2721 2721 kfree(tmplt); 2722 2722 } 2723 2723 2724 + static void fl_tmplt_reoffload(struct tcf_chain *chain, bool add, 2725 + flow_setup_cb_t *cb, void *cb_priv) 2726 + { 2727 + struct fl_flow_tmplt *tmplt = chain->tmplt_priv; 2728 + struct flow_cls_offload cls_flower = {}; 2729 + 2730 + cls_flower.rule = flow_rule_alloc(0); 2731 + if (!cls_flower.rule) 2732 + return; 2733 + 2734 + cls_flower.common.chain_index = chain->index; 2735 + cls_flower.command = add ? FLOW_CLS_TMPLT_CREATE : 2736 + FLOW_CLS_TMPLT_DESTROY; 2737 + cls_flower.cookie = (unsigned long) tmplt; 2738 + cls_flower.rule->match.dissector = &tmplt->dissector; 2739 + cls_flower.rule->match.mask = &tmplt->mask; 2740 + cls_flower.rule->match.key = &tmplt->dummy_key; 2741 + 2742 + cb(TC_SETUP_CLSFLOWER, &cls_flower, cb_priv); 2743 + kfree(cls_flower.rule); 2744 + } 2745 + 2724 2746 static int fl_dump_key_val(struct sk_buff *skb, 2725 2747 void *val, int val_type, 2726 2748 void *mask, int mask_type, int len) ··· 3650 3628 .bind_class = fl_bind_class, 3651 3629 .tmplt_create = fl_tmplt_create, 3652 3630 .tmplt_destroy = fl_tmplt_destroy, 3631 + .tmplt_reoffload = fl_tmplt_reoffload, 3653 3632 .tmplt_dump = fl_tmplt_dump, 3654 3633 .get_exts = fl_get_exts, 3655 3634 .owner = THIS_MODULE,