Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'net-sched-optimizations-around-action-binding-and-init'

Pedro Tammela says:

====================
net/sched: optimizations around action binding and init

Scaling optimizations for action binding in rtnl-less filters.
We saw a noticeable lock contention around idrinfo->lock when
testing in a 56 core system, which disappeared after the patches.
====================

Link: https://lore.kernel.org/r/20231211181807.96028-1-pctammela@mojatatu.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

+51 -29
+1 -1
include/net/act_api.h
··· 191 191 struct nlattr *est, struct tc_action **a, 192 192 const struct tc_action_ops *ops, int bind, 193 193 u32 flags); 194 - void tcf_idr_insert_many(struct tc_action *actions[]); 194 + void tcf_idr_insert_many(struct tc_action *actions[], int init_res[]); 195 195 void tcf_idr_cleanup(struct tc_action_net *tn, u32 index); 196 196 int tcf_idr_check_alloc(struct tc_action_net *tn, u32 *index, 197 197 struct tc_action **a, int bind);
+49 -27
net/sched/act_api.c
··· 816 816 * its reference and bind counters, and return 1. Otherwise insert temporary 817 817 * error pointer (to prevent concurrent users from inserting actions with same 818 818 * index) and return 0. 819 + * 820 + * May return -EAGAIN for binding actions in case of a parallel add/delete on 821 + * the requested index. 819 822 */ 820 823 821 824 int tcf_idr_check_alloc(struct tc_action_net *tn, u32 *index, ··· 827 824 struct tcf_idrinfo *idrinfo = tn->idrinfo; 828 825 struct tc_action *p; 829 826 int ret; 827 + u32 max; 830 828 831 - again: 832 - mutex_lock(&idrinfo->lock); 833 829 if (*index) { 830 + again: 831 + rcu_read_lock(); 834 832 p = idr_find(&idrinfo->action_idr, *index); 833 + 835 834 if (IS_ERR(p)) { 836 835 /* This means that another process allocated 837 836 * index but did not assign the pointer yet. 838 837 */ 839 - mutex_unlock(&idrinfo->lock); 838 + rcu_read_unlock(); 840 839 goto again; 841 840 } 842 841 843 - if (p) { 844 - refcount_inc(&p->tcfa_refcnt); 845 - if (bind) 846 - atomic_inc(&p->tcfa_bindcnt); 847 - *a = p; 848 - ret = 1; 849 - } else { 850 - *a = NULL; 851 - ret = idr_alloc_u32(&idrinfo->action_idr, NULL, index, 852 - *index, GFP_KERNEL); 853 - if (!ret) 854 - idr_replace(&idrinfo->action_idr, 855 - ERR_PTR(-EBUSY), *index); 842 + if (!p) { 843 + /* Empty slot, try to allocate it */ 844 + max = *index; 845 + rcu_read_unlock(); 846 + goto new; 856 847 } 848 + 849 + if (!refcount_inc_not_zero(&p->tcfa_refcnt)) { 850 + /* Action was deleted in parallel */ 851 + rcu_read_unlock(); 852 + return -EAGAIN; 853 + } 854 + 855 + if (bind) 856 + atomic_inc(&p->tcfa_bindcnt); 857 + *a = p; 858 + 859 + rcu_read_unlock(); 860 + 861 + return 1; 857 862 } else { 863 + /* Find a slot */ 858 864 *index = 1; 859 - *a = NULL; 860 - ret = idr_alloc_u32(&idrinfo->action_idr, NULL, index, 861 - UINT_MAX, GFP_KERNEL); 862 - if (!ret) 863 - idr_replace(&idrinfo->action_idr, ERR_PTR(-EBUSY), 864 - *index); 865 + max = UINT_MAX; 865 866 } 867 + 868 + new: 869 + *a = NULL; 870 + 871 + mutex_lock(&idrinfo->lock); 872 + ret = idr_alloc_u32(&idrinfo->action_idr, ERR_PTR(-EBUSY), index, max, 873 + GFP_KERNEL); 866 874 mutex_unlock(&idrinfo->lock); 875 + 876 + /* N binds raced for action allocation, 877 + * retry for all the ones that failed. 878 + */ 879 + if (ret == -ENOSPC && *index == max) 880 + ret = -EAGAIN; 881 + 867 882 return ret; 868 883 } 869 884 EXPORT_SYMBOL(tcf_idr_check_alloc); ··· 1304 1283 [TCA_ACT_HW_STATS] = NLA_POLICY_BITFIELD32(TCA_ACT_HW_STATS_ANY), 1305 1284 }; 1306 1285 1307 - void tcf_idr_insert_many(struct tc_action *actions[]) 1286 + void tcf_idr_insert_many(struct tc_action *actions[], int init_res[]) 1308 1287 { 1309 1288 struct tc_action *a; 1310 1289 int i; ··· 1312 1291 tcf_act_for_each_action(i, a, actions) { 1313 1292 struct tcf_idrinfo *idrinfo; 1314 1293 1294 + if (init_res[i] == 0) /* Bound */ 1295 + continue; 1296 + 1315 1297 idrinfo = a->idrinfo; 1316 1298 mutex_lock(&idrinfo->lock); 1317 - /* Replace ERR_PTR(-EBUSY) allocated by tcf_idr_check_alloc if 1318 - * it is just created, otherwise this is just a nop. 1319 - */ 1299 + /* Replace ERR_PTR(-EBUSY) allocated by tcf_idr_check_alloc */ 1320 1300 idr_replace(&idrinfo->action_idr, a, a->tcfa_index); 1321 1301 mutex_unlock(&idrinfo->lock); 1322 1302 } ··· 1517 1495 /* We have to commit them all together, because if any error happened in 1518 1496 * between, we could not handle the failure gracefully. 1519 1497 */ 1520 - tcf_idr_insert_many(actions); 1498 + tcf_idr_insert_many(actions, init_res); 1521 1499 1522 1500 *attr_size = tcf_action_full_attrs_size(sz); 1523 1501 err = i - 1;
+1 -1
net/sched/cls_api.c
··· 3313 3313 act->type = exts->type = TCA_OLD_COMPAT; 3314 3314 exts->actions[0] = act; 3315 3315 exts->nr_actions = 1; 3316 - tcf_idr_insert_many(exts->actions); 3316 + tcf_idr_insert_many(exts->actions, init_res); 3317 3317 } else if (exts->action && tb[exts->action]) { 3318 3318 int err; 3319 3319