Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

net: sched: take reference to psample group in flow_action infra

With recent patch set that removed rtnl lock dependency from cls hardware
offload API rtnl lock is only taken when reading action data and can be
released after action-specific data is parsed into intermediate
representation. However, sample action psample group is passed by pointer
without obtaining reference to it first, which makes it possible to
concurrently overwrite the action and deallocate object pointed by
psample_group pointer after rtnl lock is released but before driver
finished using the pointer.

To prevent such race condition, obtain reference to psample group while it
is used by flow_action infra. Extend psample API with function
psample_group_take() that increments psample group reference counter.
Extend struct tc_action_ops with new get_psample_group() API. Implement the
API for action sample using psample_group_take() and already existing
psample_group_put() as a destructor. Use it in tc_setup_flow_action() to
take reference to psample group pointed to by entry->sample.psample_group
and release it in tc_cleanup_flow_action().

Disable bh when taking psample_groups_lock. The lock is now taken while
holding action tcf_lock that is used by data path and requires bh to be
disabled, so doing the same for psample_groups_lock is necessary to
preserve SOFTIRQ-irq-safety.

Fixes: 918190f50eb6 ("net: sched: flower: don't take rtnl lock for cls hw offloads API")
Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Vlad Buslov and committed by
David S. Miller
4a5da47d 1158958a

+58 -14
+5
include/net/act_api.h
··· 78 78 #define ACT_P_CREATED 1 79 79 #define ACT_P_DELETED 1 80 80 81 + typedef void (*tc_action_priv_destructor)(void *priv); 82 + 81 83 struct tc_action_ops { 82 84 struct list_head head; 83 85 char kind[IFNAMSIZ]; ··· 103 101 size_t (*get_fill_size)(const struct tc_action *act); 104 102 struct net_device *(*get_dev)(const struct tc_action *a); 105 103 void (*put_dev)(struct net_device *dev); 104 + struct psample_group * 105 + (*get_psample_group)(const struct tc_action *a, 106 + tc_action_priv_destructor *destructor); 106 107 }; 107 108 108 109 struct tc_action_net {
+1
include/net/psample.h
··· 15 15 }; 16 16 17 17 struct psample_group *psample_group_get(struct net *net, u32 group_num); 18 + void psample_group_take(struct psample_group *group); 18 19 void psample_group_put(struct psample_group *group); 19 20 20 21 #if IS_ENABLED(CONFIG_PSAMPLE)
-6
include/net/tc_act/tc_sample.h
··· 41 41 return to_sample(a)->trunc_size; 42 42 } 43 43 44 - static inline struct psample_group * 45 - tcf_sample_psample_group(const struct tc_action *a) 46 - { 47 - return rcu_dereference_rtnl(to_sample(a)->psample_group); 48 - } 49 - 50 44 #endif /* __NET_TC_SAMPLE_H */
+14 -6
net/psample/psample.c
··· 73 73 int idx = 0; 74 74 int err; 75 75 76 - spin_lock(&psample_groups_lock); 76 + spin_lock_bh(&psample_groups_lock); 77 77 list_for_each_entry(group, &psample_groups_list, list) { 78 78 if (!net_eq(group->net, sock_net(msg->sk))) 79 79 continue; ··· 89 89 idx++; 90 90 } 91 91 92 - spin_unlock(&psample_groups_lock); 92 + spin_unlock_bh(&psample_groups_lock); 93 93 cb->args[0] = idx; 94 94 return msg->len; 95 95 } ··· 172 172 { 173 173 struct psample_group *group; 174 174 175 - spin_lock(&psample_groups_lock); 175 + spin_lock_bh(&psample_groups_lock); 176 176 177 177 group = psample_group_lookup(net, group_num); 178 178 if (!group) { ··· 183 183 group->refcount++; 184 184 185 185 out: 186 - spin_unlock(&psample_groups_lock); 186 + spin_unlock_bh(&psample_groups_lock); 187 187 return group; 188 188 } 189 189 EXPORT_SYMBOL_GPL(psample_group_get); 190 190 191 + void psample_group_take(struct psample_group *group) 192 + { 193 + spin_lock_bh(&psample_groups_lock); 194 + group->refcount++; 195 + spin_unlock_bh(&psample_groups_lock); 196 + } 197 + EXPORT_SYMBOL_GPL(psample_group_take); 198 + 191 199 void psample_group_put(struct psample_group *group) 192 200 { 193 - spin_lock(&psample_groups_lock); 201 + spin_lock_bh(&psample_groups_lock); 194 202 195 203 if (--group->refcount == 0) 196 204 psample_group_destroy(group); 197 205 198 - spin_unlock(&psample_groups_lock); 206 + spin_unlock_bh(&psample_groups_lock); 199 207 } 200 208 EXPORT_SYMBOL_GPL(psample_group_put); 201 209
+27
net/sched/act_sample.c
··· 252 252 return tcf_idr_search(tn, a, index); 253 253 } 254 254 255 + static void tcf_psample_group_put(void *priv) 256 + { 257 + struct psample_group *group = priv; 258 + 259 + psample_group_put(group); 260 + } 261 + 262 + static struct psample_group * 263 + tcf_sample_get_group(const struct tc_action *a, 264 + tc_action_priv_destructor *destructor) 265 + { 266 + struct tcf_sample *s = to_sample(a); 267 + struct psample_group *group; 268 + 269 + spin_lock_bh(&s->tcf_lock); 270 + group = rcu_dereference_protected(s->psample_group, 271 + lockdep_is_held(&s->tcf_lock)); 272 + if (group) { 273 + psample_group_take(group); 274 + *destructor = tcf_psample_group_put; 275 + } 276 + spin_unlock_bh(&s->tcf_lock); 277 + 278 + return group; 279 + } 280 + 255 281 static struct tc_action_ops act_sample_ops = { 256 282 .kind = "sample", 257 283 .id = TCA_ID_SAMPLE, ··· 288 262 .cleanup = tcf_sample_cleanup, 289 263 .walk = tcf_sample_walker, 290 264 .lookup = tcf_sample_search, 265 + .get_psample_group = tcf_sample_get_group, 291 266 .size = sizeof(struct tcf_sample), 292 267 }; 293 268
+11 -2
net/sched/cls_api.c
··· 3324 3324 return 0; 3325 3325 } 3326 3326 3327 + static void tcf_sample_get_group(struct flow_action_entry *entry, 3328 + const struct tc_action *act) 3329 + { 3330 + #ifdef CONFIG_NET_CLS_ACT 3331 + entry->sample.psample_group = 3332 + act->ops->get_psample_group(act, &entry->destructor); 3333 + entry->destructor_priv = entry->sample.psample_group; 3334 + #endif 3335 + } 3336 + 3327 3337 int tc_setup_flow_action(struct flow_action *flow_action, 3328 3338 const struct tcf_exts *exts, bool rtnl_held) 3329 3339 { ··· 3427 3417 entry->mark = tcf_skbedit_mark(act); 3428 3418 } else if (is_tcf_sample(act)) { 3429 3419 entry->id = FLOW_ACTION_SAMPLE; 3430 - entry->sample.psample_group = 3431 - tcf_sample_psample_group(act); 3432 3420 entry->sample.trunc_size = tcf_sample_trunc_size(act); 3433 3421 entry->sample.truncate = tcf_sample_truncate(act); 3434 3422 entry->sample.rate = tcf_sample_rate(act); 3423 + tcf_sample_get_group(entry, act); 3435 3424 } else if (is_tcf_police(act)) { 3436 3425 entry->id = FLOW_ACTION_POLICE; 3437 3426 entry->police.burst = tcf_police_tcfp_burst(act);