Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

bpf: Allow refcounted bpf_rb_node used in bpf_rbtree_{remove,left,right}

The bpf_rbtree_{remove,left,right} requires the root's lock to be held.
They also check the node_internal->owner is still owned by that root
before proceeding, so it is safe to allow refcounted bpf_rb_node
pointer to be used in these kfuncs.

In a bpf fq implementation which is much closer to the kernel fq,
https://lore.kernel.org/bpf/20250418224652.105998-13-martin.lau@linux.dev/,
a networking flow (allocated by bpf_obj_new) can be added to two different
rbtrees. There are cases that the flow is searched from one rbtree,
held the refcount of the flow, and then removed from another rbtree:

struct fq_flow {
struct bpf_rb_node fq_node;
struct bpf_rb_node rate_node;
struct bpf_refcount refcount;
unsigned long sk_long;
};

int bpf_fq_enqueue(...)
{
/* ... */

bpf_spin_lock(&root->lock);
while (can_loop) {
/* ... */
if (!p)
break;
gc_f = bpf_rb_entry(p, struct fq_flow, fq_node);
if (gc_f->sk_long == sk_long) {
f = bpf_refcount_acquire(gc_f);
break;
}
/* ... */
}
bpf_spin_unlock(&root->lock);

if (f) {
bpf_spin_lock(&q->lock);
bpf_rbtree_remove(&q->delayed, &f->rate_node);
bpf_spin_unlock(&q->lock);
}
}

bpf_rbtree_{left,right} do not need this change but are relaxed together
with bpf_rbtree_remove instead of adding extra verifier logic
to exclude these kfuncs.

To avoid bi-sect failure, this patch also changes the selftests together.

The "rbtree_api_remove_unadded_node" is not expecting verifier's error.
The test now expects bpf_rbtree_remove(&groot, &m->node) to return NULL.
The test uses __retval(0) to ensure this NULL return value.

Some of the "only take non-owning..." failure messages are changed also.

Acked-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
Link: https://lore.kernel.org/r/20250506015857.817950-5-martin.lau@linux.dev
Signed-off-by: Alexei Starovoitov <ast@kernel.org>

authored by

Martin KaFai Lau and committed by
Alexei Starovoitov
2ddef178 9e3e66c5

+17 -16
+2 -2
kernel/bpf/verifier.c
··· 13229 13229 return -EINVAL; 13230 13230 } 13231 13231 } else { 13232 - if (!type_is_non_owning_ref(reg->type) || reg->ref_obj_id) { 13233 - verbose(env, "%s node input must be non-owning ref\n", func_name); 13232 + if (!type_is_non_owning_ref(reg->type) && !reg->ref_obj_id) { 13233 + verbose(env, "%s can only take non-owning or refcounted bpf_rb_node pointer\n", func_name); 13234 13234 return -EINVAL; 13235 13235 } 13236 13236 if (in_rbtree_lock_required_cb(env)) {
+15 -14
tools/testing/selftests/bpf/progs/rbtree_fail.c
··· 69 69 } 70 70 71 71 SEC("?tc") 72 - __failure __msg("rbtree_remove node input must be non-owning ref") 72 + __retval(0) 73 73 long rbtree_api_remove_unadded_node(void *ctx) 74 74 { 75 75 struct node_data *n, *m; 76 - struct bpf_rb_node *res; 76 + struct bpf_rb_node *res_n, *res_m; 77 77 78 78 n = bpf_obj_new(typeof(*n)); 79 79 if (!n) ··· 88 88 bpf_spin_lock(&glock); 89 89 bpf_rbtree_add(&groot, &n->node, less); 90 90 91 - /* This remove should pass verifier */ 92 - res = bpf_rbtree_remove(&groot, &n->node); 93 - n = container_of(res, struct node_data, node); 91 + res_n = bpf_rbtree_remove(&groot, &n->node); 94 92 95 - /* This remove shouldn't, m isn't in an rbtree */ 96 - res = bpf_rbtree_remove(&groot, &m->node); 97 - m = container_of(res, struct node_data, node); 93 + res_m = bpf_rbtree_remove(&groot, &m->node); 98 94 bpf_spin_unlock(&glock); 99 95 100 - if (n) 101 - bpf_obj_drop(n); 102 - if (m) 103 - bpf_obj_drop(m); 96 + bpf_obj_drop(m); 97 + if (res_n) 98 + bpf_obj_drop(container_of(res_n, struct node_data, node)); 99 + if (res_m) { 100 + bpf_obj_drop(container_of(res_m, struct node_data, node)); 101 + /* m was not added to the rbtree */ 102 + return 2; 103 + } 104 + 104 105 return 0; 105 106 } 106 107 ··· 179 178 } 180 179 181 180 SEC("?tc") 182 - __failure __msg("rbtree_remove node input must be non-owning ref") 181 + __failure __msg("bpf_rbtree_remove can only take non-owning or refcounted bpf_rb_node pointer") 183 182 long rbtree_api_add_release_unlock_escape(void *ctx) 184 183 { 185 184 struct node_data *n; ··· 203 202 } 204 203 205 204 SEC("?tc") 206 - __failure __msg("rbtree_remove node input must be non-owning ref") 205 + __failure __msg("bpf_rbtree_remove can only take non-owning or refcounted bpf_rb_node pointer") 207 206 long rbtree_api_first_release_unlock_escape(void *ctx) 208 207 { 209 208 struct bpf_rb_node *res;