Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

bpf: Implement verifier support for rqspinlock

Introduce verifier-side support for rqspinlock kfuncs. The first step is
allowing bpf_res_spin_lock type to be defined in map values and
allocated objects, so BTF-side is updated with a new BPF_RES_SPIN_LOCK
field to recognize and validate.

Any object cannot have both bpf_spin_lock and bpf_res_spin_lock, only
one of them (and at most one of them per-object, like before) must be
present. The bpf_res_spin_lock can also be used to protect objects that
require lock protection for their kfuncs, like BPF rbtree and linked
list.

The verifier plumbing to simulate success and failure cases when calling
the kfuncs is done by pushing a new verifier state to the verifier state
stack which will verify the failure case upon calling the kfunc. The
path where success is indicated creates all lock reference state and IRQ
state (if necessary for irqsave variants). In the case of failure, the
state clears the registers r0-r5, sets the return value, and skips kfunc
processing, proceeding to the next instruction.

When marking the return value for success case, the value is marked as
0, and for the failure case as [-MAX_ERRNO, -1]. Then, in the program,
whenever user checks the return value as 'if (ret)' or 'if (ret < 0)'
the verifier never traverses such branches for success cases, and would
be aware that the lock is not held in such cases.

We push the kfunc state in check_kfunc_call whenever rqspinlock kfuncs
are invoked. We introduce a kfunc_class state to avoid mixing lock
irqrestore kfuncs with IRQ state created by bpf_local_irq_save.

With all this infrastructure, these kfuncs become usable in programs
while satisfying all safety properties required by the kernel.

Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20250316040541.108729-24-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>

authored by

Kumar Kartikeya Dwivedi and committed by
Alexei Starovoitov
0de20461 97eb35f3

+231 -45
+9
include/linux/bpf.h
··· 205 205 BPF_REFCOUNT = (1 << 9), 206 206 BPF_WORKQUEUE = (1 << 10), 207 207 BPF_UPTR = (1 << 11), 208 + BPF_RES_SPIN_LOCK = (1 << 12), 208 209 }; 209 210 210 211 typedef void (*btf_dtor_kfunc_t)(void *); ··· 241 240 u32 cnt; 242 241 u32 field_mask; 243 242 int spin_lock_off; 243 + int res_spin_lock_off; 244 244 int timer_off; 245 245 int wq_off; 246 246 int refcount_off; ··· 317 315 switch (type) { 318 316 case BPF_SPIN_LOCK: 319 317 return "bpf_spin_lock"; 318 + case BPF_RES_SPIN_LOCK: 319 + return "bpf_res_spin_lock"; 320 320 case BPF_TIMER: 321 321 return "bpf_timer"; 322 322 case BPF_WORKQUEUE: ··· 351 347 switch (type) { 352 348 case BPF_SPIN_LOCK: 353 349 return sizeof(struct bpf_spin_lock); 350 + case BPF_RES_SPIN_LOCK: 351 + return sizeof(struct bpf_res_spin_lock); 354 352 case BPF_TIMER: 355 353 return sizeof(struct bpf_timer); 356 354 case BPF_WORKQUEUE: ··· 383 377 switch (type) { 384 378 case BPF_SPIN_LOCK: 385 379 return __alignof__(struct bpf_spin_lock); 380 + case BPF_RES_SPIN_LOCK: 381 + return __alignof__(struct bpf_res_spin_lock); 386 382 case BPF_TIMER: 387 383 return __alignof__(struct bpf_timer); 388 384 case BPF_WORKQUEUE: ··· 428 420 case BPF_RB_ROOT: 429 421 /* RB_ROOT_CACHED 0-inits, no need to do anything after memset */ 430 422 case BPF_SPIN_LOCK: 423 + case BPF_RES_SPIN_LOCK: 431 424 case BPF_TIMER: 432 425 case BPF_WORKQUEUE: 433 426 case BPF_KPTR_UNREF:
+13 -3
include/linux/bpf_verifier.h
··· 115 115 int depth:30; 116 116 } iter; 117 117 118 + /* For irq stack slots */ 119 + struct { 120 + enum { 121 + IRQ_NATIVE_KFUNC, 122 + IRQ_LOCK_KFUNC, 123 + } kfunc_class; 124 + } irq; 125 + 118 126 /* Max size from any of the above. */ 119 127 struct { 120 128 unsigned long raw1; ··· 263 255 * default to pointer reference on zero initialization of a state. 264 256 */ 265 257 enum ref_state_type { 266 - REF_TYPE_PTR = 1, 267 - REF_TYPE_IRQ = 2, 268 - REF_TYPE_LOCK = 3, 258 + REF_TYPE_PTR = (1 << 1), 259 + REF_TYPE_IRQ = (1 << 2), 260 + REF_TYPE_LOCK = (1 << 3), 261 + REF_TYPE_RES_LOCK = (1 << 4), 262 + REF_TYPE_RES_LOCK_IRQ = (1 << 5), 269 263 } type; 270 264 /* Track each reference created with a unique id, even if the same 271 265 * instruction creates the reference multiple times (eg, via CALL).
+24 -2
kernel/bpf/btf.c
··· 3481 3481 goto end; 3482 3482 } 3483 3483 } 3484 + if (field_mask & BPF_RES_SPIN_LOCK) { 3485 + if (!strcmp(name, "bpf_res_spin_lock")) { 3486 + if (*seen_mask & BPF_RES_SPIN_LOCK) 3487 + return -E2BIG; 3488 + *seen_mask |= BPF_RES_SPIN_LOCK; 3489 + type = BPF_RES_SPIN_LOCK; 3490 + goto end; 3491 + } 3492 + } 3484 3493 if (field_mask & BPF_TIMER) { 3485 3494 if (!strcmp(name, "bpf_timer")) { 3486 3495 if (*seen_mask & BPF_TIMER) ··· 3668 3659 3669 3660 switch (field_type) { 3670 3661 case BPF_SPIN_LOCK: 3662 + case BPF_RES_SPIN_LOCK: 3671 3663 case BPF_TIMER: 3672 3664 case BPF_WORKQUEUE: 3673 3665 case BPF_LIST_NODE: ··· 3962 3952 return ERR_PTR(-ENOMEM); 3963 3953 3964 3954 rec->spin_lock_off = -EINVAL; 3955 + rec->res_spin_lock_off = -EINVAL; 3965 3956 rec->timer_off = -EINVAL; 3966 3957 rec->wq_off = -EINVAL; 3967 3958 rec->refcount_off = -EINVAL; ··· 3989 3978 WARN_ON_ONCE(rec->spin_lock_off >= 0); 3990 3979 /* Cache offset for faster lookup at runtime */ 3991 3980 rec->spin_lock_off = rec->fields[i].offset; 3981 + break; 3982 + case BPF_RES_SPIN_LOCK: 3983 + WARN_ON_ONCE(rec->spin_lock_off >= 0); 3984 + /* Cache offset for faster lookup at runtime */ 3985 + rec->res_spin_lock_off = rec->fields[i].offset; 3992 3986 break; 3993 3987 case BPF_TIMER: 3994 3988 WARN_ON_ONCE(rec->timer_off >= 0); ··· 4038 4022 rec->cnt++; 4039 4023 } 4040 4024 4025 + if (rec->spin_lock_off >= 0 && rec->res_spin_lock_off >= 0) { 4026 + ret = -EINVAL; 4027 + goto end; 4028 + } 4029 + 4041 4030 /* bpf_{list_head, rb_node} require bpf_spin_lock */ 4042 4031 if ((btf_record_has_field(rec, BPF_LIST_HEAD) || 4043 - btf_record_has_field(rec, BPF_RB_ROOT)) && rec->spin_lock_off < 0) { 4032 + btf_record_has_field(rec, BPF_RB_ROOT)) && 4033 + (rec->spin_lock_off < 0 && rec->res_spin_lock_off < 0)) { 4044 4034 ret = -EINVAL; 4045 4035 goto end; 4046 4036 } ··· 5659 5637 5660 5638 type = &tab->types[tab->cnt]; 5661 5639 type->btf_id = i; 5662 - record = btf_parse_fields(btf, t, BPF_SPIN_LOCK | BPF_LIST_HEAD | BPF_LIST_NODE | 5640 + record = btf_parse_fields(btf, t, BPF_SPIN_LOCK | BPF_RES_SPIN_LOCK | BPF_LIST_HEAD | BPF_LIST_NODE | 5663 5641 BPF_RB_ROOT | BPF_RB_NODE | BPF_REFCOUNT | 5664 5642 BPF_KPTR, t->size); 5665 5643 /* The record cannot be unset, treat it as an error if so */
+5 -1
kernel/bpf/syscall.c
··· 648 648 case BPF_RB_ROOT: 649 649 case BPF_RB_NODE: 650 650 case BPF_SPIN_LOCK: 651 + case BPF_RES_SPIN_LOCK: 651 652 case BPF_TIMER: 652 653 case BPF_REFCOUNT: 653 654 case BPF_WORKQUEUE: ··· 701 700 case BPF_RB_ROOT: 702 701 case BPF_RB_NODE: 703 702 case BPF_SPIN_LOCK: 703 + case BPF_RES_SPIN_LOCK: 704 704 case BPF_TIMER: 705 705 case BPF_REFCOUNT: 706 706 case BPF_WORKQUEUE: ··· 779 777 780 778 switch (fields[i].type) { 781 779 case BPF_SPIN_LOCK: 780 + case BPF_RES_SPIN_LOCK: 782 781 break; 783 782 case BPF_TIMER: 784 783 bpf_timer_cancel_and_free(field_ptr); ··· 1215 1212 return -EINVAL; 1216 1213 1217 1214 map->record = btf_parse_fields(btf, value_type, 1218 - BPF_SPIN_LOCK | BPF_TIMER | BPF_KPTR | BPF_LIST_HEAD | 1215 + BPF_SPIN_LOCK | BPF_RES_SPIN_LOCK | BPF_TIMER | BPF_KPTR | BPF_LIST_HEAD | 1219 1216 BPF_RB_ROOT | BPF_REFCOUNT | BPF_WORKQUEUE | BPF_UPTR, 1220 1217 map->value_size); 1221 1218 if (!IS_ERR_OR_NULL(map->record)) { ··· 1234 1231 case 0: 1235 1232 continue; 1236 1233 case BPF_SPIN_LOCK: 1234 + case BPF_RES_SPIN_LOCK: 1237 1235 if (map->map_type != BPF_MAP_TYPE_HASH && 1238 1236 map->map_type != BPF_MAP_TYPE_ARRAY && 1239 1237 map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE &&
+180 -39
kernel/bpf/verifier.c
··· 456 456 457 457 static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg) 458 458 { 459 - return btf_record_has_field(reg_btf_record(reg), BPF_SPIN_LOCK); 459 + return btf_record_has_field(reg_btf_record(reg), BPF_SPIN_LOCK | BPF_RES_SPIN_LOCK); 460 460 } 461 461 462 462 static bool type_is_rdonly_mem(u32 type) ··· 1155 1155 1156 1156 static int mark_stack_slot_irq_flag(struct bpf_verifier_env *env, 1157 1157 struct bpf_kfunc_call_arg_meta *meta, 1158 - struct bpf_reg_state *reg, int insn_idx) 1158 + struct bpf_reg_state *reg, int insn_idx, 1159 + int kfunc_class) 1159 1160 { 1160 1161 struct bpf_func_state *state = func(env, reg); 1161 1162 struct bpf_stack_state *slot; ··· 1178 1177 st->type = PTR_TO_STACK; /* we don't have dedicated reg type */ 1179 1178 st->live |= REG_LIVE_WRITTEN; 1180 1179 st->ref_obj_id = id; 1180 + st->irq.kfunc_class = kfunc_class; 1181 1181 1182 1182 for (i = 0; i < BPF_REG_SIZE; i++) 1183 1183 slot->slot_type[i] = STACK_IRQ_FLAG; ··· 1187 1185 return 0; 1188 1186 } 1189 1187 1190 - static int unmark_stack_slot_irq_flag(struct bpf_verifier_env *env, struct bpf_reg_state *reg) 1188 + static int unmark_stack_slot_irq_flag(struct bpf_verifier_env *env, struct bpf_reg_state *reg, 1189 + int kfunc_class) 1191 1190 { 1192 1191 struct bpf_func_state *state = func(env, reg); 1193 1192 struct bpf_stack_state *slot; ··· 1201 1198 1202 1199 slot = &state->stack[spi]; 1203 1200 st = &slot->spilled_ptr; 1201 + 1202 + if (st->irq.kfunc_class != kfunc_class) { 1203 + const char *flag_kfunc = st->irq.kfunc_class == IRQ_NATIVE_KFUNC ? "native" : "lock"; 1204 + const char *used_kfunc = kfunc_class == IRQ_NATIVE_KFUNC ? "native" : "lock"; 1205 + 1206 + verbose(env, "irq flag acquired by %s kfuncs cannot be restored with %s kfuncs\n", 1207 + flag_kfunc, used_kfunc); 1208 + return -EINVAL; 1209 + } 1204 1210 1205 1211 err = release_irq_state(env->cur_state, st->ref_obj_id); 1206 1212 WARN_ON_ONCE(err && err != -EACCES); ··· 1621 1609 for (i = 0; i < state->acquired_refs; i++) { 1622 1610 struct bpf_reference_state *s = &state->refs[i]; 1623 1611 1624 - if (s->type != type) 1612 + if (!(s->type & type)) 1625 1613 continue; 1626 1614 1627 1615 if (s->id == id && s->ptr == ptr) ··· 8216 8204 return err; 8217 8205 } 8218 8206 8207 + enum { 8208 + PROCESS_SPIN_LOCK = (1 << 0), 8209 + PROCESS_RES_LOCK = (1 << 1), 8210 + PROCESS_LOCK_IRQ = (1 << 2), 8211 + }; 8212 + 8219 8213 /* Implementation details: 8220 8214 * bpf_map_lookup returns PTR_TO_MAP_VALUE_OR_NULL. 8221 8215 * bpf_obj_new returns PTR_TO_BTF_ID | MEM_ALLOC | PTR_MAYBE_NULL. ··· 8244 8226 * env->cur_state->active_locks remembers which map value element or allocated 8245 8227 * object got locked and clears it after bpf_spin_unlock. 8246 8228 */ 8247 - static int process_spin_lock(struct bpf_verifier_env *env, int regno, 8248 - bool is_lock) 8229 + static int process_spin_lock(struct bpf_verifier_env *env, int regno, int flags) 8249 8230 { 8231 + bool is_lock = flags & PROCESS_SPIN_LOCK, is_res_lock = flags & PROCESS_RES_LOCK; 8232 + const char *lock_str = is_res_lock ? "bpf_res_spin" : "bpf_spin"; 8250 8233 struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno]; 8251 8234 struct bpf_verifier_state *cur = env->cur_state; 8252 8235 bool is_const = tnum_is_const(reg->var_off); 8236 + bool is_irq = flags & PROCESS_LOCK_IRQ; 8253 8237 u64 val = reg->var_off.value; 8254 8238 struct bpf_map *map = NULL; 8255 8239 struct btf *btf = NULL; 8256 8240 struct btf_record *rec; 8241 + u32 spin_lock_off; 8257 8242 int err; 8258 8243 8259 8244 if (!is_const) { 8260 8245 verbose(env, 8261 - "R%d doesn't have constant offset. bpf_spin_lock has to be at the constant offset\n", 8262 - regno); 8246 + "R%d doesn't have constant offset. %s_lock has to be at the constant offset\n", 8247 + regno, lock_str); 8263 8248 return -EINVAL; 8264 8249 } 8265 8250 if (reg->type == PTR_TO_MAP_VALUE) { 8266 8251 map = reg->map_ptr; 8267 8252 if (!map->btf) { 8268 8253 verbose(env, 8269 - "map '%s' has to have BTF in order to use bpf_spin_lock\n", 8270 - map->name); 8254 + "map '%s' has to have BTF in order to use %s_lock\n", 8255 + map->name, lock_str); 8271 8256 return -EINVAL; 8272 8257 } 8273 8258 } else { ··· 8278 8257 } 8279 8258 8280 8259 rec = reg_btf_record(reg); 8281 - if (!btf_record_has_field(rec, BPF_SPIN_LOCK)) { 8282 - verbose(env, "%s '%s' has no valid bpf_spin_lock\n", map ? "map" : "local", 8283 - map ? map->name : "kptr"); 8260 + if (!btf_record_has_field(rec, is_res_lock ? BPF_RES_SPIN_LOCK : BPF_SPIN_LOCK)) { 8261 + verbose(env, "%s '%s' has no valid %s_lock\n", map ? "map" : "local", 8262 + map ? map->name : "kptr", lock_str); 8284 8263 return -EINVAL; 8285 8264 } 8286 - if (rec->spin_lock_off != val + reg->off) { 8287 - verbose(env, "off %lld doesn't point to 'struct bpf_spin_lock' that is at %d\n", 8288 - val + reg->off, rec->spin_lock_off); 8265 + spin_lock_off = is_res_lock ? rec->res_spin_lock_off : rec->spin_lock_off; 8266 + if (spin_lock_off != val + reg->off) { 8267 + verbose(env, "off %lld doesn't point to 'struct %s_lock' that is at %d\n", 8268 + val + reg->off, lock_str, spin_lock_off); 8289 8269 return -EINVAL; 8290 8270 } 8291 8271 if (is_lock) { 8292 8272 void *ptr; 8273 + int type; 8293 8274 8294 8275 if (map) 8295 8276 ptr = map; 8296 8277 else 8297 8278 ptr = btf; 8298 8279 8299 - if (cur->active_locks) { 8300 - verbose(env, 8301 - "Locking two bpf_spin_locks are not allowed\n"); 8302 - return -EINVAL; 8280 + if (!is_res_lock && cur->active_locks) { 8281 + if (find_lock_state(env->cur_state, REF_TYPE_LOCK, 0, NULL)) { 8282 + verbose(env, 8283 + "Locking two bpf_spin_locks are not allowed\n"); 8284 + return -EINVAL; 8285 + } 8286 + } else if (is_res_lock && cur->active_locks) { 8287 + if (find_lock_state(env->cur_state, REF_TYPE_RES_LOCK | REF_TYPE_RES_LOCK_IRQ, reg->id, ptr)) { 8288 + verbose(env, "Acquiring the same lock again, AA deadlock detected\n"); 8289 + return -EINVAL; 8290 + } 8303 8291 } 8304 - err = acquire_lock_state(env, env->insn_idx, REF_TYPE_LOCK, reg->id, ptr); 8292 + 8293 + if (is_res_lock && is_irq) 8294 + type = REF_TYPE_RES_LOCK_IRQ; 8295 + else if (is_res_lock) 8296 + type = REF_TYPE_RES_LOCK; 8297 + else 8298 + type = REF_TYPE_LOCK; 8299 + err = acquire_lock_state(env, env->insn_idx, type, reg->id, ptr); 8305 8300 if (err < 0) { 8306 8301 verbose(env, "Failed to acquire lock state\n"); 8307 8302 return err; 8308 8303 } 8309 8304 } else { 8310 8305 void *ptr; 8306 + int type; 8311 8307 8312 8308 if (map) 8313 8309 ptr = map; ··· 8332 8294 ptr = btf; 8333 8295 8334 8296 if (!cur->active_locks) { 8335 - verbose(env, "bpf_spin_unlock without taking a lock\n"); 8297 + verbose(env, "%s_unlock without taking a lock\n", lock_str); 8336 8298 return -EINVAL; 8337 8299 } 8338 8300 8339 - if (release_lock_state(env->cur_state, REF_TYPE_LOCK, reg->id, ptr)) { 8340 - verbose(env, "bpf_spin_unlock of different lock\n"); 8301 + if (is_res_lock && is_irq) 8302 + type = REF_TYPE_RES_LOCK_IRQ; 8303 + else if (is_res_lock) 8304 + type = REF_TYPE_RES_LOCK; 8305 + else 8306 + type = REF_TYPE_LOCK; 8307 + if (release_lock_state(cur, type, reg->id, ptr)) { 8308 + verbose(env, "%s_unlock of different lock\n", lock_str); 8341 8309 return -EINVAL; 8342 8310 } 8343 8311 ··· 9669 9625 return -EACCES; 9670 9626 } 9671 9627 if (meta->func_id == BPF_FUNC_spin_lock) { 9672 - err = process_spin_lock(env, regno, true); 9628 + err = process_spin_lock(env, regno, PROCESS_SPIN_LOCK); 9673 9629 if (err) 9674 9630 return err; 9675 9631 } else if (meta->func_id == BPF_FUNC_spin_unlock) { 9676 - err = process_spin_lock(env, regno, false); 9632 + err = process_spin_lock(env, regno, 0); 9677 9633 if (err) 9678 9634 return err; 9679 9635 } else { ··· 11555 11511 regs[BPF_REG_0].map_uid = meta.map_uid; 11556 11512 regs[BPF_REG_0].type = PTR_TO_MAP_VALUE | ret_flag; 11557 11513 if (!type_may_be_null(ret_flag) && 11558 - btf_record_has_field(meta.map_ptr->record, BPF_SPIN_LOCK)) { 11514 + btf_record_has_field(meta.map_ptr->record, BPF_SPIN_LOCK | BPF_RES_SPIN_LOCK)) { 11559 11515 regs[BPF_REG_0].id = ++env->id_gen; 11560 11516 } 11561 11517 break; ··· 11727 11683 /* mark_btf_func_reg_size() is used when the reg size is determined by 11728 11684 * the BTF func_proto's return value size and argument. 11729 11685 */ 11730 - static void mark_btf_func_reg_size(struct bpf_verifier_env *env, u32 regno, 11731 - size_t reg_size) 11686 + static void __mark_btf_func_reg_size(struct bpf_verifier_env *env, struct bpf_reg_state *regs, 11687 + u32 regno, size_t reg_size) 11732 11688 { 11733 - struct bpf_reg_state *reg = &cur_regs(env)[regno]; 11689 + struct bpf_reg_state *reg = &regs[regno]; 11734 11690 11735 11691 if (regno == BPF_REG_0) { 11736 11692 /* Function return value */ ··· 11746 11702 mark_reg_read(env, reg, reg->parent, REG_LIVE_READ32); 11747 11703 } 11748 11704 } 11705 + } 11706 + 11707 + static void mark_btf_func_reg_size(struct bpf_verifier_env *env, u32 regno, 11708 + size_t reg_size) 11709 + { 11710 + return __mark_btf_func_reg_size(env, cur_regs(env), regno, reg_size); 11749 11711 } 11750 11712 11751 11713 static bool is_kfunc_acquire(struct bpf_kfunc_call_arg_meta *meta) ··· 11891 11841 KF_ARG_RB_ROOT_ID, 11892 11842 KF_ARG_RB_NODE_ID, 11893 11843 KF_ARG_WORKQUEUE_ID, 11844 + KF_ARG_RES_SPIN_LOCK_ID, 11894 11845 }; 11895 11846 11896 11847 BTF_ID_LIST(kf_arg_btf_ids) ··· 11901 11850 BTF_ID(struct, bpf_rb_root) 11902 11851 BTF_ID(struct, bpf_rb_node) 11903 11852 BTF_ID(struct, bpf_wq) 11853 + BTF_ID(struct, bpf_res_spin_lock) 11904 11854 11905 11855 static bool __is_kfunc_ptr_arg_type(const struct btf *btf, 11906 11856 const struct btf_param *arg, int type) ··· 11948 11896 static bool is_kfunc_arg_wq(const struct btf *btf, const struct btf_param *arg) 11949 11897 { 11950 11898 return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_WORKQUEUE_ID); 11899 + } 11900 + 11901 + static bool is_kfunc_arg_res_spin_lock(const struct btf *btf, const struct btf_param *arg) 11902 + { 11903 + return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_RES_SPIN_LOCK_ID); 11951 11904 } 11952 11905 11953 11906 static bool is_kfunc_arg_callback(struct bpf_verifier_env *env, const struct btf *btf, ··· 12026 11969 KF_ARG_PTR_TO_MAP, 12027 11970 KF_ARG_PTR_TO_WORKQUEUE, 12028 11971 KF_ARG_PTR_TO_IRQ_FLAG, 11972 + KF_ARG_PTR_TO_RES_SPIN_LOCK, 12029 11973 }; 12030 11974 12031 11975 enum special_kfunc_type { ··· 12065 12007 KF_bpf_iter_num_destroy, 12066 12008 KF_bpf_set_dentry_xattr, 12067 12009 KF_bpf_remove_dentry_xattr, 12010 + KF_bpf_res_spin_lock, 12011 + KF_bpf_res_spin_unlock, 12012 + KF_bpf_res_spin_lock_irqsave, 12013 + KF_bpf_res_spin_unlock_irqrestore, 12068 12014 }; 12069 12015 12070 12016 BTF_SET_START(special_kfunc_set) ··· 12158 12096 BTF_ID_UNUSED 12159 12097 BTF_ID_UNUSED 12160 12098 #endif 12099 + BTF_ID(func, bpf_res_spin_lock) 12100 + BTF_ID(func, bpf_res_spin_unlock) 12101 + BTF_ID(func, bpf_res_spin_lock_irqsave) 12102 + BTF_ID(func, bpf_res_spin_unlock_irqrestore) 12161 12103 12162 12104 static bool is_kfunc_ret_null(struct bpf_kfunc_call_arg_meta *meta) 12163 12105 { ··· 12254 12188 12255 12189 if (is_kfunc_arg_irq_flag(meta->btf, &args[argno])) 12256 12190 return KF_ARG_PTR_TO_IRQ_FLAG; 12191 + 12192 + if (is_kfunc_arg_res_spin_lock(meta->btf, &args[argno])) 12193 + return KF_ARG_PTR_TO_RES_SPIN_LOCK; 12257 12194 12258 12195 if ((base_type(reg->type) == PTR_TO_BTF_ID || reg2btf_ids[base_type(reg->type)])) { 12259 12196 if (!btf_type_is_struct(ref_t)) { ··· 12365 12296 struct bpf_kfunc_call_arg_meta *meta) 12366 12297 { 12367 12298 struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno]; 12299 + int err, kfunc_class = IRQ_NATIVE_KFUNC; 12368 12300 bool irq_save; 12369 - int err; 12370 12301 12371 - if (meta->func_id == special_kfunc_list[KF_bpf_local_irq_save]) { 12302 + if (meta->func_id == special_kfunc_list[KF_bpf_local_irq_save] || 12303 + meta->func_id == special_kfunc_list[KF_bpf_res_spin_lock_irqsave]) { 12372 12304 irq_save = true; 12373 - } else if (meta->func_id == special_kfunc_list[KF_bpf_local_irq_restore]) { 12305 + if (meta->func_id == special_kfunc_list[KF_bpf_res_spin_lock_irqsave]) 12306 + kfunc_class = IRQ_LOCK_KFUNC; 12307 + } else if (meta->func_id == special_kfunc_list[KF_bpf_local_irq_restore] || 12308 + meta->func_id == special_kfunc_list[KF_bpf_res_spin_unlock_irqrestore]) { 12374 12309 irq_save = false; 12310 + if (meta->func_id == special_kfunc_list[KF_bpf_res_spin_unlock_irqrestore]) 12311 + kfunc_class = IRQ_LOCK_KFUNC; 12375 12312 } else { 12376 12313 verbose(env, "verifier internal error: unknown irq flags kfunc\n"); 12377 12314 return -EFAULT; ··· 12393 12318 if (err) 12394 12319 return err; 12395 12320 12396 - err = mark_stack_slot_irq_flag(env, meta, reg, env->insn_idx); 12321 + err = mark_stack_slot_irq_flag(env, meta, reg, env->insn_idx, kfunc_class); 12397 12322 if (err) 12398 12323 return err; 12399 12324 } else { ··· 12407 12332 if (err) 12408 12333 return err; 12409 12334 12410 - err = unmark_stack_slot_irq_flag(env, reg); 12335 + err = unmark_stack_slot_irq_flag(env, reg, kfunc_class); 12411 12336 if (err) 12412 12337 return err; 12413 12338 } ··· 12534 12459 12535 12460 if (!env->cur_state->active_locks) 12536 12461 return -EINVAL; 12537 - s = find_lock_state(env->cur_state, REF_TYPE_LOCK, id, ptr); 12462 + s = find_lock_state(env->cur_state, REF_TYPE_LOCK | REF_TYPE_RES_LOCK | REF_TYPE_RES_LOCK_IRQ, 12463 + id, ptr); 12538 12464 if (!s) { 12539 12465 verbose(env, "held lock and object are not in the same allocation\n"); 12540 12466 return -EINVAL; ··· 12571 12495 btf_id == special_kfunc_list[KF_bpf_refcount_acquire_impl]; 12572 12496 } 12573 12497 12498 + static bool is_bpf_res_spin_lock_kfunc(u32 btf_id) 12499 + { 12500 + return btf_id == special_kfunc_list[KF_bpf_res_spin_lock] || 12501 + btf_id == special_kfunc_list[KF_bpf_res_spin_unlock] || 12502 + btf_id == special_kfunc_list[KF_bpf_res_spin_lock_irqsave] || 12503 + btf_id == special_kfunc_list[KF_bpf_res_spin_unlock_irqrestore]; 12504 + } 12505 + 12574 12506 static bool kfunc_spin_allowed(u32 btf_id) 12575 12507 { 12576 - return is_bpf_graph_api_kfunc(btf_id) || is_bpf_iter_num_api_kfunc(btf_id); 12508 + return is_bpf_graph_api_kfunc(btf_id) || is_bpf_iter_num_api_kfunc(btf_id) || 12509 + is_bpf_res_spin_lock_kfunc(btf_id); 12577 12510 } 12578 12511 12579 12512 static bool is_sync_callback_calling_kfunc(u32 btf_id) ··· 13014 12929 case KF_ARG_PTR_TO_CONST_STR: 13015 12930 case KF_ARG_PTR_TO_WORKQUEUE: 13016 12931 case KF_ARG_PTR_TO_IRQ_FLAG: 12932 + case KF_ARG_PTR_TO_RES_SPIN_LOCK: 13017 12933 break; 13018 12934 default: 13019 12935 WARN_ON_ONCE(1); ··· 13313 13227 if (ret < 0) 13314 13228 return ret; 13315 13229 break; 13230 + case KF_ARG_PTR_TO_RES_SPIN_LOCK: 13231 + { 13232 + int flags = PROCESS_RES_LOCK; 13233 + 13234 + if (reg->type != PTR_TO_MAP_VALUE && reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) { 13235 + verbose(env, "arg#%d doesn't point to map value or allocated object\n", i); 13236 + return -EINVAL; 13237 + } 13238 + 13239 + if (!is_bpf_res_spin_lock_kfunc(meta->func_id)) 13240 + return -EFAULT; 13241 + if (meta->func_id == special_kfunc_list[KF_bpf_res_spin_lock] || 13242 + meta->func_id == special_kfunc_list[KF_bpf_res_spin_lock_irqsave]) 13243 + flags |= PROCESS_SPIN_LOCK; 13244 + if (meta->func_id == special_kfunc_list[KF_bpf_res_spin_lock_irqsave] || 13245 + meta->func_id == special_kfunc_list[KF_bpf_res_spin_unlock_irqrestore]) 13246 + flags |= PROCESS_LOCK_IRQ; 13247 + ret = process_spin_lock(env, regno, flags); 13248 + if (ret < 0) 13249 + return ret; 13250 + break; 13251 + } 13316 13252 } 13317 13253 } 13318 13254 ··· 13419 13311 insn_aux = &env->insn_aux_data[insn_idx]; 13420 13312 13421 13313 insn_aux->is_iter_next = is_iter_next_kfunc(&meta); 13314 + 13315 + if (!insn->off && 13316 + (insn->imm == special_kfunc_list[KF_bpf_res_spin_lock] || 13317 + insn->imm == special_kfunc_list[KF_bpf_res_spin_lock_irqsave])) { 13318 + struct bpf_verifier_state *branch; 13319 + struct bpf_reg_state *regs; 13320 + 13321 + branch = push_stack(env, env->insn_idx + 1, env->insn_idx, false); 13322 + if (!branch) { 13323 + verbose(env, "failed to push state for failed lock acquisition\n"); 13324 + return -ENOMEM; 13325 + } 13326 + 13327 + regs = branch->frame[branch->curframe]->regs; 13328 + 13329 + /* Clear r0-r5 registers in forked state */ 13330 + for (i = 0; i < CALLER_SAVED_REGS; i++) 13331 + mark_reg_not_init(env, regs, caller_saved[i]); 13332 + 13333 + mark_reg_unknown(env, regs, BPF_REG_0); 13334 + err = __mark_reg_s32_range(env, regs, BPF_REG_0, -MAX_ERRNO, -1); 13335 + if (err) { 13336 + verbose(env, "failed to mark s32 range for retval in forked state for lock\n"); 13337 + return err; 13338 + } 13339 + __mark_btf_func_reg_size(env, regs, BPF_REG_0, sizeof(u32)); 13340 + } 13422 13341 13423 13342 if (is_kfunc_destructive(&meta) && !capable(CAP_SYS_BOOT)) { 13424 13343 verbose(env, "destructive kfunc calls require CAP_SYS_BOOT capability\n"); ··· 13617 13482 13618 13483 if (btf_type_is_scalar(t)) { 13619 13484 mark_reg_unknown(env, regs, BPF_REG_0); 13485 + if (meta.btf == btf_vmlinux && (meta.func_id == special_kfunc_list[KF_bpf_res_spin_lock] || 13486 + meta.func_id == special_kfunc_list[KF_bpf_res_spin_lock_irqsave])) 13487 + __mark_reg_const_zero(env, &regs[BPF_REG_0]); 13620 13488 mark_btf_func_reg_size(env, BPF_REG_0, t->size); 13621 13489 } else if (btf_type_is_ptr(t)) { 13622 13490 ptr_type = btf_type_skip_modifiers(desc_btf, t->type, &ptr_type_id); ··· 18555 18417 case STACK_IRQ_FLAG: 18556 18418 old_reg = &old->stack[spi].spilled_ptr; 18557 18419 cur_reg = &cur->stack[spi].spilled_ptr; 18558 - if (!check_ids(old_reg->ref_obj_id, cur_reg->ref_obj_id, idmap)) 18420 + if (!check_ids(old_reg->ref_obj_id, cur_reg->ref_obj_id, idmap) || 18421 + old_reg->irq.kfunc_class != cur_reg->irq.kfunc_class) 18559 18422 return false; 18560 18423 break; 18561 18424 case STACK_MISC: ··· 18600 18461 case REF_TYPE_IRQ: 18601 18462 break; 18602 18463 case REF_TYPE_LOCK: 18464 + case REF_TYPE_RES_LOCK: 18465 + case REF_TYPE_RES_LOCK_IRQ: 18603 18466 if (old->refs[i].ptr != cur->refs[i].ptr) 18604 18467 return false; 18605 18468 break; ··· 19887 19746 } 19888 19747 } 19889 19748 19890 - if (btf_record_has_field(map->record, BPF_SPIN_LOCK)) { 19749 + if (btf_record_has_field(map->record, BPF_SPIN_LOCK | BPF_RES_SPIN_LOCK)) { 19891 19750 if (prog_type == BPF_PROG_TYPE_SOCKET_FILTER) { 19892 19751 verbose(env, "socket filter progs cannot use bpf_spin_lock yet\n"); 19893 19752 return -EINVAL;