Merge tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next

+2 -5

Documentation/bpf/bpf_iterators.rst

··· 238 238 that the kernel function cond_resched() is called to avoid other kernel 239 239 subsystem (e.g., rcu) misbehaving. 240 240 * - seq_info 241 - - Specifies certain action requests in the kernel BPF iterator 242 - infrastructure. Currently, only BPF_ITER_RESCHED is supported. This means 243 - that the kernel function cond_resched() is called to avoid other kernel 244 - subsystem (e.g., rcu) misbehaving. 245 - 241 + - Specifies the set of seq operations for the BPF iterator and helpers to 242 + initialize/free the private data for the corresponding ``seq_file``. 246 243 247 244 `Click here 248 245 <https://lore.kernel.org/bpf/20210212183107.50963-2-songliubraving@fb.com/>`_

+3 -2

Documentation/bpf/cpumasks.rst

··· 351 351 can be used to query the contents of cpumasks. 352 352 353 353 .. kernel-doc:: kernel/bpf/cpumask.c 354 - :identifiers: bpf_cpumask_first bpf_cpumask_first_zero bpf_cpumask_test_cpu 354 + :identifiers: bpf_cpumask_first bpf_cpumask_first_zero bpf_cpumask_first_and 355 + bpf_cpumask_test_cpu 355 356 356 357 .. kernel-doc:: kernel/bpf/cpumask.c 357 358 :identifiers: bpf_cpumask_equal bpf_cpumask_intersects bpf_cpumask_subset 358 359 bpf_cpumask_empty bpf_cpumask_full 359 360 360 361 .. kernel-doc:: kernel/bpf/cpumask.c 361 - :identifiers: bpf_cpumask_any bpf_cpumask_any_and 362 + :identifiers: bpf_cpumask_any_distribute bpf_cpumask_any_and_distribute 362 363 363 364 ---- 364 365

+32 -6

Documentation/bpf/kfuncs.rst

··· 227 227 228 228 As mentioned above, a nested pointer obtained from walking a trusted pointer is 229 229 no longer trusted, with one exception. If a struct type has a field that is 230 - guaranteed to be valid as long as its parent pointer is trusted, the 231 - ``BTF_TYPE_SAFE_NESTED`` macro can be used to express that to the verifier as 232 - follows: 230 + guaranteed to be valid (trusted or rcu, as in KF_RCU description below) as long 231 + as its parent pointer is valid, the following macros can be used to express 232 + that to the verifier: 233 + 234 + * ``BTF_TYPE_SAFE_TRUSTED`` 235 + * ``BTF_TYPE_SAFE_RCU`` 236 + * ``BTF_TYPE_SAFE_RCU_OR_NULL`` 237 + 238 + For example, 233 239 234 240 .. code-block:: c 235 241 236 - BTF_TYPE_SAFE_NESTED(struct task_struct) { 242 + BTF_TYPE_SAFE_TRUSTED(struct socket) { 243 + struct sock *sk; 244 + }; 245 + 246 + or 247 + 248 + .. code-block:: c 249 + 250 + BTF_TYPE_SAFE_RCU(struct task_struct) { 237 251 const cpumask_t *cpus_ptr; 252 + struct css_set __rcu *cgroups; 253 + struct task_struct __rcu *real_parent; 254 + struct task_struct *group_leader; 238 255 }; 239 256 240 257 In other words, you must: 241 258 242 - 1. Wrap the trusted pointer type in the ``BTF_TYPE_SAFE_NESTED`` macro. 259 + 1. Wrap the valid pointer type in a ``BTF_TYPE_SAFE_*`` macro. 243 260 244 - 2. Specify the type and name of the trusted nested field. This field must match 261 + 2. Specify the type and name of the valid nested field. This field must match 245 262 the field in the original type definition exactly. 263 + 264 + A new type declared by a ``BTF_TYPE_SAFE_*`` macro also needs to be emitted so 265 + that it appears in BTF. For example, ``BTF_TYPE_SAFE_TRUSTED(struct socket)`` 266 + is emitted in the ``type_is_trusted()`` function as follows: 267 + 268 + .. code-block:: c 269 + 270 + BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct socket)); 271 + 246 272 247 273 2.4.5 KF_SLEEPABLE flag 248 274 -----------------------

+19 -6

include/linux/bpf_verifier.h

··· 313 313 u32 idx; 314 314 }; 315 315 316 - struct bpf_id_pair { 317 - u32 old; 318 - u32 cur; 319 - }; 320 - 321 316 #define MAX_CALL_FRAMES 8 322 317 /* Maximum number of register states that can exist at once */ 323 318 #define BPF_ID_MAP_SIZE ((MAX_BPF_REG + MAX_BPF_STACK / BPF_REG_SIZE) * MAX_CALL_FRAMES) ··· 552 557 u64 stack_masks[MAX_CALL_FRAMES]; 553 558 }; 554 559 560 + struct bpf_id_pair { 561 + u32 old; 562 + u32 cur; 563 + }; 564 + 565 + struct bpf_idmap { 566 + u32 tmp_id_gen; 567 + struct bpf_id_pair map[BPF_ID_MAP_SIZE]; 568 + }; 569 + 570 + struct bpf_idset { 571 + u32 count; 572 + u32 ids[BPF_ID_MAP_SIZE]; 573 + }; 574 + 555 575 /* single container for all structs 556 576 * one verifier_env per bpf_check() call 557 577 */ ··· 598 588 const struct bpf_line_info *prev_linfo; 599 589 struct bpf_verifier_log log; 600 590 struct bpf_subprog_info subprog_info[BPF_MAX_SUBPROGS + 1]; 601 - struct bpf_id_pair idmap_scratch[BPF_ID_MAP_SIZE]; 591 + union { 592 + struct bpf_idmap idmap_scratch; 593 + struct bpf_idset idset_scratch; 594 + }; 602 595 struct { 603 596 int *insn_state; 604 597 int *insn_stack;

-1

include/linux/bpfilter.h

··· 11 11 unsigned int optlen); 12 12 int bpfilter_ip_get_sockopt(struct sock *sk, int optname, char __user *optval, 13 13 int __user *optlen); 14 - void bpfilter_umh_cleanup(struct umd_info *info); 15 14 16 15 struct bpfilter_umh_ops { 17 16 struct umd_info info;

-1

include/linux/filter.h

··· 874 874 875 875 bool bpf_opcode_in_insntable(u8 code); 876 876 877 - void bpf_prog_free_linfo(struct bpf_prog *prog); 878 877 void bpf_prog_fill_jited_linfo(struct bpf_prog *prog, 879 878 const u32 *insn_to_jit_off); 880 879 int bpf_prog_alloc_jited_linfo(struct bpf_prog *prog);

+9

include/linux/netdevice.h

··· 5073 5073 return dev->priv_flags & IFF_L3MDEV_SLAVE; 5074 5074 } 5075 5075 5076 + static inline int dev_sdif(const struct net_device *dev) 5077 + { 5078 + #ifdef CONFIG_NET_L3_MASTER_DEV 5079 + if (netif_is_l3_slave(dev)) 5080 + return dev->ifindex; 5081 + #endif 5082 + return 0; 5083 + } 5084 + 5076 5085 static inline bool netif_is_bridge_master(const struct net_device *dev) 5077 5086 { 5078 5087 return dev->priv_flags & IFF_EBRIDGE;

-4

include/net/xdp_sock_drv.h

··· 255 255 { 256 256 } 257 257 258 - static inline void xsk_buff_discard(struct xdp_buff *xdp) 259 - { 260 - } 261 - 262 258 static inline void xsk_buff_set_size(struct xdp_buff *xdp, u32 size) 263 259 { 264 260 }

+18 -3

include/uapi/linux/bpf.h

··· 3178 3178 * **BPF_FIB_LOOKUP_DIRECT** 3179 3179 * Do a direct table lookup vs full lookup using FIB 3180 3180 * rules. 3181 + * **BPF_FIB_LOOKUP_TBID** 3182 + * Used with BPF_FIB_LOOKUP_DIRECT. 3183 + * Use the routing table ID present in *params*->tbid 3184 + * for the fib lookup. 3181 3185 * **BPF_FIB_LOOKUP_OUTPUT** 3182 3186 * Perform lookup from an egress perspective (default is 3183 3187 * ingress). ··· 6836 6832 BPF_FIB_LOOKUP_DIRECT = (1U << 0), 6837 6833 BPF_FIB_LOOKUP_OUTPUT = (1U << 1), 6838 6834 BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2), 6835 + BPF_FIB_LOOKUP_TBID = (1U << 3), 6839 6836 }; 6840 6837 6841 6838 enum { ··· 6897 6892 __u32 ipv6_dst[4]; /* in6_addr; network order */ 6898 6893 }; 6899 6894 6900 - /* output */ 6901 - __be16 h_vlan_proto; 6902 - __be16 h_vlan_TCI; 6895 + union { 6896 + struct { 6897 + /* output */ 6898 + __be16 h_vlan_proto; 6899 + __be16 h_vlan_TCI; 6900 + }; 6901 + /* input: when accompanied with the 6902 + * 'BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_TBID` flags, a 6903 + * specific routing table to use for the fib lookup. 6904 + */ 6905 + __u32 tbid; 6906 + }; 6907 + 6903 6908 __u8 smac[6]; /* ETH_ALEN */ 6904 6909 __u8 dmac[6]; /* ETH_ALEN */ 6905 6910 };

-3

kernel/bpf/bloom_filter.c

··· 86 86 int numa_node = bpf_map_attr_numa_node(attr); 87 87 struct bpf_bloom_filter *bloom; 88 88 89 - if (!bpf_capable()) 90 - return ERR_PTR(-EPERM); 91 - 92 89 if (attr->key_size != 0 || attr->value_size == 0 || 93 90 attr->max_entries == 0 || 94 91 attr->map_flags & ~BLOOM_CREATE_FLAG_MASK ||

-3

kernel/bpf/bpf_local_storage.c

··· 723 723 !attr->btf_key_type_id || !attr->btf_value_type_id) 724 724 return -EINVAL; 725 725 726 - if (!bpf_capable()) 727 - return -EPERM; 728 - 729 726 if (attr->value_size > BPF_LOCAL_STORAGE_MAX_VALUE_SIZE) 730 727 return -E2BIG; 731 728

-3

kernel/bpf/bpf_struct_ops.c

··· 655 655 const struct btf_type *t, *vt; 656 656 struct bpf_map *map; 657 657 658 - if (!bpf_capable()) 659 - return ERR_PTR(-EPERM); 660 - 661 658 st_ops = bpf_struct_ops_find_value(attr->btf_vmlinux_value_type_id); 662 659 if (!st_ops) 663 660 return ERR_PTR(-ENOTSUPP);

+12 -11

kernel/bpf/btf.c

··· 492 492 return BTF_INFO_KIND(t->info) == BTF_KIND_FWD; 493 493 } 494 494 495 - static bool btf_type_nosize(const struct btf_type *t) 496 - { 497 - return btf_type_is_void(t) || btf_type_is_fwd(t) || 498 - btf_type_is_func(t) || btf_type_is_func_proto(t); 499 - } 500 - 501 - static bool btf_type_nosize_or_null(const struct btf_type *t) 502 - { 503 - return !t || btf_type_nosize(t); 504 - } 505 - 506 495 static bool btf_type_is_datasec(const struct btf_type *t) 507 496 { 508 497 return BTF_INFO_KIND(t->info) == BTF_KIND_DATASEC; ··· 500 511 static bool btf_type_is_decl_tag(const struct btf_type *t) 501 512 { 502 513 return BTF_INFO_KIND(t->info) == BTF_KIND_DECL_TAG; 514 + } 515 + 516 + static bool btf_type_nosize(const struct btf_type *t) 517 + { 518 + return btf_type_is_void(t) || btf_type_is_fwd(t) || 519 + btf_type_is_func(t) || btf_type_is_func_proto(t) || 520 + btf_type_is_decl_tag(t); 521 + } 522 + 523 + static bool btf_type_nosize_or_null(const struct btf_type *t) 524 + { 525 + return !t || btf_type_nosize(t); 503 526 } 504 527 505 528 static bool btf_type_is_decl_tag_target(const struct btf_type *t)

+5 -3

kernel/bpf/core.c

··· 2064 2064 }; 2065 2065 #undef PROG_NAME_LIST 2066 2066 #define PROG_NAME_LIST(stack_size) PROG_NAME_ARGS(stack_size), 2067 - static u64 (*interpreters_args[])(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5, 2068 - const struct bpf_insn *insn) = { 2067 + static __maybe_unused 2068 + u64 (*interpreters_args[])(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5, 2069 + const struct bpf_insn *insn) = { 2069 2070 EVAL6(PROG_NAME_LIST, 32, 64, 96, 128, 160, 192) 2070 2071 EVAL6(PROG_NAME_LIST, 224, 256, 288, 320, 352, 384) 2071 2072 EVAL4(PROG_NAME_LIST, 416, 448, 480, 512) 2072 2073 }; 2073 2074 #undef PROG_NAME_LIST 2074 2075 2076 + #ifdef CONFIG_BPF_SYSCALL 2075 2077 void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth) 2076 2078 { 2077 2079 stack_depth = max_t(u32, stack_depth, 1); ··· 2082 2080 __bpf_call_base_args; 2083 2081 insn->code = BPF_JMP | BPF_CALL_ARGS; 2084 2082 } 2085 - 2083 + #endif 2086 2084 #else 2087 2085 static unsigned int __bpf_prog_ret0_warn(const void *ctx, 2088 2086 const struct bpf_insn *insn)

-4

kernel/bpf/cpumap.c

··· 28 28 #include <linux/sched.h> 29 29 #include <linux/workqueue.h> 30 30 #include <linux/kthread.h> 31 - #include <linux/capability.h> 32 31 #include <trace/events/xdp.h> 33 32 #include <linux/btf_ids.h> 34 33 ··· 87 88 { 88 89 u32 value_size = attr->value_size; 89 90 struct bpf_cpu_map *cmap; 90 - 91 - if (!bpf_capable()) 92 - return ERR_PTR(-EPERM); 93 91 94 92 /* check sanity of attributes */ 95 93 if (attr->max_entries == 0 || attr->key_size != 4 ||

+28 -10

kernel/bpf/cpumask.c

··· 132 132 } 133 133 134 134 /** 135 + * bpf_cpumask_first_and() - Return the index of the first nonzero bit from the 136 + * AND of two cpumasks. 137 + * @src1: The first cpumask. 138 + * @src2: The second cpumask. 139 + * 140 + * Find the index of the first nonzero bit of the AND of two cpumasks. 141 + * struct bpf_cpumask pointers may be safely passed to @src1 and @src2. 142 + */ 143 + __bpf_kfunc u32 bpf_cpumask_first_and(const struct cpumask *src1, 144 + const struct cpumask *src2) 145 + { 146 + return cpumask_first_and(src1, src2); 147 + } 148 + 149 + /** 135 150 * bpf_cpumask_set_cpu() - Set a bit for a CPU in a BPF cpumask. 136 151 * @cpu: The CPU to be set in the cpumask. 137 152 * @cpumask: The BPF cpumask in which a bit is being set. ··· 382 367 } 383 368 384 369 /** 385 - * bpf_cpumask_any() - Return a random set CPU from a cpumask. 370 + * bpf_cpumask_any_distribute() - Return a random set CPU from a cpumask. 386 371 * @cpumask: The cpumask being queried. 387 372 * 388 373 * Return: ··· 391 376 * 392 377 * A struct bpf_cpumask pointer may be safely passed to @src. 393 378 */ 394 - __bpf_kfunc u32 bpf_cpumask_any(const struct cpumask *cpumask) 379 + __bpf_kfunc u32 bpf_cpumask_any_distribute(const struct cpumask *cpumask) 395 380 { 396 - return cpumask_any(cpumask); 381 + return cpumask_any_distribute(cpumask); 397 382 } 398 383 399 384 /** 400 - * bpf_cpumask_any_and() - Return a random set CPU from the AND of two 401 - * cpumasks. 385 + * bpf_cpumask_any_and_distribute() - Return a random set CPU from the AND of 386 + * two cpumasks. 402 387 * @src1: The first cpumask. 403 388 * @src2: The second cpumask. 404 389 * 405 390 * Return: 406 - * * A random set bit within [0, num_cpus) if at least one bit is set. 391 + * * A random set bit within [0, num_cpus) from the AND of two cpumasks, if at 392 + * least one bit is set. 407 393 * * >= num_cpus if no bit is set. 408 394 * 409 395 * struct bpf_cpumask pointers may be safely passed to @src1 and @src2. 410 396 */ 411 - __bpf_kfunc u32 bpf_cpumask_any_and(const struct cpumask *src1, const struct cpumask *src2) 397 + __bpf_kfunc u32 bpf_cpumask_any_and_distribute(const struct cpumask *src1, 398 + const struct cpumask *src2) 412 399 { 413 - return cpumask_any_and(src1, src2); 400 + return cpumask_any_and_distribute(src1, src2); 414 401 } 415 402 416 403 __diag_pop(); ··· 423 406 BTF_ID_FLAGS(func, bpf_cpumask_acquire, KF_ACQUIRE | KF_TRUSTED_ARGS) 424 407 BTF_ID_FLAGS(func, bpf_cpumask_first, KF_RCU) 425 408 BTF_ID_FLAGS(func, bpf_cpumask_first_zero, KF_RCU) 409 + BTF_ID_FLAGS(func, bpf_cpumask_first_and, KF_RCU) 426 410 BTF_ID_FLAGS(func, bpf_cpumask_set_cpu, KF_RCU) 427 411 BTF_ID_FLAGS(func, bpf_cpumask_clear_cpu, KF_RCU) 428 412 BTF_ID_FLAGS(func, bpf_cpumask_test_cpu, KF_RCU) ··· 440 422 BTF_ID_FLAGS(func, bpf_cpumask_empty, KF_RCU) 441 423 BTF_ID_FLAGS(func, bpf_cpumask_full, KF_RCU) 442 424 BTF_ID_FLAGS(func, bpf_cpumask_copy, KF_RCU) 443 - BTF_ID_FLAGS(func, bpf_cpumask_any, KF_RCU) 444 - BTF_ID_FLAGS(func, bpf_cpumask_any_and, KF_RCU) 425 + BTF_ID_FLAGS(func, bpf_cpumask_any_distribute, KF_RCU) 426 + BTF_ID_FLAGS(func, bpf_cpumask_any_and_distribute, KF_RCU) 445 427 BTF_SET8_END(cpumask_kfunc_btf_ids) 446 428 447 429 static const struct btf_kfunc_id_set cpumask_kfunc_set = {

-3

kernel/bpf/devmap.c

··· 160 160 struct bpf_dtab *dtab; 161 161 int err; 162 162 163 - if (!capable(CAP_NET_ADMIN)) 164 - return ERR_PTR(-EPERM); 165 - 166 163 dtab = bpf_map_area_alloc(sizeof(*dtab), NUMA_NO_NODE); 167 164 if (!dtab) 168 165 return ERR_PTR(-ENOMEM);

-6

kernel/bpf/hashtab.c

··· 422 422 BUILD_BUG_ON(offsetof(struct htab_elem, fnode.next) != 423 423 offsetof(struct htab_elem, hash_node.pprev)); 424 424 425 - if (lru && !bpf_capable()) 426 - /* LRU implementation is much complicated than other 427 - * maps. Hence, limit to CAP_BPF. 428 - */ 429 - return -EPERM; 430 - 431 425 if (zero_seed && !capable(CAP_SYS_ADMIN)) 432 426 /* Guard against local DoS, and discourage production use. */ 433 427 return -EPERM;

+8 -4

kernel/bpf/helpers.c

··· 1933 1933 * bpf_refcount type so that it is emitted in vmlinux BTF 1934 1934 */ 1935 1935 ref = (struct bpf_refcount *)(p__refcounted_kptr + meta->record->refcount_off); 1936 + if (!refcount_inc_not_zero((refcount_t *)ref)) 1937 + return NULL; 1936 1938 1937 - refcount_inc((refcount_t *)ref); 1939 + /* Verifier strips KF_RET_NULL if input is owned ref, see is_kfunc_ret_null 1940 + * in verifier.c 1941 + */ 1938 1942 return (void *)p__refcounted_kptr; 1939 1943 } 1940 1944 ··· 1954 1950 INIT_LIST_HEAD(h); 1955 1951 if (!list_empty(n)) { 1956 1952 /* Only called from BPF prog, no need to migrate_disable */ 1957 - __bpf_obj_drop_impl(n - off, rec); 1953 + __bpf_obj_drop_impl((void *)n - off, rec); 1958 1954 return -EINVAL; 1959 1955 } 1960 1956 ··· 2036 2032 2037 2033 if (!RB_EMPTY_NODE(n)) { 2038 2034 /* Only called from BPF prog, no need to migrate_disable */ 2039 - __bpf_obj_drop_impl(n - off, rec); 2035 + __bpf_obj_drop_impl((void *)n - off, rec); 2040 2036 return -EINVAL; 2041 2037 } 2042 2038 ··· 2410 2406 #endif 2411 2407 BTF_ID_FLAGS(func, bpf_obj_new_impl, KF_ACQUIRE | KF_RET_NULL) 2412 2408 BTF_ID_FLAGS(func, bpf_obj_drop_impl, KF_RELEASE) 2413 - BTF_ID_FLAGS(func, bpf_refcount_acquire_impl, KF_ACQUIRE) 2409 + BTF_ID_FLAGS(func, bpf_refcount_acquire_impl, KF_ACQUIRE | KF_RET_NULL) 2414 2410 BTF_ID_FLAGS(func, bpf_list_push_front_impl) 2415 2411 BTF_ID_FLAGS(func, bpf_list_push_back_impl) 2416 2412 BTF_ID_FLAGS(func, bpf_list_pop_front, KF_ACQUIRE | KF_RET_NULL)

-3

kernel/bpf/lpm_trie.c

··· 544 544 { 545 545 struct lpm_trie *trie; 546 546 547 - if (!bpf_capable()) 548 - return ERR_PTR(-EPERM); 549 - 550 547 /* check sanity of attributes */ 551 548 if (attr->max_entries == 0 || 552 549 !(attr->map_flags & BPF_F_NO_PREALLOC) ||

+16 -15

kernel/bpf/memalloc.c

··· 211 211 mem_cgroup_put(memcg); 212 212 } 213 213 214 - static void free_one(struct bpf_mem_cache *c, void *obj) 214 + static void free_one(void *obj, bool percpu) 215 215 { 216 - if (c->percpu_size) { 216 + if (percpu) { 217 217 free_percpu(((void **)obj)[1]); 218 218 kfree(obj); 219 219 return; ··· 222 222 kfree(obj); 223 223 } 224 224 225 - static void __free_rcu(struct rcu_head *head) 225 + static void free_all(struct llist_node *llnode, bool percpu) 226 226 { 227 - struct bpf_mem_cache *c = container_of(head, struct bpf_mem_cache, rcu); 228 - struct llist_node *llnode = llist_del_all(&c->waiting_for_gp); 229 227 struct llist_node *pos, *t; 230 228 231 229 llist_for_each_safe(pos, t, llnode) 232 - free_one(c, pos); 230 + free_one(pos, percpu); 231 + } 232 + 233 + static void __free_rcu(struct rcu_head *head) 234 + { 235 + struct bpf_mem_cache *c = container_of(head, struct bpf_mem_cache, rcu); 236 + 237 + free_all(llist_del_all(&c->waiting_for_gp), !!c->percpu_size); 233 238 atomic_set(&c->call_rcu_in_progress, 0); 234 239 } 235 240 ··· 437 432 438 433 static void drain_mem_cache(struct bpf_mem_cache *c) 439 434 { 440 - struct llist_node *llnode, *t; 435 + bool percpu = !!c->percpu_size; 441 436 442 437 /* No progs are using this bpf_mem_cache, but htab_map_free() called 443 438 * bpf_mem_cache_free() for all remaining elements and they can be in ··· 446 441 * Except for waiting_for_gp list, there are no concurrent operations 447 442 * on these lists, so it is safe to use __llist_del_all(). 448 443 */ 449 - llist_for_each_safe(llnode, t, __llist_del_all(&c->free_by_rcu)) 450 - free_one(c, llnode); 451 - llist_for_each_safe(llnode, t, llist_del_all(&c->waiting_for_gp)) 452 - free_one(c, llnode); 453 - llist_for_each_safe(llnode, t, __llist_del_all(&c->free_llist)) 454 - free_one(c, llnode); 455 - llist_for_each_safe(llnode, t, __llist_del_all(&c->free_llist_extra)) 456 - free_one(c, llnode); 444 + free_all(__llist_del_all(&c->free_by_rcu), percpu); 445 + free_all(llist_del_all(&c->waiting_for_gp), percpu); 446 + free_all(__llist_del_all(&c->free_llist), percpu); 447 + free_all(__llist_del_all(&c->free_llist_extra), percpu); 457 448 } 458 449 459 450 static void free_mem_alloc_no_barrier(struct bpf_mem_alloc *ma)

+2 -2

kernel/bpf/preload/bpf_preload_kern.c

··· 23 23 24 24 static int preload(struct bpf_preload_info *obj) 25 25 { 26 - strlcpy(obj[0].link_name, "maps.debug", sizeof(obj[0].link_name)); 26 + strscpy(obj[0].link_name, "maps.debug", sizeof(obj[0].link_name)); 27 27 obj[0].link = maps_link; 28 - strlcpy(obj[1].link_name, "progs.debug", sizeof(obj[1].link_name)); 28 + strscpy(obj[1].link_name, "progs.debug", sizeof(obj[1].link_name)); 29 29 obj[1].link = progs_link; 30 30 return 0; 31 31 }

-4

kernel/bpf/queue_stack_maps.c

··· 7 7 #include <linux/bpf.h> 8 8 #include <linux/list.h> 9 9 #include <linux/slab.h> 10 - #include <linux/capability.h> 11 10 #include <linux/btf_ids.h> 12 11 #include "percpu_freelist.h" 13 12 ··· 45 46 /* Called from syscall */ 46 47 static int queue_stack_map_alloc_check(union bpf_attr *attr) 47 48 { 48 - if (!bpf_capable()) 49 - return -EPERM; 50 - 51 49 /* check sanity of attributes */ 52 50 if (attr->max_entries == 0 || attr->key_size != 0 || 53 51 attr->value_size == 0 ||

-3

kernel/bpf/reuseport_array.c

··· 151 151 int numa_node = bpf_map_attr_numa_node(attr); 152 152 struct reuseport_array *array; 153 153 154 - if (!bpf_capable()) 155 - return ERR_PTR(-EPERM); 156 - 157 154 /* allocate all map elements and zero-initialize them */ 158 155 array = bpf_map_area_alloc(struct_size(array, ptrs, attr->max_entries), numa_node); 159 156 if (!array)

-3

kernel/bpf/stackmap.c

··· 74 74 u64 cost, n_buckets; 75 75 int err; 76 76 77 - if (!bpf_capable()) 78 - return ERR_PTR(-EPERM); 79 - 80 77 if (attr->map_flags & ~STACK_CREATE_FLAG_MASK) 81 78 return ERR_PTR(-EINVAL); 82 79

+113 -71

kernel/bpf/syscall.c

··· 109 109 .map_mem_usage = bpf_map_offload_map_mem_usage, 110 110 }; 111 111 112 - static struct bpf_map *find_and_alloc_map(union bpf_attr *attr) 113 - { 114 - const struct bpf_map_ops *ops; 115 - u32 type = attr->map_type; 116 - struct bpf_map *map; 117 - int err; 118 - 119 - if (type >= ARRAY_SIZE(bpf_map_types)) 120 - return ERR_PTR(-EINVAL); 121 - type = array_index_nospec(type, ARRAY_SIZE(bpf_map_types)); 122 - ops = bpf_map_types[type]; 123 - if (!ops) 124 - return ERR_PTR(-EINVAL); 125 - 126 - if (ops->map_alloc_check) { 127 - err = ops->map_alloc_check(attr); 128 - if (err) 129 - return ERR_PTR(err); 130 - } 131 - if (attr->map_ifindex) 132 - ops = &bpf_map_offload_ops; 133 - if (!ops->map_mem_usage) 134 - return ERR_PTR(-EINVAL); 135 - map = ops->map_alloc(attr); 136 - if (IS_ERR(map)) 137 - return map; 138 - map->ops = ops; 139 - map->map_type = type; 140 - return map; 141 - } 142 - 143 112 static void bpf_map_write_active_inc(struct bpf_map *map) 144 113 { 145 114 atomic64_inc(&map->writecnt); ··· 1096 1127 /* called via syscall */ 1097 1128 static int map_create(union bpf_attr *attr) 1098 1129 { 1130 + const struct bpf_map_ops *ops; 1099 1131 int numa_node = bpf_map_attr_numa_node(attr); 1132 + u32 map_type = attr->map_type; 1100 1133 struct bpf_map *map; 1101 1134 int f_flags; 1102 1135 int err; ··· 1129 1158 return -EINVAL; 1130 1159 1131 1160 /* find map type and init map: hashtable vs rbtree vs bloom vs ... */ 1132 - map = find_and_alloc_map(attr); 1161 + map_type = attr->map_type; 1162 + if (map_type >= ARRAY_SIZE(bpf_map_types)) 1163 + return -EINVAL; 1164 + map_type = array_index_nospec(map_type, ARRAY_SIZE(bpf_map_types)); 1165 + ops = bpf_map_types[map_type]; 1166 + if (!ops) 1167 + return -EINVAL; 1168 + 1169 + if (ops->map_alloc_check) { 1170 + err = ops->map_alloc_check(attr); 1171 + if (err) 1172 + return err; 1173 + } 1174 + if (attr->map_ifindex) 1175 + ops = &bpf_map_offload_ops; 1176 + if (!ops->map_mem_usage) 1177 + return -EINVAL; 1178 + 1179 + /* Intent here is for unprivileged_bpf_disabled to block BPF map 1180 + * creation for unprivileged users; other actions depend 1181 + * on fd availability and access to bpffs, so are dependent on 1182 + * object creation success. Even with unprivileged BPF disabled, 1183 + * capability checks are still carried out. 1184 + */ 1185 + if (sysctl_unprivileged_bpf_disabled && !bpf_capable()) 1186 + return -EPERM; 1187 + 1188 + /* check privileged map type permissions */ 1189 + switch (map_type) { 1190 + case BPF_MAP_TYPE_ARRAY: 1191 + case BPF_MAP_TYPE_PERCPU_ARRAY: 1192 + case BPF_MAP_TYPE_PROG_ARRAY: 1193 + case BPF_MAP_TYPE_PERF_EVENT_ARRAY: 1194 + case BPF_MAP_TYPE_CGROUP_ARRAY: 1195 + case BPF_MAP_TYPE_ARRAY_OF_MAPS: 1196 + case BPF_MAP_TYPE_HASH: 1197 + case BPF_MAP_TYPE_PERCPU_HASH: 1198 + case BPF_MAP_TYPE_HASH_OF_MAPS: 1199 + case BPF_MAP_TYPE_RINGBUF: 1200 + case BPF_MAP_TYPE_USER_RINGBUF: 1201 + case BPF_MAP_TYPE_CGROUP_STORAGE: 1202 + case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE: 1203 + /* unprivileged */ 1204 + break; 1205 + case BPF_MAP_TYPE_SK_STORAGE: 1206 + case BPF_MAP_TYPE_INODE_STORAGE: 1207 + case BPF_MAP_TYPE_TASK_STORAGE: 1208 + case BPF_MAP_TYPE_CGRP_STORAGE: 1209 + case BPF_MAP_TYPE_BLOOM_FILTER: 1210 + case BPF_MAP_TYPE_LPM_TRIE: 1211 + case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY: 1212 + case BPF_MAP_TYPE_STACK_TRACE: 1213 + case BPF_MAP_TYPE_QUEUE: 1214 + case BPF_MAP_TYPE_STACK: 1215 + case BPF_MAP_TYPE_LRU_HASH: 1216 + case BPF_MAP_TYPE_LRU_PERCPU_HASH: 1217 + case BPF_MAP_TYPE_STRUCT_OPS: 1218 + case BPF_MAP_TYPE_CPUMAP: 1219 + if (!bpf_capable()) 1220 + return -EPERM; 1221 + break; 1222 + case BPF_MAP_TYPE_SOCKMAP: 1223 + case BPF_MAP_TYPE_SOCKHASH: 1224 + case BPF_MAP_TYPE_DEVMAP: 1225 + case BPF_MAP_TYPE_DEVMAP_HASH: 1226 + case BPF_MAP_TYPE_XSKMAP: 1227 + if (!capable(CAP_NET_ADMIN)) 1228 + return -EPERM; 1229 + break; 1230 + default: 1231 + WARN(1, "unsupported map type %d", map_type); 1232 + return -EPERM; 1233 + } 1234 + 1235 + map = ops->map_alloc(attr); 1133 1236 if (IS_ERR(map)) 1134 1237 return PTR_ERR(map); 1238 + map->ops = ops; 1239 + map->map_type = map_type; 1135 1240 1136 1241 err = bpf_obj_name_cpy(map->name, attr->map_name, 1137 1242 sizeof(attr->map_name)); ··· 2554 2507 struct btf *attach_btf = NULL; 2555 2508 int err; 2556 2509 char license[128]; 2557 - bool is_gpl; 2558 2510 2559 2511 if (CHECK_ATTR(BPF_PROG_LOAD)) 2560 2512 return -EINVAL; ··· 2572 2526 !bpf_capable()) 2573 2527 return -EPERM; 2574 2528 2575 - /* copy eBPF program license from user space */ 2576 - if (strncpy_from_bpfptr(license, 2577 - make_bpfptr(attr->license, uattr.is_kernel), 2578 - sizeof(license) - 1) < 0) 2579 - return -EFAULT; 2580 - license[sizeof(license) - 1] = 0; 2581 - 2582 - /* eBPF programs must be GPL compatible to use GPL-ed functions */ 2583 - is_gpl = license_is_gpl_compatible(license); 2529 + /* Intent here is for unprivileged_bpf_disabled to block BPF program 2530 + * creation for unprivileged users; other actions depend 2531 + * on fd availability and access to bpffs, so are dependent on 2532 + * object creation success. Even with unprivileged BPF disabled, 2533 + * capability checks are still carried out for these 2534 + * and other operations. 2535 + */ 2536 + if (sysctl_unprivileged_bpf_disabled && !bpf_capable()) 2537 + return -EPERM; 2584 2538 2585 2539 if (attr->insn_cnt == 0 || 2586 2540 attr->insn_cnt > (bpf_capable() ? BPF_COMPLEXITY_LIMIT_INSNS : BPF_MAXINSNS)) ··· 2664 2618 make_bpfptr(attr->insns, uattr.is_kernel), 2665 2619 bpf_prog_insn_size(prog)) != 0) 2666 2620 goto free_prog_sec; 2621 + /* copy eBPF program license from user space */ 2622 + if (strncpy_from_bpfptr(license, 2623 + make_bpfptr(attr->license, uattr.is_kernel), 2624 + sizeof(license) - 1) < 0) 2625 + goto free_prog_sec; 2626 + license[sizeof(license) - 1] = 0; 2627 + 2628 + /* eBPF programs must be GPL compatible to use GPL-ed functions */ 2629 + prog->gpl_compatible = license_is_gpl_compatible(license) ? 1 : 0; 2667 2630 2668 2631 prog->orig_prog = NULL; 2669 2632 prog->jited = 0; 2670 2633 2671 2634 atomic64_set(&prog->aux->refcnt, 1); 2672 - prog->gpl_compatible = is_gpl ? 1 : 0; 2673 2635 2674 2636 if (bpf_prog_is_dev_bound(prog->aux)) { 2675 2637 err = bpf_prog_dev_bound_init(prog, attr); ··· 2851 2797 bpf_link_free(link); 2852 2798 } 2853 2799 2854 - /* bpf_link_put can be called from atomic context, but ensures that resources 2855 - * are freed from process context 2800 + /* bpf_link_put might be called from atomic context. It needs to be called 2801 + * from sleepable context in order to acquire sleeping locks during the process. 2856 2802 */ 2857 2803 void bpf_link_put(struct bpf_link *link) 2858 2804 { 2859 2805 if (!atomic64_dec_and_test(&link->refcnt)) 2860 2806 return; 2861 2807 2862 - if (in_atomic()) { 2863 - INIT_WORK(&link->work, bpf_link_put_deferred); 2864 - schedule_work(&link->work); 2865 - } else { 2866 - bpf_link_free(link); 2867 - } 2808 + INIT_WORK(&link->work, bpf_link_put_deferred); 2809 + schedule_work(&link->work); 2868 2810 } 2869 2811 EXPORT_SYMBOL(bpf_link_put); 2812 + 2813 + static void bpf_link_put_direct(struct bpf_link *link) 2814 + { 2815 + if (!atomic64_dec_and_test(&link->refcnt)) 2816 + return; 2817 + bpf_link_free(link); 2818 + } 2870 2819 2871 2820 static int bpf_link_release(struct inode *inode, struct file *filp) 2872 2821 { 2873 2822 struct bpf_link *link = filp->private_data; 2874 2823 2875 - bpf_link_put(link); 2824 + bpf_link_put_direct(link); 2876 2825 return 0; 2877 2826 } 2878 2827 ··· 4858 4801 if (ret) 4859 4802 bpf_prog_put(new_prog); 4860 4803 out_put_link: 4861 - bpf_link_put(link); 4804 + bpf_link_put_direct(link); 4862 4805 return ret; 4863 4806 } 4864 4807 ··· 4881 4824 else 4882 4825 ret = -EOPNOTSUPP; 4883 4826 4884 - bpf_link_put(link); 4827 + bpf_link_put_direct(link); 4885 4828 return ret; 4886 4829 } 4887 4830 ··· 4951 4894 4952 4895 fd = bpf_link_new_fd(link); 4953 4896 if (fd < 0) 4954 - bpf_link_put(link); 4897 + bpf_link_put_direct(link); 4955 4898 4956 4899 return fd; 4957 4900 } ··· 5028 4971 return PTR_ERR(link); 5029 4972 5030 4973 err = bpf_iter_new_fd(link); 5031 - bpf_link_put(link); 4974 + bpf_link_put_direct(link); 5032 4975 5033 4976 return err; 5034 4977 } ··· 5098 5041 static int __sys_bpf(int cmd, bpfptr_t uattr, unsigned int size) 5099 5042 { 5100 5043 union bpf_attr attr; 5101 - bool capable; 5102 5044 int err; 5103 - 5104 - capable = bpf_capable() || !sysctl_unprivileged_bpf_disabled; 5105 - 5106 - /* Intent here is for unprivileged_bpf_disabled to block key object 5107 - * creation commands for unprivileged users; other actions depend 5108 - * of fd availability and access to bpffs, so are dependent on 5109 - * object creation success. Capabilities are later verified for 5110 - * operations such as load and map create, so even with unprivileged 5111 - * BPF disabled, capability checks are still carried out for these 5112 - * and other operations. 5113 - */ 5114 - if (!capable && 5115 - (cmd == BPF_MAP_CREATE || cmd == BPF_PROG_LOAD)) 5116 - return -EPERM; 5117 5045 5118 5046 err = bpf_check_uarg_tail_zero(uattr, sizeof(attr), size); 5119 5047 if (err)

+213 -35

kernel/bpf/verifier.c

··· 197 197 struct bpf_reg_state *reg); 198 198 static void specialize_kfunc(struct bpf_verifier_env *env, 199 199 u32 func_id, u16 offset, unsigned long *addr); 200 + static bool is_trusted_reg(const struct bpf_reg_state *reg); 200 201 201 202 static bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux) 202 203 { ··· 299 298 bool found; 300 299 } arg_constant; 301 300 302 - /* arg_btf and arg_btf_id are used by kfunc-specific handling, 301 + /* arg_{btf,btf_id,owning_ref} are used by kfunc-specific handling, 303 302 * generally to pass info about user-defined local kptr types to later 304 303 * verification logic 305 304 * bpf_obj_drop 306 305 * Record the local kptr type to be drop'd 307 306 * bpf_refcount_acquire (via KF_ARG_PTR_TO_REFCOUNTED_KPTR arg type) 308 - * Record the local kptr type to be refcount_incr'd 307 + * Record the local kptr type to be refcount_incr'd and use 308 + * arg_owning_ref to determine whether refcount_acquire should be 309 + * fallible 309 310 */ 310 311 struct btf *arg_btf; 311 312 u32 arg_btf_id; 313 + bool arg_owning_ref; 312 314 313 315 struct { 314 316 struct btf_field *field; ··· 443 439 return type & PTR_MAYBE_NULL; 444 440 } 445 441 446 - static bool reg_type_not_null(enum bpf_reg_type type) 442 + static bool reg_not_null(const struct bpf_reg_state *reg) 447 443 { 444 + enum bpf_reg_type type; 445 + 446 + type = reg->type; 448 447 if (type_may_be_null(type)) 449 448 return false; 450 449 ··· 457 450 type == PTR_TO_MAP_VALUE || 458 451 type == PTR_TO_MAP_KEY || 459 452 type == PTR_TO_SOCK_COMMON || 453 + (type == PTR_TO_BTF_ID && is_trusted_reg(reg)) || 460 454 type == PTR_TO_MEM; 461 455 } 462 456 ··· 3779 3771 } 3780 3772 } 3781 3773 3774 + static bool idset_contains(struct bpf_idset *s, u32 id) 3775 + { 3776 + u32 i; 3777 + 3778 + for (i = 0; i < s->count; ++i) 3779 + if (s->ids[i] == id) 3780 + return true; 3781 + 3782 + return false; 3783 + } 3784 + 3785 + static int idset_push(struct bpf_idset *s, u32 id) 3786 + { 3787 + if (WARN_ON_ONCE(s->count >= ARRAY_SIZE(s->ids))) 3788 + return -EFAULT; 3789 + s->ids[s->count++] = id; 3790 + return 0; 3791 + } 3792 + 3793 + static void idset_reset(struct bpf_idset *s) 3794 + { 3795 + s->count = 0; 3796 + } 3797 + 3798 + /* Collect a set of IDs for all registers currently marked as precise in env->bt. 3799 + * Mark all registers with these IDs as precise. 3800 + */ 3801 + static int mark_precise_scalar_ids(struct bpf_verifier_env *env, struct bpf_verifier_state *st) 3802 + { 3803 + struct bpf_idset *precise_ids = &env->idset_scratch; 3804 + struct backtrack_state *bt = &env->bt; 3805 + struct bpf_func_state *func; 3806 + struct bpf_reg_state *reg; 3807 + DECLARE_BITMAP(mask, 64); 3808 + int i, fr; 3809 + 3810 + idset_reset(precise_ids); 3811 + 3812 + for (fr = bt->frame; fr >= 0; fr--) { 3813 + func = st->frame[fr]; 3814 + 3815 + bitmap_from_u64(mask, bt_frame_reg_mask(bt, fr)); 3816 + for_each_set_bit(i, mask, 32) { 3817 + reg = &func->regs[i]; 3818 + if (!reg->id || reg->type != SCALAR_VALUE) 3819 + continue; 3820 + if (idset_push(precise_ids, reg->id)) 3821 + return -EFAULT; 3822 + } 3823 + 3824 + bitmap_from_u64(mask, bt_frame_stack_mask(bt, fr)); 3825 + for_each_set_bit(i, mask, 64) { 3826 + if (i >= func->allocated_stack / BPF_REG_SIZE) 3827 + break; 3828 + if (!is_spilled_scalar_reg(&func->stack[i])) 3829 + continue; 3830 + reg = &func->stack[i].spilled_ptr; 3831 + if (!reg->id) 3832 + continue; 3833 + if (idset_push(precise_ids, reg->id)) 3834 + return -EFAULT; 3835 + } 3836 + } 3837 + 3838 + for (fr = 0; fr <= st->curframe; ++fr) { 3839 + func = st->frame[fr]; 3840 + 3841 + for (i = BPF_REG_0; i < BPF_REG_10; ++i) { 3842 + reg = &func->regs[i]; 3843 + if (!reg->id) 3844 + continue; 3845 + if (!idset_contains(precise_ids, reg->id)) 3846 + continue; 3847 + bt_set_frame_reg(bt, fr, i); 3848 + } 3849 + for (i = 0; i < func->allocated_stack / BPF_REG_SIZE; ++i) { 3850 + if (!is_spilled_scalar_reg(&func->stack[i])) 3851 + continue; 3852 + reg = &func->stack[i].spilled_ptr; 3853 + if (!reg->id) 3854 + continue; 3855 + if (!idset_contains(precise_ids, reg->id)) 3856 + continue; 3857 + bt_set_frame_slot(bt, fr, i); 3858 + } 3859 + } 3860 + 3861 + return 0; 3862 + } 3863 + 3782 3864 /* 3783 3865 * __mark_chain_precision() backtracks BPF program instruction sequence and 3784 3866 * chain of verifier states making sure that register *regno* (if regno >= 0) ··· 3999 3901 verbose(env, "mark_precise: frame%d: last_idx %d first_idx %d subseq_idx %d \n", 4000 3902 bt->frame, last_idx, first_idx, subseq_idx); 4001 3903 } 3904 + 3905 + /* If some register with scalar ID is marked as precise, 3906 + * make sure that all registers sharing this ID are also precise. 3907 + * This is needed to estimate effect of find_equal_scalars(). 3908 + * Do this at the last instruction of each state, 3909 + * bpf_reg_state::id fields are valid for these instructions. 3910 + * 3911 + * Allows to track precision in situation like below: 3912 + * 3913 + * r2 = unknown value 3914 + * ... 3915 + * --- state #0 --- 3916 + * ... 3917 + * r1 = r2 // r1 and r2 now share the same ID 3918 + * ... 3919 + * --- state #1 {r1.id = A, r2.id = A} --- 3920 + * ... 3921 + * if (r2 > 10) goto exit; // find_equal_scalars() assigns range to r1 3922 + * ... 3923 + * --- state #2 {r1.id = A, r2.id = A} --- 3924 + * r3 = r10 3925 + * r3 += r1 // need to mark both r1 and r2 3926 + */ 3927 + if (mark_precise_scalar_ids(env, st)) 3928 + return -EFAULT; 4002 3929 4003 3930 if (last_idx < 0) { 4004 3931 /* we are at the entry into subprog, which ··· 6017 5894 * program allocated objects (which always have ref_obj_id > 0), 6018 5895 * but not for untrusted PTR_TO_BTF_ID | MEM_ALLOC. 6019 5896 */ 6020 - if (atype != BPF_READ && reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) { 5897 + if (atype != BPF_READ && !type_is_ptr_alloc_obj(reg->type)) { 6021 5898 verbose(env, "only read is supported\n"); 6022 5899 return -EACCES; 6023 5900 } ··· 7637 7514 if (base_type(arg_type) == ARG_PTR_TO_MEM) 7638 7515 type &= ~DYNPTR_TYPE_FLAG_MASK; 7639 7516 7640 - if (meta->func_id == BPF_FUNC_kptr_xchg && type & MEM_ALLOC) 7517 + if (meta->func_id == BPF_FUNC_kptr_xchg && type_is_alloc(type)) 7641 7518 type &= ~MEM_ALLOC; 7642 7519 7643 7520 for (i = 0; i < ARRAY_SIZE(compatible->types); i++) { ··· 9804 9681 return meta->kfunc_flags & KF_ACQUIRE; 9805 9682 } 9806 9683 9807 - static bool is_kfunc_ret_null(struct bpf_kfunc_call_arg_meta *meta) 9808 - { 9809 - return meta->kfunc_flags & KF_RET_NULL; 9810 - } 9811 - 9812 9684 static bool is_kfunc_release(struct bpf_kfunc_call_arg_meta *meta) 9813 9685 { 9814 9686 return meta->kfunc_flags & KF_RELEASE; ··· 10118 10000 BTF_ID(func, bpf_dynptr_slice) 10119 10001 BTF_ID(func, bpf_dynptr_slice_rdwr) 10120 10002 BTF_ID(func, bpf_dynptr_clone) 10003 + 10004 + static bool is_kfunc_ret_null(struct bpf_kfunc_call_arg_meta *meta) 10005 + { 10006 + if (meta->func_id == special_kfunc_list[KF_bpf_refcount_acquire_impl] && 10007 + meta->arg_owning_ref) { 10008 + return false; 10009 + } 10010 + 10011 + return meta->kfunc_flags & KF_RET_NULL; 10012 + } 10121 10013 10122 10014 static bool is_kfunc_bpf_rcu_read_lock(struct bpf_kfunc_call_arg_meta *meta) 10123 10015 { ··· 10606 10478 node_off, btf_name_by_offset(reg->btf, t->name_off)); 10607 10479 return -EINVAL; 10608 10480 } 10481 + meta->arg_btf = reg->btf; 10482 + meta->arg_btf_id = reg->btf_id; 10609 10483 10610 10484 if (node_off != field->graph_root.node_offset) { 10611 10485 verbose(env, "arg#1 offset=%d, but expected %s at offset=%d in struct %s\n", ··· 11011 10881 meta->subprogno = reg->subprogno; 11012 10882 break; 11013 10883 case KF_ARG_PTR_TO_REFCOUNTED_KPTR: 11014 - if (!type_is_ptr_alloc_obj(reg->type) && !type_is_non_owning_ref(reg->type)) { 10884 + if (!type_is_ptr_alloc_obj(reg->type)) { 11015 10885 verbose(env, "arg#%d is neither owning or non-owning ref\n", i); 11016 10886 return -EINVAL; 11017 10887 } 10888 + if (!type_is_non_owning_ref(reg->type)) 10889 + meta->arg_owning_ref = true; 11018 10890 11019 10891 rec = reg_btf_record(reg); 11020 10892 if (!rec) { ··· 11179 11047 meta.func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) { 11180 11048 release_ref_obj_id = regs[BPF_REG_2].ref_obj_id; 11181 11049 insn_aux->insert_off = regs[BPF_REG_2].off; 11050 + insn_aux->kptr_struct_meta = btf_find_struct_meta(meta.arg_btf, meta.arg_btf_id); 11182 11051 err = ref_convert_owning_non_owning(env, release_ref_obj_id); 11183 11052 if (err) { 11184 11053 verbose(env, "kfunc %s#%d conversion of owning ref to non-owning failed\n", ··· 12937 12804 if (BPF_SRC(insn->code) == BPF_X) { 12938 12805 struct bpf_reg_state *src_reg = regs + insn->src_reg; 12939 12806 struct bpf_reg_state *dst_reg = regs + insn->dst_reg; 12807 + bool need_id = src_reg->type == SCALAR_VALUE && !src_reg->id && 12808 + !tnum_is_const(src_reg->var_off); 12940 12809 12941 12810 if (BPF_CLASS(insn->code) == BPF_ALU64) { 12942 12811 /* case: R1 = R2 12943 12812 * copy register state to dest reg 12944 12813 */ 12945 - if (src_reg->type == SCALAR_VALUE && !src_reg->id) 12814 + if (need_id) 12946 12815 /* Assign src and dst registers the same ID 12947 12816 * that will be used by find_equal_scalars() 12948 12817 * to propagate min/max range. ··· 12963 12828 } else if (src_reg->type == SCALAR_VALUE) { 12964 12829 bool is_src_reg_u32 = src_reg->umax_value <= U32_MAX; 12965 12830 12966 - if (is_src_reg_u32 && !src_reg->id) 12831 + if (is_src_reg_u32 && need_id) 12967 12832 src_reg->id = ++env->id_gen; 12968 12833 copy_register_state(dst_reg, src_reg); 12969 12834 /* Make sure ID is cleared if src_reg is not in u32 range otherwise ··· 13295 13160 bool is_jmp32) 13296 13161 { 13297 13162 if (__is_pointer_value(false, reg)) { 13298 - if (!reg_type_not_null(reg->type)) 13163 + if (!reg_not_null(reg)) 13299 13164 return -1; 13300 13165 13301 13166 /* If pointer is valid tests against zero will fail so we can ··· 15119 14984 * So we look through our idmap to see if this old id has been seen before. If 15120 14985 * so, we require the new id to match; otherwise, we add the id pair to the map. 15121 14986 */ 15122 - static bool check_ids(u32 old_id, u32 cur_id, struct bpf_id_pair *idmap) 14987 + static bool check_ids(u32 old_id, u32 cur_id, struct bpf_idmap *idmap) 15123 14988 { 14989 + struct bpf_id_pair *map = idmap->map; 15124 14990 unsigned int i; 15125 14991 15126 14992 /* either both IDs should be set or both should be zero */ ··· 15132 14996 return true; 15133 14997 15134 14998 for (i = 0; i < BPF_ID_MAP_SIZE; i++) { 15135 - if (!idmap[i].old) { 14999 + if (!map[i].old) { 15136 15000 /* Reached an empty slot; haven't seen this id before */ 15137 - idmap[i].old = old_id; 15138 - idmap[i].cur = cur_id; 15001 + map[i].old = old_id; 15002 + map[i].cur = cur_id; 15139 15003 return true; 15140 15004 } 15141 - if (idmap[i].old == old_id) 15142 - return idmap[i].cur == cur_id; 15005 + if (map[i].old == old_id) 15006 + return map[i].cur == cur_id; 15007 + if (map[i].cur == cur_id) 15008 + return false; 15143 15009 } 15144 15010 /* We ran out of idmap slots, which should be impossible */ 15145 15011 WARN_ON_ONCE(1); 15146 15012 return false; 15013 + } 15014 + 15015 + /* Similar to check_ids(), but allocate a unique temporary ID 15016 + * for 'old_id' or 'cur_id' of zero. 15017 + * This makes pairs like '0 vs unique ID', 'unique ID vs 0' valid. 15018 + */ 15019 + static bool check_scalar_ids(u32 old_id, u32 cur_id, struct bpf_idmap *idmap) 15020 + { 15021 + old_id = old_id ? old_id : ++idmap->tmp_id_gen; 15022 + cur_id = cur_id ? cur_id : ++idmap->tmp_id_gen; 15023 + 15024 + return check_ids(old_id, cur_id, idmap); 15147 15025 } 15148 15026 15149 15027 static void clean_func_state(struct bpf_verifier_env *env, ··· 15258 15108 15259 15109 static bool regs_exact(const struct bpf_reg_state *rold, 15260 15110 const struct bpf_reg_state *rcur, 15261 - struct bpf_id_pair *idmap) 15111 + struct bpf_idmap *idmap) 15262 15112 { 15263 15113 return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 && 15264 15114 check_ids(rold->id, rcur->id, idmap) && ··· 15267 15117 15268 15118 /* Returns true if (rold safe implies rcur safe) */ 15269 15119 static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold, 15270 - struct bpf_reg_state *rcur, struct bpf_id_pair *idmap) 15120 + struct bpf_reg_state *rcur, struct bpf_idmap *idmap) 15271 15121 { 15272 15122 if (!(rold->live & REG_LIVE_READ)) 15273 15123 /* explored state didn't use this */ ··· 15304 15154 15305 15155 switch (base_type(rold->type)) { 15306 15156 case SCALAR_VALUE: 15307 - if (regs_exact(rold, rcur, idmap)) 15308 - return true; 15309 - if (env->explore_alu_limits) 15310 - return false; 15157 + if (env->explore_alu_limits) { 15158 + /* explore_alu_limits disables tnum_in() and range_within() 15159 + * logic and requires everything to be strict 15160 + */ 15161 + return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 && 15162 + check_scalar_ids(rold->id, rcur->id, idmap); 15163 + } 15311 15164 if (!rold->precise) 15312 15165 return true; 15313 - /* new val must satisfy old val knowledge */ 15166 + /* Why check_ids() for scalar registers? 15167 + * 15168 + * Consider the following BPF code: 15169 + * 1: r6 = ... unbound scalar, ID=a ... 15170 + * 2: r7 = ... unbound scalar, ID=b ... 15171 + * 3: if (r6 > r7) goto +1 15172 + * 4: r6 = r7 15173 + * 5: if (r6 > X) goto ... 15174 + * 6: ... memory operation using r7 ... 15175 + * 15176 + * First verification path is [1-6]: 15177 + * - at (4) same bpf_reg_state::id (b) would be assigned to r6 and r7; 15178 + * - at (5) r6 would be marked <= X, find_equal_scalars() would also mark 15179 + * r7 <= X, because r6 and r7 share same id. 15180 + * Next verification path is [1-4, 6]. 15181 + * 15182 + * Instruction (6) would be reached in two states: 15183 + * I. r6{.id=b}, r7{.id=b} via path 1-6; 15184 + * II. r6{.id=a}, r7{.id=b} via path 1-4, 6. 15185 + * 15186 + * Use check_ids() to distinguish these states. 15187 + * --- 15188 + * Also verify that new value satisfies old value range knowledge. 15189 + */ 15314 15190 return range_within(rold, rcur) && 15315 - tnum_in(rold->var_off, rcur->var_off); 15191 + tnum_in(rold->var_off, rcur->var_off) && 15192 + check_scalar_ids(rold->id, rcur->id, idmap); 15316 15193 case PTR_TO_MAP_KEY: 15317 15194 case PTR_TO_MAP_VALUE: 15318 15195 case PTR_TO_MEM: ··· 15385 15208 } 15386 15209 15387 15210 static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old, 15388 - struct bpf_func_state *cur, struct bpf_id_pair *idmap) 15211 + struct bpf_func_state *cur, struct bpf_idmap *idmap) 15389 15212 { 15390 15213 int i, spi; 15391 15214 ··· 15488 15311 } 15489 15312 15490 15313 static bool refsafe(struct bpf_func_state *old, struct bpf_func_state *cur, 15491 - struct bpf_id_pair *idmap) 15314 + struct bpf_idmap *idmap) 15492 15315 { 15493 15316 int i; 15494 15317 ··· 15536 15359 15537 15360 for (i = 0; i < MAX_BPF_REG; i++) 15538 15361 if (!regsafe(env, &old->regs[i], &cur->regs[i], 15539 - env->idmap_scratch)) 15362 + &env->idmap_scratch)) 15540 15363 return false; 15541 15364 15542 - if (!stacksafe(env, old, cur, env->idmap_scratch)) 15365 + if (!stacksafe(env, old, cur, &env->idmap_scratch)) 15543 15366 return false; 15544 15367 15545 - if (!refsafe(old, cur, env->idmap_scratch)) 15368 + if (!refsafe(old, cur, &env->idmap_scratch)) 15546 15369 return false; 15547 15370 15548 15371 return true; ··· 15557 15380 if (old->curframe != cur->curframe) 15558 15381 return false; 15559 15382 15560 - memset(env->idmap_scratch, 0, sizeof(env->idmap_scratch)); 15383 + env->idmap_scratch.tmp_id_gen = env->id_gen; 15384 + memset(&env->idmap_scratch.map, 0, sizeof(env->idmap_scratch.map)); 15561 15385 15562 15386 /* Verification state from speculative execution simulation 15563 15387 * must never prune a non-speculative execution one. ··· 15576 15398 return false; 15577 15399 15578 15400 if (old->active_lock.id && 15579 - !check_ids(old->active_lock.id, cur->active_lock.id, env->idmap_scratch)) 15401 + !check_ids(old->active_lock.id, cur->active_lock.id, &env->idmap_scratch)) 15580 15402 return false; 15581 15403 15582 15404 if (old->active_rcu_lock != cur->active_rcu_lock)

+1 -2

lib/test_bpf.c

··· 15056 15056 int which, err; 15057 15057 15058 15058 /* Allocate the table of programs to be used for tall calls */ 15059 - progs = kzalloc(sizeof(*progs) + (ntests + 1) * sizeof(progs->ptrs[0]), 15060 - GFP_KERNEL); 15059 + progs = kzalloc(struct_size(progs, ptrs, ntests + 1), GFP_KERNEL); 15061 15060 if (!progs) 15062 15061 goto out_nomem; 15063 15062

+1 -1

net/bpfilter/bpfilter_kern.c

··· 21 21 if (tgid) { 22 22 kill_pid(tgid, SIGKILL, 1); 23 23 wait_event(tgid->wait_pidfd, thread_group_exited(tgid)); 24 - bpfilter_umh_cleanup(info); 24 + umd_cleanup_helper(info); 25 25 } 26 26 } 27 27

+119 -28

net/core/filter.c

··· 3948 3948 3949 3949 void *bpf_xdp_pointer(struct xdp_buff *xdp, u32 offset, u32 len) 3950 3950 { 3951 - struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); 3952 3951 u32 size = xdp->data_end - xdp->data; 3952 + struct skb_shared_info *sinfo; 3953 3953 void *addr = xdp->data; 3954 3954 int i; 3955 3955 3956 3956 if (unlikely(offset > 0xffff || len > 0xffff)) 3957 3957 return ERR_PTR(-EFAULT); 3958 3958 3959 - if (offset + len > xdp_get_buff_len(xdp)) 3959 + if (unlikely(offset + len > xdp_get_buff_len(xdp))) 3960 3960 return ERR_PTR(-EINVAL); 3961 3961 3962 - if (offset < size) /* linear area */ 3962 + if (likely(offset < size)) /* linear area */ 3963 3963 goto out; 3964 3964 3965 + sinfo = xdp_get_shared_info_from_buff(xdp); 3965 3966 offset -= size; 3966 3967 for (i = 0; i < sinfo->nr_frags; i++) { /* paged area */ 3967 3968 u32 frag_size = skb_frag_size(&sinfo->frags[i]); ··· 5804 5803 u32 tbid = l3mdev_fib_table_rcu(dev) ? : RT_TABLE_MAIN; 5805 5804 struct fib_table *tb; 5806 5805 5806 + if (flags & BPF_FIB_LOOKUP_TBID) { 5807 + tbid = params->tbid; 5808 + /* zero out for vlan output */ 5809 + params->tbid = 0; 5810 + } 5811 + 5807 5812 tb = fib_get_table(net, tbid); 5808 5813 if (unlikely(!tb)) 5809 5814 return BPF_FIB_LKUP_RET_NOT_FWDED; ··· 5943 5936 u32 tbid = l3mdev_fib_table_rcu(dev) ? : RT_TABLE_MAIN; 5944 5937 struct fib6_table *tb; 5945 5938 5939 + if (flags & BPF_FIB_LOOKUP_TBID) { 5940 + tbid = params->tbid; 5941 + /* zero out for vlan output */ 5942 + params->tbid = 0; 5943 + } 5944 + 5946 5945 tb = ipv6_stub->fib6_get_table(net, tbid); 5947 5946 if (unlikely(!tb)) 5948 5947 return BPF_FIB_LKUP_RET_NOT_FWDED; ··· 6021 6008 #endif 6022 6009 6023 6010 #define BPF_FIB_LOOKUP_MASK (BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT | \ 6024 - BPF_FIB_LOOKUP_SKIP_NEIGH) 6011 + BPF_FIB_LOOKUP_SKIP_NEIGH | BPF_FIB_LOOKUP_TBID) 6025 6012 6026 6013 BPF_CALL_4(bpf_xdp_fib_lookup, struct xdp_buff *, ctx, 6027 6014 struct bpf_fib_lookup *, params, int, plen, u32, flags) ··· 6568 6555 static struct sock * 6569 6556 __bpf_skc_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len, 6570 6557 struct net *caller_net, u32 ifindex, u8 proto, u64 netns_id, 6571 - u64 flags) 6558 + u64 flags, int sdif) 6572 6559 { 6573 6560 struct sock *sk = NULL; 6574 6561 struct net *net; 6575 6562 u8 family; 6576 - int sdif; 6577 6563 6578 6564 if (len == sizeof(tuple->ipv4)) 6579 6565 family = AF_INET; ··· 6584 6572 if (unlikely(flags || !((s32)netns_id < 0 || netns_id <= S32_MAX))) 6585 6573 goto out; 6586 6574 6587 - if (family == AF_INET) 6588 - sdif = inet_sdif(skb); 6589 - else 6590 - sdif = inet6_sdif(skb); 6575 + if (sdif < 0) { 6576 + if (family == AF_INET) 6577 + sdif = inet_sdif(skb); 6578 + else 6579 + sdif = inet6_sdif(skb); 6580 + } 6591 6581 6592 6582 if ((s32)netns_id < 0) { 6593 6583 net = caller_net; ··· 6609 6595 static struct sock * 6610 6596 __bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len, 6611 6597 struct net *caller_net, u32 ifindex, u8 proto, u64 netns_id, 6612 - u64 flags) 6598 + u64 flags, int sdif) 6613 6599 { 6614 6600 struct sock *sk = __bpf_skc_lookup(skb, tuple, len, caller_net, 6615 - ifindex, proto, netns_id, flags); 6601 + ifindex, proto, netns_id, flags, 6602 + sdif); 6616 6603 6617 6604 if (sk) { 6618 6605 struct sock *sk2 = sk_to_full_sk(sk); ··· 6653 6638 } 6654 6639 6655 6640 return __bpf_skc_lookup(skb, tuple, len, caller_net, ifindex, proto, 6656 - netns_id, flags); 6641 + netns_id, flags, -1); 6657 6642 } 6658 6643 6659 6644 static struct sock * ··· 6742 6727 .arg5_type = ARG_ANYTHING, 6743 6728 }; 6744 6729 6730 + BPF_CALL_5(bpf_tc_skc_lookup_tcp, struct sk_buff *, skb, 6731 + struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags) 6732 + { 6733 + struct net_device *dev = skb->dev; 6734 + int ifindex = dev->ifindex, sdif = dev_sdif(dev); 6735 + struct net *caller_net = dev_net(dev); 6736 + 6737 + return (unsigned long)__bpf_skc_lookup(skb, tuple, len, caller_net, 6738 + ifindex, IPPROTO_TCP, netns_id, 6739 + flags, sdif); 6740 + } 6741 + 6742 + static const struct bpf_func_proto bpf_tc_skc_lookup_tcp_proto = { 6743 + .func = bpf_tc_skc_lookup_tcp, 6744 + .gpl_only = false, 6745 + .pkt_access = true, 6746 + .ret_type = RET_PTR_TO_SOCK_COMMON_OR_NULL, 6747 + .arg1_type = ARG_PTR_TO_CTX, 6748 + .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, 6749 + .arg3_type = ARG_CONST_SIZE, 6750 + .arg4_type = ARG_ANYTHING, 6751 + .arg5_type = ARG_ANYTHING, 6752 + }; 6753 + 6754 + BPF_CALL_5(bpf_tc_sk_lookup_tcp, struct sk_buff *, skb, 6755 + struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags) 6756 + { 6757 + struct net_device *dev = skb->dev; 6758 + int ifindex = dev->ifindex, sdif = dev_sdif(dev); 6759 + struct net *caller_net = dev_net(dev); 6760 + 6761 + return (unsigned long)__bpf_sk_lookup(skb, tuple, len, caller_net, 6762 + ifindex, IPPROTO_TCP, netns_id, 6763 + flags, sdif); 6764 + } 6765 + 6766 + static const struct bpf_func_proto bpf_tc_sk_lookup_tcp_proto = { 6767 + .func = bpf_tc_sk_lookup_tcp, 6768 + .gpl_only = false, 6769 + .pkt_access = true, 6770 + .ret_type = RET_PTR_TO_SOCKET_OR_NULL, 6771 + .arg1_type = ARG_PTR_TO_CTX, 6772 + .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, 6773 + .arg3_type = ARG_CONST_SIZE, 6774 + .arg4_type = ARG_ANYTHING, 6775 + .arg5_type = ARG_ANYTHING, 6776 + }; 6777 + 6778 + BPF_CALL_5(bpf_tc_sk_lookup_udp, struct sk_buff *, skb, 6779 + struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags) 6780 + { 6781 + struct net_device *dev = skb->dev; 6782 + int ifindex = dev->ifindex, sdif = dev_sdif(dev); 6783 + struct net *caller_net = dev_net(dev); 6784 + 6785 + return (unsigned long)__bpf_sk_lookup(skb, tuple, len, caller_net, 6786 + ifindex, IPPROTO_UDP, netns_id, 6787 + flags, sdif); 6788 + } 6789 + 6790 + static const struct bpf_func_proto bpf_tc_sk_lookup_udp_proto = { 6791 + .func = bpf_tc_sk_lookup_udp, 6792 + .gpl_only = false, 6793 + .pkt_access = true, 6794 + .ret_type = RET_PTR_TO_SOCKET_OR_NULL, 6795 + .arg1_type = ARG_PTR_TO_CTX, 6796 + .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, 6797 + .arg3_type = ARG_CONST_SIZE, 6798 + .arg4_type = ARG_ANYTHING, 6799 + .arg5_type = ARG_ANYTHING, 6800 + }; 6801 + 6745 6802 BPF_CALL_1(bpf_sk_release, struct sock *, sk) 6746 6803 { 6747 6804 if (sk && sk_is_refcounted(sk)) ··· 6831 6744 BPF_CALL_5(bpf_xdp_sk_lookup_udp, struct xdp_buff *, ctx, 6832 6745 struct bpf_sock_tuple *, tuple, u32, len, u32, netns_id, u64, flags) 6833 6746 { 6834 - struct net *caller_net = dev_net(ctx->rxq->dev); 6835 - int ifindex = ctx->rxq->dev->ifindex; 6747 + struct net_device *dev = ctx->rxq->dev; 6748 + int ifindex = dev->ifindex, sdif = dev_sdif(dev); 6749 + struct net *caller_net = dev_net(dev); 6836 6750 6837 6751 return (unsigned long)__bpf_sk_lookup(NULL, tuple, len, caller_net, 6838 6752 ifindex, IPPROTO_UDP, netns_id, 6839 - flags); 6753 + flags, sdif); 6840 6754 } 6841 6755 6842 6756 static const struct bpf_func_proto bpf_xdp_sk_lookup_udp_proto = { ··· 6855 6767 BPF_CALL_5(bpf_xdp_skc_lookup_tcp, struct xdp_buff *, ctx, 6856 6768 struct bpf_sock_tuple *, tuple, u32, len, u32, netns_id, u64, flags) 6857 6769 { 6858 - struct net *caller_net = dev_net(ctx->rxq->dev); 6859 - int ifindex = ctx->rxq->dev->ifindex; 6770 + struct net_device *dev = ctx->rxq->dev; 6771 + int ifindex = dev->ifindex, sdif = dev_sdif(dev); 6772 + struct net *caller_net = dev_net(dev); 6860 6773 6861 6774 return (unsigned long)__bpf_skc_lookup(NULL, tuple, len, caller_net, 6862 6775 ifindex, IPPROTO_TCP, netns_id, 6863 - flags); 6776 + flags, sdif); 6864 6777 } 6865 6778 6866 6779 static const struct bpf_func_proto bpf_xdp_skc_lookup_tcp_proto = { ··· 6879 6790 BPF_CALL_5(bpf_xdp_sk_lookup_tcp, struct xdp_buff *, ctx, 6880 6791 struct bpf_sock_tuple *, tuple, u32, len, u32, netns_id, u64, flags) 6881 6792 { 6882 - struct net *caller_net = dev_net(ctx->rxq->dev); 6883 - int ifindex = ctx->rxq->dev->ifindex; 6793 + struct net_device *dev = ctx->rxq->dev; 6794 + int ifindex = dev->ifindex, sdif = dev_sdif(dev); 6795 + struct net *caller_net = dev_net(dev); 6884 6796 6885 6797 return (unsigned long)__bpf_sk_lookup(NULL, tuple, len, caller_net, 6886 6798 ifindex, IPPROTO_TCP, netns_id, 6887 - flags); 6799 + flags, sdif); 6888 6800 } 6889 6801 6890 6802 static const struct bpf_func_proto bpf_xdp_sk_lookup_tcp_proto = { ··· 6905 6815 { 6906 6816 return (unsigned long)__bpf_skc_lookup(NULL, tuple, len, 6907 6817 sock_net(ctx->sk), 0, 6908 - IPPROTO_TCP, netns_id, flags); 6818 + IPPROTO_TCP, netns_id, flags, 6819 + -1); 6909 6820 } 6910 6821 6911 6822 static const struct bpf_func_proto bpf_sock_addr_skc_lookup_tcp_proto = { ··· 6925 6834 { 6926 6835 return (unsigned long)__bpf_sk_lookup(NULL, tuple, len, 6927 6836 sock_net(ctx->sk), 0, IPPROTO_TCP, 6928 - netns_id, flags); 6837 + netns_id, flags, -1); 6929 6838 } 6930 6839 6931 6840 static const struct bpf_func_proto bpf_sock_addr_sk_lookup_tcp_proto = { ··· 6944 6853 { 6945 6854 return (unsigned long)__bpf_sk_lookup(NULL, tuple, len, 6946 6855 sock_net(ctx->sk), 0, IPPROTO_UDP, 6947 - netns_id, flags); 6856 + netns_id, flags, -1); 6948 6857 } 6949 6858 6950 6859 static const struct bpf_func_proto bpf_sock_addr_sk_lookup_udp_proto = { ··· 8073 7982 #endif 8074 7983 #ifdef CONFIG_INET 8075 7984 case BPF_FUNC_sk_lookup_tcp: 8076 - return &bpf_sk_lookup_tcp_proto; 7985 + return &bpf_tc_sk_lookup_tcp_proto; 8077 7986 case BPF_FUNC_sk_lookup_udp: 8078 - return &bpf_sk_lookup_udp_proto; 7987 + return &bpf_tc_sk_lookup_udp_proto; 8079 7988 case BPF_FUNC_sk_release: 8080 7989 return &bpf_sk_release_proto; 8081 7990 case BPF_FUNC_tcp_sock: ··· 8083 7992 case BPF_FUNC_get_listener_sock: 8084 7993 return &bpf_get_listener_sock_proto; 8085 7994 case BPF_FUNC_skc_lookup_tcp: 8086 - return &bpf_skc_lookup_tcp_proto; 7995 + return &bpf_tc_skc_lookup_tcp_proto; 8087 7996 case BPF_FUNC_tcp_check_syncookie: 8088 7997 return &bpf_tcp_check_syncookie_proto; 8089 7998 case BPF_FUNC_skb_ecn_set_ce:

-4

net/core/sock_map.c

··· 32 32 { 33 33 struct bpf_stab *stab; 34 34 35 - if (!capable(CAP_NET_ADMIN)) 36 - return ERR_PTR(-EPERM); 37 35 if (attr->max_entries == 0 || 38 36 attr->key_size != 4 || 39 37 (attr->value_size != sizeof(u32) && ··· 1083 1085 struct bpf_shtab *htab; 1084 1086 int i, err; 1085 1087 1086 - if (!capable(CAP_NET_ADMIN)) 1087 - return ERR_PTR(-EPERM); 1088 1088 if (attr->max_entries == 0 || 1089 1089 attr->key_size == 0 || 1090 1090 (attr->value_size != sizeof(u32) &&

+1 -10

net/ipv4/bpfilter/sockopt.c

··· 12 12 struct bpfilter_umh_ops bpfilter_ops; 13 13 EXPORT_SYMBOL_GPL(bpfilter_ops); 14 14 15 - void bpfilter_umh_cleanup(struct umd_info *info) 16 - { 17 - fput(info->pipe_to_umh); 18 - fput(info->pipe_from_umh); 19 - put_pid(info->tgid); 20 - info->tgid = NULL; 21 - } 22 - EXPORT_SYMBOL_GPL(bpfilter_umh_cleanup); 23 - 24 15 static int bpfilter_mbox_request(struct sock *sk, int optname, sockptr_t optval, 25 16 unsigned int optlen, bool is_set) 26 17 { ··· 29 38 } 30 39 if (bpfilter_ops.info.tgid && 31 40 thread_group_exited(bpfilter_ops.info.tgid)) 32 - bpfilter_umh_cleanup(&bpfilter_ops.info); 41 + umd_cleanup_helper(&bpfilter_ops.info); 33 42 34 43 if (!bpfilter_ops.info.tgid) { 35 44 err = bpfilter_ops.start();

-4

net/xdp/xskmap.c

··· 5 5 6 6 #include <linux/bpf.h> 7 7 #include <linux/filter.h> 8 - #include <linux/capability.h> 9 8 #include <net/xdp_sock.h> 10 9 #include <linux/slab.h> 11 10 #include <linux/sched.h> ··· 66 67 struct xsk_map *m; 67 68 int numa_node; 68 69 u64 size; 69 - 70 - if (!capable(CAP_NET_ADMIN)) 71 - return ERR_PTR(-EPERM); 72 70 73 71 if (attr->max_entries == 0 || attr->key_size != 4 || 74 72 attr->value_size != 4 ||

+1 -1

samples/bpf/xdp1_kern.c

··· 39 39 return ip6h->nexthdr; 40 40 } 41 41 42 - #define XDPBUFSIZE 64 42 + #define XDPBUFSIZE 60 43 43 SEC("xdp.frags") 44 44 int xdp_prog1(struct xdp_md *ctx) 45 45 {

+1 -1

samples/bpf/xdp2_kern.c

··· 55 55 return ip6h->nexthdr; 56 56 } 57 57 58 - #define XDPBUFSIZE 64 58 + #define XDPBUFSIZE 60 59 59 SEC("xdp.frags") 60 60 int xdp_prog1(struct xdp_md *ctx) 61 61 {

+2 -2

tools/bpf/resolve_btfids/Makefile

··· 67 67 LIBELF_FLAGS := $(shell $(HOSTPKG_CONFIG) libelf --cflags 2>/dev/null) 68 68 LIBELF_LIBS := $(shell $(HOSTPKG_CONFIG) libelf --libs 2>/dev/null || echo -lelf) 69 69 70 - HOSTCFLAGS += -g \ 70 + HOSTCFLAGS_resolve_btfids += -g \ 71 71 -I$(srctree)/tools/include \ 72 72 -I$(srctree)/tools/include/uapi \ 73 73 -I$(LIBBPF_INCLUDE) \ ··· 76 76 77 77 LIBS = $(LIBELF_LIBS) -lz 78 78 79 - export srctree OUTPUT HOSTCFLAGS Q HOSTCC HOSTLD HOSTAR 79 + export srctree OUTPUT HOSTCFLAGS_resolve_btfids Q HOSTCC HOSTLD HOSTAR 80 80 include $(srctree)/tools/build/Makefile.include 81 81 82 82 $(BINARY_IN): fixdep FORCE prepare | $(OUTPUT)

+18 -3

tools/include/uapi/linux/bpf.h

··· 3178 3178 * **BPF_FIB_LOOKUP_DIRECT** 3179 3179 * Do a direct table lookup vs full lookup using FIB 3180 3180 * rules. 3181 + * **BPF_FIB_LOOKUP_TBID** 3182 + * Used with BPF_FIB_LOOKUP_DIRECT. 3183 + * Use the routing table ID present in *params*->tbid 3184 + * for the fib lookup. 3181 3185 * **BPF_FIB_LOOKUP_OUTPUT** 3182 3186 * Perform lookup from an egress perspective (default is 3183 3187 * ingress). ··· 6836 6832 BPF_FIB_LOOKUP_DIRECT = (1U << 0), 6837 6833 BPF_FIB_LOOKUP_OUTPUT = (1U << 1), 6838 6834 BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2), 6835 + BPF_FIB_LOOKUP_TBID = (1U << 3), 6839 6836 }; 6840 6837 6841 6838 enum { ··· 6897 6892 __u32 ipv6_dst[4]; /* in6_addr; network order */ 6898 6893 }; 6899 6894 6900 - /* output */ 6901 - __be16 h_vlan_proto; 6902 - __be16 h_vlan_TCI; 6895 + union { 6896 + struct { 6897 + /* output */ 6898 + __be16 h_vlan_proto; 6899 + __be16 h_vlan_TCI; 6900 + }; 6901 + /* input: when accompanied with the 6902 + * 'BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_TBID` flags, a 6903 + * specific routing table to use for the fib lookup. 6904 + */ 6905 + __u32 tbid; 6906 + }; 6907 + 6903 6908 __u8 smac[6]; /* ETH_ALEN */ 6904 6909 __u8 dmac[6]; /* ETH_ALEN */ 6905 6910 };

+9 -6

tools/testing/selftests/bpf/bench.c

··· 17 17 .duration_sec = 5, 18 18 .affinity = false, 19 19 .quiet = false, 20 - .consumer_cnt = 1, 20 + .consumer_cnt = 0, 21 21 .producer_cnt = 1, 22 22 }; 23 23 ··· 441 441 static void set_thread_affinity(pthread_t thread, int cpu) 442 442 { 443 443 cpu_set_t cpuset; 444 + int err; 444 445 445 446 CPU_ZERO(&cpuset); 446 447 CPU_SET(cpu, &cpuset); 447 - if (pthread_setaffinity_np(thread, sizeof(cpuset), &cpuset)) { 448 + err = pthread_setaffinity_np(thread, sizeof(cpuset), &cpuset); 449 + if (err) { 448 450 fprintf(stderr, "setting affinity to CPU #%d failed: %d\n", 449 - cpu, errno); 451 + cpu, -err); 450 452 exit(1); 451 453 } 452 454 } ··· 469 467 exit(1); 470 468 } 471 469 472 - return cpu_set->next_cpu++; 470 + return cpu_set->next_cpu++ % env.nr_cpus; 473 471 } 474 472 475 473 static struct bench_state { ··· 607 605 bench->consumer_thread, (void *)(long)i); 608 606 if (err) { 609 607 fprintf(stderr, "failed to create consumer thread #%d: %d\n", 610 - i, -errno); 608 + i, -err); 611 609 exit(1); 612 610 } 613 611 if (env.affinity) ··· 626 624 bench->producer_thread, (void *)(long)i); 627 625 if (err) { 628 626 fprintf(stderr, "failed to create producer thread #%d: %d\n", 629 - i, -errno); 627 + i, -err); 630 628 exit(1); 631 629 } 632 630 if (env.affinity) ··· 659 657 660 658 int main(int argc, char **argv) 661 659 { 660 + env.nr_cpus = get_nprocs(); 662 661 parse_cmdline_args_init(argc, argv); 663 662 664 663 if (env.list) {

+1

tools/testing/selftests/bpf/bench.h

··· 27 27 bool quiet; 28 28 int consumer_cnt; 29 29 int producer_cnt; 30 + int nr_cpus; 30 31 struct cpu_set prod_cpus; 31 32 struct cpu_set cons_cpus; 32 33 };

+2 -12

tools/testing/selftests/bpf/benchs/bench_bloom_filter_map.c

··· 107 107 108 108 static void validate(void) 109 109 { 110 - if (env.consumer_cnt != 1) { 110 + if (env.consumer_cnt != 0) { 111 111 fprintf(stderr, 112 - "The bloom filter benchmarks do not support multi-consumer use\n"); 112 + "The bloom filter benchmarks do not support consumer\n"); 113 113 exit(1); 114 114 } 115 115 } ··· 421 421 last_false_hits = total_false_hits; 422 422 } 423 423 424 - static void *consumer(void *input) 425 - { 426 - return NULL; 427 - } 428 - 429 424 const struct bench bench_bloom_lookup = { 430 425 .name = "bloom-lookup", 431 426 .argp = &bench_bloom_map_argp, 432 427 .validate = validate, 433 428 .setup = bloom_lookup_setup, 434 429 .producer_thread = producer, 435 - .consumer_thread = consumer, 436 430 .measure = measure, 437 431 .report_progress = hits_drops_report_progress, 438 432 .report_final = hits_drops_report_final, ··· 438 444 .validate = validate, 439 445 .setup = bloom_update_setup, 440 446 .producer_thread = producer, 441 - .consumer_thread = consumer, 442 447 .measure = measure, 443 448 .report_progress = hits_drops_report_progress, 444 449 .report_final = hits_drops_report_final, ··· 449 456 .validate = validate, 450 457 .setup = false_positive_setup, 451 458 .producer_thread = producer, 452 - .consumer_thread = consumer, 453 459 .measure = measure, 454 460 .report_progress = false_hits_report_progress, 455 461 .report_final = false_hits_report_final, ··· 460 468 .validate = validate, 461 469 .setup = hashmap_no_bloom_setup, 462 470 .producer_thread = producer, 463 - .consumer_thread = consumer, 464 471 .measure = measure, 465 472 .report_progress = hits_drops_report_progress, 466 473 .report_final = hits_drops_report_final, ··· 471 480 .validate = validate, 472 481 .setup = hashmap_with_bloom_setup, 473 482 .producer_thread = producer, 474 - .consumer_thread = consumer, 475 483 .measure = measure, 476 484 .report_progress = hits_drops_report_progress, 477 485 .report_final = hits_drops_report_final,

+2 -8

tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_full_update.c

··· 14 14 15 15 static void validate(void) 16 16 { 17 - if (env.consumer_cnt != 1) { 18 - fprintf(stderr, "benchmark doesn't support multi-consumer!\n"); 17 + if (env.consumer_cnt != 0) { 18 + fprintf(stderr, "benchmark doesn't support consumer!\n"); 19 19 exit(1); 20 20 } 21 21 } ··· 27 27 syscall(__NR_getpgid); 28 28 } 29 29 30 - return NULL; 31 - } 32 - 33 - static void *consumer(void *input) 34 - { 35 30 return NULL; 36 31 } 37 32 ··· 83 88 .validate = validate, 84 89 .setup = setup, 85 90 .producer_thread = producer, 86 - .consumer_thread = consumer, 87 91 .measure = measure, 88 92 .report_progress = NULL, 89 93 .report_final = hashmap_report_final,

+2 -8

tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_lookup.c

··· 113 113 114 114 static void validate(void) 115 115 { 116 - if (env.consumer_cnt != 1) { 117 - fprintf(stderr, "benchmark doesn't support multi-consumer!\n"); 116 + if (env.consumer_cnt != 0) { 117 + fprintf(stderr, "benchmark doesn't support consumer!\n"); 118 118 exit(1); 119 119 } 120 120 ··· 131 131 /* trigger the bpf program */ 132 132 syscall(__NR_getpgid); 133 133 } 134 - return NULL; 135 - } 136 - 137 - static void *consumer(void *input) 138 - { 139 134 return NULL; 140 135 } 141 136 ··· 271 276 .validate = validate, 272 277 .setup = setup, 273 278 .producer_thread = producer, 274 - .consumer_thread = consumer, 275 279 .measure = measure, 276 280 .report_progress = NULL, 277 281 .report_final = hashmap_report_final,

+2 -8

tools/testing/selftests/bpf/benchs/bench_bpf_loop.c

··· 47 47 48 48 static void validate(void) 49 49 { 50 - if (env.consumer_cnt != 1) { 51 - fprintf(stderr, "benchmark doesn't support multi-consumer!\n"); 50 + if (env.consumer_cnt != 0) { 51 + fprintf(stderr, "benchmark doesn't support consumer!\n"); 52 52 exit(1); 53 53 } 54 54 } ··· 59 59 /* trigger the bpf program */ 60 60 syscall(__NR_getpgid); 61 61 62 - return NULL; 63 - } 64 - 65 - static void *consumer(void *input) 66 - { 67 62 return NULL; 68 63 } 69 64 ··· 94 99 .validate = validate, 95 100 .setup = setup, 96 101 .producer_thread = producer, 97 - .consumer_thread = consumer, 98 102 .measure = measure, 99 103 .report_progress = ops_report_progress, 100 104 .report_final = ops_report_final,

+1 -13

tools/testing/selftests/bpf/benchs/bench_count.c

··· 18 18 return NULL; 19 19 } 20 20 21 - static void *count_global_consumer(void *input) 22 - { 23 - return NULL; 24 - } 25 - 26 21 static void count_global_measure(struct bench_res *res) 27 22 { 28 23 struct count_global_ctx *ctx = &count_global_ctx; ··· 35 40 { 36 41 struct count_local_ctx *ctx = &count_local_ctx; 37 42 38 - ctx->hits = calloc(env.consumer_cnt, sizeof(*ctx->hits)); 43 + ctx->hits = calloc(env.producer_cnt, sizeof(*ctx->hits)); 39 44 if (!ctx->hits) 40 45 exit(1); 41 46 } ··· 48 53 while (true) { 49 54 atomic_inc(&ctx->hits[idx].value); 50 55 } 51 - return NULL; 52 - } 53 - 54 - static void *count_local_consumer(void *input) 55 - { 56 56 return NULL; 57 57 } 58 58 ··· 64 74 const struct bench bench_count_global = { 65 75 .name = "count-global", 66 76 .producer_thread = count_global_producer, 67 - .consumer_thread = count_global_consumer, 68 77 .measure = count_global_measure, 69 78 .report_progress = hits_drops_report_progress, 70 79 .report_final = hits_drops_report_final, ··· 73 84 .name = "count-local", 74 85 .setup = count_local_setup, 75 86 .producer_thread = count_local_producer, 76 - .consumer_thread = count_local_consumer, 77 87 .measure = count_local_measure, 78 88 .report_progress = hits_drops_report_progress, 79 89 .report_final = hits_drops_report_final,

+2 -10

tools/testing/selftests/bpf/benchs/bench_local_storage.c

··· 74 74 fprintf(stderr, "benchmark doesn't support multi-producer!\n"); 75 75 exit(1); 76 76 } 77 - if (env.consumer_cnt != 1) { 78 - fprintf(stderr, "benchmark doesn't support multi-consumer!\n"); 77 + if (env.consumer_cnt != 0) { 78 + fprintf(stderr, "benchmark doesn't support consumer!\n"); 79 79 exit(1); 80 80 } 81 81 ··· 230 230 syscall(__NR_getpgid); 231 231 } 232 232 233 - static void *consumer(void *input) 234 - { 235 - return NULL; 236 - } 237 - 238 233 static void *producer(void *input) 239 234 { 240 235 while (true) ··· 254 259 .validate = validate, 255 260 .setup = local_storage_cache_get_setup, 256 261 .producer_thread = producer, 257 - .consumer_thread = consumer, 258 262 .measure = measure, 259 263 .report_progress = local_storage_report_progress, 260 264 .report_final = local_storage_report_final, ··· 265 271 .validate = validate, 266 272 .setup = local_storage_cache_get_interleaved_setup, 267 273 .producer_thread = producer, 268 - .consumer_thread = consumer, 269 274 .measure = measure, 270 275 .report_progress = local_storage_report_progress, 271 276 .report_final = local_storage_report_final, ··· 276 283 .validate = validate, 277 284 .setup = hashmap_setup, 278 285 .producer_thread = producer, 279 - .consumer_thread = consumer, 280 286 .measure = measure, 281 287 .report_progress = local_storage_report_progress, 282 288 .report_final = local_storage_report_final,

+1 -7

tools/testing/selftests/bpf/benchs/bench_local_storage_create.c

··· 71 71 72 72 static void validate(void) 73 73 { 74 - if (env.consumer_cnt > 1) { 74 + if (env.consumer_cnt != 0) { 75 75 fprintf(stderr, 76 76 "local-storage-create benchmark does not need consumer\n"); 77 77 exit(1); ··· 141 141 { 142 142 res->hits = atomic_swap(&skel->bss->create_cnts, 0); 143 143 res->drops = atomic_swap(&skel->bss->kmalloc_cnts, 0); 144 - } 145 - 146 - static void *consumer(void *input) 147 - { 148 - return NULL; 149 144 } 150 145 151 146 static void *sk_producer(void *input) ··· 252 257 .validate = validate, 253 258 .setup = setup, 254 259 .producer_thread = producer, 255 - .consumer_thread = consumer, 256 260 .measure = measure, 257 261 .report_progress = report_progress, 258 262 .report_final = report_final,

+2 -8

tools/testing/selftests/bpf/benchs/bench_local_storage_rcu_tasks_trace.c

··· 72 72 fprintf(stderr, "benchmark doesn't support multi-producer!\n"); 73 73 exit(1); 74 74 } 75 - if (env.consumer_cnt != 1) { 76 - fprintf(stderr, "benchmark doesn't support multi-consumer!\n"); 75 + if (env.consumer_cnt != 0) { 76 + fprintf(stderr, "benchmark doesn't support consumer!\n"); 77 77 exit(1); 78 78 } 79 79 ··· 197 197 ctx.prev_kthread_stime = ticks; 198 198 } 199 199 200 - static void *consumer(void *input) 201 - { 202 - return NULL; 203 - } 204 - 205 200 static void *producer(void *input) 206 201 { 207 202 while (true) ··· 257 262 .validate = validate, 258 263 .setup = local_storage_tasks_trace_setup, 259 264 .producer_thread = producer, 260 - .consumer_thread = consumer, 261 265 .measure = measure, 262 266 .report_progress = report_progress, 263 267 .report_final = report_final,

+2 -13

tools/testing/selftests/bpf/benchs/bench_rename.c

··· 17 17 fprintf(stderr, "benchmark doesn't support multi-producer!\n"); 18 18 exit(1); 19 19 } 20 - if (env.consumer_cnt != 1) { 21 - fprintf(stderr, "benchmark doesn't support multi-consumer!\n"); 20 + if (env.consumer_cnt != 0) { 21 + fprintf(stderr, "benchmark doesn't support consumer!\n"); 22 22 exit(1); 23 23 } 24 24 } ··· 106 106 attach_bpf(ctx.skel->progs.prog5); 107 107 } 108 108 109 - static void *consumer(void *input) 110 - { 111 - return NULL; 112 - } 113 - 114 109 const struct bench bench_rename_base = { 115 110 .name = "rename-base", 116 111 .validate = validate, 117 112 .setup = setup_base, 118 113 .producer_thread = producer, 119 - .consumer_thread = consumer, 120 114 .measure = measure, 121 115 .report_progress = hits_drops_report_progress, 122 116 .report_final = hits_drops_report_final, ··· 121 127 .validate = validate, 122 128 .setup = setup_kprobe, 123 129 .producer_thread = producer, 124 - .consumer_thread = consumer, 125 130 .measure = measure, 126 131 .report_progress = hits_drops_report_progress, 127 132 .report_final = hits_drops_report_final, ··· 131 138 .validate = validate, 132 139 .setup = setup_kretprobe, 133 140 .producer_thread = producer, 134 - .consumer_thread = consumer, 135 141 .measure = measure, 136 142 .report_progress = hits_drops_report_progress, 137 143 .report_final = hits_drops_report_final, ··· 141 149 .validate = validate, 142 150 .setup = setup_rawtp, 143 151 .producer_thread = producer, 144 - .consumer_thread = consumer, 145 152 .measure = measure, 146 153 .report_progress = hits_drops_report_progress, 147 154 .report_final = hits_drops_report_final, ··· 151 160 .validate = validate, 152 161 .setup = setup_fentry, 153 162 .producer_thread = producer, 154 - .consumer_thread = consumer, 155 163 .measure = measure, 156 164 .report_progress = hits_drops_report_progress, 157 165 .report_final = hits_drops_report_final, ··· 161 171 .validate = validate, 162 172 .setup = setup_fexit, 163 173 .producer_thread = producer, 164 - .consumer_thread = consumer, 165 174 .measure = measure, 166 175 .report_progress = hits_drops_report_progress, 167 176 .report_final = hits_drops_report_final,

+1 -1

tools/testing/selftests/bpf/benchs/bench_ringbufs.c

··· 96 96 static void bufs_validate(void) 97 97 { 98 98 if (env.consumer_cnt != 1) { 99 - fprintf(stderr, "rb-libbpf benchmark doesn't support multi-consumer!\n"); 99 + fprintf(stderr, "rb-libbpf benchmark needs one consumer!\n"); 100 100 exit(1); 101 101 } 102 102

+2 -9

tools/testing/selftests/bpf/benchs/bench_strncmp.c

··· 50 50 51 51 static void strncmp_validate(void) 52 52 { 53 - if (env.consumer_cnt != 1) { 54 - fprintf(stderr, "strncmp benchmark doesn't support multi-consumer!\n"); 53 + if (env.consumer_cnt != 0) { 54 + fprintf(stderr, "strncmp benchmark doesn't support consumer!\n"); 55 55 exit(1); 56 56 } 57 57 } ··· 128 128 return NULL; 129 129 } 130 130 131 - static void *strncmp_consumer(void *ctx) 132 - { 133 - return NULL; 134 - } 135 - 136 131 static void strncmp_measure(struct bench_res *res) 137 132 { 138 133 res->hits = atomic_swap(&ctx.skel->bss->hits, 0); ··· 139 144 .validate = strncmp_validate, 140 145 .setup = strncmp_no_helper_setup, 141 146 .producer_thread = strncmp_producer, 142 - .consumer_thread = strncmp_consumer, 143 147 .measure = strncmp_measure, 144 148 .report_progress = hits_drops_report_progress, 145 149 .report_final = hits_drops_report_final, ··· 150 156 .validate = strncmp_validate, 151 157 .setup = strncmp_helper_setup, 152 158 .producer_thread = strncmp_producer, 153 - .consumer_thread = strncmp_consumer, 154 159 .measure = strncmp_measure, 155 160 .report_progress = hits_drops_report_progress, 156 161 .report_final = hits_drops_report_final,

+2 -19

tools/testing/selftests/bpf/benchs/bench_trigger.c

··· 13 13 14 14 static void trigger_validate(void) 15 15 { 16 - if (env.consumer_cnt != 1) { 17 - fprintf(stderr, "benchmark doesn't support multi-consumer!\n"); 16 + if (env.consumer_cnt != 0) { 17 + fprintf(stderr, "benchmark doesn't support consumer!\n"); 18 18 exit(1); 19 19 } 20 20 } ··· 101 101 { 102 102 setup_ctx(); 103 103 attach_bpf(ctx.skel->progs.bench_trigger_fmodret); 104 - } 105 - 106 - static void *trigger_consumer(void *input) 107 - { 108 - return NULL; 109 104 } 110 105 111 106 /* make sure call is not inlined and not avoided by compiler, so __weak and ··· 200 205 .name = "trig-base", 201 206 .validate = trigger_validate, 202 207 .producer_thread = trigger_base_producer, 203 - .consumer_thread = trigger_consumer, 204 208 .measure = trigger_base_measure, 205 209 .report_progress = hits_drops_report_progress, 206 210 .report_final = hits_drops_report_final, ··· 210 216 .validate = trigger_validate, 211 217 .setup = trigger_tp_setup, 212 218 .producer_thread = trigger_producer, 213 - .consumer_thread = trigger_consumer, 214 219 .measure = trigger_measure, 215 220 .report_progress = hits_drops_report_progress, 216 221 .report_final = hits_drops_report_final, ··· 220 227 .validate = trigger_validate, 221 228 .setup = trigger_rawtp_setup, 222 229 .producer_thread = trigger_producer, 223 - .consumer_thread = trigger_consumer, 224 230 .measure = trigger_measure, 225 231 .report_progress = hits_drops_report_progress, 226 232 .report_final = hits_drops_report_final, ··· 230 238 .validate = trigger_validate, 231 239 .setup = trigger_kprobe_setup, 232 240 .producer_thread = trigger_producer, 233 - .consumer_thread = trigger_consumer, 234 241 .measure = trigger_measure, 235 242 .report_progress = hits_drops_report_progress, 236 243 .report_final = hits_drops_report_final, ··· 240 249 .validate = trigger_validate, 241 250 .setup = trigger_fentry_setup, 242 251 .producer_thread = trigger_producer, 243 - .consumer_thread = trigger_consumer, 244 252 .measure = trigger_measure, 245 253 .report_progress = hits_drops_report_progress, 246 254 .report_final = hits_drops_report_final, ··· 250 260 .validate = trigger_validate, 251 261 .setup = trigger_fentry_sleep_setup, 252 262 .producer_thread = trigger_producer, 253 - .consumer_thread = trigger_consumer, 254 263 .measure = trigger_measure, 255 264 .report_progress = hits_drops_report_progress, 256 265 .report_final = hits_drops_report_final, ··· 260 271 .validate = trigger_validate, 261 272 .setup = trigger_fmodret_setup, 262 273 .producer_thread = trigger_producer, 263 - .consumer_thread = trigger_consumer, 264 274 .measure = trigger_measure, 265 275 .report_progress = hits_drops_report_progress, 266 276 .report_final = hits_drops_report_final, ··· 269 281 .name = "trig-uprobe-base", 270 282 .setup = NULL, /* no uprobe/uretprobe is attached */ 271 283 .producer_thread = uprobe_base_producer, 272 - .consumer_thread = trigger_consumer, 273 284 .measure = trigger_base_measure, 274 285 .report_progress = hits_drops_report_progress, 275 286 .report_final = hits_drops_report_final, ··· 278 291 .name = "trig-uprobe-with-nop", 279 292 .setup = uprobe_setup_with_nop, 280 293 .producer_thread = uprobe_producer_with_nop, 281 - .consumer_thread = trigger_consumer, 282 294 .measure = trigger_measure, 283 295 .report_progress = hits_drops_report_progress, 284 296 .report_final = hits_drops_report_final, ··· 287 301 .name = "trig-uretprobe-with-nop", 288 302 .setup = uretprobe_setup_with_nop, 289 303 .producer_thread = uprobe_producer_with_nop, 290 - .consumer_thread = trigger_consumer, 291 304 .measure = trigger_measure, 292 305 .report_progress = hits_drops_report_progress, 293 306 .report_final = hits_drops_report_final, ··· 296 311 .name = "trig-uprobe-without-nop", 297 312 .setup = uprobe_setup_without_nop, 298 313 .producer_thread = uprobe_producer_without_nop, 299 - .consumer_thread = trigger_consumer, 300 314 .measure = trigger_measure, 301 315 .report_progress = hits_drops_report_progress, 302 316 .report_final = hits_drops_report_final, ··· 305 321 .name = "trig-uretprobe-without-nop", 306 322 .setup = uretprobe_setup_without_nop, 307 323 .producer_thread = uprobe_producer_without_nop, 308 - .consumer_thread = trigger_consumer, 309 324 .measure = trigger_measure, 310 325 .report_progress = hits_drops_report_progress, 311 326 .report_final = hits_drops_report_final,

+14 -12

tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh

··· 4 4 5 5 set -eufo pipefail 6 6 7 + RUN_RB_BENCH="$RUN_BENCH -c1" 8 + 7 9 header "Single-producer, parallel producer" 8 10 for b in rb-libbpf rb-custom pb-libbpf pb-custom; do 9 - summarize $b "$($RUN_BENCH $b)" 11 + summarize $b "$($RUN_RB_BENCH $b)" 10 12 done 11 13 12 14 header "Single-producer, parallel producer, sampled notification" 13 15 for b in rb-libbpf rb-custom pb-libbpf pb-custom; do 14 - summarize $b "$($RUN_BENCH --rb-sampled $b)" 16 + summarize $b "$($RUN_RB_BENCH --rb-sampled $b)" 15 17 done 16 18 17 19 header "Single-producer, back-to-back mode" 18 20 for b in rb-libbpf rb-custom pb-libbpf pb-custom; do 19 - summarize $b "$($RUN_BENCH --rb-b2b $b)" 20 - summarize $b-sampled "$($RUN_BENCH --rb-sampled --rb-b2b $b)" 21 + summarize $b "$($RUN_RB_BENCH --rb-b2b $b)" 22 + summarize $b-sampled "$($RUN_RB_BENCH --rb-sampled --rb-b2b $b)" 21 23 done 22 24 23 25 header "Ringbuf back-to-back, effect of sample rate" 24 26 for b in 1 5 10 25 50 100 250 500 1000 2000 3000; do 25 - summarize "rb-sampled-$b" "$($RUN_BENCH --rb-b2b --rb-batch-cnt $b --rb-sampled --rb-sample-rate $b rb-custom)" 27 + summarize "rb-sampled-$b" "$($RUN_RB_BENCH --rb-b2b --rb-batch-cnt $b --rb-sampled --rb-sample-rate $b rb-custom)" 26 28 done 27 29 header "Perfbuf back-to-back, effect of sample rate" 28 30 for b in 1 5 10 25 50 100 250 500 1000 2000 3000; do 29 - summarize "pb-sampled-$b" "$($RUN_BENCH --rb-b2b --rb-batch-cnt $b --rb-sampled --rb-sample-rate $b pb-custom)" 31 + summarize "pb-sampled-$b" "$($RUN_RB_BENCH --rb-b2b --rb-batch-cnt $b --rb-sampled --rb-sample-rate $b pb-custom)" 30 32 done 31 33 32 34 header "Ringbuf back-to-back, reserve+commit vs output" 33 - summarize "reserve" "$($RUN_BENCH --rb-b2b rb-custom)" 34 - summarize "output" "$($RUN_BENCH --rb-b2b --rb-use-output rb-custom)" 35 + summarize "reserve" "$($RUN_RB_BENCH --rb-b2b rb-custom)" 36 + summarize "output" "$($RUN_RB_BENCH --rb-b2b --rb-use-output rb-custom)" 35 37 36 38 header "Ringbuf sampled, reserve+commit vs output" 37 - summarize "reserve-sampled" "$($RUN_BENCH --rb-sampled rb-custom)" 38 - summarize "output-sampled" "$($RUN_BENCH --rb-sampled --rb-use-output rb-custom)" 39 + summarize "reserve-sampled" "$($RUN_RB_BENCH --rb-sampled rb-custom)" 40 + summarize "output-sampled" "$($RUN_RB_BENCH --rb-sampled --rb-use-output rb-custom)" 39 41 40 42 header "Single-producer, consumer/producer competing on the same CPU, low batch count" 41 43 for b in rb-libbpf rb-custom pb-libbpf pb-custom; do 42 - summarize $b "$($RUN_BENCH --rb-batch-cnt 1 --rb-sample-rate 1 --prod-affinity 0 --cons-affinity 0 $b)" 44 + summarize $b "$($RUN_RB_BENCH --rb-batch-cnt 1 --rb-sample-rate 1 --prod-affinity 0 --cons-affinity 0 $b)" 43 45 done 44 46 45 47 header "Ringbuf, multi-producer contention" 46 48 for b in 1 2 3 4 8 12 16 20 24 28 32 36 40 44 48 52; do 47 - summarize "rb-libbpf nr_prod $b" "$($RUN_BENCH -p$b --rb-batch-cnt 50 rb-libbpf)" 49 + summarize "rb-libbpf nr_prod $b" "$($RUN_RB_BENCH -p$b --rb-batch-cnt 50 rb-libbpf)" 48 50 done 49 51

+8 -8

tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c

··· 191 191 return a + b + c; 192 192 } 193 193 194 - __diag_pop(); 195 - 196 194 int bpf_testmod_fentry_ok; 197 195 198 196 noinline ssize_t ··· 270 272 } 271 273 EXPORT_SYMBOL(bpf_testmod_test_write); 272 274 ALLOW_ERROR_INJECTION(bpf_testmod_test_write, ERRNO); 275 + 276 + noinline int bpf_fentry_shadow_test(int a) 277 + { 278 + return a + 2; 279 + } 280 + EXPORT_SYMBOL_GPL(bpf_fentry_shadow_test); 281 + 282 + __diag_pop(); 273 283 274 284 static struct bin_attribute bin_attr_bpf_testmod_file __ro_after_init = { 275 285 .attr = { .name = "bpf_testmod", .mode = 0666, }, ··· 467 461 .owner = THIS_MODULE, 468 462 .set = &bpf_testmod_check_kfunc_ids, 469 463 }; 470 - 471 - noinline int bpf_fentry_shadow_test(int a) 472 - { 473 - return a + 2; 474 - } 475 - EXPORT_SYMBOL_GPL(bpf_fentry_shadow_test); 476 464 477 465 extern int bpf_fentry_test1(int a); 478 466

+7

tools/testing/selftests/bpf/bpf_testmod/bpf_testmod_kfunc.h

··· 97 97 98 98 void bpf_kfunc_call_test_destructive(void) __ksym; 99 99 100 + void bpf_kfunc_call_test_offset(struct prog_test_ref_kfunc *p); 101 + struct prog_test_member *bpf_kfunc_call_memb_acquire(void); 102 + void bpf_kfunc_call_memb1_release(struct prog_test_member1 *p); 103 + void bpf_kfunc_call_test_fail1(struct prog_test_fail1 *p); 104 + void bpf_kfunc_call_test_fail2(struct prog_test_fail2 *p); 105 + void bpf_kfunc_call_test_fail3(struct prog_test_fail3 *p); 106 + void bpf_kfunc_call_test_mem_len_fail1(void *mem, int len); 100 107 #endif /* _BPF_TESTMOD_KFUNC_H */

+4

tools/testing/selftests/bpf/config

··· 13 13 CONFIG_CRYPTO_HMAC=y 14 14 CONFIG_CRYPTO_SHA256=y 15 15 CONFIG_CRYPTO_USER_API_HASH=y 16 + CONFIG_DEBUG_INFO=y 17 + CONFIG_DEBUG_INFO_BTF=y 18 + CONFIG_DEBUG_INFO_DWARF4=y 16 19 CONFIG_DYNAMIC_FTRACE=y 17 20 CONFIG_FPROBE=y 18 21 CONFIG_FTRACE_SYSCALLS=y ··· 63 60 CONFIG_NET_SCHED=y 64 61 CONFIG_NETDEVSIM=y 65 62 CONFIG_NETFILTER=y 63 + CONFIG_NETFILTER_ADVANCED=y 66 64 CONFIG_NETFILTER_SYNPROXY=y 67 65 CONFIG_NETFILTER_XT_CONNMARK=y 68 66 CONFIG_NETFILTER_XT_MATCH_STATE=y

+40

tools/testing/selftests/bpf/prog_tests/btf.c

··· 3991 3991 .err_str = "Invalid arg#1", 3992 3992 }, 3993 3993 { 3994 + .descr = "decl_tag test #18, decl_tag as the map key type", 3995 + .raw_types = { 3996 + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ 3997 + BTF_STRUCT_ENC(0, 2, 8), /* [2] */ 3998 + BTF_MEMBER_ENC(NAME_TBD, 1, 0), 3999 + BTF_MEMBER_ENC(NAME_TBD, 1, 32), 4000 + BTF_DECL_TAG_ENC(NAME_TBD, 2, -1), /* [3] */ 4001 + BTF_END_RAW, 4002 + }, 4003 + BTF_STR_SEC("\0m1\0m2\0tag"), 4004 + .map_type = BPF_MAP_TYPE_HASH, 4005 + .map_name = "tag_type_check_btf", 4006 + .key_size = 8, 4007 + .value_size = 4, 4008 + .key_type_id = 3, 4009 + .value_type_id = 1, 4010 + .max_entries = 1, 4011 + .map_create_err = true, 4012 + }, 4013 + { 4014 + .descr = "decl_tag test #19, decl_tag as the map value type", 4015 + .raw_types = { 4016 + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ 4017 + BTF_STRUCT_ENC(0, 2, 8), /* [2] */ 4018 + BTF_MEMBER_ENC(NAME_TBD, 1, 0), 4019 + BTF_MEMBER_ENC(NAME_TBD, 1, 32), 4020 + BTF_DECL_TAG_ENC(NAME_TBD, 2, -1), /* [3] */ 4021 + BTF_END_RAW, 4022 + }, 4023 + BTF_STR_SEC("\0m1\0m2\0tag"), 4024 + .map_type = BPF_MAP_TYPE_HASH, 4025 + .map_name = "tag_type_check_btf", 4026 + .key_size = 4, 4027 + .value_size = 8, 4028 + .key_type_id = 1, 4029 + .value_type_id = 3, 4030 + .max_entries = 1, 4031 + .map_create_err = true, 4032 + }, 4033 + { 3994 4034 .descr = "type_tag test #1", 3995 4035 .raw_types = { 3996 4036 BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */

+1 -1

tools/testing/selftests/bpf/prog_tests/check_mtu.c

··· 183 183 184 184 void serial_test_check_mtu(void) 185 185 { 186 - __u32 mtu_lo; 186 + int mtu_lo; 187 187 188 188 if (test__start_subtest("bpf_check_mtu XDP-attach")) 189 189 test_check_mtu_xdp_attach();

+2

tools/testing/selftests/bpf/prog_tests/cpumask.c

··· 10 10 "test_set_clear_cpu", 11 11 "test_setall_clear_cpu", 12 12 "test_first_firstzero_cpu", 13 + "test_firstand_nocpu", 13 14 "test_test_and_set_clear", 14 15 "test_and_or_xor", 15 16 "test_intersects_subset", ··· 71 70 verify_success(cpumask_success_testcases[i]); 72 71 } 73 72 73 + RUN_TESTS(cpumask_success); 74 74 RUN_TESTS(cpumask_failure); 75 75 }

+53 -8

tools/testing/selftests/bpf/prog_tests/fib_lookup.c

··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 /* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ 3 3 4 + #include <linux/rtnetlink.h> 4 5 #include <sys/types.h> 5 6 #include <net/if.h> 6 7 ··· 16 15 #define IPV4_IFACE_ADDR "10.0.0.254" 17 16 #define IPV4_NUD_FAILED_ADDR "10.0.0.1" 18 17 #define IPV4_NUD_STALE_ADDR "10.0.0.2" 18 + #define IPV4_TBID_ADDR "172.0.0.254" 19 + #define IPV4_TBID_NET "172.0.0.0" 20 + #define IPV4_TBID_DST "172.0.0.2" 21 + #define IPV6_TBID_ADDR "fd00::FFFF" 22 + #define IPV6_TBID_NET "fd00::" 23 + #define IPV6_TBID_DST "fd00::2" 19 24 #define DMAC "11:11:11:11:11:11" 20 25 #define DMAC_INIT { 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, } 26 + #define DMAC2 "01:01:01:01:01:01" 27 + #define DMAC_INIT2 { 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, } 21 28 22 29 struct fib_lookup_test { 23 30 const char *desc; 24 31 const char *daddr; 25 32 int expected_ret; 26 33 int lookup_flags; 34 + __u32 tbid; 27 35 __u8 dmac[6]; 28 36 }; 29 37 ··· 53 43 { .desc = "IPv4 skip neigh", 54 44 .daddr = IPV4_NUD_FAILED_ADDR, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS, 55 45 .lookup_flags = BPF_FIB_LOOKUP_SKIP_NEIGH, }, 46 + { .desc = "IPv4 TBID lookup failure", 47 + .daddr = IPV4_TBID_DST, .expected_ret = BPF_FIB_LKUP_RET_NOT_FWDED, 48 + .lookup_flags = BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_TBID, 49 + .tbid = RT_TABLE_MAIN, }, 50 + { .desc = "IPv4 TBID lookup success", 51 + .daddr = IPV4_TBID_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS, 52 + .lookup_flags = BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_TBID, .tbid = 100, 53 + .dmac = DMAC_INIT2, }, 54 + { .desc = "IPv6 TBID lookup failure", 55 + .daddr = IPV6_TBID_DST, .expected_ret = BPF_FIB_LKUP_RET_NOT_FWDED, 56 + .lookup_flags = BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_TBID, 57 + .tbid = RT_TABLE_MAIN, }, 58 + { .desc = "IPv6 TBID lookup success", 59 + .daddr = IPV6_TBID_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS, 60 + .lookup_flags = BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_TBID, .tbid = 100, 61 + .dmac = DMAC_INIT2, }, 56 62 }; 57 63 58 64 static int ifindex; ··· 79 53 80 54 SYS(fail, "ip link add veth1 type veth peer name veth2"); 81 55 SYS(fail, "ip link set dev veth1 up"); 56 + SYS(fail, "ip link set dev veth2 up"); 82 57 83 58 err = write_sysctl("/proc/sys/net/ipv4/neigh/veth1/gc_stale_time", "900"); 84 59 if (!ASSERT_OK(err, "write_sysctl(net.ipv4.neigh.veth1.gc_stale_time)")) ··· 97 70 SYS(fail, "ip neigh add %s dev veth1 nud failed", IPV4_NUD_FAILED_ADDR); 98 71 SYS(fail, "ip neigh add %s dev veth1 lladdr %s nud stale", IPV4_NUD_STALE_ADDR, DMAC); 99 72 73 + /* Setup for tbid lookup tests */ 74 + SYS(fail, "ip addr add %s/24 dev veth2", IPV4_TBID_ADDR); 75 + SYS(fail, "ip route del %s/24 dev veth2", IPV4_TBID_NET); 76 + SYS(fail, "ip route add table 100 %s/24 dev veth2", IPV4_TBID_NET); 77 + SYS(fail, "ip neigh add %s dev veth2 lladdr %s nud stale", IPV4_TBID_DST, DMAC2); 78 + 79 + SYS(fail, "ip addr add %s/64 dev veth2", IPV6_TBID_ADDR); 80 + SYS(fail, "ip -6 route del %s/64 dev veth2", IPV6_TBID_NET); 81 + SYS(fail, "ip -6 route add table 100 %s/64 dev veth2", IPV6_TBID_NET); 82 + SYS(fail, "ip neigh add %s dev veth2 lladdr %s nud stale", IPV6_TBID_DST, DMAC2); 83 + 100 84 err = write_sysctl("/proc/sys/net/ipv4/conf/veth1/forwarding", "1"); 101 85 if (!ASSERT_OK(err, "write_sysctl(net.ipv4.conf.veth1.forwarding)")) 102 86 goto fail; ··· 121 83 return -1; 122 84 } 123 85 124 - static int set_lookup_params(struct bpf_fib_lookup *params, const char *daddr) 86 + static int set_lookup_params(struct bpf_fib_lookup *params, const struct fib_lookup_test *test) 125 87 { 126 88 int ret; 127 89 ··· 129 91 130 92 params->l4_protocol = IPPROTO_TCP; 131 93 params->ifindex = ifindex; 94 + params->tbid = test->tbid; 132 95 133 - if (inet_pton(AF_INET6, daddr, params->ipv6_dst) == 1) { 96 + if (inet_pton(AF_INET6, test->daddr, params->ipv6_dst) == 1) { 134 97 params->family = AF_INET6; 135 98 ret = inet_pton(AF_INET6, IPV6_IFACE_ADDR, params->ipv6_src); 136 99 if (!ASSERT_EQ(ret, 1, "inet_pton(IPV6_IFACE_ADDR)")) ··· 139 100 return 0; 140 101 } 141 102 142 - ret = inet_pton(AF_INET, daddr, &params->ipv4_dst); 103 + ret = inet_pton(AF_INET, test->daddr, &params->ipv4_dst); 143 104 if (!ASSERT_EQ(ret, 1, "convert IP[46] address")) 144 105 return -1; 145 106 params->family = AF_INET; ··· 193 154 fib_params = &skel->bss->fib_params; 194 155 195 156 for (i = 0; i < ARRAY_SIZE(tests); i++) { 196 - printf("Testing %s\n", tests[i].desc); 157 + printf("Testing %s ", tests[i].desc); 197 158 198 - if (set_lookup_params(fib_params, tests[i].daddr)) 159 + if (set_lookup_params(fib_params, &tests[i])) 199 160 continue; 200 161 skel->bss->fib_lookup_ret = -1; 201 - skel->bss->lookup_flags = BPF_FIB_LOOKUP_OUTPUT | 202 - tests[i].lookup_flags; 162 + skel->bss->lookup_flags = tests[i].lookup_flags; 203 163 204 164 err = bpf_prog_test_run_opts(prog_fd, &run_opts); 205 165 if (!ASSERT_OK(err, "bpf_prog_test_run_opts")) ··· 213 175 214 176 mac_str(expected, tests[i].dmac); 215 177 mac_str(actual, fib_params->dmac); 216 - printf("dmac expected %s actual %s\n", expected, actual); 178 + printf("dmac expected %s actual %s ", expected, actual); 179 + } 180 + 181 + // ensure tbid is zero'd out after fib lookup. 182 + if (tests[i].lookup_flags & BPF_FIB_LOOKUP_DIRECT) { 183 + if (!ASSERT_EQ(skel->bss->fib_params.tbid, 0, 184 + "expected fib_params.tbid to be zero")) 185 + goto fail; 217 186 } 218 187 } 219 188

+5 -1

tools/testing/selftests/bpf/prog_tests/unpriv_bpf_disabled.c

··· 171 171 prog_insns, prog_insn_cnt, &load_opts), 172 172 -EPERM, "prog_load_fails"); 173 173 174 - for (i = BPF_MAP_TYPE_HASH; i <= BPF_MAP_TYPE_BLOOM_FILTER; i++) 174 + /* some map types require particular correct parameters which could be 175 + * sanity-checked before enforcing -EPERM, so only validate that 176 + * the simple ARRAY and HASH maps are failing with -EPERM 177 + */ 178 + for (i = BPF_MAP_TYPE_HASH; i <= BPF_MAP_TYPE_ARRAY; i++) 175 179 ASSERT_EQ(bpf_map_create(i, NULL, sizeof(int), sizeof(int), 1, NULL), 176 180 -EPERM, "map_create_fails"); 177 181

+2

tools/testing/selftests/bpf/prog_tests/verifier.c

··· 50 50 #include "verifier_regalloc.skel.h" 51 51 #include "verifier_ringbuf.skel.h" 52 52 #include "verifier_runtime_jit.skel.h" 53 + #include "verifier_scalar_ids.skel.h" 53 54 #include "verifier_search_pruning.skel.h" 54 55 #include "verifier_sock.skel.h" 55 56 #include "verifier_spill_fill.skel.h" ··· 151 150 void test_verifier_regalloc(void) { RUN(verifier_regalloc); } 152 151 void test_verifier_ringbuf(void) { RUN(verifier_ringbuf); } 153 152 void test_verifier_runtime_jit(void) { RUN(verifier_runtime_jit); } 153 + void test_verifier_scalar_ids(void) { RUN(verifier_scalar_ids); } 154 154 void test_verifier_search_pruning(void) { RUN(verifier_search_pruning); } 155 155 void test_verifier_sock(void) { RUN(verifier_sock); } 156 156 void test_verifier_spill_fill(void) { RUN(verifier_spill_fill); }

+312

tools/testing/selftests/bpf/prog_tests/vrf_socket_lookup.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 2 + 3 + /* 4 + * Topology: 5 + * --------- 6 + * NS0 namespace | NS1 namespace 7 + * | 8 + * +--------------+ | +--------------+ 9 + * | veth01 |----------| veth10 | 10 + * | 172.16.1.100 | | | 172.16.1.200 | 11 + * | bpf | | +--------------+ 12 + * +--------------+ | 13 + * server(UDP/TCP) | 14 + * +-------------------+ | 15 + * | vrf1 | | 16 + * | +--------------+ | | +--------------+ 17 + * | | veth02 |----------| veth20 | 18 + * | | 172.16.2.100 | | | | 172.16.2.200 | 19 + * | | bpf | | | +--------------+ 20 + * | +--------------+ | | 21 + * | server(UDP/TCP) | | 22 + * +-------------------+ | 23 + * 24 + * Test flow 25 + * ----------- 26 + * The tests verifies that socket lookup via TC is VRF aware: 27 + * 1) Creates two veth pairs between NS0 and NS1: 28 + * a) veth01 <-> veth10 outside the VRF 29 + * b) veth02 <-> veth20 in the VRF 30 + * 2) Attaches to veth01 and veth02 a program that calls: 31 + * a) bpf_skc_lookup_tcp() with TCP and tcp_skc is true 32 + * b) bpf_sk_lookup_tcp() with TCP and tcp_skc is false 33 + * c) bpf_sk_lookup_udp() with UDP 34 + * The program stores the lookup result in bss->lookup_status. 35 + * 3) Creates a socket TCP/UDP server in/outside the VRF. 36 + * 4) The test expects lookup_status to be: 37 + * a) 0 from device in VRF to server outside VRF 38 + * b) 0 from device outside VRF to server in VRF 39 + * c) 1 from device in VRF to server in VRF 40 + * d) 1 from device outside VRF to server outside VRF 41 + */ 42 + 43 + #include <net/if.h> 44 + 45 + #include "test_progs.h" 46 + #include "network_helpers.h" 47 + #include "vrf_socket_lookup.skel.h" 48 + 49 + #define NS0 "vrf_socket_lookup_0" 50 + #define NS1 "vrf_socket_lookup_1" 51 + 52 + #define IP4_ADDR_VETH01 "172.16.1.100" 53 + #define IP4_ADDR_VETH10 "172.16.1.200" 54 + #define IP4_ADDR_VETH02 "172.16.2.100" 55 + #define IP4_ADDR_VETH20 "172.16.2.200" 56 + 57 + #define NON_VRF_PORT 5000 58 + #define IN_VRF_PORT 5001 59 + 60 + #define TIMEOUT_MS 3000 61 + 62 + static int make_socket(int sotype, const char *ip, int port, 63 + struct sockaddr_storage *addr) 64 + { 65 + int err, fd; 66 + 67 + err = make_sockaddr(AF_INET, ip, port, addr, NULL); 68 + if (!ASSERT_OK(err, "make_address")) 69 + return -1; 70 + 71 + fd = socket(AF_INET, sotype, 0); 72 + if (!ASSERT_GE(fd, 0, "socket")) 73 + return -1; 74 + 75 + if (!ASSERT_OK(settimeo(fd, TIMEOUT_MS), "settimeo")) 76 + goto fail; 77 + 78 + return fd; 79 + fail: 80 + close(fd); 81 + return -1; 82 + } 83 + 84 + static int make_server(int sotype, const char *ip, int port, const char *ifname) 85 + { 86 + int err, fd = -1; 87 + 88 + fd = start_server(AF_INET, sotype, ip, port, TIMEOUT_MS); 89 + if (!ASSERT_GE(fd, 0, "start_server")) 90 + return -1; 91 + 92 + if (ifname) { 93 + err = setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE, 94 + ifname, strlen(ifname) + 1); 95 + if (!ASSERT_OK(err, "setsockopt(SO_BINDTODEVICE)")) 96 + goto fail; 97 + } 98 + 99 + return fd; 100 + fail: 101 + close(fd); 102 + return -1; 103 + } 104 + 105 + static int attach_progs(char *ifname, int tc_prog_fd, int xdp_prog_fd) 106 + { 107 + LIBBPF_OPTS(bpf_tc_hook, hook, .attach_point = BPF_TC_INGRESS); 108 + LIBBPF_OPTS(bpf_tc_opts, opts, .handle = 1, .priority = 1, 109 + .prog_fd = tc_prog_fd); 110 + int ret, ifindex; 111 + 112 + ifindex = if_nametoindex(ifname); 113 + if (!ASSERT_NEQ(ifindex, 0, "if_nametoindex")) 114 + return -1; 115 + hook.ifindex = ifindex; 116 + 117 + ret = bpf_tc_hook_create(&hook); 118 + if (!ASSERT_OK(ret, "bpf_tc_hook_create")) 119 + return ret; 120 + 121 + ret = bpf_tc_attach(&hook, &opts); 122 + if (!ASSERT_OK(ret, "bpf_tc_attach")) { 123 + bpf_tc_hook_destroy(&hook); 124 + return ret; 125 + } 126 + ret = bpf_xdp_attach(ifindex, xdp_prog_fd, 0, NULL); 127 + if (!ASSERT_OK(ret, "bpf_xdp_attach")) { 128 + bpf_tc_hook_destroy(&hook); 129 + return ret; 130 + } 131 + 132 + return 0; 133 + } 134 + 135 + static void cleanup(void) 136 + { 137 + SYS_NOFAIL("test -f /var/run/netns/" NS0 " && ip netns delete " 138 + NS0); 139 + SYS_NOFAIL("test -f /var/run/netns/" NS1 " && ip netns delete " 140 + NS1); 141 + } 142 + 143 + static int setup(struct vrf_socket_lookup *skel) 144 + { 145 + int tc_prog_fd, xdp_prog_fd, ret = 0; 146 + struct nstoken *nstoken = NULL; 147 + 148 + SYS(fail, "ip netns add " NS0); 149 + SYS(fail, "ip netns add " NS1); 150 + 151 + /* NS0 <-> NS1 [veth01 <-> veth10] */ 152 + SYS(fail, "ip link add veth01 netns " NS0 " type veth peer name veth10" 153 + " netns " NS1); 154 + SYS(fail, "ip -net " NS0 " addr add " IP4_ADDR_VETH01 "/24 dev veth01"); 155 + SYS(fail, "ip -net " NS0 " link set dev veth01 up"); 156 + SYS(fail, "ip -net " NS1 " addr add " IP4_ADDR_VETH10 "/24 dev veth10"); 157 + SYS(fail, "ip -net " NS1 " link set dev veth10 up"); 158 + 159 + /* NS0 <-> NS1 [veth02 <-> veth20] */ 160 + SYS(fail, "ip link add veth02 netns " NS0 " type veth peer name veth20" 161 + " netns " NS1); 162 + SYS(fail, "ip -net " NS0 " addr add " IP4_ADDR_VETH02 "/24 dev veth02"); 163 + SYS(fail, "ip -net " NS0 " link set dev veth02 up"); 164 + SYS(fail, "ip -net " NS1 " addr add " IP4_ADDR_VETH20 "/24 dev veth20"); 165 + SYS(fail, "ip -net " NS1 " link set dev veth20 up"); 166 + 167 + /* veth02 -> vrf1 */ 168 + SYS(fail, "ip -net " NS0 " link add vrf1 type vrf table 11"); 169 + SYS(fail, "ip -net " NS0 " route add vrf vrf1 unreachable default" 170 + " metric 4278198272"); 171 + SYS(fail, "ip -net " NS0 " link set vrf1 alias vrf"); 172 + SYS(fail, "ip -net " NS0 " link set vrf1 up"); 173 + SYS(fail, "ip -net " NS0 " link set veth02 master vrf1"); 174 + 175 + /* Attach TC and XDP progs to veth devices in NS0 */ 176 + nstoken = open_netns(NS0); 177 + if (!ASSERT_OK_PTR(nstoken, "setns " NS0)) 178 + goto fail; 179 + tc_prog_fd = bpf_program__fd(skel->progs.tc_socket_lookup); 180 + if (!ASSERT_GE(tc_prog_fd, 0, "bpf_program__tc_fd")) 181 + goto fail; 182 + xdp_prog_fd = bpf_program__fd(skel->progs.xdp_socket_lookup); 183 + if (!ASSERT_GE(xdp_prog_fd, 0, "bpf_program__xdp_fd")) 184 + goto fail; 185 + 186 + if (attach_progs("veth01", tc_prog_fd, xdp_prog_fd)) 187 + goto fail; 188 + 189 + if (attach_progs("veth02", tc_prog_fd, xdp_prog_fd)) 190 + goto fail; 191 + 192 + goto close; 193 + fail: 194 + ret = -1; 195 + close: 196 + if (nstoken) 197 + close_netns(nstoken); 198 + return ret; 199 + } 200 + 201 + static int test_lookup(struct vrf_socket_lookup *skel, int sotype, 202 + const char *ip, int port, bool test_xdp, bool tcp_skc, 203 + int lookup_status_exp) 204 + { 205 + static const char msg[] = "Hello Server"; 206 + struct sockaddr_storage addr = {}; 207 + int fd, ret = 0; 208 + 209 + fd = make_socket(sotype, ip, port, &addr); 210 + if (fd < 0) 211 + return -1; 212 + 213 + skel->bss->test_xdp = test_xdp; 214 + skel->bss->tcp_skc = tcp_skc; 215 + skel->bss->lookup_status = -1; 216 + 217 + if (sotype == SOCK_STREAM) 218 + connect(fd, (void *)&addr, sizeof(struct sockaddr_in)); 219 + else 220 + sendto(fd, msg, sizeof(msg), 0, (void *)&addr, 221 + sizeof(struct sockaddr_in)); 222 + 223 + if (!ASSERT_EQ(skel->bss->lookup_status, lookup_status_exp, 224 + "lookup_status")) 225 + goto fail; 226 + 227 + goto close; 228 + 229 + fail: 230 + ret = -1; 231 + close: 232 + close(fd); 233 + return ret; 234 + } 235 + 236 + static void _test_vrf_socket_lookup(struct vrf_socket_lookup *skel, int sotype, 237 + bool test_xdp, bool tcp_skc) 238 + { 239 + int in_vrf_server = -1, non_vrf_server = -1; 240 + struct nstoken *nstoken = NULL; 241 + 242 + nstoken = open_netns(NS0); 243 + if (!ASSERT_OK_PTR(nstoken, "setns " NS0)) 244 + goto done; 245 + 246 + /* Open sockets in and outside VRF */ 247 + non_vrf_server = make_server(sotype, "0.0.0.0", NON_VRF_PORT, NULL); 248 + if (!ASSERT_GE(non_vrf_server, 0, "make_server__outside_vrf_fd")) 249 + goto done; 250 + 251 + in_vrf_server = make_server(sotype, "0.0.0.0", IN_VRF_PORT, "veth02"); 252 + if (!ASSERT_GE(in_vrf_server, 0, "make_server__in_vrf_fd")) 253 + goto done; 254 + 255 + /* Perform test from NS1 */ 256 + close_netns(nstoken); 257 + nstoken = open_netns(NS1); 258 + if (!ASSERT_OK_PTR(nstoken, "setns " NS1)) 259 + goto done; 260 + 261 + if (!ASSERT_OK(test_lookup(skel, sotype, IP4_ADDR_VETH02, NON_VRF_PORT, 262 + test_xdp, tcp_skc, 0), "in_to_out")) 263 + goto done; 264 + if (!ASSERT_OK(test_lookup(skel, sotype, IP4_ADDR_VETH02, IN_VRF_PORT, 265 + test_xdp, tcp_skc, 1), "in_to_in")) 266 + goto done; 267 + if (!ASSERT_OK(test_lookup(skel, sotype, IP4_ADDR_VETH01, NON_VRF_PORT, 268 + test_xdp, tcp_skc, 1), "out_to_out")) 269 + goto done; 270 + if (!ASSERT_OK(test_lookup(skel, sotype, IP4_ADDR_VETH01, IN_VRF_PORT, 271 + test_xdp, tcp_skc, 0), "out_to_in")) 272 + goto done; 273 + 274 + done: 275 + if (non_vrf_server >= 0) 276 + close(non_vrf_server); 277 + if (in_vrf_server >= 0) 278 + close(in_vrf_server); 279 + if (nstoken) 280 + close_netns(nstoken); 281 + } 282 + 283 + void test_vrf_socket_lookup(void) 284 + { 285 + struct vrf_socket_lookup *skel; 286 + 287 + cleanup(); 288 + 289 + skel = vrf_socket_lookup__open_and_load(); 290 + if (!ASSERT_OK_PTR(skel, "vrf_socket_lookup__open_and_load")) 291 + return; 292 + 293 + if (!ASSERT_OK(setup(skel), "setup")) 294 + goto done; 295 + 296 + if (test__start_subtest("tc_socket_lookup_tcp")) 297 + _test_vrf_socket_lookup(skel, SOCK_STREAM, false, false); 298 + if (test__start_subtest("tc_socket_lookup_tcp_skc")) 299 + _test_vrf_socket_lookup(skel, SOCK_STREAM, false, false); 300 + if (test__start_subtest("tc_socket_lookup_udp")) 301 + _test_vrf_socket_lookup(skel, SOCK_STREAM, false, false); 302 + if (test__start_subtest("xdp_socket_lookup_tcp")) 303 + _test_vrf_socket_lookup(skel, SOCK_STREAM, true, false); 304 + if (test__start_subtest("xdp_socket_lookup_tcp_skc")) 305 + _test_vrf_socket_lookup(skel, SOCK_STREAM, true, false); 306 + if (test__start_subtest("xdp_socket_lookup_udp")) 307 + _test_vrf_socket_lookup(skel, SOCK_STREAM, true, false); 308 + 309 + done: 310 + vrf_socket_lookup__destroy(skel); 311 + cleanup(); 312 + }

+4 -2

tools/testing/selftests/bpf/progs/cpumask_common.h

··· 28 28 struct bpf_cpumask *bpf_cpumask_acquire(struct bpf_cpumask *cpumask) __ksym; 29 29 u32 bpf_cpumask_first(const struct cpumask *cpumask) __ksym; 30 30 u32 bpf_cpumask_first_zero(const struct cpumask *cpumask) __ksym; 31 + u32 bpf_cpumask_first_and(const struct cpumask *src1, 32 + const struct cpumask *src2) __ksym; 31 33 void bpf_cpumask_set_cpu(u32 cpu, struct bpf_cpumask *cpumask) __ksym; 32 34 void bpf_cpumask_clear_cpu(u32 cpu, struct bpf_cpumask *cpumask) __ksym; 33 35 bool bpf_cpumask_test_cpu(u32 cpu, const struct cpumask *cpumask) __ksym; ··· 52 50 bool bpf_cpumask_empty(const struct cpumask *cpumask) __ksym; 53 51 bool bpf_cpumask_full(const struct cpumask *cpumask) __ksym; 54 52 void bpf_cpumask_copy(struct bpf_cpumask *dst, const struct cpumask *src) __ksym; 55 - u32 bpf_cpumask_any(const struct cpumask *src) __ksym; 56 - u32 bpf_cpumask_any_and(const struct cpumask *src1, const struct cpumask *src2) __ksym; 53 + u32 bpf_cpumask_any_distribute(const struct cpumask *src) __ksym; 54 + u32 bpf_cpumask_any_and_distribute(const struct cpumask *src1, const struct cpumask *src2) __ksym; 57 55 58 56 void bpf_rcu_read_lock(void) __ksym; 59 57 void bpf_rcu_read_unlock(void) __ksym;

+60 -4

tools/testing/selftests/bpf/progs/cpumask_success.c

··· 5 5 #include <bpf/bpf_tracing.h> 6 6 #include <bpf/bpf_helpers.h> 7 7 8 + #include "bpf_misc.h" 8 9 #include "cpumask_common.h" 9 10 10 11 char _license[] SEC("license") = "GPL"; ··· 176 175 } 177 176 178 177 SEC("tp_btf/task_newtask") 178 + int BPF_PROG(test_firstand_nocpu, struct task_struct *task, u64 clone_flags) 179 + { 180 + struct bpf_cpumask *mask1, *mask2; 181 + u32 first; 182 + 183 + if (!is_test_task()) 184 + return 0; 185 + 186 + mask1 = create_cpumask(); 187 + if (!mask1) 188 + return 0; 189 + 190 + mask2 = create_cpumask(); 191 + if (!mask2) 192 + goto release_exit; 193 + 194 + bpf_cpumask_set_cpu(0, mask1); 195 + bpf_cpumask_set_cpu(1, mask2); 196 + 197 + first = bpf_cpumask_first_and(cast(mask1), cast(mask2)); 198 + if (first <= 1) 199 + err = 3; 200 + 201 + release_exit: 202 + if (mask1) 203 + bpf_cpumask_release(mask1); 204 + if (mask2) 205 + bpf_cpumask_release(mask2); 206 + return 0; 207 + } 208 + 209 + SEC("tp_btf/task_newtask") 179 210 int BPF_PROG(test_test_and_set_clear, struct task_struct *task, u64 clone_flags) 180 211 { 181 212 struct bpf_cpumask *cpumask; ··· 344 311 bpf_cpumask_set_cpu(1, mask2); 345 312 bpf_cpumask_or(dst1, cast(mask1), cast(mask2)); 346 313 347 - cpu = bpf_cpumask_any(cast(mask1)); 314 + cpu = bpf_cpumask_any_distribute(cast(mask1)); 348 315 if (cpu != 0) { 349 316 err = 6; 350 317 goto release_exit; 351 318 } 352 319 353 - cpu = bpf_cpumask_any(cast(dst2)); 320 + cpu = bpf_cpumask_any_distribute(cast(dst2)); 354 321 if (cpu < nr_cpus) { 355 322 err = 7; 356 323 goto release_exit; ··· 362 329 goto release_exit; 363 330 } 364 331 365 - cpu = bpf_cpumask_any(cast(dst2)); 332 + cpu = bpf_cpumask_any_distribute(cast(dst2)); 366 333 if (cpu > 1) { 367 334 err = 9; 368 335 goto release_exit; 369 336 } 370 337 371 - cpu = bpf_cpumask_any_and(cast(mask1), cast(mask2)); 338 + cpu = bpf_cpumask_any_and_distribute(cast(mask1), cast(mask2)); 372 339 if (cpu < nr_cpus) { 373 340 err = 10; 374 341 goto release_exit; ··· 457 424 bpf_cpumask_test_cpu(0, (const struct cpumask *)local); 458 425 bpf_rcu_read_unlock(); 459 426 427 + return 0; 428 + } 429 + 430 + SEC("tp_btf/task_newtask") 431 + __success 432 + int BPF_PROG(test_refcount_null_tracking, struct task_struct *task, u64 clone_flags) 433 + { 434 + struct bpf_cpumask *mask1, *mask2; 435 + 436 + mask1 = bpf_cpumask_create(); 437 + mask2 = bpf_cpumask_create(); 438 + 439 + if (!mask1 || !mask2) 440 + goto free_masks_return; 441 + 442 + bpf_cpumask_test_cpu(0, (const struct cpumask *)mask1); 443 + bpf_cpumask_test_cpu(0, (const struct cpumask *)mask2); 444 + 445 + free_masks_return: 446 + if (mask1) 447 + bpf_cpumask_release(mask1); 448 + if (mask2) 449 + bpf_cpumask_release(mask2); 460 450 return 0; 461 451 }

+2

tools/testing/selftests/bpf/progs/refcounted_kptr.c

··· 375 375 bpf_rbtree_add(&aroot, &n->node, less_a); 376 376 m = bpf_refcount_acquire(n); 377 377 bpf_spin_unlock(&alock); 378 + if (!m) 379 + return 2; 378 380 379 381 m->key = 2; 380 382 bpf_obj_drop(m);

+3 -1

tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c

··· 29 29 } 30 30 31 31 SEC("?tc") 32 - __failure __msg("Unreleased reference id=3 alloc_insn=21") 32 + __failure __msg("Unreleased reference id=4 alloc_insn=21") 33 33 long rbtree_refcounted_node_ref_escapes(void *ctx) 34 34 { 35 35 struct node_acquire *n, *m; ··· 43 43 /* m becomes an owning ref but is never drop'd or added to a tree */ 44 44 m = bpf_refcount_acquire(n); 45 45 bpf_spin_unlock(&glock); 46 + if (!m) 47 + return 2; 46 48 47 49 m->key = 2; 48 50 return 0;

+659

tools/testing/selftests/bpf/progs/verifier_scalar_ids.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + #include <linux/bpf.h> 4 + #include <bpf/bpf_helpers.h> 5 + #include "bpf_misc.h" 6 + 7 + /* Check that precision marks propagate through scalar IDs. 8 + * Registers r{0,1,2} have the same scalar ID at the moment when r0 is 9 + * marked to be precise, this mark is immediately propagated to r{1,2}. 10 + */ 11 + SEC("socket") 12 + __success __log_level(2) 13 + __msg("frame0: regs=r0,r1,r2 stack= before 4: (bf) r3 = r10") 14 + __msg("frame0: regs=r0,r1,r2 stack= before 3: (bf) r2 = r0") 15 + __msg("frame0: regs=r0,r1 stack= before 2: (bf) r1 = r0") 16 + __msg("frame0: regs=r0 stack= before 1: (57) r0 &= 255") 17 + __msg("frame0: regs=r0 stack= before 0: (85) call bpf_ktime_get_ns") 18 + __flag(BPF_F_TEST_STATE_FREQ) 19 + __naked void precision_same_state(void) 20 + { 21 + asm volatile ( 22 + /* r0 = random number up to 0xff */ 23 + "call %[bpf_ktime_get_ns];" 24 + "r0 &= 0xff;" 25 + /* tie r0.id == r1.id == r2.id */ 26 + "r1 = r0;" 27 + "r2 = r0;" 28 + /* force r0 to be precise, this immediately marks r1 and r2 as 29 + * precise as well because of shared IDs 30 + */ 31 + "r3 = r10;" 32 + "r3 += r0;" 33 + "r0 = 0;" 34 + "exit;" 35 + : 36 + : __imm(bpf_ktime_get_ns) 37 + : __clobber_all); 38 + } 39 + 40 + /* Same as precision_same_state, but mark propagates through state / 41 + * parent state boundary. 42 + */ 43 + SEC("socket") 44 + __success __log_level(2) 45 + __msg("frame0: last_idx 6 first_idx 5 subseq_idx -1") 46 + __msg("frame0: regs=r0,r1,r2 stack= before 5: (bf) r3 = r10") 47 + __msg("frame0: parent state regs=r0,r1,r2 stack=:") 48 + __msg("frame0: regs=r0,r1,r2 stack= before 4: (05) goto pc+0") 49 + __msg("frame0: regs=r0,r1,r2 stack= before 3: (bf) r2 = r0") 50 + __msg("frame0: regs=r0,r1 stack= before 2: (bf) r1 = r0") 51 + __msg("frame0: regs=r0 stack= before 1: (57) r0 &= 255") 52 + __msg("frame0: parent state regs=r0 stack=:") 53 + __msg("frame0: regs=r0 stack= before 0: (85) call bpf_ktime_get_ns") 54 + __flag(BPF_F_TEST_STATE_FREQ) 55 + __naked void precision_cross_state(void) 56 + { 57 + asm volatile ( 58 + /* r0 = random number up to 0xff */ 59 + "call %[bpf_ktime_get_ns];" 60 + "r0 &= 0xff;" 61 + /* tie r0.id == r1.id == r2.id */ 62 + "r1 = r0;" 63 + "r2 = r0;" 64 + /* force checkpoint */ 65 + "goto +0;" 66 + /* force r0 to be precise, this immediately marks r1 and r2 as 67 + * precise as well because of shared IDs 68 + */ 69 + "r3 = r10;" 70 + "r3 += r0;" 71 + "r0 = 0;" 72 + "exit;" 73 + : 74 + : __imm(bpf_ktime_get_ns) 75 + : __clobber_all); 76 + } 77 + 78 + /* Same as precision_same_state, but break one of the 79 + * links, note that r1 is absent from regs=... in __msg below. 80 + */ 81 + SEC("socket") 82 + __success __log_level(2) 83 + __msg("frame0: regs=r0,r2 stack= before 5: (bf) r3 = r10") 84 + __msg("frame0: regs=r0,r2 stack= before 4: (b7) r1 = 0") 85 + __msg("frame0: regs=r0,r2 stack= before 3: (bf) r2 = r0") 86 + __msg("frame0: regs=r0 stack= before 2: (bf) r1 = r0") 87 + __msg("frame0: regs=r0 stack= before 1: (57) r0 &= 255") 88 + __msg("frame0: regs=r0 stack= before 0: (85) call bpf_ktime_get_ns") 89 + __flag(BPF_F_TEST_STATE_FREQ) 90 + __naked void precision_same_state_broken_link(void) 91 + { 92 + asm volatile ( 93 + /* r0 = random number up to 0xff */ 94 + "call %[bpf_ktime_get_ns];" 95 + "r0 &= 0xff;" 96 + /* tie r0.id == r1.id == r2.id */ 97 + "r1 = r0;" 98 + "r2 = r0;" 99 + /* break link for r1, this is the only line that differs 100 + * compared to the previous test 101 + */ 102 + "r1 = 0;" 103 + /* force r0 to be precise, this immediately marks r1 and r2 as 104 + * precise as well because of shared IDs 105 + */ 106 + "r3 = r10;" 107 + "r3 += r0;" 108 + "r0 = 0;" 109 + "exit;" 110 + : 111 + : __imm(bpf_ktime_get_ns) 112 + : __clobber_all); 113 + } 114 + 115 + /* Same as precision_same_state_broken_link, but with state / 116 + * parent state boundary. 117 + */ 118 + SEC("socket") 119 + __success __log_level(2) 120 + __msg("frame0: regs=r0,r2 stack= before 6: (bf) r3 = r10") 121 + __msg("frame0: regs=r0,r2 stack= before 5: (b7) r1 = 0") 122 + __msg("frame0: parent state regs=r0,r2 stack=:") 123 + __msg("frame0: regs=r0,r1,r2 stack= before 4: (05) goto pc+0") 124 + __msg("frame0: regs=r0,r1,r2 stack= before 3: (bf) r2 = r0") 125 + __msg("frame0: regs=r0,r1 stack= before 2: (bf) r1 = r0") 126 + __msg("frame0: regs=r0 stack= before 1: (57) r0 &= 255") 127 + __msg("frame0: parent state regs=r0 stack=:") 128 + __msg("frame0: regs=r0 stack= before 0: (85) call bpf_ktime_get_ns") 129 + __flag(BPF_F_TEST_STATE_FREQ) 130 + __naked void precision_cross_state_broken_link(void) 131 + { 132 + asm volatile ( 133 + /* r0 = random number up to 0xff */ 134 + "call %[bpf_ktime_get_ns];" 135 + "r0 &= 0xff;" 136 + /* tie r0.id == r1.id == r2.id */ 137 + "r1 = r0;" 138 + "r2 = r0;" 139 + /* force checkpoint, although link between r1 and r{0,2} is 140 + * broken by the next statement current precision tracking 141 + * algorithm can't react to it and propagates mark for r1 to 142 + * the parent state. 143 + */ 144 + "goto +0;" 145 + /* break link for r1, this is the only line that differs 146 + * compared to precision_cross_state() 147 + */ 148 + "r1 = 0;" 149 + /* force r0 to be precise, this immediately marks r1 and r2 as 150 + * precise as well because of shared IDs 151 + */ 152 + "r3 = r10;" 153 + "r3 += r0;" 154 + "r0 = 0;" 155 + "exit;" 156 + : 157 + : __imm(bpf_ktime_get_ns) 158 + : __clobber_all); 159 + } 160 + 161 + /* Check that precision marks propagate through scalar IDs. 162 + * Use the same scalar ID in multiple stack frames, check that 163 + * precision information is propagated up the call stack. 164 + */ 165 + SEC("socket") 166 + __success __log_level(2) 167 + __msg("11: (0f) r2 += r1") 168 + /* Current state */ 169 + __msg("frame2: last_idx 11 first_idx 10 subseq_idx -1") 170 + __msg("frame2: regs=r1 stack= before 10: (bf) r2 = r10") 171 + __msg("frame2: parent state regs=r1 stack=") 172 + /* frame1.r{6,7} are marked because mark_precise_scalar_ids() 173 + * looks for all registers with frame2.r1.id in the current state 174 + */ 175 + __msg("frame1: parent state regs=r6,r7 stack=") 176 + __msg("frame0: parent state regs=r6 stack=") 177 + /* Parent state */ 178 + __msg("frame2: last_idx 8 first_idx 8 subseq_idx 10") 179 + __msg("frame2: regs=r1 stack= before 8: (85) call pc+1") 180 + /* frame1.r1 is marked because of backtracking of call instruction */ 181 + __msg("frame1: parent state regs=r1,r6,r7 stack=") 182 + __msg("frame0: parent state regs=r6 stack=") 183 + /* Parent state */ 184 + __msg("frame1: last_idx 7 first_idx 6 subseq_idx 8") 185 + __msg("frame1: regs=r1,r6,r7 stack= before 7: (bf) r7 = r1") 186 + __msg("frame1: regs=r1,r6 stack= before 6: (bf) r6 = r1") 187 + __msg("frame1: parent state regs=r1 stack=") 188 + __msg("frame0: parent state regs=r6 stack=") 189 + /* Parent state */ 190 + __msg("frame1: last_idx 4 first_idx 4 subseq_idx 6") 191 + __msg("frame1: regs=r1 stack= before 4: (85) call pc+1") 192 + __msg("frame0: parent state regs=r1,r6 stack=") 193 + /* Parent state */ 194 + __msg("frame0: last_idx 3 first_idx 1 subseq_idx 4") 195 + __msg("frame0: regs=r0,r1,r6 stack= before 3: (bf) r6 = r0") 196 + __msg("frame0: regs=r0,r1 stack= before 2: (bf) r1 = r0") 197 + __msg("frame0: regs=r0 stack= before 1: (57) r0 &= 255") 198 + __flag(BPF_F_TEST_STATE_FREQ) 199 + __naked void precision_many_frames(void) 200 + { 201 + asm volatile ( 202 + /* r0 = random number up to 0xff */ 203 + "call %[bpf_ktime_get_ns];" 204 + "r0 &= 0xff;" 205 + /* tie r0.id == r1.id == r6.id */ 206 + "r1 = r0;" 207 + "r6 = r0;" 208 + "call precision_many_frames__foo;" 209 + "exit;" 210 + : 211 + : __imm(bpf_ktime_get_ns) 212 + : __clobber_all); 213 + } 214 + 215 + static __naked __noinline __used 216 + void precision_many_frames__foo(void) 217 + { 218 + asm volatile ( 219 + /* conflate one of the register numbers (r6) with outer frame, 220 + * to verify that those are tracked independently 221 + */ 222 + "r6 = r1;" 223 + "r7 = r1;" 224 + "call precision_many_frames__bar;" 225 + "exit" 226 + ::: __clobber_all); 227 + } 228 + 229 + static __naked __noinline __used 230 + void precision_many_frames__bar(void) 231 + { 232 + asm volatile ( 233 + /* force r1 to be precise, this immediately marks: 234 + * - bar frame r1 235 + * - foo frame r{1,6,7} 236 + * - main frame r{1,6} 237 + */ 238 + "r2 = r10;" 239 + "r2 += r1;" 240 + "r0 = 0;" 241 + "exit;" 242 + ::: __clobber_all); 243 + } 244 + 245 + /* Check that scalars with the same IDs are marked precise on stack as 246 + * well as in registers. 247 + */ 248 + SEC("socket") 249 + __success __log_level(2) 250 + /* foo frame */ 251 + __msg("frame1: regs=r1 stack=-8,-16 before 9: (bf) r2 = r10") 252 + __msg("frame1: regs=r1 stack=-8,-16 before 8: (7b) *(u64 *)(r10 -16) = r1") 253 + __msg("frame1: regs=r1 stack=-8 before 7: (7b) *(u64 *)(r10 -8) = r1") 254 + __msg("frame1: regs=r1 stack= before 4: (85) call pc+2") 255 + /* main frame */ 256 + __msg("frame0: regs=r0,r1 stack=-8 before 3: (7b) *(u64 *)(r10 -8) = r1") 257 + __msg("frame0: regs=r0,r1 stack= before 2: (bf) r1 = r0") 258 + __msg("frame0: regs=r0 stack= before 1: (57) r0 &= 255") 259 + __flag(BPF_F_TEST_STATE_FREQ) 260 + __naked void precision_stack(void) 261 + { 262 + asm volatile ( 263 + /* r0 = random number up to 0xff */ 264 + "call %[bpf_ktime_get_ns];" 265 + "r0 &= 0xff;" 266 + /* tie r0.id == r1.id == fp[-8].id */ 267 + "r1 = r0;" 268 + "*(u64*)(r10 - 8) = r1;" 269 + "call precision_stack__foo;" 270 + "r0 = 0;" 271 + "exit;" 272 + : 273 + : __imm(bpf_ktime_get_ns) 274 + : __clobber_all); 275 + } 276 + 277 + static __naked __noinline __used 278 + void precision_stack__foo(void) 279 + { 280 + asm volatile ( 281 + /* conflate one of the register numbers (r6) with outer frame, 282 + * to verify that those are tracked independently 283 + */ 284 + "*(u64*)(r10 - 8) = r1;" 285 + "*(u64*)(r10 - 16) = r1;" 286 + /* force r1 to be precise, this immediately marks: 287 + * - foo frame r1,fp{-8,-16} 288 + * - main frame r1,fp{-8} 289 + */ 290 + "r2 = r10;" 291 + "r2 += r1;" 292 + "exit" 293 + ::: __clobber_all); 294 + } 295 + 296 + /* Use two separate scalar IDs to check that these are propagated 297 + * independently. 298 + */ 299 + SEC("socket") 300 + __success __log_level(2) 301 + /* r{6,7} */ 302 + __msg("11: (0f) r3 += r7") 303 + __msg("frame0: regs=r6,r7 stack= before 10: (bf) r3 = r10") 304 + /* ... skip some insns ... */ 305 + __msg("frame0: regs=r6,r7 stack= before 3: (bf) r7 = r0") 306 + __msg("frame0: regs=r0,r6 stack= before 2: (bf) r6 = r0") 307 + /* r{8,9} */ 308 + __msg("12: (0f) r3 += r9") 309 + __msg("frame0: regs=r8,r9 stack= before 11: (0f) r3 += r7") 310 + /* ... skip some insns ... */ 311 + __msg("frame0: regs=r8,r9 stack= before 7: (bf) r9 = r0") 312 + __msg("frame0: regs=r0,r8 stack= before 6: (bf) r8 = r0") 313 + __flag(BPF_F_TEST_STATE_FREQ) 314 + __naked void precision_two_ids(void) 315 + { 316 + asm volatile ( 317 + /* r6 = random number up to 0xff 318 + * r6.id == r7.id 319 + */ 320 + "call %[bpf_ktime_get_ns];" 321 + "r0 &= 0xff;" 322 + "r6 = r0;" 323 + "r7 = r0;" 324 + /* same, but for r{8,9} */ 325 + "call %[bpf_ktime_get_ns];" 326 + "r0 &= 0xff;" 327 + "r8 = r0;" 328 + "r9 = r0;" 329 + /* clear r0 id */ 330 + "r0 = 0;" 331 + /* force checkpoint */ 332 + "goto +0;" 333 + "r3 = r10;" 334 + /* force r7 to be precise, this also marks r6 */ 335 + "r3 += r7;" 336 + /* force r9 to be precise, this also marks r8 */ 337 + "r3 += r9;" 338 + "exit;" 339 + : 340 + : __imm(bpf_ktime_get_ns) 341 + : __clobber_all); 342 + } 343 + 344 + /* Verify that check_ids() is used by regsafe() for scalars. 345 + * 346 + * r9 = ... some pointer with range X ... 347 + * r6 = ... unbound scalar ID=a ... 348 + * r7 = ... unbound scalar ID=b ... 349 + * if (r6 > r7) goto +1 350 + * r7 = r6 351 + * if (r7 > X) goto exit 352 + * r9 += r6 353 + * ... access memory using r9 ... 354 + * 355 + * The memory access is safe only if r7 is bounded, 356 + * which is true for one branch and not true for another. 357 + */ 358 + SEC("socket") 359 + __failure __msg("register with unbounded min value") 360 + __flag(BPF_F_TEST_STATE_FREQ) 361 + __naked void check_ids_in_regsafe(void) 362 + { 363 + asm volatile ( 364 + /* Bump allocated stack */ 365 + "r1 = 0;" 366 + "*(u64*)(r10 - 8) = r1;" 367 + /* r9 = pointer to stack */ 368 + "r9 = r10;" 369 + "r9 += -8;" 370 + /* r7 = ktime_get_ns() */ 371 + "call %[bpf_ktime_get_ns];" 372 + "r7 = r0;" 373 + /* r6 = ktime_get_ns() */ 374 + "call %[bpf_ktime_get_ns];" 375 + "r6 = r0;" 376 + /* if r6 > r7 is an unpredictable jump */ 377 + "if r6 > r7 goto l1_%=;" 378 + "r7 = r6;" 379 + "l1_%=:" 380 + /* if r7 > 4 ...; transfers range to r6 on one execution path 381 + * but does not transfer on another 382 + */ 383 + "if r7 > 4 goto l2_%=;" 384 + /* Access memory at r9[r6], r6 is not always bounded */ 385 + "r9 += r6;" 386 + "r0 = *(u8*)(r9 + 0);" 387 + "l2_%=:" 388 + "r0 = 0;" 389 + "exit;" 390 + : 391 + : __imm(bpf_ktime_get_ns) 392 + : __clobber_all); 393 + } 394 + 395 + /* Similar to check_ids_in_regsafe. 396 + * The l0 could be reached in two states: 397 + * 398 + * (1) r6{.id=A}, r7{.id=A}, r8{.id=B} 399 + * (2) r6{.id=B}, r7{.id=A}, r8{.id=B} 400 + * 401 + * Where (2) is not safe, as "r7 > 4" check won't propagate range for it. 402 + * This example would be considered safe without changes to 403 + * mark_chain_precision() to track scalar values with equal IDs. 404 + */ 405 + SEC("socket") 406 + __failure __msg("register with unbounded min value") 407 + __flag(BPF_F_TEST_STATE_FREQ) 408 + __naked void check_ids_in_regsafe_2(void) 409 + { 410 + asm volatile ( 411 + /* Bump allocated stack */ 412 + "r1 = 0;" 413 + "*(u64*)(r10 - 8) = r1;" 414 + /* r9 = pointer to stack */ 415 + "r9 = r10;" 416 + "r9 += -8;" 417 + /* r8 = ktime_get_ns() */ 418 + "call %[bpf_ktime_get_ns];" 419 + "r8 = r0;" 420 + /* r7 = ktime_get_ns() */ 421 + "call %[bpf_ktime_get_ns];" 422 + "r7 = r0;" 423 + /* r6 = ktime_get_ns() */ 424 + "call %[bpf_ktime_get_ns];" 425 + "r6 = r0;" 426 + /* scratch .id from r0 */ 427 + "r0 = 0;" 428 + /* if r6 > r7 is an unpredictable jump */ 429 + "if r6 > r7 goto l1_%=;" 430 + /* tie r6 and r7 .id */ 431 + "r6 = r7;" 432 + "l0_%=:" 433 + /* if r7 > 4 exit(0) */ 434 + "if r7 > 4 goto l2_%=;" 435 + /* Access memory at r9[r6] */ 436 + "r9 += r6;" 437 + "r0 = *(u8*)(r9 + 0);" 438 + "l2_%=:" 439 + "r0 = 0;" 440 + "exit;" 441 + "l1_%=:" 442 + /* tie r6 and r8 .id */ 443 + "r6 = r8;" 444 + "goto l0_%=;" 445 + : 446 + : __imm(bpf_ktime_get_ns) 447 + : __clobber_all); 448 + } 449 + 450 + /* Check that scalar IDs *are not* generated on register to register 451 + * assignments if source register is a constant. 452 + * 453 + * If such IDs *are* generated the 'l1' below would be reached in 454 + * two states: 455 + * 456 + * (1) r1{.id=A}, r2{.id=A} 457 + * (2) r1{.id=C}, r2{.id=C} 458 + * 459 + * Thus forcing 'if r1 == r2' verification twice. 460 + */ 461 + SEC("socket") 462 + __success __log_level(2) 463 + __msg("11: (1d) if r3 == r4 goto pc+0") 464 + __msg("frame 0: propagating r3,r4") 465 + __msg("11: safe") 466 + __msg("processed 15 insns") 467 + __flag(BPF_F_TEST_STATE_FREQ) 468 + __naked void no_scalar_id_for_const(void) 469 + { 470 + asm volatile ( 471 + "call %[bpf_ktime_get_ns];" 472 + /* unpredictable jump */ 473 + "if r0 > 7 goto l0_%=;" 474 + /* possibly generate same scalar ids for r3 and r4 */ 475 + "r1 = 0;" 476 + "r1 = r1;" 477 + "r3 = r1;" 478 + "r4 = r1;" 479 + "goto l1_%=;" 480 + "l0_%=:" 481 + /* possibly generate different scalar ids for r3 and r4 */ 482 + "r1 = 0;" 483 + "r2 = 0;" 484 + "r3 = r1;" 485 + "r4 = r2;" 486 + "l1_%=:" 487 + /* predictable jump, marks r3 and r4 precise */ 488 + "if r3 == r4 goto +0;" 489 + "r0 = 0;" 490 + "exit;" 491 + : 492 + : __imm(bpf_ktime_get_ns) 493 + : __clobber_all); 494 + } 495 + 496 + /* Same as no_scalar_id_for_const() but for 32-bit values */ 497 + SEC("socket") 498 + __success __log_level(2) 499 + __msg("11: (1e) if w3 == w4 goto pc+0") 500 + __msg("frame 0: propagating r3,r4") 501 + __msg("11: safe") 502 + __msg("processed 15 insns") 503 + __flag(BPF_F_TEST_STATE_FREQ) 504 + __naked void no_scalar_id_for_const32(void) 505 + { 506 + asm volatile ( 507 + "call %[bpf_ktime_get_ns];" 508 + /* unpredictable jump */ 509 + "if r0 > 7 goto l0_%=;" 510 + /* possibly generate same scalar ids for r3 and r4 */ 511 + "w1 = 0;" 512 + "w1 = w1;" 513 + "w3 = w1;" 514 + "w4 = w1;" 515 + "goto l1_%=;" 516 + "l0_%=:" 517 + /* possibly generate different scalar ids for r3 and r4 */ 518 + "w1 = 0;" 519 + "w2 = 0;" 520 + "w3 = w1;" 521 + "w4 = w2;" 522 + "l1_%=:" 523 + /* predictable jump, marks r1 and r2 precise */ 524 + "if w3 == w4 goto +0;" 525 + "r0 = 0;" 526 + "exit;" 527 + : 528 + : __imm(bpf_ktime_get_ns) 529 + : __clobber_all); 530 + } 531 + 532 + /* Check that unique scalar IDs are ignored when new verifier state is 533 + * compared to cached verifier state. For this test: 534 + * - cached state has no id on r1 535 + * - new state has a unique id on r1 536 + */ 537 + SEC("socket") 538 + __success __log_level(2) 539 + __msg("6: (25) if r6 > 0x7 goto pc+1") 540 + __msg("7: (57) r1 &= 255") 541 + __msg("8: (bf) r2 = r10") 542 + __msg("from 6 to 8: safe") 543 + __msg("processed 12 insns") 544 + __flag(BPF_F_TEST_STATE_FREQ) 545 + __naked void ignore_unique_scalar_ids_cur(void) 546 + { 547 + asm volatile ( 548 + "call %[bpf_ktime_get_ns];" 549 + "r6 = r0;" 550 + "call %[bpf_ktime_get_ns];" 551 + "r0 &= 0xff;" 552 + /* r1.id == r0.id */ 553 + "r1 = r0;" 554 + /* make r1.id unique */ 555 + "r0 = 0;" 556 + "if r6 > 7 goto l0_%=;" 557 + /* clear r1 id, but keep the range compatible */ 558 + "r1 &= 0xff;" 559 + "l0_%=:" 560 + /* get here in two states: 561 + * - first: r1 has no id (cached state) 562 + * - second: r1 has a unique id (should be considered equivalent) 563 + */ 564 + "r2 = r10;" 565 + "r2 += r1;" 566 + "exit;" 567 + : 568 + : __imm(bpf_ktime_get_ns) 569 + : __clobber_all); 570 + } 571 + 572 + /* Check that unique scalar IDs are ignored when new verifier state is 573 + * compared to cached verifier state. For this test: 574 + * - cached state has a unique id on r1 575 + * - new state has no id on r1 576 + */ 577 + SEC("socket") 578 + __success __log_level(2) 579 + __msg("6: (25) if r6 > 0x7 goto pc+1") 580 + __msg("7: (05) goto pc+1") 581 + __msg("9: (bf) r2 = r10") 582 + __msg("9: safe") 583 + __msg("processed 13 insns") 584 + __flag(BPF_F_TEST_STATE_FREQ) 585 + __naked void ignore_unique_scalar_ids_old(void) 586 + { 587 + asm volatile ( 588 + "call %[bpf_ktime_get_ns];" 589 + "r6 = r0;" 590 + "call %[bpf_ktime_get_ns];" 591 + "r0 &= 0xff;" 592 + /* r1.id == r0.id */ 593 + "r1 = r0;" 594 + /* make r1.id unique */ 595 + "r0 = 0;" 596 + "if r6 > 7 goto l1_%=;" 597 + "goto l0_%=;" 598 + "l1_%=:" 599 + /* clear r1 id, but keep the range compatible */ 600 + "r1 &= 0xff;" 601 + "l0_%=:" 602 + /* get here in two states: 603 + * - first: r1 has a unique id (cached state) 604 + * - second: r1 has no id (should be considered equivalent) 605 + */ 606 + "r2 = r10;" 607 + "r2 += r1;" 608 + "exit;" 609 + : 610 + : __imm(bpf_ktime_get_ns) 611 + : __clobber_all); 612 + } 613 + 614 + /* Check that two different scalar IDs in a verified state can't be 615 + * mapped to the same scalar ID in current state. 616 + */ 617 + SEC("socket") 618 + __success __log_level(2) 619 + /* The exit instruction should be reachable from two states, 620 + * use two matches and "processed .. insns" to ensure this. 621 + */ 622 + __msg("13: (95) exit") 623 + __msg("13: (95) exit") 624 + __msg("processed 18 insns") 625 + __flag(BPF_F_TEST_STATE_FREQ) 626 + __naked void two_old_ids_one_cur_id(void) 627 + { 628 + asm volatile ( 629 + /* Give unique scalar IDs to r{6,7} */ 630 + "call %[bpf_ktime_get_ns];" 631 + "r0 &= 0xff;" 632 + "r6 = r0;" 633 + "call %[bpf_ktime_get_ns];" 634 + "r0 &= 0xff;" 635 + "r7 = r0;" 636 + "r0 = 0;" 637 + /* Maybe make r{6,7} IDs identical */ 638 + "if r6 > r7 goto l0_%=;" 639 + "goto l1_%=;" 640 + "l0_%=:" 641 + "r6 = r7;" 642 + "l1_%=:" 643 + /* Mark r{6,7} precise. 644 + * Get here in two states: 645 + * - first: r6{.id=A}, r7{.id=B} (cached state) 646 + * - second: r6{.id=A}, r7{.id=A} 647 + * Currently we don't want to consider such states equivalent. 648 + * Thus "exit;" would be verified twice. 649 + */ 650 + "r2 = r10;" 651 + "r2 += r6;" 652 + "r2 += r7;" 653 + "exit;" 654 + : 655 + : __imm(bpf_ktime_get_ns) 656 + : __clobber_all); 657 + } 658 + 659 + char _license[] SEC("license") = "GPL";

+89

tools/testing/selftests/bpf/progs/vrf_socket_lookup.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #include <linux/bpf.h> 3 + #include <bpf/bpf_helpers.h> 4 + #include <bpf/bpf_endian.h> 5 + 6 + #include <linux/ip.h> 7 + #include <linux/in.h> 8 + #include <linux/if_ether.h> 9 + #include <linux/pkt_cls.h> 10 + #include <stdbool.h> 11 + 12 + int lookup_status; 13 + bool test_xdp; 14 + bool tcp_skc; 15 + 16 + #define CUR_NS BPF_F_CURRENT_NETNS 17 + 18 + static void socket_lookup(void *ctx, void *data_end, void *data) 19 + { 20 + struct ethhdr *eth = data; 21 + struct bpf_sock_tuple *tp; 22 + struct bpf_sock *sk; 23 + struct iphdr *iph; 24 + int tplen; 25 + 26 + if (eth + 1 > data_end) 27 + return; 28 + 29 + if (eth->h_proto != bpf_htons(ETH_P_IP)) 30 + return; 31 + 32 + iph = (struct iphdr *)(eth + 1); 33 + if (iph + 1 > data_end) 34 + return; 35 + 36 + tp = (struct bpf_sock_tuple *)&iph->saddr; 37 + tplen = sizeof(tp->ipv4); 38 + if ((void *)tp + tplen > data_end) 39 + return; 40 + 41 + switch (iph->protocol) { 42 + case IPPROTO_TCP: 43 + if (tcp_skc) 44 + sk = bpf_skc_lookup_tcp(ctx, tp, tplen, CUR_NS, 0); 45 + else 46 + sk = bpf_sk_lookup_tcp(ctx, tp, tplen, CUR_NS, 0); 47 + break; 48 + case IPPROTO_UDP: 49 + sk = bpf_sk_lookup_udp(ctx, tp, tplen, CUR_NS, 0); 50 + break; 51 + default: 52 + return; 53 + } 54 + 55 + lookup_status = 0; 56 + 57 + if (sk) { 58 + bpf_sk_release(sk); 59 + lookup_status = 1; 60 + } 61 + } 62 + 63 + SEC("tc") 64 + int tc_socket_lookup(struct __sk_buff *skb) 65 + { 66 + void *data_end = (void *)(long)skb->data_end; 67 + void *data = (void *)(long)skb->data; 68 + 69 + if (test_xdp) 70 + return TC_ACT_UNSPEC; 71 + 72 + socket_lookup(skb, data_end, data); 73 + return TC_ACT_UNSPEC; 74 + } 75 + 76 + SEC("xdp") 77 + int xdp_socket_lookup(struct xdp_md *xdp) 78 + { 79 + void *data_end = (void *)(long)xdp->data_end; 80 + void *data = (void *)(long)xdp->data; 81 + 82 + if (!test_xdp) 83 + return XDP_PASS; 84 + 85 + socket_lookup(xdp, data_end, data); 86 + return XDP_PASS; 87 + } 88 + 89 + char _license[] SEC("license") = "GPL";

+13 -11

tools/testing/selftests/bpf/test_verifier.c

··· 1341 1341 return true; 1342 1342 } 1343 1343 1344 - static int get_xlated_program(int fd_prog, struct bpf_insn **buf, int *cnt) 1344 + static struct bpf_insn *get_xlated_program(int fd_prog, int *cnt) 1345 1345 { 1346 + __u32 buf_element_size = sizeof(struct bpf_insn); 1346 1347 struct bpf_prog_info info = {}; 1347 1348 __u32 info_len = sizeof(info); 1348 1349 __u32 xlated_prog_len; 1349 - __u32 buf_element_size = sizeof(struct bpf_insn); 1350 + struct bpf_insn *buf; 1350 1351 1351 1352 if (bpf_prog_get_info_by_fd(fd_prog, &info, &info_len)) { 1352 1353 perror("bpf_prog_get_info_by_fd failed"); 1353 - return -1; 1354 + return NULL; 1354 1355 } 1355 1356 1356 1357 xlated_prog_len = info.xlated_prog_len; 1357 1358 if (xlated_prog_len % buf_element_size) { 1358 1359 printf("Program length %d is not multiple of %d\n", 1359 1360 xlated_prog_len, buf_element_size); 1360 - return -1; 1361 + return NULL; 1361 1362 } 1362 1363 1363 1364 *cnt = xlated_prog_len / buf_element_size; 1364 - *buf = calloc(*cnt, buf_element_size); 1365 + buf = calloc(*cnt, buf_element_size); 1365 1366 if (!buf) { 1366 1367 perror("can't allocate xlated program buffer"); 1367 - return -ENOMEM; 1368 + return NULL; 1368 1369 } 1369 1370 1370 1371 bzero(&info, sizeof(info)); 1371 1372 info.xlated_prog_len = xlated_prog_len; 1372 - info.xlated_prog_insns = (__u64)(unsigned long)*buf; 1373 + info.xlated_prog_insns = (__u64)(unsigned long)buf; 1373 1374 if (bpf_prog_get_info_by_fd(fd_prog, &info, &info_len)) { 1374 1375 perror("second bpf_prog_get_info_by_fd failed"); 1375 1376 goto out_free_buf; 1376 1377 } 1377 1378 1378 - return 0; 1379 + return buf; 1379 1380 1380 1381 out_free_buf: 1381 - free(*buf); 1382 - return -1; 1382 + free(buf); 1383 + return NULL; 1383 1384 } 1384 1385 1385 1386 static bool is_null_insn(struct bpf_insn *insn) ··· 1513 1512 if (!check_expected && !check_unexpected) 1514 1513 goto out; 1515 1514 1516 - if (get_xlated_program(fd_prog, &buf, &cnt)) { 1515 + buf = get_xlated_program(fd_prog, &cnt); 1516 + if (!buf) { 1517 1517 printf("FAIL: can't get xlated program\n"); 1518 1518 result = false; 1519 1519 goto out;

+4 -4

tools/testing/selftests/bpf/verifier/precise.c

··· 46 46 mark_precise: frame0: regs=r2 stack= before 20\ 47 47 mark_precise: frame0: parent state regs=r2 stack=:\ 48 48 mark_precise: frame0: last_idx 19 first_idx 10\ 49 - mark_precise: frame0: regs=r2 stack= before 19\ 49 + mark_precise: frame0: regs=r2,r9 stack= before 19\ 50 50 mark_precise: frame0: regs=r9 stack= before 18\ 51 51 mark_precise: frame0: regs=r8,r9 stack= before 17\ 52 52 mark_precise: frame0: regs=r0,r9 stack= before 15\ ··· 106 106 mark_precise: frame0: regs=r2 stack= before 22\ 107 107 mark_precise: frame0: parent state regs=r2 stack=:\ 108 108 mark_precise: frame0: last_idx 20 first_idx 20\ 109 - mark_precise: frame0: regs=r2 stack= before 20\ 110 - mark_precise: frame0: parent state regs=r2 stack=:\ 109 + mark_precise: frame0: regs=r2,r9 stack= before 20\ 110 + mark_precise: frame0: parent state regs=r2,r9 stack=:\ 111 111 mark_precise: frame0: last_idx 19 first_idx 17\ 112 - mark_precise: frame0: regs=r2 stack= before 19\ 112 + mark_precise: frame0: regs=r2,r9 stack= before 19\ 113 113 mark_precise: frame0: regs=r9 stack= before 18\ 114 114 mark_precise: frame0: regs=r8,r9 stack= before 17\ 115 115 mark_precise: frame0: parent state regs= stack=:",