Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next

+8 -10

arch/x86/net/bpf_jit_comp.c

··· 1328 1328 return proglen; 1329 1329 } 1330 1330 1331 - static void save_regs(struct btf_func_model *m, u8 **prog, int nr_args, 1331 + static void save_regs(const struct btf_func_model *m, u8 **prog, int nr_args, 1332 1332 int stack_size) 1333 1333 { 1334 1334 int i; ··· 1344 1344 -(stack_size - i * 8)); 1345 1345 } 1346 1346 1347 - static void restore_regs(struct btf_func_model *m, u8 **prog, int nr_args, 1347 + static void restore_regs(const struct btf_func_model *m, u8 **prog, int nr_args, 1348 1348 int stack_size) 1349 1349 { 1350 1350 int i; ··· 1361 1361 -(stack_size - i * 8)); 1362 1362 } 1363 1363 1364 - static int invoke_bpf(struct btf_func_model *m, u8 **pprog, 1364 + static int invoke_bpf(const struct btf_func_model *m, u8 **pprog, 1365 1365 struct bpf_prog **progs, int prog_cnt, int stack_size) 1366 1366 { 1367 1367 u8 *prog = *pprog; ··· 1456 1456 * add rsp, 8 // skip eth_type_trans's frame 1457 1457 * ret // return to its caller 1458 1458 */ 1459 - int arch_prepare_bpf_trampoline(void *image, struct btf_func_model *m, u32 flags, 1459 + int arch_prepare_bpf_trampoline(void *image, void *image_end, 1460 + const struct btf_func_model *m, u32 flags, 1460 1461 struct bpf_prog **fentry_progs, int fentry_cnt, 1461 1462 struct bpf_prog **fexit_progs, int fexit_cnt, 1462 1463 void *orig_call) ··· 1524 1523 /* skip our return address and return to parent */ 1525 1524 EMIT4(0x48, 0x83, 0xC4, 8); /* add rsp, 8 */ 1526 1525 EMIT1(0xC3); /* ret */ 1527 - /* One half of the page has active running trampoline. 1528 - * Another half is an area for next trampoline. 1529 - * Make sure the trampoline generation logic doesn't overflow. 1530 - */ 1531 - if (WARN_ON_ONCE(prog - (u8 *)image > PAGE_SIZE / 2 - BPF_INSN_SAFETY)) 1526 + /* Make sure the trampoline generation logic doesn't overflow */ 1527 + if (WARN_ON_ONCE(prog > (u8 *)image_end - BPF_INSN_SAFETY)) 1532 1528 return -EFAULT; 1533 - return 0; 1529 + return prog - (u8 *)image; 1534 1530 } 1535 1531 1536 1532 static int emit_cond_near_jump(u8 **pprog, void *func, void *ip, u8 jmp_cond)

+2 -2

drivers/net/tun.c

··· 1718 1718 if (err < 0) 1719 1719 goto err_xdp; 1720 1720 if (err == XDP_REDIRECT) 1721 - xdp_do_flush_map(); 1721 + xdp_do_flush(); 1722 1722 if (err != XDP_PASS) 1723 1723 goto out; 1724 1724 ··· 2549 2549 } 2550 2550 2551 2551 if (flush) 2552 - xdp_do_flush_map(); 2552 + xdp_do_flush(); 2553 2553 2554 2554 rcu_read_unlock(); 2555 2555 local_bh_enable();

+1 -1

drivers/net/veth.c

··· 769 769 if (xdp_xmit & VETH_XDP_TX) 770 770 veth_xdp_flush(rq->dev, &bq); 771 771 if (xdp_xmit & VETH_XDP_REDIR) 772 - xdp_do_flush_map(); 772 + xdp_do_flush(); 773 773 xdp_clear_return_frame_no_direct(); 774 774 775 775 return done;

+1 -1

drivers/net/virtio_net.c

··· 1432 1432 virtqueue_napi_complete(napi, rq->vq, received); 1433 1433 1434 1434 if (xdp_xmit & VIRTIO_XDP_REDIR) 1435 - xdp_do_flush_map(); 1435 + xdp_do_flush(); 1436 1436 1437 1437 if (xdp_xmit & VIRTIO_XDP_TX) { 1438 1438 sq = virtnet_xdp_sq(vi);

+122 -6

include/linux/bpf.h

··· 17 17 #include <linux/u64_stats_sync.h> 18 18 #include <linux/refcount.h> 19 19 #include <linux/mutex.h> 20 + #include <linux/module.h> 20 21 21 22 struct bpf_verifier_env; 22 23 struct bpf_verifier_log; ··· 44 43 int (*map_get_next_key)(struct bpf_map *map, void *key, void *next_key); 45 44 void (*map_release_uref)(struct bpf_map *map); 46 45 void *(*map_lookup_elem_sys_only)(struct bpf_map *map, void *key); 46 + int (*map_lookup_batch)(struct bpf_map *map, const union bpf_attr *attr, 47 + union bpf_attr __user *uattr); 48 + int (*map_lookup_and_delete_batch)(struct bpf_map *map, 49 + const union bpf_attr *attr, 50 + union bpf_attr __user *uattr); 51 + int (*map_update_batch)(struct bpf_map *map, const union bpf_attr *attr, 52 + union bpf_attr __user *uattr); 53 + int (*map_delete_batch)(struct bpf_map *map, const union bpf_attr *attr, 54 + union bpf_attr __user *uattr); 47 55 48 56 /* funcs callable from userspace and from eBPF programs */ 49 57 void *(*map_lookup_elem)(struct bpf_map *map, void *key); ··· 116 106 struct btf *btf; 117 107 struct bpf_map_memory memory; 118 108 char name[BPF_OBJ_NAME_LEN]; 109 + u32 btf_vmlinux_value_type_id; 119 110 bool unpriv_array; 120 111 bool frozen; /* write-once; write-protected by freeze_mutex */ 121 112 /* 22 bytes hole */ ··· 194 183 195 184 static inline bool bpf_map_support_seq_show(const struct bpf_map *map) 196 185 { 197 - return map->btf && map->ops->map_seq_show_elem; 186 + return (map->btf_value_type_id || map->btf_vmlinux_value_type_id) && 187 + map->ops->map_seq_show_elem; 198 188 } 199 189 200 190 int map_check_no_btf(const struct bpf_map *map, ··· 361 349 const struct bpf_insn *src, 362 350 struct bpf_insn *dst, 363 351 struct bpf_prog *prog, u32 *target_size); 352 + int (*btf_struct_access)(struct bpf_verifier_log *log, 353 + const struct btf_type *t, int off, int size, 354 + enum bpf_access_type atype, 355 + u32 *next_btf_id); 364 356 }; 365 357 366 358 struct bpf_prog_offload_ops { ··· 453 437 * fentry = a set of program to run before calling original function 454 438 * fexit = a set of program to run after original function 455 439 */ 456 - int arch_prepare_bpf_trampoline(void *image, struct btf_func_model *m, u32 flags, 440 + int arch_prepare_bpf_trampoline(void *image, void *image_end, 441 + const struct btf_func_model *m, u32 flags, 457 442 struct bpf_prog **fentry_progs, int fentry_cnt, 458 443 struct bpf_prog **fexit_progs, int fexit_cnt, 459 444 void *orig_call); ··· 465 448 enum bpf_tramp_prog_type { 466 449 BPF_TRAMP_FENTRY, 467 450 BPF_TRAMP_FEXIT, 468 - BPF_TRAMP_MAX 451 + BPF_TRAMP_MAX, 452 + BPF_TRAMP_REPLACE, /* more than MAX */ 469 453 }; 470 454 471 455 struct bpf_trampoline { ··· 481 463 void *addr; 482 464 bool ftrace_managed; 483 465 } func; 466 + /* if !NULL this is BPF_PROG_TYPE_EXT program that extends another BPF 467 + * program by replacing one of its functions. func.addr is the address 468 + * of the function it replaced. 469 + */ 470 + struct bpf_prog *extension_prog; 484 471 /* list of BPF programs using this trampoline */ 485 472 struct hlist_head progs_hlist[BPF_TRAMP_MAX]; 486 473 /* Number of attached programs. A counter per kind. */ ··· 581 558 #endif 582 559 583 560 struct bpf_func_info_aux { 561 + u16 linkage; 584 562 bool unreliable; 585 563 }; 586 564 ··· 691 667 struct mutex poke_mutex; 692 668 struct work_struct work; 693 669 }; 670 + 671 + struct bpf_struct_ops_value; 672 + struct btf_type; 673 + struct btf_member; 674 + 675 + #define BPF_STRUCT_OPS_MAX_NR_MEMBERS 64 676 + struct bpf_struct_ops { 677 + const struct bpf_verifier_ops *verifier_ops; 678 + int (*init)(struct btf *btf); 679 + int (*check_member)(const struct btf_type *t, 680 + const struct btf_member *member); 681 + int (*init_member)(const struct btf_type *t, 682 + const struct btf_member *member, 683 + void *kdata, const void *udata); 684 + int (*reg)(void *kdata); 685 + void (*unreg)(void *kdata); 686 + const struct btf_type *type; 687 + const struct btf_type *value_type; 688 + const char *name; 689 + struct btf_func_model func_models[BPF_STRUCT_OPS_MAX_NR_MEMBERS]; 690 + u32 type_id; 691 + u32 value_id; 692 + }; 693 + 694 + #if defined(CONFIG_BPF_JIT) && defined(CONFIG_BPF_SYSCALL) 695 + #define BPF_MODULE_OWNER ((void *)((0xeB9FUL << 2) + POISON_POINTER_DELTA)) 696 + const struct bpf_struct_ops *bpf_struct_ops_find(u32 type_id); 697 + void bpf_struct_ops_init(struct btf *btf); 698 + bool bpf_struct_ops_get(const void *kdata); 699 + void bpf_struct_ops_put(const void *kdata); 700 + int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map, void *key, 701 + void *value); 702 + static inline bool bpf_try_module_get(const void *data, struct module *owner) 703 + { 704 + if (owner == BPF_MODULE_OWNER) 705 + return bpf_struct_ops_get(data); 706 + else 707 + return try_module_get(owner); 708 + } 709 + static inline void bpf_module_put(const void *data, struct module *owner) 710 + { 711 + if (owner == BPF_MODULE_OWNER) 712 + bpf_struct_ops_put(data); 713 + else 714 + module_put(owner); 715 + } 716 + #else 717 + static inline const struct bpf_struct_ops *bpf_struct_ops_find(u32 type_id) 718 + { 719 + return NULL; 720 + } 721 + static inline void bpf_struct_ops_init(struct btf *btf) { } 722 + static inline bool bpf_try_module_get(const void *data, struct module *owner) 723 + { 724 + return try_module_get(owner); 725 + } 726 + static inline void bpf_module_put(const void *data, struct module *owner) 727 + { 728 + module_put(owner); 729 + } 730 + static inline int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map, 731 + void *key, 732 + void *value) 733 + { 734 + return -EINVAL; 735 + } 736 + #endif 694 737 695 738 struct bpf_array { 696 739 struct bpf_map map; ··· 997 906 void *bpf_map_area_mmapable_alloc(u64 size, int numa_node); 998 907 void bpf_map_area_free(void *base); 999 908 void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr); 909 + int generic_map_lookup_batch(struct bpf_map *map, 910 + const union bpf_attr *attr, 911 + union bpf_attr __user *uattr); 912 + int generic_map_update_batch(struct bpf_map *map, 913 + const union bpf_attr *attr, 914 + union bpf_attr __user *uattr); 915 + int generic_map_delete_batch(struct bpf_map *map, 916 + const union bpf_attr *attr, 917 + union bpf_attr __user *uattr); 1000 918 1001 919 extern int sysctl_unprivileged_bpf_disabled; 1002 920 ··· 1062 962 1063 963 struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key); 1064 964 struct bpf_dtab_netdev *__dev_map_hash_lookup_elem(struct bpf_map *map, u32 key); 1065 - void __dev_map_flush(void); 965 + void __dev_flush(void); 966 + int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp, 967 + struct net_device *dev_rx); 1066 968 int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp, 1067 969 struct net_device *dev_rx); 1068 970 int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb, ··· 1108 1006 const char *func_name, 1109 1007 struct btf_func_model *m); 1110 1008 1111 - int btf_check_func_arg_match(struct bpf_verifier_env *env, int subprog); 1009 + struct bpf_reg_state; 1010 + int btf_check_func_arg_match(struct bpf_verifier_env *env, int subprog, 1011 + struct bpf_reg_state *regs); 1012 + int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog, 1013 + struct bpf_reg_state *reg); 1014 + int btf_check_type_match(struct bpf_verifier_env *env, struct bpf_prog *prog, 1015 + struct btf *btf, const struct btf_type *t); 1112 1016 1113 1017 struct bpf_prog *bpf_prog_by_id(u32 id); 1114 1018 ··· 1179 1071 return NULL; 1180 1072 } 1181 1073 1182 - static inline void __dev_map_flush(void) 1074 + static inline void __dev_flush(void) 1183 1075 { 1184 1076 } 1185 1077 1186 1078 struct xdp_buff; 1187 1079 struct bpf_dtab_netdev; 1080 + 1081 + static inline 1082 + int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp, 1083 + struct net_device *dev_rx) 1084 + { 1085 + return 0; 1086 + } 1188 1087 1189 1088 static inline 1190 1089 int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp, ··· 1414 1299 extern const struct bpf_func_proto bpf_strtol_proto; 1415 1300 extern const struct bpf_func_proto bpf_strtoul_proto; 1416 1301 extern const struct bpf_func_proto bpf_tcp_sock_proto; 1302 + extern const struct bpf_func_proto bpf_jiffies64_proto; 1417 1303 1418 1304 /* Shared helpers among cBPF and eBPF. */ 1419 1305 void bpf_user_rnd_init_once(void);

+9

include/linux/bpf_types.h

··· 65 65 BPF_PROG_TYPE(BPF_PROG_TYPE_SK_REUSEPORT, sk_reuseport, 66 66 struct sk_reuseport_md, struct sk_reuseport_kern) 67 67 #endif 68 + #if defined(CONFIG_BPF_JIT) 69 + BPF_PROG_TYPE(BPF_PROG_TYPE_STRUCT_OPS, bpf_struct_ops, 70 + void *, void *) 71 + BPF_PROG_TYPE(BPF_PROG_TYPE_EXT, bpf_extension, 72 + void *, void *) 73 + #endif 68 74 69 75 BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops) 70 76 BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_ARRAY, percpu_array_map_ops) ··· 111 105 #endif 112 106 BPF_MAP_TYPE(BPF_MAP_TYPE_QUEUE, queue_map_ops) 113 107 BPF_MAP_TYPE(BPF_MAP_TYPE_STACK, stack_map_ops) 108 + #if defined(CONFIG_BPF_JIT) 109 + BPF_MAP_TYPE(BPF_MAP_TYPE_STRUCT_OPS, bpf_struct_ops_map_ops) 110 + #endif

+8 -2

include/linux/bpf_verifier.h

··· 304 304 u64 map_key_state; /* constant (32 bit) key tracking for maps */ 305 305 int ctx_field_size; /* the ctx field size for load insn, maybe 0 */ 306 306 int sanitize_stack_off; /* stack slot to be cleared */ 307 - bool seen; /* this insn was processed by the verifier */ 307 + u32 seen; /* this insn was processed by the verifier at env->pass_cnt */ 308 308 bool zext_dst; /* this insn zero extends dst reg */ 309 309 u8 alu_state; /* used in combination with alu_limit */ 310 - bool prune_point; 310 + 311 + /* below fields are initialized once */ 311 312 unsigned int orig_idx; /* original instruction index */ 313 + bool prune_point; 312 314 }; 313 315 314 316 #define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */ ··· 381 379 int *insn_stack; 382 380 int cur_stack; 383 381 } cfg; 382 + u32 pass_cnt; /* number of times do_check() was called */ 384 383 u32 subprog_cnt; 385 384 /* number of instructions analyzed by the verifier */ 386 385 u32 prev_insn_processed, insn_processed; ··· 430 427 struct bpf_insn *insn); 431 428 void 432 429 bpf_prog_offload_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt); 430 + 431 + int check_ctx_reg(struct bpf_verifier_env *env, 432 + const struct bpf_reg_state *reg, int regno); 433 433 434 434 #endif /* _LINUX_BPF_VERIFIER_H */

+52

include/linux/btf.h

··· 7 7 #include <linux/types.h> 8 8 #include <uapi/linux/btf.h> 9 9 10 + #define BTF_TYPE_EMIT(type) ((void)(type *)0) 11 + 10 12 struct btf; 11 13 struct btf_member; 12 14 struct btf_type; ··· 55 53 u32 expected_offset, u32 expected_size); 56 54 int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t); 57 55 bool btf_type_is_void(const struct btf_type *t); 56 + s32 btf_find_by_name_kind(const struct btf *btf, const char *name, u8 kind); 57 + const struct btf_type *btf_type_skip_modifiers(const struct btf *btf, 58 + u32 id, u32 *res_id); 59 + const struct btf_type *btf_type_resolve_ptr(const struct btf *btf, 60 + u32 id, u32 *res_id); 61 + const struct btf_type *btf_type_resolve_func_ptr(const struct btf *btf, 62 + u32 id, u32 *res_id); 63 + const struct btf_type * 64 + btf_resolve_size(const struct btf *btf, const struct btf_type *type, 65 + u32 *type_size, const struct btf_type **elem_type, 66 + u32 *total_nelems); 67 + 68 + #define for_each_member(i, struct_type, member) \ 69 + for (i = 0, member = btf_type_member(struct_type); \ 70 + i < btf_type_vlen(struct_type); \ 71 + i++, member++) 58 72 59 73 static inline bool btf_type_is_ptr(const struct btf_type *t) 60 74 { ··· 100 82 static inline bool btf_type_is_func_proto(const struct btf_type *t) 101 83 { 102 84 return BTF_INFO_KIND(t->info) == BTF_KIND_FUNC_PROTO; 85 + } 86 + 87 + static inline u16 btf_type_vlen(const struct btf_type *t) 88 + { 89 + return BTF_INFO_VLEN(t->info); 90 + } 91 + 92 + static inline u16 btf_func_linkage(const struct btf_type *t) 93 + { 94 + return BTF_INFO_VLEN(t->info); 95 + } 96 + 97 + static inline bool btf_type_kflag(const struct btf_type *t) 98 + { 99 + return BTF_INFO_KFLAG(t->info); 100 + } 101 + 102 + static inline u32 btf_member_bit_offset(const struct btf_type *struct_type, 103 + const struct btf_member *member) 104 + { 105 + return btf_type_kflag(struct_type) ? BTF_MEMBER_BIT_OFFSET(member->offset) 106 + : member->offset; 107 + } 108 + 109 + static inline u32 btf_member_bitfield_size(const struct btf_type *struct_type, 110 + const struct btf_member *member) 111 + { 112 + return btf_type_kflag(struct_type) ? BTF_MEMBER_BITFIELD_SIZE(member->offset) 113 + : 0; 114 + } 115 + 116 + static inline const struct btf_member *btf_type_member(const struct btf_type *t) 117 + { 118 + return (const struct btf_member *)(t + 1); 103 119 } 104 120 105 121 #ifdef CONFIG_BPF_SYSCALL

+10 -2

include/linux/filter.h

··· 843 843 int bpf_prog_create_from_user(struct bpf_prog **pfp, struct sock_fprog *fprog, 844 844 bpf_aux_classic_check_t trans, bool save_orig); 845 845 void bpf_prog_destroy(struct bpf_prog *fp); 846 + const struct bpf_func_proto * 847 + bpf_base_func_proto(enum bpf_func_id func_id); 846 848 847 849 int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk); 848 850 int sk_attach_bpf(u32 ufd, struct sock *sk); ··· 918 916 return 0; 919 917 } 920 918 921 - /* The pair of xdp_do_redirect and xdp_do_flush_map MUST be called in the 919 + /* The pair of xdp_do_redirect and xdp_do_flush MUST be called in the 922 920 * same cpu context. Further for best results no more than a single map 923 921 * for the do_redirect/do_flush pair should be used. This limitation is 924 922 * because we only track one map and force a flush when the map changes. ··· 929 927 int xdp_do_redirect(struct net_device *dev, 930 928 struct xdp_buff *xdp, 931 929 struct bpf_prog *prog); 932 - void xdp_do_flush_map(void); 930 + void xdp_do_flush(void); 931 + 932 + /* The xdp_do_flush_map() helper has been renamed to drop the _map suffix, as 933 + * it is no longer only flushing maps. Keep this define for compatibility 934 + * until all drivers are updated - do not use xdp_do_flush_map() in new code! 935 + */ 936 + #define xdp_do_flush_map xdp_do_flush 933 937 934 938 void bpf_warn_invalid_xdp_action(u32 act); 935 939

+8 -5

include/linux/netdevice.h

··· 876 876 struct bpf_prog_offload_ops; 877 877 struct netlink_ext_ack; 878 878 struct xdp_umem; 879 + struct xdp_dev_bulk_queue; 879 880 880 881 struct netdev_bpf { 881 882 enum bpf_netdev_command command; ··· 1987 1986 unsigned int num_tx_queues; 1988 1987 unsigned int real_num_tx_queues; 1989 1988 struct Qdisc *qdisc; 1990 - #ifdef CONFIG_NET_SCHED 1991 - DECLARE_HASHTABLE (qdisc_hash, 4); 1992 - #endif 1993 1989 unsigned int tx_queue_len; 1994 1990 spinlock_t tx_global_lock; 1995 - int watchdog_timeo; 1991 + 1992 + struct xdp_dev_bulk_queue __percpu *xdp_bulkq; 1996 1993 1997 1994 #ifdef CONFIG_XPS 1998 1995 struct xps_dev_maps __rcu *xps_cpus_map; ··· 2000 2001 struct mini_Qdisc __rcu *miniq_egress; 2001 2002 #endif 2002 2003 2004 + #ifdef CONFIG_NET_SCHED 2005 + DECLARE_HASHTABLE (qdisc_hash, 4); 2006 + #endif 2003 2007 /* These may be needed for future network-power-down code. */ 2004 2008 struct timer_list watchdog_timer; 2009 + int watchdog_timeo; 2005 2010 2006 - int __percpu *pcpu_refcnt; 2007 2011 struct list_head todo_list; 2012 + int __percpu *pcpu_refcnt; 2008 2013 2009 2014 struct list_head link_watch_list; 2010 2015

+2

include/net/sock.h

··· 2597 2597 return false; 2598 2598 } 2599 2599 2600 + void sock_def_readable(struct sock *sk); 2601 + 2600 2602 #endif /* _SOCK_H */

+2

include/net/tcp.h

··· 1019 1019 #define TCP_CONG_NON_RESTRICTED 0x1 1020 1020 /* Requires ECN/ECT set on all packets */ 1021 1021 #define TCP_CONG_NEEDS_ECN 0x2 1022 + #define TCP_CONG_MASK (TCP_CONG_NON_RESTRICTED | TCP_CONG_NEEDS_ECN) 1022 1023 1023 1024 union tcp_cc_info; 1024 1025 ··· 1114 1113 void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked); 1115 1114 extern struct tcp_congestion_ops tcp_reno; 1116 1115 1116 + struct tcp_congestion_ops *tcp_ca_find(const char *name); 1117 1117 struct tcp_congestion_ops *tcp_ca_find_key(u32 key); 1118 1118 u32 tcp_ca_get_key_by_name(struct net *net, const char *name, bool *ecn_ca); 1119 1119 #ifdef CONFIG_INET

+56 -74

include/trace/events/xdp.h

··· 79 79 __entry->sent, __entry->drops, __entry->err) 80 80 ); 81 81 82 + #ifndef __DEVMAP_OBJ_TYPE 83 + #define __DEVMAP_OBJ_TYPE 84 + struct _bpf_dtab_netdev { 85 + struct net_device *dev; 86 + }; 87 + #endif /* __DEVMAP_OBJ_TYPE */ 88 + 89 + #define devmap_ifindex(tgt, map) \ 90 + (((map->map_type == BPF_MAP_TYPE_DEVMAP || \ 91 + map->map_type == BPF_MAP_TYPE_DEVMAP_HASH)) ? \ 92 + ((struct _bpf_dtab_netdev *)tgt)->dev->ifindex : 0) 93 + 82 94 DECLARE_EVENT_CLASS(xdp_redirect_template, 83 95 84 96 TP_PROTO(const struct net_device *dev, 85 97 const struct bpf_prog *xdp, 86 - int to_ifindex, int err, 87 - const struct bpf_map *map, u32 map_index), 98 + const void *tgt, int err, 99 + const struct bpf_map *map, u32 index), 88 100 89 - TP_ARGS(dev, xdp, to_ifindex, err, map, map_index), 101 + TP_ARGS(dev, xdp, tgt, err, map, index), 90 102 91 103 TP_STRUCT__entry( 92 104 __field(int, prog_id) ··· 115 103 __entry->act = XDP_REDIRECT; 116 104 __entry->ifindex = dev->ifindex; 117 105 __entry->err = err; 118 - __entry->to_ifindex = to_ifindex; 106 + __entry->to_ifindex = map ? devmap_ifindex(tgt, map) : 107 + index; 119 108 __entry->map_id = map ? map->id : 0; 120 - __entry->map_index = map_index; 109 + __entry->map_index = map ? index : 0; 121 110 ), 122 111 123 - TP_printk("prog_id=%d action=%s ifindex=%d to_ifindex=%d err=%d", 112 + TP_printk("prog_id=%d action=%s ifindex=%d to_ifindex=%d err=%d" 113 + " map_id=%d map_index=%d", 124 114 __entry->prog_id, 125 115 __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB), 126 116 __entry->ifindex, __entry->to_ifindex, 127 - __entry->err) 117 + __entry->err, __entry->map_id, __entry->map_index) 128 118 ); 129 119 130 120 DEFINE_EVENT(xdp_redirect_template, xdp_redirect, 131 121 TP_PROTO(const struct net_device *dev, 132 122 const struct bpf_prog *xdp, 133 - int to_ifindex, int err, 134 - const struct bpf_map *map, u32 map_index), 135 - TP_ARGS(dev, xdp, to_ifindex, err, map, map_index) 123 + const void *tgt, int err, 124 + const struct bpf_map *map, u32 index), 125 + TP_ARGS(dev, xdp, tgt, err, map, index) 136 126 ); 137 127 138 128 DEFINE_EVENT(xdp_redirect_template, xdp_redirect_err, 139 129 TP_PROTO(const struct net_device *dev, 140 130 const struct bpf_prog *xdp, 141 - int to_ifindex, int err, 142 - const struct bpf_map *map, u32 map_index), 143 - TP_ARGS(dev, xdp, to_ifindex, err, map, map_index) 131 + const void *tgt, int err, 132 + const struct bpf_map *map, u32 index), 133 + TP_ARGS(dev, xdp, tgt, err, map, index) 144 134 ); 145 135 146 136 #define _trace_xdp_redirect(dev, xdp, to) \ 147 - trace_xdp_redirect(dev, xdp, to, 0, NULL, 0); 137 + trace_xdp_redirect(dev, xdp, NULL, 0, NULL, to); 148 138 149 139 #define _trace_xdp_redirect_err(dev, xdp, to, err) \ 150 - trace_xdp_redirect_err(dev, xdp, to, err, NULL, 0); 140 + trace_xdp_redirect_err(dev, xdp, NULL, err, NULL, to); 151 141 152 - DEFINE_EVENT_PRINT(xdp_redirect_template, xdp_redirect_map, 142 + #define _trace_xdp_redirect_map(dev, xdp, to, map, index) \ 143 + trace_xdp_redirect(dev, xdp, to, 0, map, index); 144 + 145 + #define _trace_xdp_redirect_map_err(dev, xdp, to, map, index, err) \ 146 + trace_xdp_redirect_err(dev, xdp, to, err, map, index); 147 + 148 + /* not used anymore, but kept around so as not to break old programs */ 149 + DEFINE_EVENT(xdp_redirect_template, xdp_redirect_map, 153 150 TP_PROTO(const struct net_device *dev, 154 151 const struct bpf_prog *xdp, 155 - int to_ifindex, int err, 156 - const struct bpf_map *map, u32 map_index), 157 - TP_ARGS(dev, xdp, to_ifindex, err, map, map_index), 158 - TP_printk("prog_id=%d action=%s ifindex=%d to_ifindex=%d err=%d" 159 - " map_id=%d map_index=%d", 160 - __entry->prog_id, 161 - __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB), 162 - __entry->ifindex, __entry->to_ifindex, 163 - __entry->err, 164 - __entry->map_id, __entry->map_index) 152 + const void *tgt, int err, 153 + const struct bpf_map *map, u32 index), 154 + TP_ARGS(dev, xdp, tgt, err, map, index) 165 155 ); 166 156 167 - DEFINE_EVENT_PRINT(xdp_redirect_template, xdp_redirect_map_err, 157 + DEFINE_EVENT(xdp_redirect_template, xdp_redirect_map_err, 168 158 TP_PROTO(const struct net_device *dev, 169 159 const struct bpf_prog *xdp, 170 - int to_ifindex, int err, 171 - const struct bpf_map *map, u32 map_index), 172 - TP_ARGS(dev, xdp, to_ifindex, err, map, map_index), 173 - TP_printk("prog_id=%d action=%s ifindex=%d to_ifindex=%d err=%d" 174 - " map_id=%d map_index=%d", 175 - __entry->prog_id, 176 - __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB), 177 - __entry->ifindex, __entry->to_ifindex, 178 - __entry->err, 179 - __entry->map_id, __entry->map_index) 160 + const void *tgt, int err, 161 + const struct bpf_map *map, u32 index), 162 + TP_ARGS(dev, xdp, tgt, err, map, index) 180 163 ); 181 - 182 - #ifndef __DEVMAP_OBJ_TYPE 183 - #define __DEVMAP_OBJ_TYPE 184 - struct _bpf_dtab_netdev { 185 - struct net_device *dev; 186 - }; 187 - #endif /* __DEVMAP_OBJ_TYPE */ 188 - 189 - #define devmap_ifindex(fwd, map) \ 190 - ((map->map_type == BPF_MAP_TYPE_DEVMAP || \ 191 - map->map_type == BPF_MAP_TYPE_DEVMAP_HASH) ? \ 192 - ((struct _bpf_dtab_netdev *)fwd)->dev->ifindex : 0) 193 - 194 - #define _trace_xdp_redirect_map(dev, xdp, fwd, map, idx) \ 195 - trace_xdp_redirect_map(dev, xdp, devmap_ifindex(fwd, map), \ 196 - 0, map, idx) 197 - 198 - #define _trace_xdp_redirect_map_err(dev, xdp, fwd, map, idx, err) \ 199 - trace_xdp_redirect_map_err(dev, xdp, devmap_ifindex(fwd, map), \ 200 - err, map, idx) 201 164 202 165 TRACE_EVENT(xdp_cpumap_kthread, 203 166 ··· 246 259 247 260 TRACE_EVENT(xdp_devmap_xmit, 248 261 249 - TP_PROTO(const struct bpf_map *map, u32 map_index, 250 - int sent, int drops, 251 - const struct net_device *from_dev, 252 - const struct net_device *to_dev, int err), 262 + TP_PROTO(const struct net_device *from_dev, 263 + const struct net_device *to_dev, 264 + int sent, int drops, int err), 253 265 254 - TP_ARGS(map, map_index, sent, drops, from_dev, to_dev, err), 266 + TP_ARGS(from_dev, to_dev, sent, drops, err), 255 267 256 268 TP_STRUCT__entry( 257 - __field(int, map_id) 269 + __field(int, from_ifindex) 258 270 __field(u32, act) 259 - __field(u32, map_index) 271 + __field(int, to_ifindex) 260 272 __field(int, drops) 261 273 __field(int, sent) 262 - __field(int, from_ifindex) 263 - __field(int, to_ifindex) 264 274 __field(int, err) 265 275 ), 266 276 267 277 TP_fast_assign( 268 - __entry->map_id = map->id; 278 + __entry->from_ifindex = from_dev->ifindex; 269 279 __entry->act = XDP_REDIRECT; 270 - __entry->map_index = map_index; 280 + __entry->to_ifindex = to_dev->ifindex; 271 281 __entry->drops = drops; 272 282 __entry->sent = sent; 273 - __entry->from_ifindex = from_dev->ifindex; 274 - __entry->to_ifindex = to_dev->ifindex; 275 283 __entry->err = err; 276 284 ), 277 285 278 286 TP_printk("ndo_xdp_xmit" 279 - " map_id=%d map_index=%d action=%s" 287 + " from_ifindex=%d to_ifindex=%d action=%s" 280 288 " sent=%d drops=%d" 281 - " from_ifindex=%d to_ifindex=%d err=%d", 282 - __entry->map_id, __entry->map_index, 289 + " err=%d", 290 + __entry->from_ifindex, __entry->to_ifindex, 283 291 __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB), 284 292 __entry->sent, __entry->drops, 285 - __entry->from_ifindex, __entry->to_ifindex, __entry->err) 293 + __entry->err) 286 294 ); 287 295 288 296 /* Expect users already include <net/xdp.h>, but not xdp_priv.h */

+68 -4

include/uapi/linux/bpf.h

··· 107 107 BPF_MAP_LOOKUP_AND_DELETE_ELEM, 108 108 BPF_MAP_FREEZE, 109 109 BPF_BTF_GET_NEXT_ID, 110 + BPF_MAP_LOOKUP_BATCH, 111 + BPF_MAP_LOOKUP_AND_DELETE_BATCH, 112 + BPF_MAP_UPDATE_BATCH, 113 + BPF_MAP_DELETE_BATCH, 110 114 }; 111 115 112 116 enum bpf_map_type { ··· 140 136 BPF_MAP_TYPE_STACK, 141 137 BPF_MAP_TYPE_SK_STORAGE, 142 138 BPF_MAP_TYPE_DEVMAP_HASH, 139 + BPF_MAP_TYPE_STRUCT_OPS, 143 140 }; 144 141 145 142 /* Note that tracing related programs such as ··· 179 174 BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, 180 175 BPF_PROG_TYPE_CGROUP_SOCKOPT, 181 176 BPF_PROG_TYPE_TRACING, 177 + BPF_PROG_TYPE_STRUCT_OPS, 178 + BPF_PROG_TYPE_EXT, 182 179 }; 183 180 184 181 enum bpf_attach_type { ··· 364 357 /* Enable memory-mapping BPF map */ 365 358 #define BPF_F_MMAPABLE (1U << 10) 366 359 367 - /* flags for BPF_PROG_QUERY */ 360 + /* Flags for BPF_PROG_QUERY. */ 361 + 362 + /* Query effective (directly attached + inherited from ancestor cgroups) 363 + * programs that will be executed for events within a cgroup. 364 + * attach_flags with this flag are returned only for directly attached programs. 365 + */ 368 366 #define BPF_F_QUERY_EFFECTIVE (1U << 0) 369 367 370 368 enum bpf_stack_build_id_status { ··· 409 397 __u32 btf_fd; /* fd pointing to a BTF type data */ 410 398 __u32 btf_key_type_id; /* BTF type_id of the key */ 411 399 __u32 btf_value_type_id; /* BTF type_id of the value */ 400 + __u32 btf_vmlinux_value_type_id;/* BTF type_id of a kernel- 401 + * struct stored as the 402 + * map value 403 + */ 412 404 }; 413 405 414 406 struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ ··· 424 408 }; 425 409 __u64 flags; 426 410 }; 411 + 412 + struct { /* struct used by BPF_MAP_*_BATCH commands */ 413 + __aligned_u64 in_batch; /* start batch, 414 + * NULL to start from beginning 415 + */ 416 + __aligned_u64 out_batch; /* output: next start batch */ 417 + __aligned_u64 keys; 418 + __aligned_u64 values; 419 + __u32 count; /* input/output: 420 + * input: # of key/value 421 + * elements 422 + * output: # of filled elements 423 + */ 424 + __u32 map_fd; 425 + __u64 elem_flags; 426 + __u64 flags; 427 + } batch; 427 428 428 429 struct { /* anonymous struct used by BPF_PROG_LOAD command */ 429 430 __u32 prog_type; /* one of enum bpf_prog_type */ ··· 2736 2703 * 2737 2704 * int bpf_send_signal(u32 sig) 2738 2705 * Description 2739 - * Send signal *sig* to the current task. 2706 + * Send signal *sig* to the process of the current task. 2707 + * The signal may be delivered to any of this process's threads. 2740 2708 * Return 2741 2709 * 0 on success or successfully queued. 2742 2710 * ··· 2865 2831 * Return 2866 2832 * On success, the strictly positive length of the string, including 2867 2833 * the trailing NUL character. On error, a negative value. 2834 + * 2835 + * int bpf_tcp_send_ack(void *tp, u32 rcv_nxt) 2836 + * Description 2837 + * Send out a tcp-ack. *tp* is the in-kernel struct tcp_sock. 2838 + * *rcv_nxt* is the ack_seq to be sent out. 2839 + * Return 2840 + * 0 on success, or a negative error in case of failure. 2841 + * 2842 + * int bpf_send_signal_thread(u32 sig) 2843 + * Description 2844 + * Send signal *sig* to the thread corresponding to the current task. 2845 + * Return 2846 + * 0 on success or successfully queued. 2847 + * 2848 + * **-EBUSY** if work queue under nmi is full. 2849 + * 2850 + * **-EINVAL** if *sig* is invalid. 2851 + * 2852 + * **-EPERM** if no permission to send the *sig*. 2853 + * 2854 + * **-EAGAIN** if bpf program can try again. 2855 + * 2856 + * u64 bpf_jiffies64(void) 2857 + * Description 2858 + * Obtain the 64bit jiffies 2859 + * Return 2860 + * The 64 bit jiffies 2868 2861 */ 2869 2862 #define __BPF_FUNC_MAPPER(FN) \ 2870 2863 FN(unspec), \ ··· 3009 2948 FN(probe_read_user), \ 3010 2949 FN(probe_read_kernel), \ 3011 2950 FN(probe_read_user_str), \ 3012 - FN(probe_read_kernel_str), 2951 + FN(probe_read_kernel_str), \ 2952 + FN(tcp_send_ack), \ 2953 + FN(send_signal_thread), \ 2954 + FN(jiffies64), 3013 2955 3014 2956 /* integer value in 'imm' field of BPF_CALL instruction selects which helper 3015 2957 * function eBPF program intends to call ··· 3413 3349 __u32 map_flags; 3414 3350 char name[BPF_OBJ_NAME_LEN]; 3415 3351 __u32 ifindex; 3416 - __u32 :32; 3352 + __u32 btf_vmlinux_value_type_id; 3417 3353 __u64 netns_dev; 3418 3354 __u64 netns_ino; 3419 3355 __u32 btf_id;

+6

include/uapi/linux/btf.h

··· 146 146 BTF_VAR_GLOBAL_EXTERN = 2, 147 147 }; 148 148 149 + enum btf_func_linkage { 150 + BTF_FUNC_STATIC = 0, 151 + BTF_FUNC_GLOBAL = 1, 152 + BTF_FUNC_EXTERN = 2, 153 + }; 154 + 149 155 /* BTF_KIND_VAR is followed by a single "struct btf_var" to describe 150 156 * additional information related to the variable such as its linkage. 151 157 */

+3

kernel/bpf/Makefile

··· 27 27 ifeq ($(CONFIG_SYSFS),y) 28 28 obj-$(CONFIG_DEBUG_INFO_BTF) += sysfs_btf.o 29 29 endif 30 + ifeq ($(CONFIG_BPF_JIT),y) 31 + obj-$(CONFIG_BPF_SYSCALL) += bpf_struct_ops.o 32 + endif

+2

kernel/bpf/arraymap.c

··· 503 503 .map_mmap = array_map_mmap, 504 504 .map_seq_show_elem = array_map_seq_show_elem, 505 505 .map_check_btf = array_map_check_btf, 506 + .map_lookup_batch = generic_map_lookup_batch, 507 + .map_update_batch = generic_map_update_batch, 506 508 }; 507 509 508 510 const struct bpf_map_ops percpu_array_map_ops = {

+634

kernel/bpf/bpf_struct_ops.c

··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* Copyright (c) 2019 Facebook */ 3 + 4 + #include <linux/bpf.h> 5 + #include <linux/bpf_verifier.h> 6 + #include <linux/btf.h> 7 + #include <linux/filter.h> 8 + #include <linux/slab.h> 9 + #include <linux/numa.h> 10 + #include <linux/seq_file.h> 11 + #include <linux/refcount.h> 12 + #include <linux/mutex.h> 13 + 14 + enum bpf_struct_ops_state { 15 + BPF_STRUCT_OPS_STATE_INIT, 16 + BPF_STRUCT_OPS_STATE_INUSE, 17 + BPF_STRUCT_OPS_STATE_TOBEFREE, 18 + }; 19 + 20 + #define BPF_STRUCT_OPS_COMMON_VALUE \ 21 + refcount_t refcnt; \ 22 + enum bpf_struct_ops_state state 23 + 24 + struct bpf_struct_ops_value { 25 + BPF_STRUCT_OPS_COMMON_VALUE; 26 + char data[0] ____cacheline_aligned_in_smp; 27 + }; 28 + 29 + struct bpf_struct_ops_map { 30 + struct bpf_map map; 31 + const struct bpf_struct_ops *st_ops; 32 + /* protect map_update */ 33 + struct mutex lock; 34 + /* progs has all the bpf_prog that is populated 35 + * to the func ptr of the kernel's struct 36 + * (in kvalue.data). 37 + */ 38 + struct bpf_prog **progs; 39 + /* image is a page that has all the trampolines 40 + * that stores the func args before calling the bpf_prog. 41 + * A PAGE_SIZE "image" is enough to store all trampoline for 42 + * "progs[]". 43 + */ 44 + void *image; 45 + /* uvalue->data stores the kernel struct 46 + * (e.g. tcp_congestion_ops) that is more useful 47 + * to userspace than the kvalue. For example, 48 + * the bpf_prog's id is stored instead of the kernel 49 + * address of a func ptr. 50 + */ 51 + struct bpf_struct_ops_value *uvalue; 52 + /* kvalue.data stores the actual kernel's struct 53 + * (e.g. tcp_congestion_ops) that will be 54 + * registered to the kernel subsystem. 55 + */ 56 + struct bpf_struct_ops_value kvalue; 57 + }; 58 + 59 + #define VALUE_PREFIX "bpf_struct_ops_" 60 + #define VALUE_PREFIX_LEN (sizeof(VALUE_PREFIX) - 1) 61 + 62 + /* bpf_struct_ops_##_name (e.g. bpf_struct_ops_tcp_congestion_ops) is 63 + * the map's value exposed to the userspace and its btf-type-id is 64 + * stored at the map->btf_vmlinux_value_type_id. 65 + * 66 + */ 67 + #define BPF_STRUCT_OPS_TYPE(_name) \ 68 + extern struct bpf_struct_ops bpf_##_name; \ 69 + \ 70 + struct bpf_struct_ops_##_name { \ 71 + BPF_STRUCT_OPS_COMMON_VALUE; \ 72 + struct _name data ____cacheline_aligned_in_smp; \ 73 + }; 74 + #include "bpf_struct_ops_types.h" 75 + #undef BPF_STRUCT_OPS_TYPE 76 + 77 + enum { 78 + #define BPF_STRUCT_OPS_TYPE(_name) BPF_STRUCT_OPS_TYPE_##_name, 79 + #include "bpf_struct_ops_types.h" 80 + #undef BPF_STRUCT_OPS_TYPE 81 + __NR_BPF_STRUCT_OPS_TYPE, 82 + }; 83 + 84 + static struct bpf_struct_ops * const bpf_struct_ops[] = { 85 + #define BPF_STRUCT_OPS_TYPE(_name) \ 86 + [BPF_STRUCT_OPS_TYPE_##_name] = &bpf_##_name, 87 + #include "bpf_struct_ops_types.h" 88 + #undef BPF_STRUCT_OPS_TYPE 89 + }; 90 + 91 + const struct bpf_verifier_ops bpf_struct_ops_verifier_ops = { 92 + }; 93 + 94 + const struct bpf_prog_ops bpf_struct_ops_prog_ops = { 95 + }; 96 + 97 + static const struct btf_type *module_type; 98 + 99 + void bpf_struct_ops_init(struct btf *btf) 100 + { 101 + s32 type_id, value_id, module_id; 102 + const struct btf_member *member; 103 + struct bpf_struct_ops *st_ops; 104 + struct bpf_verifier_log log = {}; 105 + const struct btf_type *t; 106 + char value_name[128]; 107 + const char *mname; 108 + u32 i, j; 109 + 110 + /* Ensure BTF type is emitted for "struct bpf_struct_ops_##_name" */ 111 + #define BPF_STRUCT_OPS_TYPE(_name) BTF_TYPE_EMIT(struct bpf_struct_ops_##_name); 112 + #include "bpf_struct_ops_types.h" 113 + #undef BPF_STRUCT_OPS_TYPE 114 + 115 + module_id = btf_find_by_name_kind(btf, "module", BTF_KIND_STRUCT); 116 + if (module_id < 0) { 117 + pr_warn("Cannot find struct module in btf_vmlinux\n"); 118 + return; 119 + } 120 + module_type = btf_type_by_id(btf, module_id); 121 + 122 + for (i = 0; i < ARRAY_SIZE(bpf_struct_ops); i++) { 123 + st_ops = bpf_struct_ops[i]; 124 + 125 + if (strlen(st_ops->name) + VALUE_PREFIX_LEN >= 126 + sizeof(value_name)) { 127 + pr_warn("struct_ops name %s is too long\n", 128 + st_ops->name); 129 + continue; 130 + } 131 + sprintf(value_name, "%s%s", VALUE_PREFIX, st_ops->name); 132 + 133 + value_id = btf_find_by_name_kind(btf, value_name, 134 + BTF_KIND_STRUCT); 135 + if (value_id < 0) { 136 + pr_warn("Cannot find struct %s in btf_vmlinux\n", 137 + value_name); 138 + continue; 139 + } 140 + 141 + type_id = btf_find_by_name_kind(btf, st_ops->name, 142 + BTF_KIND_STRUCT); 143 + if (type_id < 0) { 144 + pr_warn("Cannot find struct %s in btf_vmlinux\n", 145 + st_ops->name); 146 + continue; 147 + } 148 + t = btf_type_by_id(btf, type_id); 149 + if (btf_type_vlen(t) > BPF_STRUCT_OPS_MAX_NR_MEMBERS) { 150 + pr_warn("Cannot support #%u members in struct %s\n", 151 + btf_type_vlen(t), st_ops->name); 152 + continue; 153 + } 154 + 155 + for_each_member(j, t, member) { 156 + const struct btf_type *func_proto; 157 + 158 + mname = btf_name_by_offset(btf, member->name_off); 159 + if (!*mname) { 160 + pr_warn("anon member in struct %s is not supported\n", 161 + st_ops->name); 162 + break; 163 + } 164 + 165 + if (btf_member_bitfield_size(t, member)) { 166 + pr_warn("bit field member %s in struct %s is not supported\n", 167 + mname, st_ops->name); 168 + break; 169 + } 170 + 171 + func_proto = btf_type_resolve_func_ptr(btf, 172 + member->type, 173 + NULL); 174 + if (func_proto && 175 + btf_distill_func_proto(&log, btf, 176 + func_proto, mname, 177 + &st_ops->func_models[j])) { 178 + pr_warn("Error in parsing func ptr %s in struct %s\n", 179 + mname, st_ops->name); 180 + break; 181 + } 182 + } 183 + 184 + if (j == btf_type_vlen(t)) { 185 + if (st_ops->init(btf)) { 186 + pr_warn("Error in init bpf_struct_ops %s\n", 187 + st_ops->name); 188 + } else { 189 + st_ops->type_id = type_id; 190 + st_ops->type = t; 191 + st_ops->value_id = value_id; 192 + st_ops->value_type = btf_type_by_id(btf, 193 + value_id); 194 + } 195 + } 196 + } 197 + } 198 + 199 + extern struct btf *btf_vmlinux; 200 + 201 + static const struct bpf_struct_ops * 202 + bpf_struct_ops_find_value(u32 value_id) 203 + { 204 + unsigned int i; 205 + 206 + if (!value_id || !btf_vmlinux) 207 + return NULL; 208 + 209 + for (i = 0; i < ARRAY_SIZE(bpf_struct_ops); i++) { 210 + if (bpf_struct_ops[i]->value_id == value_id) 211 + return bpf_struct_ops[i]; 212 + } 213 + 214 + return NULL; 215 + } 216 + 217 + const struct bpf_struct_ops *bpf_struct_ops_find(u32 type_id) 218 + { 219 + unsigned int i; 220 + 221 + if (!type_id || !btf_vmlinux) 222 + return NULL; 223 + 224 + for (i = 0; i < ARRAY_SIZE(bpf_struct_ops); i++) { 225 + if (bpf_struct_ops[i]->type_id == type_id) 226 + return bpf_struct_ops[i]; 227 + } 228 + 229 + return NULL; 230 + } 231 + 232 + static int bpf_struct_ops_map_get_next_key(struct bpf_map *map, void *key, 233 + void *next_key) 234 + { 235 + if (key && *(u32 *)key == 0) 236 + return -ENOENT; 237 + 238 + *(u32 *)next_key = 0; 239 + return 0; 240 + } 241 + 242 + int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map, void *key, 243 + void *value) 244 + { 245 + struct bpf_struct_ops_map *st_map = (struct bpf_struct_ops_map *)map; 246 + struct bpf_struct_ops_value *uvalue, *kvalue; 247 + enum bpf_struct_ops_state state; 248 + 249 + if (unlikely(*(u32 *)key != 0)) 250 + return -ENOENT; 251 + 252 + kvalue = &st_map->kvalue; 253 + /* Pair with smp_store_release() during map_update */ 254 + state = smp_load_acquire(&kvalue->state); 255 + if (state == BPF_STRUCT_OPS_STATE_INIT) { 256 + memset(value, 0, map->value_size); 257 + return 0; 258 + } 259 + 260 + /* No lock is needed. state and refcnt do not need 261 + * to be updated together under atomic context. 262 + */ 263 + uvalue = (struct bpf_struct_ops_value *)value; 264 + memcpy(uvalue, st_map->uvalue, map->value_size); 265 + uvalue->state = state; 266 + refcount_set(&uvalue->refcnt, refcount_read(&kvalue->refcnt)); 267 + 268 + return 0; 269 + } 270 + 271 + static void *bpf_struct_ops_map_lookup_elem(struct bpf_map *map, void *key) 272 + { 273 + return ERR_PTR(-EINVAL); 274 + } 275 + 276 + static void bpf_struct_ops_map_put_progs(struct bpf_struct_ops_map *st_map) 277 + { 278 + const struct btf_type *t = st_map->st_ops->type; 279 + u32 i; 280 + 281 + for (i = 0; i < btf_type_vlen(t); i++) { 282 + if (st_map->progs[i]) { 283 + bpf_prog_put(st_map->progs[i]); 284 + st_map->progs[i] = NULL; 285 + } 286 + } 287 + } 288 + 289 + static int check_zero_holes(const struct btf_type *t, void *data) 290 + { 291 + const struct btf_member *member; 292 + u32 i, moff, msize, prev_mend = 0; 293 + const struct btf_type *mtype; 294 + 295 + for_each_member(i, t, member) { 296 + moff = btf_member_bit_offset(t, member) / 8; 297 + if (moff > prev_mend && 298 + memchr_inv(data + prev_mend, 0, moff - prev_mend)) 299 + return -EINVAL; 300 + 301 + mtype = btf_type_by_id(btf_vmlinux, member->type); 302 + mtype = btf_resolve_size(btf_vmlinux, mtype, &msize, 303 + NULL, NULL); 304 + if (IS_ERR(mtype)) 305 + return PTR_ERR(mtype); 306 + prev_mend = moff + msize; 307 + } 308 + 309 + if (t->size > prev_mend && 310 + memchr_inv(data + prev_mend, 0, t->size - prev_mend)) 311 + return -EINVAL; 312 + 313 + return 0; 314 + } 315 + 316 + static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, 317 + void *value, u64 flags) 318 + { 319 + struct bpf_struct_ops_map *st_map = (struct bpf_struct_ops_map *)map; 320 + const struct bpf_struct_ops *st_ops = st_map->st_ops; 321 + struct bpf_struct_ops_value *uvalue, *kvalue; 322 + const struct btf_member *member; 323 + const struct btf_type *t = st_ops->type; 324 + void *udata, *kdata; 325 + int prog_fd, err = 0; 326 + void *image; 327 + u32 i; 328 + 329 + if (flags) 330 + return -EINVAL; 331 + 332 + if (*(u32 *)key != 0) 333 + return -E2BIG; 334 + 335 + err = check_zero_holes(st_ops->value_type, value); 336 + if (err) 337 + return err; 338 + 339 + uvalue = (struct bpf_struct_ops_value *)value; 340 + err = check_zero_holes(t, uvalue->data); 341 + if (err) 342 + return err; 343 + 344 + if (uvalue->state || refcount_read(&uvalue->refcnt)) 345 + return -EINVAL; 346 + 347 + uvalue = (struct bpf_struct_ops_value *)st_map->uvalue; 348 + kvalue = (struct bpf_struct_ops_value *)&st_map->kvalue; 349 + 350 + mutex_lock(&st_map->lock); 351 + 352 + if (kvalue->state != BPF_STRUCT_OPS_STATE_INIT) { 353 + err = -EBUSY; 354 + goto unlock; 355 + } 356 + 357 + memcpy(uvalue, value, map->value_size); 358 + 359 + udata = &uvalue->data; 360 + kdata = &kvalue->data; 361 + image = st_map->image; 362 + 363 + for_each_member(i, t, member) { 364 + const struct btf_type *mtype, *ptype; 365 + struct bpf_prog *prog; 366 + u32 moff; 367 + 368 + moff = btf_member_bit_offset(t, member) / 8; 369 + ptype = btf_type_resolve_ptr(btf_vmlinux, member->type, NULL); 370 + if (ptype == module_type) { 371 + if (*(void **)(udata + moff)) 372 + goto reset_unlock; 373 + *(void **)(kdata + moff) = BPF_MODULE_OWNER; 374 + continue; 375 + } 376 + 377 + err = st_ops->init_member(t, member, kdata, udata); 378 + if (err < 0) 379 + goto reset_unlock; 380 + 381 + /* The ->init_member() has handled this member */ 382 + if (err > 0) 383 + continue; 384 + 385 + /* If st_ops->init_member does not handle it, 386 + * we will only handle func ptrs and zero-ed members 387 + * here. Reject everything else. 388 + */ 389 + 390 + /* All non func ptr member must be 0 */ 391 + if (!ptype || !btf_type_is_func_proto(ptype)) { 392 + u32 msize; 393 + 394 + mtype = btf_type_by_id(btf_vmlinux, member->type); 395 + mtype = btf_resolve_size(btf_vmlinux, mtype, &msize, 396 + NULL, NULL); 397 + if (IS_ERR(mtype)) { 398 + err = PTR_ERR(mtype); 399 + goto reset_unlock; 400 + } 401 + 402 + if (memchr_inv(udata + moff, 0, msize)) { 403 + err = -EINVAL; 404 + goto reset_unlock; 405 + } 406 + 407 + continue; 408 + } 409 + 410 + prog_fd = (int)(*(unsigned long *)(udata + moff)); 411 + /* Similar check as the attr->attach_prog_fd */ 412 + if (!prog_fd) 413 + continue; 414 + 415 + prog = bpf_prog_get(prog_fd); 416 + if (IS_ERR(prog)) { 417 + err = PTR_ERR(prog); 418 + goto reset_unlock; 419 + } 420 + st_map->progs[i] = prog; 421 + 422 + if (prog->type != BPF_PROG_TYPE_STRUCT_OPS || 423 + prog->aux->attach_btf_id != st_ops->type_id || 424 + prog->expected_attach_type != i) { 425 + err = -EINVAL; 426 + goto reset_unlock; 427 + } 428 + 429 + err = arch_prepare_bpf_trampoline(image, 430 + st_map->image + PAGE_SIZE, 431 + &st_ops->func_models[i], 0, 432 + &prog, 1, NULL, 0, NULL); 433 + if (err < 0) 434 + goto reset_unlock; 435 + 436 + *(void **)(kdata + moff) = image; 437 + image += err; 438 + 439 + /* put prog_id to udata */ 440 + *(unsigned long *)(udata + moff) = prog->aux->id; 441 + } 442 + 443 + refcount_set(&kvalue->refcnt, 1); 444 + bpf_map_inc(map); 445 + 446 + set_memory_ro((long)st_map->image, 1); 447 + set_memory_x((long)st_map->image, 1); 448 + err = st_ops->reg(kdata); 449 + if (likely(!err)) { 450 + /* Pair with smp_load_acquire() during lookup_elem(). 451 + * It ensures the above udata updates (e.g. prog->aux->id) 452 + * can be seen once BPF_STRUCT_OPS_STATE_INUSE is set. 453 + */ 454 + smp_store_release(&kvalue->state, BPF_STRUCT_OPS_STATE_INUSE); 455 + goto unlock; 456 + } 457 + 458 + /* Error during st_ops->reg(). It is very unlikely since 459 + * the above init_member() should have caught it earlier 460 + * before reg(). The only possibility is if there was a race 461 + * in registering the struct_ops (under the same name) to 462 + * a sub-system through different struct_ops's maps. 463 + */ 464 + set_memory_nx((long)st_map->image, 1); 465 + set_memory_rw((long)st_map->image, 1); 466 + bpf_map_put(map); 467 + 468 + reset_unlock: 469 + bpf_struct_ops_map_put_progs(st_map); 470 + memset(uvalue, 0, map->value_size); 471 + memset(kvalue, 0, map->value_size); 472 + unlock: 473 + mutex_unlock(&st_map->lock); 474 + return err; 475 + } 476 + 477 + static int bpf_struct_ops_map_delete_elem(struct bpf_map *map, void *key) 478 + { 479 + enum bpf_struct_ops_state prev_state; 480 + struct bpf_struct_ops_map *st_map; 481 + 482 + st_map = (struct bpf_struct_ops_map *)map; 483 + prev_state = cmpxchg(&st_map->kvalue.state, 484 + BPF_STRUCT_OPS_STATE_INUSE, 485 + BPF_STRUCT_OPS_STATE_TOBEFREE); 486 + if (prev_state == BPF_STRUCT_OPS_STATE_INUSE) { 487 + st_map->st_ops->unreg(&st_map->kvalue.data); 488 + if (refcount_dec_and_test(&st_map->kvalue.refcnt)) 489 + bpf_map_put(map); 490 + } 491 + 492 + return 0; 493 + } 494 + 495 + static void bpf_struct_ops_map_seq_show_elem(struct bpf_map *map, void *key, 496 + struct seq_file *m) 497 + { 498 + void *value; 499 + int err; 500 + 501 + value = kmalloc(map->value_size, GFP_USER | __GFP_NOWARN); 502 + if (!value) 503 + return; 504 + 505 + err = bpf_struct_ops_map_sys_lookup_elem(map, key, value); 506 + if (!err) { 507 + btf_type_seq_show(btf_vmlinux, map->btf_vmlinux_value_type_id, 508 + value, m); 509 + seq_puts(m, "\n"); 510 + } 511 + 512 + kfree(value); 513 + } 514 + 515 + static void bpf_struct_ops_map_free(struct bpf_map *map) 516 + { 517 + struct bpf_struct_ops_map *st_map = (struct bpf_struct_ops_map *)map; 518 + 519 + if (st_map->progs) 520 + bpf_struct_ops_map_put_progs(st_map); 521 + bpf_map_area_free(st_map->progs); 522 + bpf_jit_free_exec(st_map->image); 523 + bpf_map_area_free(st_map->uvalue); 524 + bpf_map_area_free(st_map); 525 + } 526 + 527 + static int bpf_struct_ops_map_alloc_check(union bpf_attr *attr) 528 + { 529 + if (attr->key_size != sizeof(unsigned int) || attr->max_entries != 1 || 530 + attr->map_flags || !attr->btf_vmlinux_value_type_id) 531 + return -EINVAL; 532 + return 0; 533 + } 534 + 535 + static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr) 536 + { 537 + const struct bpf_struct_ops *st_ops; 538 + size_t map_total_size, st_map_size; 539 + struct bpf_struct_ops_map *st_map; 540 + const struct btf_type *t, *vt; 541 + struct bpf_map_memory mem; 542 + struct bpf_map *map; 543 + int err; 544 + 545 + if (!capable(CAP_SYS_ADMIN)) 546 + return ERR_PTR(-EPERM); 547 + 548 + st_ops = bpf_struct_ops_find_value(attr->btf_vmlinux_value_type_id); 549 + if (!st_ops) 550 + return ERR_PTR(-ENOTSUPP); 551 + 552 + vt = st_ops->value_type; 553 + if (attr->value_size != vt->size) 554 + return ERR_PTR(-EINVAL); 555 + 556 + t = st_ops->type; 557 + 558 + st_map_size = sizeof(*st_map) + 559 + /* kvalue stores the 560 + * struct bpf_struct_ops_tcp_congestions_ops 561 + */ 562 + (vt->size - sizeof(struct bpf_struct_ops_value)); 563 + map_total_size = st_map_size + 564 + /* uvalue */ 565 + sizeof(vt->size) + 566 + /* struct bpf_progs **progs */ 567 + btf_type_vlen(t) * sizeof(struct bpf_prog *); 568 + err = bpf_map_charge_init(&mem, map_total_size); 569 + if (err < 0) 570 + return ERR_PTR(err); 571 + 572 + st_map = bpf_map_area_alloc(st_map_size, NUMA_NO_NODE); 573 + if (!st_map) { 574 + bpf_map_charge_finish(&mem); 575 + return ERR_PTR(-ENOMEM); 576 + } 577 + st_map->st_ops = st_ops; 578 + map = &st_map->map; 579 + 580 + st_map->uvalue = bpf_map_area_alloc(vt->size, NUMA_NO_NODE); 581 + st_map->progs = 582 + bpf_map_area_alloc(btf_type_vlen(t) * sizeof(struct bpf_prog *), 583 + NUMA_NO_NODE); 584 + st_map->image = bpf_jit_alloc_exec(PAGE_SIZE); 585 + if (!st_map->uvalue || !st_map->progs || !st_map->image) { 586 + bpf_struct_ops_map_free(map); 587 + bpf_map_charge_finish(&mem); 588 + return ERR_PTR(-ENOMEM); 589 + } 590 + 591 + mutex_init(&st_map->lock); 592 + set_vm_flush_reset_perms(st_map->image); 593 + bpf_map_init_from_attr(map, attr); 594 + bpf_map_charge_move(&map->memory, &mem); 595 + 596 + return map; 597 + } 598 + 599 + const struct bpf_map_ops bpf_struct_ops_map_ops = { 600 + .map_alloc_check = bpf_struct_ops_map_alloc_check, 601 + .map_alloc = bpf_struct_ops_map_alloc, 602 + .map_free = bpf_struct_ops_map_free, 603 + .map_get_next_key = bpf_struct_ops_map_get_next_key, 604 + .map_lookup_elem = bpf_struct_ops_map_lookup_elem, 605 + .map_delete_elem = bpf_struct_ops_map_delete_elem, 606 + .map_update_elem = bpf_struct_ops_map_update_elem, 607 + .map_seq_show_elem = bpf_struct_ops_map_seq_show_elem, 608 + }; 609 + 610 + /* "const void *" because some subsystem is 611 + * passing a const (e.g. const struct tcp_congestion_ops *) 612 + */ 613 + bool bpf_struct_ops_get(const void *kdata) 614 + { 615 + struct bpf_struct_ops_value *kvalue; 616 + 617 + kvalue = container_of(kdata, struct bpf_struct_ops_value, data); 618 + 619 + return refcount_inc_not_zero(&kvalue->refcnt); 620 + } 621 + 622 + void bpf_struct_ops_put(const void *kdata) 623 + { 624 + struct bpf_struct_ops_value *kvalue; 625 + 626 + kvalue = container_of(kdata, struct bpf_struct_ops_value, data); 627 + if (refcount_dec_and_test(&kvalue->refcnt)) { 628 + struct bpf_struct_ops_map *st_map; 629 + 630 + st_map = container_of(kvalue, struct bpf_struct_ops_map, 631 + kvalue); 632 + bpf_map_put(&st_map->map); 633 + } 634 + }

+9

kernel/bpf/bpf_struct_ops_types.h

··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + /* internal file - do not include directly */ 3 + 4 + #ifdef CONFIG_BPF_JIT 5 + #ifdef CONFIG_INET 6 + #include <net/tcp.h> 7 + BPF_STRUCT_OPS_TYPE(tcp_congestion_ops) 8 + #endif 9 + #endif

+410 -78

kernel/bpf/btf.c

··· 180 180 */ 181 181 #define BTF_MAX_SIZE (16 * 1024 * 1024) 182 182 183 - #define for_each_member(i, struct_type, member) \ 184 - for (i = 0, member = btf_type_member(struct_type); \ 185 - i < btf_type_vlen(struct_type); \ 186 - i++, member++) 187 - 188 183 #define for_each_member_from(i, from, struct_type, member) \ 189 184 for (i = from, member = btf_type_member(struct_type) + from; \ 190 185 i < btf_type_vlen(struct_type); \ ··· 275 280 [BTF_KIND_VAR] = "VAR", 276 281 [BTF_KIND_DATASEC] = "DATASEC", 277 282 }; 283 + 284 + static const char *btf_type_str(const struct btf_type *t) 285 + { 286 + return btf_kind_str[BTF_INFO_KIND(t->info)]; 287 + } 278 288 279 289 struct btf_kind_operations { 280 290 s32 (*check_meta)(struct btf_verifier_env *env, ··· 382 382 return BTF_INFO_KIND(t->info) == BTF_KIND_DATASEC; 383 383 } 384 384 385 + s32 btf_find_by_name_kind(const struct btf *btf, const char *name, u8 kind) 386 + { 387 + const struct btf_type *t; 388 + const char *tname; 389 + u32 i; 390 + 391 + for (i = 1; i <= btf->nr_types; i++) { 392 + t = btf->types[i]; 393 + if (BTF_INFO_KIND(t->info) != kind) 394 + continue; 395 + 396 + tname = btf_name_by_offset(btf, t->name_off); 397 + if (!strcmp(tname, name)) 398 + return i; 399 + } 400 + 401 + return -ENOENT; 402 + } 403 + 404 + const struct btf_type *btf_type_skip_modifiers(const struct btf *btf, 405 + u32 id, u32 *res_id) 406 + { 407 + const struct btf_type *t = btf_type_by_id(btf, id); 408 + 409 + while (btf_type_is_modifier(t)) { 410 + id = t->type; 411 + t = btf_type_by_id(btf, t->type); 412 + } 413 + 414 + if (res_id) 415 + *res_id = id; 416 + 417 + return t; 418 + } 419 + 420 + const struct btf_type *btf_type_resolve_ptr(const struct btf *btf, 421 + u32 id, u32 *res_id) 422 + { 423 + const struct btf_type *t; 424 + 425 + t = btf_type_skip_modifiers(btf, id, NULL); 426 + if (!btf_type_is_ptr(t)) 427 + return NULL; 428 + 429 + return btf_type_skip_modifiers(btf, t->type, res_id); 430 + } 431 + 432 + const struct btf_type *btf_type_resolve_func_ptr(const struct btf *btf, 433 + u32 id, u32 *res_id) 434 + { 435 + const struct btf_type *ptype; 436 + 437 + ptype = btf_type_resolve_ptr(btf, id, res_id); 438 + if (ptype && btf_type_is_func_proto(ptype)) 439 + return ptype; 440 + 441 + return NULL; 442 + } 443 + 385 444 /* Types that act only as a source, not sink or intermediate 386 445 * type when resolving. 387 446 */ ··· 505 446 return "UNKN"; 506 447 } 507 448 508 - static u16 btf_type_vlen(const struct btf_type *t) 509 - { 510 - return BTF_INFO_VLEN(t->info); 511 - } 512 - 513 - static bool btf_type_kflag(const struct btf_type *t) 514 - { 515 - return BTF_INFO_KFLAG(t->info); 516 - } 517 - 518 - static u32 btf_member_bit_offset(const struct btf_type *struct_type, 519 - const struct btf_member *member) 520 - { 521 - return btf_type_kflag(struct_type) ? BTF_MEMBER_BIT_OFFSET(member->offset) 522 - : member->offset; 523 - } 524 - 525 - static u32 btf_member_bitfield_size(const struct btf_type *struct_type, 526 - const struct btf_member *member) 527 - { 528 - return btf_type_kflag(struct_type) ? BTF_MEMBER_BITFIELD_SIZE(member->offset) 529 - : 0; 530 - } 531 - 532 449 static u32 btf_type_int(const struct btf_type *t) 533 450 { 534 451 return *(u32 *)(t + 1); ··· 513 478 static const struct btf_array *btf_type_array(const struct btf_type *t) 514 479 { 515 480 return (const struct btf_array *)(t + 1); 516 - } 517 - 518 - static const struct btf_member *btf_type_member(const struct btf_type *t) 519 - { 520 - return (const struct btf_member *)(t + 1); 521 481 } 522 482 523 483 static const struct btf_enum *btf_type_enum(const struct btf_type *t) ··· 1087 1057 * *elem_type: same as return type ("struct X") 1088 1058 * *total_nelems: 1 1089 1059 */ 1090 - static const struct btf_type * 1060 + const struct btf_type * 1091 1061 btf_resolve_size(const struct btf *btf, const struct btf_type *type, 1092 1062 u32 *type_size, const struct btf_type **elem_type, 1093 1063 u32 *total_nelems) ··· 1141 1111 return ERR_PTR(-EINVAL); 1142 1112 1143 1113 *type_size = nelems * size; 1144 - *total_nelems = nelems; 1145 - *elem_type = type; 1114 + if (total_nelems) 1115 + *total_nelems = nelems; 1116 + if (elem_type) 1117 + *elem_type = type; 1146 1118 1147 1119 return array_type ? : type; 1148 1120 } ··· 1858 1826 u32 type_id, void *data, 1859 1827 u8 bits_offset, struct seq_file *m) 1860 1828 { 1861 - t = btf_type_id_resolve(btf, &type_id); 1829 + if (btf->resolved_ids) 1830 + t = btf_type_id_resolve(btf, &type_id); 1831 + else 1832 + t = btf_type_skip_modifiers(btf, type_id, NULL); 1862 1833 1863 1834 btf_type_ops(t)->seq_show(btf, t, type_id, data, bits_offset, m); 1864 1835 } ··· 2656 2621 return -EINVAL; 2657 2622 } 2658 2623 2659 - if (btf_type_vlen(t)) { 2660 - btf_verifier_log_type(env, t, "vlen != 0"); 2624 + if (btf_type_vlen(t) > BTF_FUNC_GLOBAL) { 2625 + btf_verifier_log_type(env, t, "Invalid func linkage"); 2661 2626 return -EINVAL; 2662 2627 } 2663 2628 ··· 3511 3476 3512 3477 static const struct btf_member * 3513 3478 btf_get_prog_ctx_type(struct bpf_verifier_log *log, struct btf *btf, 3514 - const struct btf_type *t, enum bpf_prog_type prog_type) 3479 + const struct btf_type *t, enum bpf_prog_type prog_type, 3480 + int arg) 3515 3481 { 3516 3482 const struct btf_type *conv_struct; 3517 3483 const struct btf_type *ctx_struct; ··· 3533 3497 * is not supported yet. 3534 3498 * BPF_PROG_TYPE_RAW_TRACEPOINT is fine. 3535 3499 */ 3536 - bpf_log(log, "BPF program ctx type is not a struct\n"); 3500 + if (log->level & BPF_LOG_LEVEL) 3501 + bpf_log(log, "arg#%d type is not a struct\n", arg); 3537 3502 return NULL; 3538 3503 } 3539 3504 tname = btf_name_by_offset(btf, t->name_off); 3540 3505 if (!tname) { 3541 - bpf_log(log, "BPF program ctx struct doesn't have a name\n"); 3506 + bpf_log(log, "arg#%d struct doesn't have a name\n", arg); 3542 3507 return NULL; 3543 3508 } 3544 3509 /* prog_type is valid bpf program type. No need for bounds check. */ ··· 3572 3535 static int btf_translate_to_vmlinux(struct bpf_verifier_log *log, 3573 3536 struct btf *btf, 3574 3537 const struct btf_type *t, 3575 - enum bpf_prog_type prog_type) 3538 + enum bpf_prog_type prog_type, 3539 + int arg) 3576 3540 { 3577 3541 const struct btf_member *prog_ctx_type, *kern_ctx_type; 3578 3542 3579 - prog_ctx_type = btf_get_prog_ctx_type(log, btf, t, prog_type); 3543 + prog_ctx_type = btf_get_prog_ctx_type(log, btf, t, prog_type, arg); 3580 3544 if (!prog_ctx_type) 3581 3545 return -ENOENT; 3582 3546 kern_ctx_type = prog_ctx_type + 1; ··· 3642 3604 err = -ENOENT; 3643 3605 goto errout; 3644 3606 } 3607 + 3608 + bpf_struct_ops_init(btf); 3645 3609 3646 3610 btf_verifier_env_free(env); 3647 3611 refcount_set(&btf->refcnt, 1); ··· 3717 3677 /* skip modifiers */ 3718 3678 while (btf_type_is_modifier(t)) 3719 3679 t = btf_type_by_id(btf, t->type); 3720 - if (btf_type_is_int(t)) 3680 + if (btf_type_is_int(t) || btf_type_is_enum(t)) 3721 3681 /* accessing a scalar */ 3722 3682 return true; 3723 3683 if (!btf_type_is_ptr(t)) { ··· 3737 3697 3738 3698 /* this is a pointer to another type */ 3739 3699 info->reg_type = PTR_TO_BTF_ID; 3740 - info->btf_id = t->type; 3741 3700 3742 3701 if (tgt_prog) { 3743 - ret = btf_translate_to_vmlinux(log, btf, t, tgt_prog->type); 3702 + ret = btf_translate_to_vmlinux(log, btf, t, tgt_prog->type, arg); 3744 3703 if (ret > 0) { 3745 3704 info->btf_id = ret; 3746 3705 return true; ··· 3747 3708 return false; 3748 3709 } 3749 3710 } 3711 + 3712 + info->btf_id = t->type; 3750 3713 t = btf_type_by_id(btf, t->type); 3751 3714 /* skip modifiers */ 3752 - while (btf_type_is_modifier(t)) 3715 + while (btf_type_is_modifier(t)) { 3716 + info->btf_id = t->type; 3753 3717 t = btf_type_by_id(btf, t->type); 3718 + } 3754 3719 if (!btf_type_is_struct(t)) { 3755 3720 bpf_log(log, 3756 3721 "func '%s' arg%d type %s is not a struct\n", ··· 3780 3737 again: 3781 3738 tname = __btf_name_by_offset(btf_vmlinux, t->name_off); 3782 3739 if (!btf_type_is_struct(t)) { 3783 - bpf_log(log, "Type '%s' is not a struct", tname); 3740 + bpf_log(log, "Type '%s' is not a struct\n", tname); 3784 3741 return -EINVAL; 3785 3742 } 3786 3743 3787 - for_each_member(i, t, member) { 3788 - if (btf_member_bitfield_size(t, member)) 3789 - /* bitfields are not supported yet */ 3790 - continue; 3744 + if (off + size > t->size) { 3745 + bpf_log(log, "access beyond struct %s at off %u size %u\n", 3746 + tname, off, size); 3747 + return -EACCES; 3748 + } 3791 3749 3750 + for_each_member(i, t, member) { 3792 3751 /* offset of the field in bytes */ 3793 3752 moff = btf_member_bit_offset(t, member) / 8; 3794 3753 if (off + size <= moff) 3795 3754 /* won't find anything, field is already too far */ 3796 3755 break; 3756 + 3757 + if (btf_member_bitfield_size(t, member)) { 3758 + u32 end_bit = btf_member_bit_offset(t, member) + 3759 + btf_member_bitfield_size(t, member); 3760 + 3761 + /* off <= moff instead of off == moff because clang 3762 + * does not generate a BTF member for anonymous 3763 + * bitfield like the ":16" here: 3764 + * struct { 3765 + * int :16; 3766 + * int x:8; 3767 + * }; 3768 + */ 3769 + if (off <= moff && 3770 + BITS_ROUNDUP_BYTES(end_bit) <= off + size) 3771 + return SCALAR_VALUE; 3772 + 3773 + /* off may be accessing a following member 3774 + * 3775 + * or 3776 + * 3777 + * Doing partial access at either end of this 3778 + * bitfield. Continue on this case also to 3779 + * treat it as not accessing this bitfield 3780 + * and eventually error out as field not 3781 + * found to keep it simple. 3782 + * It could be relaxed if there was a legit 3783 + * partial access case later. 3784 + */ 3785 + continue; 3786 + } 3787 + 3797 3788 /* In case of "off" is pointing to holes of a struct */ 3798 3789 if (off < moff) 3799 - continue; 3790 + break; 3800 3791 3801 3792 /* type of the field */ 3802 3793 mtype = btf_type_by_id(btf_vmlinux, member->type); ··· 4120 4043 return 0; 4121 4044 } 4122 4045 4123 - int btf_check_func_arg_match(struct bpf_verifier_env *env, int subprog) 4046 + /* Compare BTFs of two functions assuming only scalars and pointers to context. 4047 + * t1 points to BTF_KIND_FUNC in btf1 4048 + * t2 points to BTF_KIND_FUNC in btf2 4049 + * Returns: 4050 + * EINVAL - function prototype mismatch 4051 + * EFAULT - verifier bug 4052 + * 0 - 99% match. The last 1% is validated by the verifier. 4053 + */ 4054 + int btf_check_func_type_match(struct bpf_verifier_log *log, 4055 + struct btf *btf1, const struct btf_type *t1, 4056 + struct btf *btf2, const struct btf_type *t2) 4124 4057 { 4125 - struct bpf_verifier_state *st = env->cur_state; 4126 - struct bpf_func_state *func = st->frame[st->curframe]; 4127 - struct bpf_reg_state *reg = func->regs; 4058 + const struct btf_param *args1, *args2; 4059 + const char *fn1, *fn2, *s1, *s2; 4060 + u32 nargs1, nargs2, i; 4061 + 4062 + fn1 = btf_name_by_offset(btf1, t1->name_off); 4063 + fn2 = btf_name_by_offset(btf2, t2->name_off); 4064 + 4065 + if (btf_func_linkage(t1) != BTF_FUNC_GLOBAL) { 4066 + bpf_log(log, "%s() is not a global function\n", fn1); 4067 + return -EINVAL; 4068 + } 4069 + if (btf_func_linkage(t2) != BTF_FUNC_GLOBAL) { 4070 + bpf_log(log, "%s() is not a global function\n", fn2); 4071 + return -EINVAL; 4072 + } 4073 + 4074 + t1 = btf_type_by_id(btf1, t1->type); 4075 + if (!t1 || !btf_type_is_func_proto(t1)) 4076 + return -EFAULT; 4077 + t2 = btf_type_by_id(btf2, t2->type); 4078 + if (!t2 || !btf_type_is_func_proto(t2)) 4079 + return -EFAULT; 4080 + 4081 + args1 = (const struct btf_param *)(t1 + 1); 4082 + nargs1 = btf_type_vlen(t1); 4083 + args2 = (const struct btf_param *)(t2 + 1); 4084 + nargs2 = btf_type_vlen(t2); 4085 + 4086 + if (nargs1 != nargs2) { 4087 + bpf_log(log, "%s() has %d args while %s() has %d args\n", 4088 + fn1, nargs1, fn2, nargs2); 4089 + return -EINVAL; 4090 + } 4091 + 4092 + t1 = btf_type_skip_modifiers(btf1, t1->type, NULL); 4093 + t2 = btf_type_skip_modifiers(btf2, t2->type, NULL); 4094 + if (t1->info != t2->info) { 4095 + bpf_log(log, 4096 + "Return type %s of %s() doesn't match type %s of %s()\n", 4097 + btf_type_str(t1), fn1, 4098 + btf_type_str(t2), fn2); 4099 + return -EINVAL; 4100 + } 4101 + 4102 + for (i = 0; i < nargs1; i++) { 4103 + t1 = btf_type_skip_modifiers(btf1, args1[i].type, NULL); 4104 + t2 = btf_type_skip_modifiers(btf2, args2[i].type, NULL); 4105 + 4106 + if (t1->info != t2->info) { 4107 + bpf_log(log, "arg%d in %s() is %s while %s() has %s\n", 4108 + i, fn1, btf_type_str(t1), 4109 + fn2, btf_type_str(t2)); 4110 + return -EINVAL; 4111 + } 4112 + if (btf_type_has_size(t1) && t1->size != t2->size) { 4113 + bpf_log(log, 4114 + "arg%d in %s() has size %d while %s() has %d\n", 4115 + i, fn1, t1->size, 4116 + fn2, t2->size); 4117 + return -EINVAL; 4118 + } 4119 + 4120 + /* global functions are validated with scalars and pointers 4121 + * to context only. And only global functions can be replaced. 4122 + * Hence type check only those types. 4123 + */ 4124 + if (btf_type_is_int(t1) || btf_type_is_enum(t1)) 4125 + continue; 4126 + if (!btf_type_is_ptr(t1)) { 4127 + bpf_log(log, 4128 + "arg%d in %s() has unrecognized type\n", 4129 + i, fn1); 4130 + return -EINVAL; 4131 + } 4132 + t1 = btf_type_skip_modifiers(btf1, t1->type, NULL); 4133 + t2 = btf_type_skip_modifiers(btf2, t2->type, NULL); 4134 + if (!btf_type_is_struct(t1)) { 4135 + bpf_log(log, 4136 + "arg%d in %s() is not a pointer to context\n", 4137 + i, fn1); 4138 + return -EINVAL; 4139 + } 4140 + if (!btf_type_is_struct(t2)) { 4141 + bpf_log(log, 4142 + "arg%d in %s() is not a pointer to context\n", 4143 + i, fn2); 4144 + return -EINVAL; 4145 + } 4146 + /* This is an optional check to make program writing easier. 4147 + * Compare names of structs and report an error to the user. 4148 + * btf_prepare_func_args() already checked that t2 struct 4149 + * is a context type. btf_prepare_func_args() will check 4150 + * later that t1 struct is a context type as well. 4151 + */ 4152 + s1 = btf_name_by_offset(btf1, t1->name_off); 4153 + s2 = btf_name_by_offset(btf2, t2->name_off); 4154 + if (strcmp(s1, s2)) { 4155 + bpf_log(log, 4156 + "arg%d %s(struct %s *) doesn't match %s(struct %s *)\n", 4157 + i, fn1, s1, fn2, s2); 4158 + return -EINVAL; 4159 + } 4160 + } 4161 + return 0; 4162 + } 4163 + 4164 + /* Compare BTFs of given program with BTF of target program */ 4165 + int btf_check_type_match(struct bpf_verifier_env *env, struct bpf_prog *prog, 4166 + struct btf *btf2, const struct btf_type *t2) 4167 + { 4168 + struct btf *btf1 = prog->aux->btf; 4169 + const struct btf_type *t1; 4170 + u32 btf_id = 0; 4171 + 4172 + if (!prog->aux->func_info) { 4173 + bpf_log(&env->log, "Program extension requires BTF\n"); 4174 + return -EINVAL; 4175 + } 4176 + 4177 + btf_id = prog->aux->func_info[0].type_id; 4178 + if (!btf_id) 4179 + return -EFAULT; 4180 + 4181 + t1 = btf_type_by_id(btf1, btf_id); 4182 + if (!t1 || !btf_type_is_func(t1)) 4183 + return -EFAULT; 4184 + 4185 + return btf_check_func_type_match(&env->log, btf1, t1, btf2, t2); 4186 + } 4187 + 4188 + /* Compare BTF of a function with given bpf_reg_state. 4189 + * Returns: 4190 + * EFAULT - there is a verifier bug. Abort verification. 4191 + * EINVAL - there is a type mismatch or BTF is not available. 4192 + * 0 - BTF matches with what bpf_reg_state expects. 4193 + * Only PTR_TO_CTX and SCALAR_VALUE states are recognized. 4194 + */ 4195 + int btf_check_func_arg_match(struct bpf_verifier_env *env, int subprog, 4196 + struct bpf_reg_state *reg) 4197 + { 4128 4198 struct bpf_verifier_log *log = &env->log; 4129 4199 struct bpf_prog *prog = env->prog; 4130 4200 struct btf *btf = prog->aux->btf; ··· 4281 4057 const char *tname; 4282 4058 4283 4059 if (!prog->aux->func_info) 4284 - return 0; 4060 + return -EINVAL; 4285 4061 4286 4062 btf_id = prog->aux->func_info[subprog].type_id; 4287 4063 if (!btf_id) 4288 - return 0; 4064 + return -EFAULT; 4289 4065 4290 4066 if (prog->aux->func_info_aux[subprog].unreliable) 4291 - return 0; 4067 + return -EINVAL; 4292 4068 4293 4069 t = btf_type_by_id(btf, btf_id); 4294 4070 if (!t || !btf_type_is_func(t)) { 4295 - bpf_log(log, "BTF of subprog %d doesn't point to KIND_FUNC\n", 4071 + /* These checks were already done by the verifier while loading 4072 + * struct bpf_func_info 4073 + */ 4074 + bpf_log(log, "BTF of func#%d doesn't point to KIND_FUNC\n", 4296 4075 subprog); 4297 - return -EINVAL; 4076 + return -EFAULT; 4298 4077 } 4299 4078 tname = btf_name_by_offset(btf, t->name_off); 4300 4079 4301 4080 t = btf_type_by_id(btf, t->type); 4302 4081 if (!t || !btf_type_is_func_proto(t)) { 4303 - bpf_log(log, "Invalid type of func %s\n", tname); 4304 - return -EINVAL; 4082 + bpf_log(log, "Invalid BTF of func %s\n", tname); 4083 + return -EFAULT; 4305 4084 } 4306 4085 args = (const struct btf_param *)(t + 1); 4307 4086 nargs = btf_type_vlen(t); ··· 4330 4103 bpf_log(log, "R%d is not a pointer\n", i + 1); 4331 4104 goto out; 4332 4105 } 4333 - /* If program is passing PTR_TO_CTX into subprogram 4334 - * check that BTF type matches. 4106 + /* If function expects ctx type in BTF check that caller 4107 + * is passing PTR_TO_CTX. 4335 4108 */ 4336 - if (reg[i + 1].type == PTR_TO_CTX && 4337 - !btf_get_prog_ctx_type(log, btf, t, prog->type)) 4338 - goto out; 4339 - /* All other pointers are ok */ 4340 - continue; 4109 + if (btf_get_prog_ctx_type(log, btf, t, prog->type, i)) { 4110 + if (reg[i + 1].type != PTR_TO_CTX) { 4111 + bpf_log(log, 4112 + "arg#%d expected pointer to ctx, but got %s\n", 4113 + i, btf_kind_str[BTF_INFO_KIND(t->info)]); 4114 + goto out; 4115 + } 4116 + if (check_ctx_reg(env, &reg[i + 1], i + 1)) 4117 + goto out; 4118 + continue; 4119 + } 4341 4120 } 4342 - bpf_log(log, "Unrecognized argument type %s\n", 4343 - btf_kind_str[BTF_INFO_KIND(t->info)]); 4121 + bpf_log(log, "Unrecognized arg#%d type %s\n", 4122 + i, btf_kind_str[BTF_INFO_KIND(t->info)]); 4344 4123 goto out; 4345 4124 } 4346 4125 return 0; 4347 4126 out: 4348 - /* LLVM optimizations can remove arguments from static functions. */ 4349 - bpf_log(log, 4350 - "Type info disagrees with actual arguments due to compiler optimizations\n"); 4127 + /* Compiler optimizations can remove arguments from static functions 4128 + * or mismatched type can be passed into a global function. 4129 + * In such cases mark the function as unreliable from BTF point of view. 4130 + */ 4351 4131 prog->aux->func_info_aux[subprog].unreliable = true; 4132 + return -EINVAL; 4133 + } 4134 + 4135 + /* Convert BTF of a function into bpf_reg_state if possible 4136 + * Returns: 4137 + * EFAULT - there is a verifier bug. Abort verification. 4138 + * EINVAL - cannot convert BTF. 4139 + * 0 - Successfully converted BTF into bpf_reg_state 4140 + * (either PTR_TO_CTX or SCALAR_VALUE). 4141 + */ 4142 + int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog, 4143 + struct bpf_reg_state *reg) 4144 + { 4145 + struct bpf_verifier_log *log = &env->log; 4146 + struct bpf_prog *prog = env->prog; 4147 + enum bpf_prog_type prog_type = prog->type; 4148 + struct btf *btf = prog->aux->btf; 4149 + const struct btf_param *args; 4150 + const struct btf_type *t; 4151 + u32 i, nargs, btf_id; 4152 + const char *tname; 4153 + 4154 + if (!prog->aux->func_info || 4155 + prog->aux->func_info_aux[subprog].linkage != BTF_FUNC_GLOBAL) { 4156 + bpf_log(log, "Verifier bug\n"); 4157 + return -EFAULT; 4158 + } 4159 + 4160 + btf_id = prog->aux->func_info[subprog].type_id; 4161 + if (!btf_id) { 4162 + bpf_log(log, "Global functions need valid BTF\n"); 4163 + return -EFAULT; 4164 + } 4165 + 4166 + t = btf_type_by_id(btf, btf_id); 4167 + if (!t || !btf_type_is_func(t)) { 4168 + /* These checks were already done by the verifier while loading 4169 + * struct bpf_func_info 4170 + */ 4171 + bpf_log(log, "BTF of func#%d doesn't point to KIND_FUNC\n", 4172 + subprog); 4173 + return -EFAULT; 4174 + } 4175 + tname = btf_name_by_offset(btf, t->name_off); 4176 + 4177 + if (log->level & BPF_LOG_LEVEL) 4178 + bpf_log(log, "Validating %s() func#%d...\n", 4179 + tname, subprog); 4180 + 4181 + if (prog->aux->func_info_aux[subprog].unreliable) { 4182 + bpf_log(log, "Verifier bug in function %s()\n", tname); 4183 + return -EFAULT; 4184 + } 4185 + if (prog_type == BPF_PROG_TYPE_EXT) 4186 + prog_type = prog->aux->linked_prog->type; 4187 + 4188 + t = btf_type_by_id(btf, t->type); 4189 + if (!t || !btf_type_is_func_proto(t)) { 4190 + bpf_log(log, "Invalid type of function %s()\n", tname); 4191 + return -EFAULT; 4192 + } 4193 + args = (const struct btf_param *)(t + 1); 4194 + nargs = btf_type_vlen(t); 4195 + if (nargs > 5) { 4196 + bpf_log(log, "Global function %s() with %d > 5 args. Buggy compiler.\n", 4197 + tname, nargs); 4198 + return -EINVAL; 4199 + } 4200 + /* check that function returns int */ 4201 + t = btf_type_by_id(btf, t->type); 4202 + while (btf_type_is_modifier(t)) 4203 + t = btf_type_by_id(btf, t->type); 4204 + if (!btf_type_is_int(t) && !btf_type_is_enum(t)) { 4205 + bpf_log(log, 4206 + "Global function %s() doesn't return scalar. Only those are supported.\n", 4207 + tname); 4208 + return -EINVAL; 4209 + } 4210 + /* Convert BTF function arguments into verifier types. 4211 + * Only PTR_TO_CTX and SCALAR are supported atm. 4212 + */ 4213 + for (i = 0; i < nargs; i++) { 4214 + t = btf_type_by_id(btf, args[i].type); 4215 + while (btf_type_is_modifier(t)) 4216 + t = btf_type_by_id(btf, t->type); 4217 + if (btf_type_is_int(t) || btf_type_is_enum(t)) { 4218 + reg[i + 1].type = SCALAR_VALUE; 4219 + continue; 4220 + } 4221 + if (btf_type_is_ptr(t) && 4222 + btf_get_prog_ctx_type(log, btf, t, prog_type, i)) { 4223 + reg[i + 1].type = PTR_TO_CTX; 4224 + continue; 4225 + } 4226 + bpf_log(log, "Arg#%d type %s in %s() is not supported yet.\n", 4227 + i, btf_kind_str[BTF_INFO_KIND(t->info)], tname); 4228 + return -EINVAL; 4229 + } 4352 4230 return 0; 4353 4231 } 4354 4232

+1

kernel/bpf/core.c

··· 2137 2137 const struct bpf_func_proto bpf_map_peek_elem_proto __weak; 2138 2138 const struct bpf_func_proto bpf_spin_lock_proto __weak; 2139 2139 const struct bpf_func_proto bpf_spin_unlock_proto __weak; 2140 + const struct bpf_func_proto bpf_jiffies64_proto __weak; 2140 2141 2141 2142 const struct bpf_func_proto bpf_get_prandom_u32_proto __weak; 2142 2143 const struct bpf_func_proto bpf_get_smp_processor_id_proto __weak;

+49 -46

kernel/bpf/devmap.c

··· 53 53 (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY) 54 54 55 55 #define DEV_MAP_BULK_SIZE 16 56 - struct bpf_dtab_netdev; 57 - 58 - struct xdp_bulk_queue { 56 + struct xdp_dev_bulk_queue { 59 57 struct xdp_frame *q[DEV_MAP_BULK_SIZE]; 60 58 struct list_head flush_node; 59 + struct net_device *dev; 61 60 struct net_device *dev_rx; 62 - struct bpf_dtab_netdev *obj; 63 61 unsigned int count; 64 62 }; 65 63 ··· 65 67 struct net_device *dev; /* must be first member, due to tracepoint */ 66 68 struct hlist_node index_hlist; 67 69 struct bpf_dtab *dtab; 68 - struct xdp_bulk_queue __percpu *bulkq; 69 70 struct rcu_head rcu; 70 - unsigned int idx; /* keep track of map index for tracepoint */ 71 + unsigned int idx; 71 72 }; 72 73 73 74 struct bpf_dtab { ··· 81 84 u32 n_buckets; 82 85 }; 83 86 84 - static DEFINE_PER_CPU(struct list_head, dev_map_flush_list); 87 + static DEFINE_PER_CPU(struct list_head, dev_flush_list); 85 88 static DEFINE_SPINLOCK(dev_map_lock); 86 89 static LIST_HEAD(dev_map_list); 87 90 ··· 216 219 217 220 hlist_for_each_entry_safe(dev, next, head, index_hlist) { 218 221 hlist_del_rcu(&dev->index_hlist); 219 - free_percpu(dev->bulkq); 220 222 dev_put(dev->dev); 221 223 kfree(dev); 222 224 } ··· 230 234 if (!dev) 231 235 continue; 232 236 233 - free_percpu(dev->bulkq); 234 237 dev_put(dev->dev); 235 238 kfree(dev); 236 239 } ··· 315 320 return -ENOENT; 316 321 } 317 322 318 - static int bq_xmit_all(struct xdp_bulk_queue *bq, u32 flags) 323 + static int bq_xmit_all(struct xdp_dev_bulk_queue *bq, u32 flags) 319 324 { 320 - struct bpf_dtab_netdev *obj = bq->obj; 321 - struct net_device *dev = obj->dev; 325 + struct net_device *dev = bq->dev; 322 326 int sent = 0, drops = 0, err = 0; 323 327 int i; 324 328 ··· 340 346 out: 341 347 bq->count = 0; 342 348 343 - trace_xdp_devmap_xmit(&obj->dtab->map, obj->idx, 344 - sent, drops, bq->dev_rx, dev, err); 349 + trace_xdp_devmap_xmit(bq->dev_rx, dev, sent, drops, err); 345 350 bq->dev_rx = NULL; 346 351 __list_del_clearprev(&bq->flush_node); 347 352 return 0; ··· 357 364 goto out; 358 365 } 359 366 360 - /* __dev_map_flush is called from xdp_do_flush_map() which _must_ be signaled 367 + /* __dev_flush is called from xdp_do_flush() which _must_ be signaled 361 368 * from the driver before returning from its napi->poll() routine. The poll() 362 369 * routine is called either from busy_poll context or net_rx_action signaled 363 370 * from NET_RX_SOFTIRQ. Either way the poll routine must complete before the 364 371 * net device can be torn down. On devmap tear down we ensure the flush list 365 372 * is empty before completing to ensure all flush operations have completed. 366 373 */ 367 - void __dev_map_flush(void) 374 + void __dev_flush(void) 368 375 { 369 - struct list_head *flush_list = this_cpu_ptr(&dev_map_flush_list); 370 - struct xdp_bulk_queue *bq, *tmp; 376 + struct list_head *flush_list = this_cpu_ptr(&dev_flush_list); 377 + struct xdp_dev_bulk_queue *bq, *tmp; 371 378 372 379 rcu_read_lock(); 373 380 list_for_each_entry_safe(bq, tmp, flush_list, flush_node) ··· 394 401 /* Runs under RCU-read-side, plus in softirq under NAPI protection. 395 402 * Thus, safe percpu variable access. 396 403 */ 397 - static int bq_enqueue(struct bpf_dtab_netdev *obj, struct xdp_frame *xdpf, 404 + static int bq_enqueue(struct net_device *dev, struct xdp_frame *xdpf, 398 405 struct net_device *dev_rx) 399 - 400 406 { 401 - struct list_head *flush_list = this_cpu_ptr(&dev_map_flush_list); 402 - struct xdp_bulk_queue *bq = this_cpu_ptr(obj->bulkq); 407 + struct list_head *flush_list = this_cpu_ptr(&dev_flush_list); 408 + struct xdp_dev_bulk_queue *bq = this_cpu_ptr(dev->xdp_bulkq); 403 409 404 410 if (unlikely(bq->count == DEV_MAP_BULK_SIZE)) 405 411 bq_xmit_all(bq, 0); ··· 418 426 return 0; 419 427 } 420 428 421 - int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp, 422 - struct net_device *dev_rx) 429 + static inline int __xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp, 430 + struct net_device *dev_rx) 423 431 { 424 - struct net_device *dev = dst->dev; 425 432 struct xdp_frame *xdpf; 426 433 int err; 427 434 ··· 435 444 if (unlikely(!xdpf)) 436 445 return -EOVERFLOW; 437 446 438 - return bq_enqueue(dst, xdpf, dev_rx); 447 + return bq_enqueue(dev, xdpf, dev_rx); 448 + } 449 + 450 + int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp, 451 + struct net_device *dev_rx) 452 + { 453 + return __xdp_enqueue(dev, xdp, dev_rx); 454 + } 455 + 456 + int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp, 457 + struct net_device *dev_rx) 458 + { 459 + struct net_device *dev = dst->dev; 460 + 461 + return __xdp_enqueue(dev, xdp, dev_rx); 439 462 } 440 463 441 464 int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb, ··· 488 483 struct bpf_dtab_netdev *dev; 489 484 490 485 dev = container_of(rcu, struct bpf_dtab_netdev, rcu); 491 - free_percpu(dev->bulkq); 492 486 dev_put(dev->dev); 493 487 kfree(dev); 494 488 } ··· 542 538 u32 ifindex, 543 539 unsigned int idx) 544 540 { 545 - gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN; 546 541 struct bpf_dtab_netdev *dev; 547 - struct xdp_bulk_queue *bq; 548 - int cpu; 549 542 550 - dev = kmalloc_node(sizeof(*dev), gfp, dtab->map.numa_node); 543 + dev = kmalloc_node(sizeof(*dev), GFP_ATOMIC | __GFP_NOWARN, 544 + dtab->map.numa_node); 551 545 if (!dev) 552 546 return ERR_PTR(-ENOMEM); 553 547 554 - dev->bulkq = __alloc_percpu_gfp(sizeof(*dev->bulkq), 555 - sizeof(void *), gfp); 556 - if (!dev->bulkq) { 557 - kfree(dev); 558 - return ERR_PTR(-ENOMEM); 559 - } 560 - 561 - for_each_possible_cpu(cpu) { 562 - bq = per_cpu_ptr(dev->bulkq, cpu); 563 - bq->obj = dev; 564 - } 565 - 566 548 dev->dev = dev_get_by_index(net, ifindex); 567 549 if (!dev->dev) { 568 - free_percpu(dev->bulkq); 569 550 kfree(dev); 570 551 return ERR_PTR(-EINVAL); 571 552 } ··· 710 721 { 711 722 struct net_device *netdev = netdev_notifier_info_to_dev(ptr); 712 723 struct bpf_dtab *dtab; 713 - int i; 724 + int i, cpu; 714 725 715 726 switch (event) { 727 + case NETDEV_REGISTER: 728 + if (!netdev->netdev_ops->ndo_xdp_xmit || netdev->xdp_bulkq) 729 + break; 730 + 731 + /* will be freed in free_netdev() */ 732 + netdev->xdp_bulkq = 733 + __alloc_percpu_gfp(sizeof(struct xdp_dev_bulk_queue), 734 + sizeof(void *), GFP_ATOMIC); 735 + if (!netdev->xdp_bulkq) 736 + return NOTIFY_BAD; 737 + 738 + for_each_possible_cpu(cpu) 739 + per_cpu_ptr(netdev->xdp_bulkq, cpu)->dev = netdev; 740 + break; 716 741 case NETDEV_UNREGISTER: 717 742 /* This rcu_read_lock/unlock pair is needed because 718 743 * dev_map_list is an RCU list AND to ensure a delete ··· 774 771 register_netdevice_notifier(&dev_map_notifier); 775 772 776 773 for_each_possible_cpu(cpu) 777 - INIT_LIST_HEAD(&per_cpu(dev_map_flush_list, cpu)); 774 + INIT_LIST_HEAD(&per_cpu(dev_flush_list, cpu)); 778 775 return 0; 779 776 } 780 777

+264

kernel/bpf/hashtab.c

··· 17 17 (BPF_F_NO_PREALLOC | BPF_F_NO_COMMON_LRU | BPF_F_NUMA_NODE | \ 18 18 BPF_F_ACCESS_MASK | BPF_F_ZERO_SEED) 19 19 20 + #define BATCH_OPS(_name) \ 21 + .map_lookup_batch = \ 22 + _name##_map_lookup_batch, \ 23 + .map_lookup_and_delete_batch = \ 24 + _name##_map_lookup_and_delete_batch, \ 25 + .map_update_batch = \ 26 + generic_map_update_batch, \ 27 + .map_delete_batch = \ 28 + generic_map_delete_batch 29 + 20 30 struct bucket { 21 31 struct hlist_nulls_head head; 22 32 raw_spinlock_t lock; ··· 1242 1232 rcu_read_unlock(); 1243 1233 } 1244 1234 1235 + static int 1236 + __htab_map_lookup_and_delete_batch(struct bpf_map *map, 1237 + const union bpf_attr *attr, 1238 + union bpf_attr __user *uattr, 1239 + bool do_delete, bool is_lru_map, 1240 + bool is_percpu) 1241 + { 1242 + struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 1243 + u32 bucket_cnt, total, key_size, value_size, roundup_key_size; 1244 + void *keys = NULL, *values = NULL, *value, *dst_key, *dst_val; 1245 + void __user *uvalues = u64_to_user_ptr(attr->batch.values); 1246 + void __user *ukeys = u64_to_user_ptr(attr->batch.keys); 1247 + void *ubatch = u64_to_user_ptr(attr->batch.in_batch); 1248 + u32 batch, max_count, size, bucket_size; 1249 + u64 elem_map_flags, map_flags; 1250 + struct hlist_nulls_head *head; 1251 + struct hlist_nulls_node *n; 1252 + unsigned long flags; 1253 + struct htab_elem *l; 1254 + struct bucket *b; 1255 + int ret = 0; 1256 + 1257 + elem_map_flags = attr->batch.elem_flags; 1258 + if ((elem_map_flags & ~BPF_F_LOCK) || 1259 + ((elem_map_flags & BPF_F_LOCK) && !map_value_has_spin_lock(map))) 1260 + return -EINVAL; 1261 + 1262 + map_flags = attr->batch.flags; 1263 + if (map_flags) 1264 + return -EINVAL; 1265 + 1266 + max_count = attr->batch.count; 1267 + if (!max_count) 1268 + return 0; 1269 + 1270 + if (put_user(0, &uattr->batch.count)) 1271 + return -EFAULT; 1272 + 1273 + batch = 0; 1274 + if (ubatch && copy_from_user(&batch, ubatch, sizeof(batch))) 1275 + return -EFAULT; 1276 + 1277 + if (batch >= htab->n_buckets) 1278 + return -ENOENT; 1279 + 1280 + key_size = htab->map.key_size; 1281 + roundup_key_size = round_up(htab->map.key_size, 8); 1282 + value_size = htab->map.value_size; 1283 + size = round_up(value_size, 8); 1284 + if (is_percpu) 1285 + value_size = size * num_possible_cpus(); 1286 + total = 0; 1287 + /* while experimenting with hash tables with sizes ranging from 10 to 1288 + * 1000, it was observed that a bucket can have upto 5 entries. 1289 + */ 1290 + bucket_size = 5; 1291 + 1292 + alloc: 1293 + /* We cannot do copy_from_user or copy_to_user inside 1294 + * the rcu_read_lock. Allocate enough space here. 1295 + */ 1296 + keys = kvmalloc(key_size * bucket_size, GFP_USER | __GFP_NOWARN); 1297 + values = kvmalloc(value_size * bucket_size, GFP_USER | __GFP_NOWARN); 1298 + if (!keys || !values) { 1299 + ret = -ENOMEM; 1300 + goto after_loop; 1301 + } 1302 + 1303 + again: 1304 + preempt_disable(); 1305 + this_cpu_inc(bpf_prog_active); 1306 + rcu_read_lock(); 1307 + again_nocopy: 1308 + dst_key = keys; 1309 + dst_val = values; 1310 + b = &htab->buckets[batch]; 1311 + head = &b->head; 1312 + raw_spin_lock_irqsave(&b->lock, flags); 1313 + 1314 + bucket_cnt = 0; 1315 + hlist_nulls_for_each_entry_rcu(l, n, head, hash_node) 1316 + bucket_cnt++; 1317 + 1318 + if (bucket_cnt > (max_count - total)) { 1319 + if (total == 0) 1320 + ret = -ENOSPC; 1321 + raw_spin_unlock_irqrestore(&b->lock, flags); 1322 + rcu_read_unlock(); 1323 + this_cpu_dec(bpf_prog_active); 1324 + preempt_enable(); 1325 + goto after_loop; 1326 + } 1327 + 1328 + if (bucket_cnt > bucket_size) { 1329 + bucket_size = bucket_cnt; 1330 + raw_spin_unlock_irqrestore(&b->lock, flags); 1331 + rcu_read_unlock(); 1332 + this_cpu_dec(bpf_prog_active); 1333 + preempt_enable(); 1334 + kvfree(keys); 1335 + kvfree(values); 1336 + goto alloc; 1337 + } 1338 + 1339 + hlist_nulls_for_each_entry_safe(l, n, head, hash_node) { 1340 + memcpy(dst_key, l->key, key_size); 1341 + 1342 + if (is_percpu) { 1343 + int off = 0, cpu; 1344 + void __percpu *pptr; 1345 + 1346 + pptr = htab_elem_get_ptr(l, map->key_size); 1347 + for_each_possible_cpu(cpu) { 1348 + bpf_long_memcpy(dst_val + off, 1349 + per_cpu_ptr(pptr, cpu), size); 1350 + off += size; 1351 + } 1352 + } else { 1353 + value = l->key + roundup_key_size; 1354 + if (elem_map_flags & BPF_F_LOCK) 1355 + copy_map_value_locked(map, dst_val, value, 1356 + true); 1357 + else 1358 + copy_map_value(map, dst_val, value); 1359 + check_and_init_map_lock(map, dst_val); 1360 + } 1361 + if (do_delete) { 1362 + hlist_nulls_del_rcu(&l->hash_node); 1363 + if (is_lru_map) 1364 + bpf_lru_push_free(&htab->lru, &l->lru_node); 1365 + else 1366 + free_htab_elem(htab, l); 1367 + } 1368 + dst_key += key_size; 1369 + dst_val += value_size; 1370 + } 1371 + 1372 + raw_spin_unlock_irqrestore(&b->lock, flags); 1373 + /* If we are not copying data, we can go to next bucket and avoid 1374 + * unlocking the rcu. 1375 + */ 1376 + if (!bucket_cnt && (batch + 1 < htab->n_buckets)) { 1377 + batch++; 1378 + goto again_nocopy; 1379 + } 1380 + 1381 + rcu_read_unlock(); 1382 + this_cpu_dec(bpf_prog_active); 1383 + preempt_enable(); 1384 + if (bucket_cnt && (copy_to_user(ukeys + total * key_size, keys, 1385 + key_size * bucket_cnt) || 1386 + copy_to_user(uvalues + total * value_size, values, 1387 + value_size * bucket_cnt))) { 1388 + ret = -EFAULT; 1389 + goto after_loop; 1390 + } 1391 + 1392 + total += bucket_cnt; 1393 + batch++; 1394 + if (batch >= htab->n_buckets) { 1395 + ret = -ENOENT; 1396 + goto after_loop; 1397 + } 1398 + goto again; 1399 + 1400 + after_loop: 1401 + if (ret == -EFAULT) 1402 + goto out; 1403 + 1404 + /* copy # of entries and next batch */ 1405 + ubatch = u64_to_user_ptr(attr->batch.out_batch); 1406 + if (copy_to_user(ubatch, &batch, sizeof(batch)) || 1407 + put_user(total, &uattr->batch.count)) 1408 + ret = -EFAULT; 1409 + 1410 + out: 1411 + kvfree(keys); 1412 + kvfree(values); 1413 + return ret; 1414 + } 1415 + 1416 + static int 1417 + htab_percpu_map_lookup_batch(struct bpf_map *map, const union bpf_attr *attr, 1418 + union bpf_attr __user *uattr) 1419 + { 1420 + return __htab_map_lookup_and_delete_batch(map, attr, uattr, false, 1421 + false, true); 1422 + } 1423 + 1424 + static int 1425 + htab_percpu_map_lookup_and_delete_batch(struct bpf_map *map, 1426 + const union bpf_attr *attr, 1427 + union bpf_attr __user *uattr) 1428 + { 1429 + return __htab_map_lookup_and_delete_batch(map, attr, uattr, true, 1430 + false, true); 1431 + } 1432 + 1433 + static int 1434 + htab_map_lookup_batch(struct bpf_map *map, const union bpf_attr *attr, 1435 + union bpf_attr __user *uattr) 1436 + { 1437 + return __htab_map_lookup_and_delete_batch(map, attr, uattr, false, 1438 + false, false); 1439 + } 1440 + 1441 + static int 1442 + htab_map_lookup_and_delete_batch(struct bpf_map *map, 1443 + const union bpf_attr *attr, 1444 + union bpf_attr __user *uattr) 1445 + { 1446 + return __htab_map_lookup_and_delete_batch(map, attr, uattr, true, 1447 + false, false); 1448 + } 1449 + 1450 + static int 1451 + htab_lru_percpu_map_lookup_batch(struct bpf_map *map, 1452 + const union bpf_attr *attr, 1453 + union bpf_attr __user *uattr) 1454 + { 1455 + return __htab_map_lookup_and_delete_batch(map, attr, uattr, false, 1456 + true, true); 1457 + } 1458 + 1459 + static int 1460 + htab_lru_percpu_map_lookup_and_delete_batch(struct bpf_map *map, 1461 + const union bpf_attr *attr, 1462 + union bpf_attr __user *uattr) 1463 + { 1464 + return __htab_map_lookup_and_delete_batch(map, attr, uattr, true, 1465 + true, true); 1466 + } 1467 + 1468 + static int 1469 + htab_lru_map_lookup_batch(struct bpf_map *map, const union bpf_attr *attr, 1470 + union bpf_attr __user *uattr) 1471 + { 1472 + return __htab_map_lookup_and_delete_batch(map, attr, uattr, false, 1473 + true, false); 1474 + } 1475 + 1476 + static int 1477 + htab_lru_map_lookup_and_delete_batch(struct bpf_map *map, 1478 + const union bpf_attr *attr, 1479 + union bpf_attr __user *uattr) 1480 + { 1481 + return __htab_map_lookup_and_delete_batch(map, attr, uattr, true, 1482 + true, false); 1483 + } 1484 + 1245 1485 const struct bpf_map_ops htab_map_ops = { 1246 1486 .map_alloc_check = htab_map_alloc_check, 1247 1487 .map_alloc = htab_map_alloc, ··· 1502 1242 .map_delete_elem = htab_map_delete_elem, 1503 1243 .map_gen_lookup = htab_map_gen_lookup, 1504 1244 .map_seq_show_elem = htab_map_seq_show_elem, 1245 + BATCH_OPS(htab), 1505 1246 }; 1506 1247 1507 1248 const struct bpf_map_ops htab_lru_map_ops = { ··· 1516 1255 .map_delete_elem = htab_lru_map_delete_elem, 1517 1256 .map_gen_lookup = htab_lru_map_gen_lookup, 1518 1257 .map_seq_show_elem = htab_map_seq_show_elem, 1258 + BATCH_OPS(htab_lru), 1519 1259 }; 1520 1260 1521 1261 /* Called from eBPF program */ ··· 1630 1368 .map_update_elem = htab_percpu_map_update_elem, 1631 1369 .map_delete_elem = htab_map_delete_elem, 1632 1370 .map_seq_show_elem = htab_percpu_map_seq_show_elem, 1371 + BATCH_OPS(htab_percpu), 1633 1372 }; 1634 1373 1635 1374 const struct bpf_map_ops htab_lru_percpu_map_ops = { ··· 1642 1379 .map_update_elem = htab_lru_percpu_map_update_elem, 1643 1380 .map_delete_elem = htab_lru_map_delete_elem, 1644 1381 .map_seq_show_elem = htab_percpu_map_seq_show_elem, 1382 + BATCH_OPS(htab_lru_percpu), 1645 1383 }; 1646 1384 1647 1385 static int fd_htab_map_alloc_check(union bpf_attr *attr)

+12

kernel/bpf/helpers.c

··· 11 11 #include <linux/uidgid.h> 12 12 #include <linux/filter.h> 13 13 #include <linux/ctype.h> 14 + #include <linux/jiffies.h> 14 15 15 16 #include "../../lib/kstrtox.h" 16 17 ··· 312 311 ____bpf_spin_unlock(lock); 313 312 preempt_enable(); 314 313 } 314 + 315 + BPF_CALL_0(bpf_jiffies64) 316 + { 317 + return get_jiffies_64(); 318 + } 319 + 320 + const struct bpf_func_proto bpf_jiffies64_proto = { 321 + .func = bpf_jiffies64, 322 + .gpl_only = false, 323 + .ret_type = RET_INTEGER, 324 + }; 315 325 316 326 #ifdef CONFIG_CGROUPS 317 327 BPF_CALL_0(bpf_get_current_cgroup_id)

+13 -30

kernel/bpf/inode.c

··· 380 380 .unlink = simple_unlink, 381 381 }; 382 382 383 - static int bpf_obj_do_pin(const struct filename *pathname, void *raw, 383 + static int bpf_obj_do_pin(const char __user *pathname, void *raw, 384 384 enum bpf_type type) 385 385 { 386 386 struct dentry *dentry; ··· 389 389 umode_t mode; 390 390 int ret; 391 391 392 - dentry = kern_path_create(AT_FDCWD, pathname->name, &path, 0); 392 + dentry = user_path_create(AT_FDCWD, pathname, &path, 0); 393 393 if (IS_ERR(dentry)) 394 394 return PTR_ERR(dentry); 395 395 ··· 422 422 423 423 int bpf_obj_pin_user(u32 ufd, const char __user *pathname) 424 424 { 425 - struct filename *pname; 426 425 enum bpf_type type; 427 426 void *raw; 428 427 int ret; 429 428 430 - pname = getname(pathname); 431 - if (IS_ERR(pname)) 432 - return PTR_ERR(pname); 433 - 434 429 raw = bpf_fd_probe_obj(ufd, &type); 435 - if (IS_ERR(raw)) { 436 - ret = PTR_ERR(raw); 437 - goto out; 438 - } 430 + if (IS_ERR(raw)) 431 + return PTR_ERR(raw); 439 432 440 - ret = bpf_obj_do_pin(pname, raw, type); 433 + ret = bpf_obj_do_pin(pathname, raw, type); 441 434 if (ret != 0) 442 435 bpf_any_put(raw, type); 443 - out: 444 - putname(pname); 436 + 445 437 return ret; 446 438 } 447 439 448 - static void *bpf_obj_do_get(const struct filename *pathname, 440 + static void *bpf_obj_do_get(const char __user *pathname, 449 441 enum bpf_type *type, int flags) 450 442 { 451 443 struct inode *inode; ··· 445 453 void *raw; 446 454 int ret; 447 455 448 - ret = kern_path(pathname->name, LOOKUP_FOLLOW, &path); 456 + ret = user_path_at(AT_FDCWD, pathname, LOOKUP_FOLLOW, &path); 449 457 if (ret) 450 458 return ERR_PTR(ret); 451 459 ··· 472 480 int bpf_obj_get_user(const char __user *pathname, int flags) 473 481 { 474 482 enum bpf_type type = BPF_TYPE_UNSPEC; 475 - struct filename *pname; 476 - int ret = -ENOENT; 477 483 int f_flags; 478 484 void *raw; 485 + int ret; 479 486 480 487 f_flags = bpf_get_file_flag(flags); 481 488 if (f_flags < 0) 482 489 return f_flags; 483 490 484 - pname = getname(pathname); 485 - if (IS_ERR(pname)) 486 - return PTR_ERR(pname); 487 - 488 - raw = bpf_obj_do_get(pname, &type, f_flags); 489 - if (IS_ERR(raw)) { 490 - ret = PTR_ERR(raw); 491 - goto out; 492 - } 491 + raw = bpf_obj_do_get(pathname, &type, f_flags); 492 + if (IS_ERR(raw)) 493 + return PTR_ERR(raw); 493 494 494 495 if (type == BPF_TYPE_PROG) 495 496 ret = bpf_prog_new_fd(raw); 496 497 else if (type == BPF_TYPE_MAP) 497 498 ret = bpf_map_new_fd(raw, f_flags); 498 499 else 499 - goto out; 500 + return -ENOENT; 500 501 501 502 if (ret < 0) 502 503 bpf_any_put(raw, type); 503 - out: 504 - putname(pname); 505 504 return ret; 506 505 } 507 506

+2 -1

kernel/bpf/map_in_map.c

··· 22 22 */ 23 23 if (inner_map->map_type == BPF_MAP_TYPE_PROG_ARRAY || 24 24 inner_map->map_type == BPF_MAP_TYPE_CGROUP_STORAGE || 25 - inner_map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) { 25 + inner_map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE || 26 + inner_map->map_type == BPF_MAP_TYPE_STRUCT_OPS) { 26 27 fdput(f); 27 28 return ERR_PTR(-ENOTSUPP); 28 29 }

+484 -148

kernel/bpf/syscall.c

··· 129 129 return map; 130 130 } 131 131 132 + static u32 bpf_map_value_size(struct bpf_map *map) 133 + { 134 + if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 135 + map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH || 136 + map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY || 137 + map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) 138 + return round_up(map->value_size, 8) * num_possible_cpus(); 139 + else if (IS_FD_MAP(map)) 140 + return sizeof(u32); 141 + else 142 + return map->value_size; 143 + } 144 + 145 + static void maybe_wait_bpf_programs(struct bpf_map *map) 146 + { 147 + /* Wait for any running BPF programs to complete so that 148 + * userspace, when we return to it, knows that all programs 149 + * that could be running use the new map value. 150 + */ 151 + if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS || 152 + map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS) 153 + synchronize_rcu(); 154 + } 155 + 156 + static int bpf_map_update_value(struct bpf_map *map, struct fd f, void *key, 157 + void *value, __u64 flags) 158 + { 159 + int err; 160 + 161 + /* Need to create a kthread, thus must support schedule */ 162 + if (bpf_map_is_dev_bound(map)) { 163 + return bpf_map_offload_update_elem(map, key, value, flags); 164 + } else if (map->map_type == BPF_MAP_TYPE_CPUMAP || 165 + map->map_type == BPF_MAP_TYPE_SOCKHASH || 166 + map->map_type == BPF_MAP_TYPE_SOCKMAP || 167 + map->map_type == BPF_MAP_TYPE_STRUCT_OPS) { 168 + return map->ops->map_update_elem(map, key, value, flags); 169 + } else if (IS_FD_PROG_ARRAY(map)) { 170 + return bpf_fd_array_map_update_elem(map, f.file, key, value, 171 + flags); 172 + } 173 + 174 + /* must increment bpf_prog_active to avoid kprobe+bpf triggering from 175 + * inside bpf map update or delete otherwise deadlocks are possible 176 + */ 177 + preempt_disable(); 178 + __this_cpu_inc(bpf_prog_active); 179 + if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 180 + map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { 181 + err = bpf_percpu_hash_update(map, key, value, flags); 182 + } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { 183 + err = bpf_percpu_array_update(map, key, value, flags); 184 + } else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) { 185 + err = bpf_percpu_cgroup_storage_update(map, key, value, 186 + flags); 187 + } else if (IS_FD_ARRAY(map)) { 188 + rcu_read_lock(); 189 + err = bpf_fd_array_map_update_elem(map, f.file, key, value, 190 + flags); 191 + rcu_read_unlock(); 192 + } else if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) { 193 + rcu_read_lock(); 194 + err = bpf_fd_htab_map_update_elem(map, f.file, key, value, 195 + flags); 196 + rcu_read_unlock(); 197 + } else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) { 198 + /* rcu_read_lock() is not needed */ 199 + err = bpf_fd_reuseport_array_update_elem(map, key, value, 200 + flags); 201 + } else if (map->map_type == BPF_MAP_TYPE_QUEUE || 202 + map->map_type == BPF_MAP_TYPE_STACK) { 203 + err = map->ops->map_push_elem(map, value, flags); 204 + } else { 205 + rcu_read_lock(); 206 + err = map->ops->map_update_elem(map, key, value, flags); 207 + rcu_read_unlock(); 208 + } 209 + __this_cpu_dec(bpf_prog_active); 210 + preempt_enable(); 211 + maybe_wait_bpf_programs(map); 212 + 213 + return err; 214 + } 215 + 216 + static int bpf_map_copy_value(struct bpf_map *map, void *key, void *value, 217 + __u64 flags) 218 + { 219 + void *ptr; 220 + int err; 221 + 222 + if (bpf_map_is_dev_bound(map)) 223 + return bpf_map_offload_lookup_elem(map, key, value); 224 + 225 + preempt_disable(); 226 + this_cpu_inc(bpf_prog_active); 227 + if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 228 + map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { 229 + err = bpf_percpu_hash_copy(map, key, value); 230 + } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { 231 + err = bpf_percpu_array_copy(map, key, value); 232 + } else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) { 233 + err = bpf_percpu_cgroup_storage_copy(map, key, value); 234 + } else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) { 235 + err = bpf_stackmap_copy(map, key, value); 236 + } else if (IS_FD_ARRAY(map) || IS_FD_PROG_ARRAY(map)) { 237 + err = bpf_fd_array_map_lookup_elem(map, key, value); 238 + } else if (IS_FD_HASH(map)) { 239 + err = bpf_fd_htab_map_lookup_elem(map, key, value); 240 + } else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) { 241 + err = bpf_fd_reuseport_array_lookup_elem(map, key, value); 242 + } else if (map->map_type == BPF_MAP_TYPE_QUEUE || 243 + map->map_type == BPF_MAP_TYPE_STACK) { 244 + err = map->ops->map_peek_elem(map, value); 245 + } else if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) { 246 + /* struct_ops map requires directly updating "value" */ 247 + err = bpf_struct_ops_map_sys_lookup_elem(map, key, value); 248 + } else { 249 + rcu_read_lock(); 250 + if (map->ops->map_lookup_elem_sys_only) 251 + ptr = map->ops->map_lookup_elem_sys_only(map, key); 252 + else 253 + ptr = map->ops->map_lookup_elem(map, key); 254 + if (IS_ERR(ptr)) { 255 + err = PTR_ERR(ptr); 256 + } else if (!ptr) { 257 + err = -ENOENT; 258 + } else { 259 + err = 0; 260 + if (flags & BPF_F_LOCK) 261 + /* lock 'ptr' and copy everything but lock */ 262 + copy_map_value_locked(map, value, ptr, true); 263 + else 264 + copy_map_value(map, value, ptr); 265 + /* mask lock, since value wasn't zero inited */ 266 + check_and_init_map_lock(map, value); 267 + } 268 + rcu_read_unlock(); 269 + } 270 + 271 + this_cpu_dec(bpf_prog_active); 272 + preempt_enable(); 273 + maybe_wait_bpf_programs(map); 274 + 275 + return err; 276 + } 277 + 132 278 static void *__bpf_map_area_alloc(u64 size, int numa_node, bool mmapable) 133 279 { 134 280 /* We really just want to fail instead of triggering OOM killer ··· 774 628 return ret; 775 629 } 776 630 777 - #define BPF_MAP_CREATE_LAST_FIELD btf_value_type_id 631 + #define BPF_MAP_CREATE_LAST_FIELD btf_vmlinux_value_type_id 778 632 /* called via syscall */ 779 633 static int map_create(union bpf_attr *attr) 780 634 { ··· 787 641 err = CHECK_ATTR(BPF_MAP_CREATE); 788 642 if (err) 789 643 return -EINVAL; 644 + 645 + if (attr->btf_vmlinux_value_type_id) { 646 + if (attr->map_type != BPF_MAP_TYPE_STRUCT_OPS || 647 + attr->btf_key_type_id || attr->btf_value_type_id) 648 + return -EINVAL; 649 + } else if (attr->btf_key_type_id && !attr->btf_value_type_id) { 650 + return -EINVAL; 651 + } 790 652 791 653 f_flags = bpf_get_file_flag(attr->map_flags); 792 654 if (f_flags < 0) ··· 818 664 atomic64_set(&map->usercnt, 1); 819 665 mutex_init(&map->freeze_mutex); 820 666 821 - if (attr->btf_key_type_id || attr->btf_value_type_id) { 667 + map->spin_lock_off = -EINVAL; 668 + if (attr->btf_key_type_id || attr->btf_value_type_id || 669 + /* Even the map's value is a kernel's struct, 670 + * the bpf_prog.o must have BTF to begin with 671 + * to figure out the corresponding kernel's 672 + * counter part. Thus, attr->btf_fd has 673 + * to be valid also. 674 + */ 675 + attr->btf_vmlinux_value_type_id) { 822 676 struct btf *btf; 823 - 824 - if (!attr->btf_value_type_id) { 825 - err = -EINVAL; 826 - goto free_map; 827 - } 828 677 829 678 btf = btf_get_by_fd(attr->btf_fd); 830 679 if (IS_ERR(btf)) { 831 680 err = PTR_ERR(btf); 832 681 goto free_map; 833 682 } 683 + map->btf = btf; 834 684 835 - err = map_check_btf(map, btf, attr->btf_key_type_id, 836 - attr->btf_value_type_id); 837 - if (err) { 838 - btf_put(btf); 839 - goto free_map; 685 + if (attr->btf_value_type_id) { 686 + err = map_check_btf(map, btf, attr->btf_key_type_id, 687 + attr->btf_value_type_id); 688 + if (err) 689 + goto free_map; 840 690 } 841 691 842 - map->btf = btf; 843 692 map->btf_key_type_id = attr->btf_key_type_id; 844 693 map->btf_value_type_id = attr->btf_value_type_id; 845 - } else { 846 - map->spin_lock_off = -EINVAL; 694 + map->btf_vmlinux_value_type_id = 695 + attr->btf_vmlinux_value_type_id; 847 696 } 848 697 849 698 err = security_bpf_map_alloc(map); ··· 973 816 void __user *uvalue = u64_to_user_ptr(attr->value); 974 817 int ufd = attr->map_fd; 975 818 struct bpf_map *map; 976 - void *key, *value, *ptr; 819 + void *key, *value; 977 820 u32 value_size; 978 821 struct fd f; 979 822 int err; ··· 1005 848 goto err_put; 1006 849 } 1007 850 1008 - if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 1009 - map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH || 1010 - map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY || 1011 - map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) 1012 - value_size = round_up(map->value_size, 8) * num_possible_cpus(); 1013 - else if (IS_FD_MAP(map)) 1014 - value_size = sizeof(u32); 1015 - else 1016 - value_size = map->value_size; 851 + value_size = bpf_map_value_size(map); 1017 852 1018 853 err = -ENOMEM; 1019 854 value = kmalloc(value_size, GFP_USER | __GFP_NOWARN); 1020 855 if (!value) 1021 856 goto free_key; 1022 857 1023 - if (bpf_map_is_dev_bound(map)) { 1024 - err = bpf_map_offload_lookup_elem(map, key, value); 1025 - goto done; 1026 - } 1027 - 1028 - preempt_disable(); 1029 - this_cpu_inc(bpf_prog_active); 1030 - if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 1031 - map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { 1032 - err = bpf_percpu_hash_copy(map, key, value); 1033 - } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { 1034 - err = bpf_percpu_array_copy(map, key, value); 1035 - } else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) { 1036 - err = bpf_percpu_cgroup_storage_copy(map, key, value); 1037 - } else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) { 1038 - err = bpf_stackmap_copy(map, key, value); 1039 - } else if (IS_FD_ARRAY(map) || IS_FD_PROG_ARRAY(map)) { 1040 - err = bpf_fd_array_map_lookup_elem(map, key, value); 1041 - } else if (IS_FD_HASH(map)) { 1042 - err = bpf_fd_htab_map_lookup_elem(map, key, value); 1043 - } else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) { 1044 - err = bpf_fd_reuseport_array_lookup_elem(map, key, value); 1045 - } else if (map->map_type == BPF_MAP_TYPE_QUEUE || 1046 - map->map_type == BPF_MAP_TYPE_STACK) { 1047 - err = map->ops->map_peek_elem(map, value); 1048 - } else { 1049 - rcu_read_lock(); 1050 - if (map->ops->map_lookup_elem_sys_only) 1051 - ptr = map->ops->map_lookup_elem_sys_only(map, key); 1052 - else 1053 - ptr = map->ops->map_lookup_elem(map, key); 1054 - if (IS_ERR(ptr)) { 1055 - err = PTR_ERR(ptr); 1056 - } else if (!ptr) { 1057 - err = -ENOENT; 1058 - } else { 1059 - err = 0; 1060 - if (attr->flags & BPF_F_LOCK) 1061 - /* lock 'ptr' and copy everything but lock */ 1062 - copy_map_value_locked(map, value, ptr, true); 1063 - else 1064 - copy_map_value(map, value, ptr); 1065 - /* mask lock, since value wasn't zero inited */ 1066 - check_and_init_map_lock(map, value); 1067 - } 1068 - rcu_read_unlock(); 1069 - } 1070 - this_cpu_dec(bpf_prog_active); 1071 - preempt_enable(); 1072 - 1073 - done: 858 + err = bpf_map_copy_value(map, key, value, attr->flags); 1074 859 if (err) 1075 860 goto free_value; 1076 861 ··· 1031 932 return err; 1032 933 } 1033 934 1034 - static void maybe_wait_bpf_programs(struct bpf_map *map) 1035 - { 1036 - /* Wait for any running BPF programs to complete so that 1037 - * userspace, when we return to it, knows that all programs 1038 - * that could be running use the new map value. 1039 - */ 1040 - if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS || 1041 - map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS) 1042 - synchronize_rcu(); 1043 - } 1044 935 1045 936 #define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags 1046 937 ··· 1086 997 if (copy_from_user(value, uvalue, value_size) != 0) 1087 998 goto free_value; 1088 999 1089 - /* Need to create a kthread, thus must support schedule */ 1090 - if (bpf_map_is_dev_bound(map)) { 1091 - err = bpf_map_offload_update_elem(map, key, value, attr->flags); 1092 - goto out; 1093 - } else if (map->map_type == BPF_MAP_TYPE_CPUMAP || 1094 - map->map_type == BPF_MAP_TYPE_SOCKHASH || 1095 - map->map_type == BPF_MAP_TYPE_SOCKMAP) { 1096 - err = map->ops->map_update_elem(map, key, value, attr->flags); 1097 - goto out; 1098 - } else if (IS_FD_PROG_ARRAY(map)) { 1099 - err = bpf_fd_array_map_update_elem(map, f.file, key, value, 1100 - attr->flags); 1101 - goto out; 1102 - } 1000 + err = bpf_map_update_value(map, f, key, value, attr->flags); 1103 1001 1104 - /* must increment bpf_prog_active to avoid kprobe+bpf triggering from 1105 - * inside bpf map update or delete otherwise deadlocks are possible 1106 - */ 1107 - preempt_disable(); 1108 - __this_cpu_inc(bpf_prog_active); 1109 - if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 1110 - map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { 1111 - err = bpf_percpu_hash_update(map, key, value, attr->flags); 1112 - } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { 1113 - err = bpf_percpu_array_update(map, key, value, attr->flags); 1114 - } else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) { 1115 - err = bpf_percpu_cgroup_storage_update(map, key, value, 1116 - attr->flags); 1117 - } else if (IS_FD_ARRAY(map)) { 1118 - rcu_read_lock(); 1119 - err = bpf_fd_array_map_update_elem(map, f.file, key, value, 1120 - attr->flags); 1121 - rcu_read_unlock(); 1122 - } else if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) { 1123 - rcu_read_lock(); 1124 - err = bpf_fd_htab_map_update_elem(map, f.file, key, value, 1125 - attr->flags); 1126 - rcu_read_unlock(); 1127 - } else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) { 1128 - /* rcu_read_lock() is not needed */ 1129 - err = bpf_fd_reuseport_array_update_elem(map, key, value, 1130 - attr->flags); 1131 - } else if (map->map_type == BPF_MAP_TYPE_QUEUE || 1132 - map->map_type == BPF_MAP_TYPE_STACK) { 1133 - err = map->ops->map_push_elem(map, value, attr->flags); 1134 - } else { 1135 - rcu_read_lock(); 1136 - err = map->ops->map_update_elem(map, key, value, attr->flags); 1137 - rcu_read_unlock(); 1138 - } 1139 - __this_cpu_dec(bpf_prog_active); 1140 - preempt_enable(); 1141 - maybe_wait_bpf_programs(map); 1142 - out: 1143 1002 free_value: 1144 1003 kfree(value); 1145 1004 free_key: ··· 1129 1092 if (bpf_map_is_dev_bound(map)) { 1130 1093 err = bpf_map_offload_delete_elem(map, key); 1131 1094 goto out; 1132 - } else if (IS_FD_PROG_ARRAY(map)) { 1095 + } else if (IS_FD_PROG_ARRAY(map) || 1096 + map->map_type == BPF_MAP_TYPE_STRUCT_OPS) { 1097 + /* These maps require sleepable context */ 1133 1098 err = map->ops->map_delete_elem(map, key); 1134 1099 goto out; 1135 1100 } ··· 1215 1176 kfree(key); 1216 1177 err_put: 1217 1178 fdput(f); 1179 + return err; 1180 + } 1181 + 1182 + int generic_map_delete_batch(struct bpf_map *map, 1183 + const union bpf_attr *attr, 1184 + union bpf_attr __user *uattr) 1185 + { 1186 + void __user *keys = u64_to_user_ptr(attr->batch.keys); 1187 + u32 cp, max_count; 1188 + int err = 0; 1189 + void *key; 1190 + 1191 + if (attr->batch.elem_flags & ~BPF_F_LOCK) 1192 + return -EINVAL; 1193 + 1194 + if ((attr->batch.elem_flags & BPF_F_LOCK) && 1195 + !map_value_has_spin_lock(map)) { 1196 + return -EINVAL; 1197 + } 1198 + 1199 + max_count = attr->batch.count; 1200 + if (!max_count) 1201 + return 0; 1202 + 1203 + key = kmalloc(map->key_size, GFP_USER | __GFP_NOWARN); 1204 + if (!key) 1205 + return -ENOMEM; 1206 + 1207 + for (cp = 0; cp < max_count; cp++) { 1208 + err = -EFAULT; 1209 + if (copy_from_user(key, keys + cp * map->key_size, 1210 + map->key_size)) 1211 + break; 1212 + 1213 + if (bpf_map_is_dev_bound(map)) { 1214 + err = bpf_map_offload_delete_elem(map, key); 1215 + break; 1216 + } 1217 + 1218 + preempt_disable(); 1219 + __this_cpu_inc(bpf_prog_active); 1220 + rcu_read_lock(); 1221 + err = map->ops->map_delete_elem(map, key); 1222 + rcu_read_unlock(); 1223 + __this_cpu_dec(bpf_prog_active); 1224 + preempt_enable(); 1225 + maybe_wait_bpf_programs(map); 1226 + if (err) 1227 + break; 1228 + } 1229 + if (copy_to_user(&uattr->batch.count, &cp, sizeof(cp))) 1230 + err = -EFAULT; 1231 + 1232 + kfree(key); 1233 + return err; 1234 + } 1235 + 1236 + int generic_map_update_batch(struct bpf_map *map, 1237 + const union bpf_attr *attr, 1238 + union bpf_attr __user *uattr) 1239 + { 1240 + void __user *values = u64_to_user_ptr(attr->batch.values); 1241 + void __user *keys = u64_to_user_ptr(attr->batch.keys); 1242 + u32 value_size, cp, max_count; 1243 + int ufd = attr->map_fd; 1244 + void *key, *value; 1245 + struct fd f; 1246 + int err = 0; 1247 + 1248 + f = fdget(ufd); 1249 + if (attr->batch.elem_flags & ~BPF_F_LOCK) 1250 + return -EINVAL; 1251 + 1252 + if ((attr->batch.elem_flags & BPF_F_LOCK) && 1253 + !map_value_has_spin_lock(map)) { 1254 + return -EINVAL; 1255 + } 1256 + 1257 + value_size = bpf_map_value_size(map); 1258 + 1259 + max_count = attr->batch.count; 1260 + if (!max_count) 1261 + return 0; 1262 + 1263 + key = kmalloc(map->key_size, GFP_USER | __GFP_NOWARN); 1264 + if (!key) 1265 + return -ENOMEM; 1266 + 1267 + value = kmalloc(value_size, GFP_USER | __GFP_NOWARN); 1268 + if (!value) { 1269 + kfree(key); 1270 + return -ENOMEM; 1271 + } 1272 + 1273 + for (cp = 0; cp < max_count; cp++) { 1274 + err = -EFAULT; 1275 + if (copy_from_user(key, keys + cp * map->key_size, 1276 + map->key_size) || 1277 + copy_from_user(value, values + cp * value_size, value_size)) 1278 + break; 1279 + 1280 + err = bpf_map_update_value(map, f, key, value, 1281 + attr->batch.elem_flags); 1282 + 1283 + if (err) 1284 + break; 1285 + } 1286 + 1287 + if (copy_to_user(&uattr->batch.count, &cp, sizeof(cp))) 1288 + err = -EFAULT; 1289 + 1290 + kfree(value); 1291 + kfree(key); 1292 + return err; 1293 + } 1294 + 1295 + #define MAP_LOOKUP_RETRIES 3 1296 + 1297 + int generic_map_lookup_batch(struct bpf_map *map, 1298 + const union bpf_attr *attr, 1299 + union bpf_attr __user *uattr) 1300 + { 1301 + void __user *uobatch = u64_to_user_ptr(attr->batch.out_batch); 1302 + void __user *ubatch = u64_to_user_ptr(attr->batch.in_batch); 1303 + void __user *values = u64_to_user_ptr(attr->batch.values); 1304 + void __user *keys = u64_to_user_ptr(attr->batch.keys); 1305 + void *buf, *buf_prevkey, *prev_key, *key, *value; 1306 + int err, retry = MAP_LOOKUP_RETRIES; 1307 + u32 value_size, cp, max_count; 1308 + 1309 + if (attr->batch.elem_flags & ~BPF_F_LOCK) 1310 + return -EINVAL; 1311 + 1312 + if ((attr->batch.elem_flags & BPF_F_LOCK) && 1313 + !map_value_has_spin_lock(map)) 1314 + return -EINVAL; 1315 + 1316 + value_size = bpf_map_value_size(map); 1317 + 1318 + max_count = attr->batch.count; 1319 + if (!max_count) 1320 + return 0; 1321 + 1322 + if (put_user(0, &uattr->batch.count)) 1323 + return -EFAULT; 1324 + 1325 + buf_prevkey = kmalloc(map->key_size, GFP_USER | __GFP_NOWARN); 1326 + if (!buf_prevkey) 1327 + return -ENOMEM; 1328 + 1329 + buf = kmalloc(map->key_size + value_size, GFP_USER | __GFP_NOWARN); 1330 + if (!buf) { 1331 + kvfree(buf_prevkey); 1332 + return -ENOMEM; 1333 + } 1334 + 1335 + err = -EFAULT; 1336 + prev_key = NULL; 1337 + if (ubatch && copy_from_user(buf_prevkey, ubatch, map->key_size)) 1338 + goto free_buf; 1339 + key = buf; 1340 + value = key + map->key_size; 1341 + if (ubatch) 1342 + prev_key = buf_prevkey; 1343 + 1344 + for (cp = 0; cp < max_count;) { 1345 + rcu_read_lock(); 1346 + err = map->ops->map_get_next_key(map, prev_key, key); 1347 + rcu_read_unlock(); 1348 + if (err) 1349 + break; 1350 + err = bpf_map_copy_value(map, key, value, 1351 + attr->batch.elem_flags); 1352 + 1353 + if (err == -ENOENT) { 1354 + if (retry) { 1355 + retry--; 1356 + continue; 1357 + } 1358 + err = -EINTR; 1359 + break; 1360 + } 1361 + 1362 + if (err) 1363 + goto free_buf; 1364 + 1365 + if (copy_to_user(keys + cp * map->key_size, key, 1366 + map->key_size)) { 1367 + err = -EFAULT; 1368 + goto free_buf; 1369 + } 1370 + if (copy_to_user(values + cp * value_size, value, value_size)) { 1371 + err = -EFAULT; 1372 + goto free_buf; 1373 + } 1374 + 1375 + if (!prev_key) 1376 + prev_key = buf_prevkey; 1377 + 1378 + swap(prev_key, key); 1379 + retry = MAP_LOOKUP_RETRIES; 1380 + cp++; 1381 + } 1382 + 1383 + if (err == -EFAULT) 1384 + goto free_buf; 1385 + 1386 + if ((copy_to_user(&uattr->batch.count, &cp, sizeof(cp)) || 1387 + (cp && copy_to_user(uobatch, prev_key, map->key_size)))) 1388 + err = -EFAULT; 1389 + 1390 + free_buf: 1391 + kfree(buf_prevkey); 1392 + kfree(buf); 1218 1393 return err; 1219 1394 } 1220 1395 ··· 1925 1672 enum bpf_attach_type expected_attach_type, 1926 1673 u32 btf_id, u32 prog_fd) 1927 1674 { 1928 - switch (prog_type) { 1929 - case BPF_PROG_TYPE_TRACING: 1675 + if (btf_id) { 1930 1676 if (btf_id > BTF_MAX_TYPE) 1931 1677 return -EINVAL; 1932 - break; 1933 - default: 1934 - if (btf_id || prog_fd) 1678 + 1679 + switch (prog_type) { 1680 + case BPF_PROG_TYPE_TRACING: 1681 + case BPF_PROG_TYPE_STRUCT_OPS: 1682 + case BPF_PROG_TYPE_EXT: 1683 + break; 1684 + default: 1935 1685 return -EINVAL; 1936 - break; 1686 + } 1937 1687 } 1688 + 1689 + if (prog_fd && prog_type != BPF_PROG_TYPE_TRACING && 1690 + prog_type != BPF_PROG_TYPE_EXT) 1691 + return -EINVAL; 1938 1692 1939 1693 switch (prog_type) { 1940 1694 case BPF_PROG_TYPE_CGROUP_SOCK: ··· 1983 1723 default: 1984 1724 return -EINVAL; 1985 1725 } 1726 + case BPF_PROG_TYPE_EXT: 1727 + if (expected_attach_type) 1728 + return -EINVAL; 1729 + /* fallthrough */ 1986 1730 default: 1987 1731 return 0; 1988 1732 } ··· 2189 1925 int tr_fd, err; 2190 1926 2191 1927 if (prog->expected_attach_type != BPF_TRACE_FENTRY && 2192 - prog->expected_attach_type != BPF_TRACE_FEXIT) { 1928 + prog->expected_attach_type != BPF_TRACE_FEXIT && 1929 + prog->type != BPF_PROG_TYPE_EXT) { 2193 1930 err = -EINVAL; 2194 1931 goto out_put_prog; 2195 1932 } ··· 2257 1992 2258 1993 if (prog->type != BPF_PROG_TYPE_RAW_TRACEPOINT && 2259 1994 prog->type != BPF_PROG_TYPE_TRACING && 1995 + prog->type != BPF_PROG_TYPE_EXT && 2260 1996 prog->type != BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE) { 2261 1997 err = -EINVAL; 2262 1998 goto out_put_prog; 2263 1999 } 2264 2000 2265 - if (prog->type == BPF_PROG_TYPE_TRACING) { 2001 + if (prog->type == BPF_PROG_TYPE_TRACING || 2002 + prog->type == BPF_PROG_TYPE_EXT) { 2266 2003 if (attr->raw_tracepoint.name) { 2267 2004 /* The attach point for this category of programs 2268 2005 * should be specified via btf_id during program load. ··· 3084 2817 info.btf_key_type_id = map->btf_key_type_id; 3085 2818 info.btf_value_type_id = map->btf_value_type_id; 3086 2819 } 2820 + info.btf_vmlinux_value_type_id = map->btf_vmlinux_value_type_id; 3087 2821 3088 2822 if (bpf_map_is_dev_bound(map)) { 3089 2823 err = bpf_map_offload_info_fill(&info, map); ··· 3297 3029 return err; 3298 3030 } 3299 3031 3032 + #define BPF_MAP_BATCH_LAST_FIELD batch.flags 3033 + 3034 + #define BPF_DO_BATCH(fn) \ 3035 + do { \ 3036 + if (!fn) { \ 3037 + err = -ENOTSUPP; \ 3038 + goto err_put; \ 3039 + } \ 3040 + err = fn(map, attr, uattr); \ 3041 + } while (0) 3042 + 3043 + static int bpf_map_do_batch(const union bpf_attr *attr, 3044 + union bpf_attr __user *uattr, 3045 + int cmd) 3046 + { 3047 + struct bpf_map *map; 3048 + int err, ufd; 3049 + struct fd f; 3050 + 3051 + if (CHECK_ATTR(BPF_MAP_BATCH)) 3052 + return -EINVAL; 3053 + 3054 + ufd = attr->batch.map_fd; 3055 + f = fdget(ufd); 3056 + map = __bpf_map_get(f); 3057 + if (IS_ERR(map)) 3058 + return PTR_ERR(map); 3059 + 3060 + if ((cmd == BPF_MAP_LOOKUP_BATCH || 3061 + cmd == BPF_MAP_LOOKUP_AND_DELETE_BATCH) && 3062 + !(map_get_sys_perms(map, f) & FMODE_CAN_READ)) { 3063 + err = -EPERM; 3064 + goto err_put; 3065 + } 3066 + 3067 + if (cmd != BPF_MAP_LOOKUP_BATCH && 3068 + !(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) { 3069 + err = -EPERM; 3070 + goto err_put; 3071 + } 3072 + 3073 + if (cmd == BPF_MAP_LOOKUP_BATCH) 3074 + BPF_DO_BATCH(map->ops->map_lookup_batch); 3075 + else if (cmd == BPF_MAP_LOOKUP_AND_DELETE_BATCH) 3076 + BPF_DO_BATCH(map->ops->map_lookup_and_delete_batch); 3077 + else if (cmd == BPF_MAP_UPDATE_BATCH) 3078 + BPF_DO_BATCH(map->ops->map_update_batch); 3079 + else 3080 + BPF_DO_BATCH(map->ops->map_delete_batch); 3081 + 3082 + err_put: 3083 + fdput(f); 3084 + return err; 3085 + } 3086 + 3300 3087 SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size) 3301 3088 { 3302 3089 union bpf_attr attr = {}; ··· 3448 3125 break; 3449 3126 case BPF_MAP_LOOKUP_AND_DELETE_ELEM: 3450 3127 err = map_lookup_and_delete_elem(&attr); 3128 + break; 3129 + case BPF_MAP_LOOKUP_BATCH: 3130 + err = bpf_map_do_batch(&attr, uattr, BPF_MAP_LOOKUP_BATCH); 3131 + break; 3132 + case BPF_MAP_LOOKUP_AND_DELETE_BATCH: 3133 + err = bpf_map_do_batch(&attr, uattr, 3134 + BPF_MAP_LOOKUP_AND_DELETE_BATCH); 3135 + break; 3136 + case BPF_MAP_UPDATE_BATCH: 3137 + err = bpf_map_do_batch(&attr, uattr, BPF_MAP_UPDATE_BATCH); 3138 + break; 3139 + case BPF_MAP_DELETE_BATCH: 3140 + err = bpf_map_do_batch(&attr, uattr, BPF_MAP_DELETE_BATCH); 3451 3141 break; 3452 3142 default: 3453 3143 err = -EINVAL;

+53 -6

kernel/bpf/trampoline.c

··· 5 5 #include <linux/filter.h> 6 6 #include <linux/ftrace.h> 7 7 8 + /* dummy _ops. The verifier will operate on target program's ops. */ 9 + const struct bpf_verifier_ops bpf_extension_verifier_ops = { 10 + }; 11 + const struct bpf_prog_ops bpf_extension_prog_ops = { 12 + }; 13 + 8 14 /* btf_vmlinux has ~22k attachable functions. 1k htab is enough. */ 9 15 #define TRAMPOLINE_HASH_BITS 10 10 16 #define TRAMPOLINE_TABLE_SIZE (1 << TRAMPOLINE_HASH_BITS) ··· 166 160 if (fexit_cnt) 167 161 flags = BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_SKIP_FRAME; 168 162 169 - err = arch_prepare_bpf_trampoline(new_image, &tr->func.model, flags, 163 + /* Though the second half of trampoline page is unused a task could be 164 + * preempted in the middle of the first half of trampoline and two 165 + * updates to trampoline would change the code from underneath the 166 + * preempted task. Hence wait for tasks to voluntarily schedule or go 167 + * to userspace. 168 + */ 169 + synchronize_rcu_tasks(); 170 + 171 + err = arch_prepare_bpf_trampoline(new_image, new_image + PAGE_SIZE / 2, 172 + &tr->func.model, flags, 170 173 fentry, fentry_cnt, 171 174 fexit, fexit_cnt, 172 175 tr->func.addr); 173 - if (err) 176 + if (err < 0) 174 177 goto out; 175 178 176 179 if (tr->selector) ··· 200 185 switch (t) { 201 186 case BPF_TRACE_FENTRY: 202 187 return BPF_TRAMP_FENTRY; 203 - default: 188 + case BPF_TRACE_FEXIT: 204 189 return BPF_TRAMP_FEXIT; 190 + default: 191 + return BPF_TRAMP_REPLACE; 205 192 } 206 193 } 207 194 ··· 212 195 enum bpf_tramp_prog_type kind; 213 196 struct bpf_trampoline *tr; 214 197 int err = 0; 198 + int cnt; 215 199 216 200 tr = prog->aux->trampoline; 217 201 kind = bpf_attach_type_to_tramp(prog->expected_attach_type); 218 202 mutex_lock(&tr->mutex); 219 - if (tr->progs_cnt[BPF_TRAMP_FENTRY] + tr->progs_cnt[BPF_TRAMP_FEXIT] 220 - >= BPF_MAX_TRAMP_PROGS) { 203 + if (tr->extension_prog) { 204 + /* cannot attach fentry/fexit if extension prog is attached. 205 + * cannot overwrite extension prog either. 206 + */ 207 + err = -EBUSY; 208 + goto out; 209 + } 210 + cnt = tr->progs_cnt[BPF_TRAMP_FENTRY] + tr->progs_cnt[BPF_TRAMP_FEXIT]; 211 + if (kind == BPF_TRAMP_REPLACE) { 212 + /* Cannot attach extension if fentry/fexit are in use. */ 213 + if (cnt) { 214 + err = -EBUSY; 215 + goto out; 216 + } 217 + tr->extension_prog = prog; 218 + err = bpf_arch_text_poke(tr->func.addr, BPF_MOD_JUMP, NULL, 219 + prog->bpf_func); 220 + goto out; 221 + } 222 + if (cnt >= BPF_MAX_TRAMP_PROGS) { 221 223 err = -E2BIG; 222 224 goto out; 223 225 } ··· 267 231 tr = prog->aux->trampoline; 268 232 kind = bpf_attach_type_to_tramp(prog->expected_attach_type); 269 233 mutex_lock(&tr->mutex); 234 + if (kind == BPF_TRAMP_REPLACE) { 235 + WARN_ON_ONCE(!tr->extension_prog); 236 + err = bpf_arch_text_poke(tr->func.addr, BPF_MOD_JUMP, 237 + tr->extension_prog->bpf_func, NULL); 238 + tr->extension_prog = NULL; 239 + goto out; 240 + } 270 241 hlist_del(&prog->aux->tramp_hlist); 271 242 tr->progs_cnt[kind]--; 272 243 err = bpf_trampoline_update(prog->aux->trampoline); 244 + out: 273 245 mutex_unlock(&tr->mutex); 274 246 return err; 275 247 } ··· 294 250 goto out; 295 251 if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[BPF_TRAMP_FEXIT]))) 296 252 goto out; 253 + /* wait for tasks to get out of trampoline before freeing it */ 254 + synchronize_rcu_tasks(); 297 255 bpf_jit_free_exec(tr->image); 298 256 hlist_del(&tr->hlist); 299 257 kfree(tr); ··· 342 296 } 343 297 344 298 int __weak 345 - arch_prepare_bpf_trampoline(void *image, struct btf_func_model *m, u32 flags, 299 + arch_prepare_bpf_trampoline(void *image, void *image_end, 300 + const struct btf_func_model *m, u32 flags, 346 301 struct bpf_prog **fentry_progs, int fentry_cnt, 347 302 struct bpf_prog **fexit_progs, int fexit_cnt, 348 303 void *orig_call)

+405 -99

kernel/bpf/verifier.c

··· 1122 1122 regs[BPF_REG_FP].type = PTR_TO_STACK; 1123 1123 mark_reg_known_zero(env, regs, BPF_REG_FP); 1124 1124 regs[BPF_REG_FP].frameno = state->frameno; 1125 - 1126 - /* 1st arg to a function */ 1127 - regs[BPF_REG_1].type = PTR_TO_CTX; 1128 - mark_reg_known_zero(env, regs, BPF_REG_1); 1129 1125 } 1130 1126 1131 1127 #define BPF_MAIN_FUNC (-1) ··· 1912 1916 case PTR_TO_TCP_SOCK: 1913 1917 case PTR_TO_TCP_SOCK_OR_NULL: 1914 1918 case PTR_TO_XDP_SOCK: 1919 + case PTR_TO_BTF_ID: 1915 1920 return true; 1916 1921 default: 1917 1922 return false; ··· 2735 2738 } 2736 2739 #endif 2737 2740 2738 - static int check_ctx_reg(struct bpf_verifier_env *env, 2739 - const struct bpf_reg_state *reg, int regno) 2741 + int check_ctx_reg(struct bpf_verifier_env *env, 2742 + const struct bpf_reg_state *reg, int regno) 2740 2743 { 2741 2744 /* Access to ctx or passing it to a helper is only allowed in 2742 2745 * its original, unmodified form. ··· 2855 2858 u32 btf_id; 2856 2859 int ret; 2857 2860 2858 - if (atype != BPF_READ) { 2859 - verbose(env, "only read is supported\n"); 2860 - return -EACCES; 2861 - } 2862 - 2863 2861 if (off < 0) { 2864 2862 verbose(env, 2865 2863 "R%d is ptr_%s invalid negative access: off=%d\n", ··· 2871 2879 return -EACCES; 2872 2880 } 2873 2881 2874 - ret = btf_struct_access(&env->log, t, off, size, atype, &btf_id); 2882 + if (env->ops->btf_struct_access) { 2883 + ret = env->ops->btf_struct_access(&env->log, t, off, size, 2884 + atype, &btf_id); 2885 + } else { 2886 + if (atype != BPF_READ) { 2887 + verbose(env, "only read is supported\n"); 2888 + return -EACCES; 2889 + } 2890 + 2891 + ret = btf_struct_access(&env->log, t, off, size, atype, 2892 + &btf_id); 2893 + } 2894 + 2875 2895 if (ret < 0) 2876 2896 return ret; 2877 2897 2878 - if (ret == SCALAR_VALUE) { 2879 - mark_reg_unknown(env, regs, value_regno); 2880 - return 0; 2898 + if (atype == BPF_READ) { 2899 + if (ret == SCALAR_VALUE) { 2900 + mark_reg_unknown(env, regs, value_regno); 2901 + return 0; 2902 + } 2903 + mark_reg_known_zero(env, regs, value_regno); 2904 + regs[value_regno].type = PTR_TO_BTF_ID; 2905 + regs[value_regno].btf_id = btf_id; 2881 2906 } 2882 - mark_reg_known_zero(env, regs, value_regno); 2883 - regs[value_regno].type = PTR_TO_BTF_ID; 2884 - regs[value_regno].btf_id = btf_id; 2907 + 2885 2908 return 0; 2886 2909 } 2887 2910 ··· 3952 3945 return 0; 3953 3946 } 3954 3947 3948 + static void clear_caller_saved_regs(struct bpf_verifier_env *env, 3949 + struct bpf_reg_state *regs) 3950 + { 3951 + int i; 3952 + 3953 + /* after the call registers r0 - r5 were scratched */ 3954 + for (i = 0; i < CALLER_SAVED_REGS; i++) { 3955 + mark_reg_not_init(env, regs, caller_saved[i]); 3956 + check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK); 3957 + } 3958 + } 3959 + 3955 3960 static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn, 3956 3961 int *insn_idx) 3957 3962 { 3958 3963 struct bpf_verifier_state *state = env->cur_state; 3964 + struct bpf_func_info_aux *func_info_aux; 3959 3965 struct bpf_func_state *caller, *callee; 3960 3966 int i, err, subprog, target_insn; 3967 + bool is_global = false; 3961 3968 3962 3969 if (state->curframe + 1 >= MAX_CALL_FRAMES) { 3963 3970 verbose(env, "the call stack of %d frames is too deep\n", ··· 3992 3971 verbose(env, "verifier bug. Frame %d already allocated\n", 3993 3972 state->curframe + 1); 3994 3973 return -EFAULT; 3974 + } 3975 + 3976 + func_info_aux = env->prog->aux->func_info_aux; 3977 + if (func_info_aux) 3978 + is_global = func_info_aux[subprog].linkage == BTF_FUNC_GLOBAL; 3979 + err = btf_check_func_arg_match(env, subprog, caller->regs); 3980 + if (err == -EFAULT) 3981 + return err; 3982 + if (is_global) { 3983 + if (err) { 3984 + verbose(env, "Caller passes invalid args into func#%d\n", 3985 + subprog); 3986 + return err; 3987 + } else { 3988 + if (env->log.level & BPF_LOG_LEVEL) 3989 + verbose(env, 3990 + "Func#%d is global and valid. Skipping.\n", 3991 + subprog); 3992 + clear_caller_saved_regs(env, caller->regs); 3993 + 3994 + /* All global functions return SCALAR_VALUE */ 3995 + mark_reg_unknown(env, caller->regs, BPF_REG_0); 3996 + 3997 + /* continue with next insn after call */ 3998 + return 0; 3999 + } 3995 4000 } 3996 4001 3997 4002 callee = kzalloc(sizeof(*callee), GFP_KERNEL); ··· 4046 3999 for (i = BPF_REG_1; i <= BPF_REG_5; i++) 4047 4000 callee->regs[i] = caller->regs[i]; 4048 4001 4049 - /* after the call registers r0 - r5 were scratched */ 4050 - for (i = 0; i < CALLER_SAVED_REGS; i++) { 4051 - mark_reg_not_init(env, caller->regs, caller_saved[i]); 4052 - check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK); 4053 - } 4002 + clear_caller_saved_regs(env, caller->regs); 4054 4003 4055 4004 /* only increment it after check_reg_arg() finished */ 4056 4005 state->curframe++; 4057 - 4058 - if (btf_check_func_arg_match(env, subprog)) 4059 - return -EINVAL; 4060 4006 4061 4007 /* and go analyze first insn of the callee */ 4062 4008 *insn_idx = target_insn; ··· 6400 6360 static int check_return_code(struct bpf_verifier_env *env) 6401 6361 { 6402 6362 struct tnum enforce_attach_type_range = tnum_unknown; 6363 + const struct bpf_prog *prog = env->prog; 6403 6364 struct bpf_reg_state *reg; 6404 6365 struct tnum range = tnum_range(0, 1); 6366 + int err; 6367 + 6368 + /* The struct_ops func-ptr's return type could be "void" */ 6369 + if (env->prog->type == BPF_PROG_TYPE_STRUCT_OPS && 6370 + !prog->aux->attach_func_proto->type) 6371 + return 0; 6372 + 6373 + /* eBPF calling convetion is such that R0 is used 6374 + * to return the value from eBPF program. 6375 + * Make sure that it's readable at this time 6376 + * of bpf_exit, which means that program wrote 6377 + * something into it earlier 6378 + */ 6379 + err = check_reg_arg(env, BPF_REG_0, SRC_OP); 6380 + if (err) 6381 + return err; 6382 + 6383 + if (is_pointer_value(env, BPF_REG_0)) { 6384 + verbose(env, "R0 leaks addr as return value\n"); 6385 + return -EACCES; 6386 + } 6405 6387 6406 6388 switch (env->prog->type) { 6407 6389 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: ··· 6812 6750 6813 6751 /* check type_id */ 6814 6752 type = btf_type_by_id(btf, krecord[i].type_id); 6815 - if (!type || BTF_INFO_KIND(type->info) != BTF_KIND_FUNC) { 6753 + if (!type || !btf_type_is_func(type)) { 6816 6754 verbose(env, "invalid type id %d in func info", 6817 6755 krecord[i].type_id); 6818 6756 ret = -EINVAL; 6819 6757 goto err_free; 6820 6758 } 6759 + info_aux[i].linkage = BTF_INFO_VLEN(type->info); 6821 6760 prev_offset = krecord[i].insn_off; 6822 6761 urecord += urec_size; 6823 6762 } ··· 7798 7735 7799 7736 static int do_check(struct bpf_verifier_env *env) 7800 7737 { 7801 - struct bpf_verifier_state *state; 7738 + struct bpf_verifier_state *state = env->cur_state; 7802 7739 struct bpf_insn *insns = env->prog->insnsi; 7803 7740 struct bpf_reg_state *regs; 7804 7741 int insn_cnt = env->prog->len; 7805 7742 bool do_print_state = false; 7806 7743 int prev_insn_idx = -1; 7807 - 7808 - env->prev_linfo = NULL; 7809 - 7810 - state = kzalloc(sizeof(struct bpf_verifier_state), GFP_KERNEL); 7811 - if (!state) 7812 - return -ENOMEM; 7813 - state->curframe = 0; 7814 - state->speculative = false; 7815 - state->branches = 1; 7816 - state->frame[0] = kzalloc(sizeof(struct bpf_func_state), GFP_KERNEL); 7817 - if (!state->frame[0]) { 7818 - kfree(state); 7819 - return -ENOMEM; 7820 - } 7821 - env->cur_state = state; 7822 - init_func_state(env, state->frame[0], 7823 - BPF_MAIN_FUNC /* callsite */, 7824 - 0 /* frameno */, 7825 - 0 /* subprogno, zero == main subprog */); 7826 - 7827 - if (btf_check_func_arg_match(env, 0)) 7828 - return -EINVAL; 7829 7744 7830 7745 for (;;) { 7831 7746 struct bpf_insn *insn; ··· 7882 7841 } 7883 7842 7884 7843 regs = cur_regs(env); 7885 - env->insn_aux_data[env->insn_idx].seen = true; 7844 + env->insn_aux_data[env->insn_idx].seen = env->pass_cnt; 7886 7845 prev_insn_idx = env->insn_idx; 7887 7846 7888 7847 if (class == BPF_ALU || class == BPF_ALU64) { ··· 8068 8027 if (err) 8069 8028 return err; 8070 8029 8071 - /* eBPF calling convetion is such that R0 is used 8072 - * to return the value from eBPF program. 8073 - * Make sure that it's readable at this time 8074 - * of bpf_exit, which means that program wrote 8075 - * something into it earlier 8076 - */ 8077 - err = check_reg_arg(env, BPF_REG_0, SRC_OP); 8078 - if (err) 8079 - return err; 8080 - 8081 - if (is_pointer_value(env, BPF_REG_0)) { 8082 - verbose(env, "R0 leaks addr as return value\n"); 8083 - return -EACCES; 8084 - } 8085 - 8086 8030 err = check_return_code(env); 8087 8031 if (err) 8088 8032 return err; ··· 8102 8076 return err; 8103 8077 8104 8078 env->insn_idx++; 8105 - env->insn_aux_data[env->insn_idx].seen = true; 8079 + env->insn_aux_data[env->insn_idx].seen = env->pass_cnt; 8106 8080 } else { 8107 8081 verbose(env, "invalid BPF_LD mode\n"); 8108 8082 return -EINVAL; ··· 8115 8089 env->insn_idx++; 8116 8090 } 8117 8091 8118 - env->prog->aux->stack_depth = env->subprog_info[0].stack_depth; 8119 8092 return 0; 8120 8093 } 8121 8094 ··· 8171 8146 if ((bpf_prog_is_dev_bound(prog->aux) || bpf_map_is_dev_bound(map)) && 8172 8147 !bpf_offload_prog_map_match(prog, map)) { 8173 8148 verbose(env, "offload device mismatch between prog and map\n"); 8149 + return -EINVAL; 8150 + } 8151 + 8152 + if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) { 8153 + verbose(env, "bpf_struct_ops map cannot be used in prog\n"); 8174 8154 return -EINVAL; 8175 8155 } 8176 8156 ··· 8391 8361 memcpy(new_data + off + cnt - 1, old_data + off, 8392 8362 sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1)); 8393 8363 for (i = off; i < off + cnt - 1; i++) { 8394 - new_data[i].seen = true; 8364 + new_data[i].seen = env->pass_cnt; 8395 8365 new_data[i].zext_dst = insn_has_def32(env, insn + i); 8396 8366 } 8397 8367 env->insn_aux_data = new_data; ··· 8870 8840 convert_ctx_access = bpf_xdp_sock_convert_ctx_access; 8871 8841 break; 8872 8842 case PTR_TO_BTF_ID: 8873 - if (type == BPF_WRITE) { 8843 + if (type == BPF_READ) { 8844 + insn->code = BPF_LDX | BPF_PROBE_MEM | 8845 + BPF_SIZE((insn)->code); 8846 + env->prog->aux->num_exentries++; 8847 + } else if (env->prog->type != BPF_PROG_TYPE_STRUCT_OPS) { 8874 8848 verbose(env, "Writes through BTF pointers are not allowed\n"); 8875 8849 return -EINVAL; 8876 8850 } 8877 - insn->code = BPF_LDX | BPF_PROBE_MEM | BPF_SIZE((insn)->code); 8878 - env->prog->aux->num_exentries++; 8879 8851 continue; 8880 8852 default: 8881 8853 continue; ··· 9457 9425 goto patch_call_imm; 9458 9426 } 9459 9427 9428 + if (prog->jit_requested && BITS_PER_LONG == 64 && 9429 + insn->imm == BPF_FUNC_jiffies64) { 9430 + struct bpf_insn ld_jiffies_addr[2] = { 9431 + BPF_LD_IMM64(BPF_REG_0, 9432 + (unsigned long)&jiffies), 9433 + }; 9434 + 9435 + insn_buf[0] = ld_jiffies_addr[0]; 9436 + insn_buf[1] = ld_jiffies_addr[1]; 9437 + insn_buf[2] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, 9438 + BPF_REG_0, 0); 9439 + cnt = 3; 9440 + 9441 + new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, 9442 + cnt); 9443 + if (!new_prog) 9444 + return -ENOMEM; 9445 + 9446 + delta += cnt - 1; 9447 + env->prog = prog = new_prog; 9448 + insn = new_prog->insnsi + i + delta; 9449 + continue; 9450 + } 9451 + 9460 9452 patch_call_imm: 9461 9453 fn = env->ops->get_func_proto(insn->imm, env->prog); 9462 9454 /* all functions that have prototype and verifier allowed ··· 9527 9471 kfree(sl); 9528 9472 sl = sln; 9529 9473 } 9474 + env->free_list = NULL; 9530 9475 9531 9476 if (!env->explored_states) 9532 9477 return; ··· 9541 9484 kfree(sl); 9542 9485 sl = sln; 9543 9486 } 9487 + env->explored_states[i] = NULL; 9488 + } 9489 + } 9490 + 9491 + /* The verifier is using insn_aux_data[] to store temporary data during 9492 + * verification and to store information for passes that run after the 9493 + * verification like dead code sanitization. do_check_common() for subprogram N 9494 + * may analyze many other subprograms. sanitize_insn_aux_data() clears all 9495 + * temporary data after do_check_common() finds that subprogram N cannot be 9496 + * verified independently. pass_cnt counts the number of times 9497 + * do_check_common() was run and insn->aux->seen tells the pass number 9498 + * insn_aux_data was touched. These variables are compared to clear temporary 9499 + * data from failed pass. For testing and experiments do_check_common() can be 9500 + * run multiple times even when prior attempt to verify is unsuccessful. 9501 + */ 9502 + static void sanitize_insn_aux_data(struct bpf_verifier_env *env) 9503 + { 9504 + struct bpf_insn *insn = env->prog->insnsi; 9505 + struct bpf_insn_aux_data *aux; 9506 + int i, class; 9507 + 9508 + for (i = 0; i < env->prog->len; i++) { 9509 + class = BPF_CLASS(insn[i].code); 9510 + if (class != BPF_LDX && class != BPF_STX) 9511 + continue; 9512 + aux = &env->insn_aux_data[i]; 9513 + if (aux->seen != env->pass_cnt) 9514 + continue; 9515 + memset(aux, 0, offsetof(typeof(*aux), orig_idx)); 9516 + } 9517 + } 9518 + 9519 + static int do_check_common(struct bpf_verifier_env *env, int subprog) 9520 + { 9521 + struct bpf_verifier_state *state; 9522 + struct bpf_reg_state *regs; 9523 + int ret, i; 9524 + 9525 + env->prev_linfo = NULL; 9526 + env->pass_cnt++; 9527 + 9528 + state = kzalloc(sizeof(struct bpf_verifier_state), GFP_KERNEL); 9529 + if (!state) 9530 + return -ENOMEM; 9531 + state->curframe = 0; 9532 + state->speculative = false; 9533 + state->branches = 1; 9534 + state->frame[0] = kzalloc(sizeof(struct bpf_func_state), GFP_KERNEL); 9535 + if (!state->frame[0]) { 9536 + kfree(state); 9537 + return -ENOMEM; 9538 + } 9539 + env->cur_state = state; 9540 + init_func_state(env, state->frame[0], 9541 + BPF_MAIN_FUNC /* callsite */, 9542 + 0 /* frameno */, 9543 + subprog); 9544 + 9545 + regs = state->frame[state->curframe]->regs; 9546 + if (subprog || env->prog->type == BPF_PROG_TYPE_EXT) { 9547 + ret = btf_prepare_func_args(env, subprog, regs); 9548 + if (ret) 9549 + goto out; 9550 + for (i = BPF_REG_1; i <= BPF_REG_5; i++) { 9551 + if (regs[i].type == PTR_TO_CTX) 9552 + mark_reg_known_zero(env, regs, i); 9553 + else if (regs[i].type == SCALAR_VALUE) 9554 + mark_reg_unknown(env, regs, i); 9555 + } 9556 + } else { 9557 + /* 1st arg to a function */ 9558 + regs[BPF_REG_1].type = PTR_TO_CTX; 9559 + mark_reg_known_zero(env, regs, BPF_REG_1); 9560 + ret = btf_check_func_arg_match(env, subprog, regs); 9561 + if (ret == -EFAULT) 9562 + /* unlikely verifier bug. abort. 9563 + * ret == 0 and ret < 0 are sadly acceptable for 9564 + * main() function due to backward compatibility. 9565 + * Like socket filter program may be written as: 9566 + * int bpf_prog(struct pt_regs *ctx) 9567 + * and never dereference that ctx in the program. 9568 + * 'struct pt_regs' is a type mismatch for socket 9569 + * filter that should be using 'struct __sk_buff'. 9570 + */ 9571 + goto out; 9544 9572 } 9545 9573 9546 - kvfree(env->explored_states); 9574 + ret = do_check(env); 9575 + out: 9576 + /* check for NULL is necessary, since cur_state can be freed inside 9577 + * do_check() under memory pressure. 9578 + */ 9579 + if (env->cur_state) { 9580 + free_verifier_state(env->cur_state, true); 9581 + env->cur_state = NULL; 9582 + } 9583 + while (!pop_stack(env, NULL, NULL)); 9584 + free_states(env); 9585 + if (ret) 9586 + /* clean aux data in case subprog was rejected */ 9587 + sanitize_insn_aux_data(env); 9588 + return ret; 9547 9589 } 9590 + 9591 + /* Verify all global functions in a BPF program one by one based on their BTF. 9592 + * All global functions must pass verification. Otherwise the whole program is rejected. 9593 + * Consider: 9594 + * int bar(int); 9595 + * int foo(int f) 9596 + * { 9597 + * return bar(f); 9598 + * } 9599 + * int bar(int b) 9600 + * { 9601 + * ... 9602 + * } 9603 + * foo() will be verified first for R1=any_scalar_value. During verification it 9604 + * will be assumed that bar() already verified successfully and call to bar() 9605 + * from foo() will be checked for type match only. Later bar() will be verified 9606 + * independently to check that it's safe for R1=any_scalar_value. 9607 + */ 9608 + static int do_check_subprogs(struct bpf_verifier_env *env) 9609 + { 9610 + struct bpf_prog_aux *aux = env->prog->aux; 9611 + int i, ret; 9612 + 9613 + if (!aux->func_info) 9614 + return 0; 9615 + 9616 + for (i = 1; i < env->subprog_cnt; i++) { 9617 + if (aux->func_info_aux[i].linkage != BTF_FUNC_GLOBAL) 9618 + continue; 9619 + env->insn_idx = env->subprog_info[i].start; 9620 + WARN_ON_ONCE(env->insn_idx == 0); 9621 + ret = do_check_common(env, i); 9622 + if (ret) { 9623 + return ret; 9624 + } else if (env->log.level & BPF_LOG_LEVEL) { 9625 + verbose(env, 9626 + "Func#%d is safe for any args that match its prototype\n", 9627 + i); 9628 + } 9629 + } 9630 + return 0; 9631 + } 9632 + 9633 + static int do_check_main(struct bpf_verifier_env *env) 9634 + { 9635 + int ret; 9636 + 9637 + env->insn_idx = 0; 9638 + ret = do_check_common(env, 0); 9639 + if (!ret) 9640 + env->prog->aux->stack_depth = env->subprog_info[0].stack_depth; 9641 + return ret; 9642 + } 9643 + 9548 9644 9549 9645 static void print_verification_stats(struct bpf_verifier_env *env) 9550 9646 { ··· 9723 9513 env->peak_states, env->longest_mark_read_walk); 9724 9514 } 9725 9515 9516 + static int check_struct_ops_btf_id(struct bpf_verifier_env *env) 9517 + { 9518 + const struct btf_type *t, *func_proto; 9519 + const struct bpf_struct_ops *st_ops; 9520 + const struct btf_member *member; 9521 + struct bpf_prog *prog = env->prog; 9522 + u32 btf_id, member_idx; 9523 + const char *mname; 9524 + 9525 + btf_id = prog->aux->attach_btf_id; 9526 + st_ops = bpf_struct_ops_find(btf_id); 9527 + if (!st_ops) { 9528 + verbose(env, "attach_btf_id %u is not a supported struct\n", 9529 + btf_id); 9530 + return -ENOTSUPP; 9531 + } 9532 + 9533 + t = st_ops->type; 9534 + member_idx = prog->expected_attach_type; 9535 + if (member_idx >= btf_type_vlen(t)) { 9536 + verbose(env, "attach to invalid member idx %u of struct %s\n", 9537 + member_idx, st_ops->name); 9538 + return -EINVAL; 9539 + } 9540 + 9541 + member = &btf_type_member(t)[member_idx]; 9542 + mname = btf_name_by_offset(btf_vmlinux, member->name_off); 9543 + func_proto = btf_type_resolve_func_ptr(btf_vmlinux, member->type, 9544 + NULL); 9545 + if (!func_proto) { 9546 + verbose(env, "attach to invalid member %s(@idx %u) of struct %s\n", 9547 + mname, member_idx, st_ops->name); 9548 + return -EINVAL; 9549 + } 9550 + 9551 + if (st_ops->check_member) { 9552 + int err = st_ops->check_member(t, member); 9553 + 9554 + if (err) { 9555 + verbose(env, "attach to unsupported member %s of struct %s\n", 9556 + mname, st_ops->name); 9557 + return err; 9558 + } 9559 + } 9560 + 9561 + prog->aux->attach_func_proto = func_proto; 9562 + prog->aux->attach_func_name = mname; 9563 + env->ops = st_ops->verifier_ops; 9564 + 9565 + return 0; 9566 + } 9567 + 9726 9568 static int check_attach_btf_id(struct bpf_verifier_env *env) 9727 9569 { 9728 9570 struct bpf_prog *prog = env->prog; 9571 + bool prog_extension = prog->type == BPF_PROG_TYPE_EXT; 9729 9572 struct bpf_prog *tgt_prog = prog->aux->linked_prog; 9730 9573 u32 btf_id = prog->aux->attach_btf_id; 9731 9574 const char prefix[] = "btf_trace_"; ··· 9791 9528 long addr; 9792 9529 u64 key; 9793 9530 9794 - if (prog->type != BPF_PROG_TYPE_TRACING) 9531 + if (prog->type == BPF_PROG_TYPE_STRUCT_OPS) 9532 + return check_struct_ops_btf_id(env); 9533 + 9534 + if (prog->type != BPF_PROG_TYPE_TRACING && !prog_extension) 9795 9535 return 0; 9796 9536 9797 9537 if (!btf_id) { ··· 9830 9564 return -EINVAL; 9831 9565 } 9832 9566 conservative = aux->func_info_aux[subprog].unreliable; 9567 + if (prog_extension) { 9568 + if (conservative) { 9569 + verbose(env, 9570 + "Cannot replace static functions\n"); 9571 + return -EINVAL; 9572 + } 9573 + if (!prog->jit_requested) { 9574 + verbose(env, 9575 + "Extension programs should be JITed\n"); 9576 + return -EINVAL; 9577 + } 9578 + env->ops = bpf_verifier_ops[tgt_prog->type]; 9579 + } 9580 + if (!tgt_prog->jited) { 9581 + verbose(env, "Can attach to only JITed progs\n"); 9582 + return -EINVAL; 9583 + } 9584 + if (tgt_prog->type == prog->type) { 9585 + /* Cannot fentry/fexit another fentry/fexit program. 9586 + * Cannot attach program extension to another extension. 9587 + * It's ok to attach fentry/fexit to extension program. 9588 + */ 9589 + verbose(env, "Cannot recursively attach\n"); 9590 + return -EINVAL; 9591 + } 9592 + if (tgt_prog->type == BPF_PROG_TYPE_TRACING && 9593 + prog_extension && 9594 + (tgt_prog->expected_attach_type == BPF_TRACE_FENTRY || 9595 + tgt_prog->expected_attach_type == BPF_TRACE_FEXIT)) { 9596 + /* Program extensions can extend all program types 9597 + * except fentry/fexit. The reason is the following. 9598 + * The fentry/fexit programs are used for performance 9599 + * analysis, stats and can be attached to any program 9600 + * type except themselves. When extension program is 9601 + * replacing XDP function it is necessary to allow 9602 + * performance analysis of all functions. Both original 9603 + * XDP program and its program extension. Hence 9604 + * attaching fentry/fexit to BPF_PROG_TYPE_EXT is 9605 + * allowed. If extending of fentry/fexit was allowed it 9606 + * would be possible to create long call chain 9607 + * fentry->extension->fentry->extension beyond 9608 + * reasonable stack size. Hence extending fentry is not 9609 + * allowed. 9610 + */ 9611 + verbose(env, "Cannot extend fentry/fexit\n"); 9612 + return -EINVAL; 9613 + } 9833 9614 key = ((u64)aux->id) << 32 | btf_id; 9834 9615 } else { 9616 + if (prog_extension) { 9617 + verbose(env, "Cannot replace kernel functions\n"); 9618 + return -EINVAL; 9619 + } 9835 9620 key = btf_id; 9836 9621 } 9837 9622 ··· 9920 9603 prog->aux->attach_func_proto = t; 9921 9604 prog->aux->attach_btf_trace = true; 9922 9605 return 0; 9606 + default: 9607 + if (!prog_extension) 9608 + return -EINVAL; 9609 + /* fallthrough */ 9923 9610 case BPF_TRACE_FENTRY: 9924 9611 case BPF_TRACE_FEXIT: 9925 9612 if (!btf_type_is_func(t)) { ··· 9931 9610 btf_id); 9932 9611 return -EINVAL; 9933 9612 } 9613 + if (prog_extension && 9614 + btf_check_type_match(env, prog, btf, t)) 9615 + return -EINVAL; 9934 9616 t = btf_type_by_id(btf, t->type); 9935 9617 if (!btf_type_is_func_proto(t)) 9936 9618 return -EINVAL; ··· 9957 9633 if (ret < 0) 9958 9634 goto out; 9959 9635 if (tgt_prog) { 9960 - if (!tgt_prog->jited) { 9961 - /* for now */ 9962 - verbose(env, "Can trace only JITed BPF progs\n"); 9963 - ret = -EINVAL; 9964 - goto out; 9965 - } 9966 - if (tgt_prog->type == BPF_PROG_TYPE_TRACING) { 9967 - /* prevent cycles */ 9968 - verbose(env, "Cannot recursively attach\n"); 9969 - ret = -EINVAL; 9970 - goto out; 9971 - } 9972 9636 if (subprog == 0) 9973 9637 addr = (long) tgt_prog->bpf_func; 9974 9638 else ··· 9978 9666 if (ret) 9979 9667 bpf_trampoline_put(tr); 9980 9668 return ret; 9981 - default: 9982 - return -EINVAL; 9983 9669 } 9984 9670 } 9985 9671 ··· 10047 9737 goto skip_full_check; 10048 9738 } 10049 9739 10050 - ret = check_attach_btf_id(env); 10051 - if (ret) 10052 - goto skip_full_check; 10053 - 10054 9740 env->strict_alignment = !!(attr->prog_flags & BPF_F_STRICT_ALIGNMENT); 10055 9741 if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) 10056 9742 env->strict_alignment = true; ··· 10083 9777 if (ret < 0) 10084 9778 goto skip_full_check; 10085 9779 9780 + ret = check_attach_btf_id(env); 9781 + if (ret) 9782 + goto skip_full_check; 9783 + 10086 9784 ret = check_cfg(env); 10087 9785 if (ret < 0) 10088 9786 goto skip_full_check; 10089 9787 10090 - ret = do_check(env); 10091 - if (env->cur_state) { 10092 - free_verifier_state(env->cur_state, true); 10093 - env->cur_state = NULL; 10094 - } 9788 + ret = do_check_subprogs(env); 9789 + ret = ret ?: do_check_main(env); 10095 9790 10096 9791 if (ret == 0 && bpf_prog_is_dev_bound(env->prog->aux)) 10097 9792 ret = bpf_prog_offload_finalize(env); 10098 9793 10099 9794 skip_full_check: 10100 - while (!pop_stack(env, NULL, NULL)); 10101 - free_states(env); 9795 + kvfree(env->explored_states); 10102 9796 10103 9797 if (ret == 0) 10104 9798 ret = check_max_stack_depth(env);

+24 -3

kernel/trace/bpf_trace.c

··· 703 703 struct irq_work irq_work; 704 704 struct task_struct *task; 705 705 u32 sig; 706 + enum pid_type type; 706 707 }; 707 708 708 709 static DEFINE_PER_CPU(struct send_signal_irq_work, send_signal_work); ··· 713 712 struct send_signal_irq_work *work; 714 713 715 714 work = container_of(entry, struct send_signal_irq_work, irq_work); 716 - group_send_sig_info(work->sig, SEND_SIG_PRIV, work->task, PIDTYPE_TGID); 715 + group_send_sig_info(work->sig, SEND_SIG_PRIV, work->task, work->type); 717 716 } 718 717 719 - BPF_CALL_1(bpf_send_signal, u32, sig) 718 + static int bpf_send_signal_common(u32 sig, enum pid_type type) 720 719 { 721 720 struct send_signal_irq_work *work = NULL; 722 721 ··· 749 748 */ 750 749 work->task = current; 751 750 work->sig = sig; 751 + work->type = type; 752 752 irq_work_queue(&work->irq_work); 753 753 return 0; 754 754 } 755 755 756 - return group_send_sig_info(sig, SEND_SIG_PRIV, current, PIDTYPE_TGID); 756 + return group_send_sig_info(sig, SEND_SIG_PRIV, current, type); 757 + } 758 + 759 + BPF_CALL_1(bpf_send_signal, u32, sig) 760 + { 761 + return bpf_send_signal_common(sig, PIDTYPE_TGID); 757 762 } 758 763 759 764 static const struct bpf_func_proto bpf_send_signal_proto = { 760 765 .func = bpf_send_signal, 766 + .gpl_only = false, 767 + .ret_type = RET_INTEGER, 768 + .arg1_type = ARG_ANYTHING, 769 + }; 770 + 771 + BPF_CALL_1(bpf_send_signal_thread, u32, sig) 772 + { 773 + return bpf_send_signal_common(sig, PIDTYPE_PID); 774 + } 775 + 776 + static const struct bpf_func_proto bpf_send_signal_thread_proto = { 777 + .func = bpf_send_signal_thread, 761 778 .gpl_only = false, 762 779 .ret_type = RET_INTEGER, 763 780 .arg1_type = ARG_ANYTHING, ··· 841 822 #endif 842 823 case BPF_FUNC_send_signal: 843 824 return &bpf_send_signal_proto; 825 + case BPF_FUNC_send_signal_thread: 826 + return &bpf_send_signal_thread_proto; 844 827 default: 845 828 return NULL; 846 829 }

+2

net/core/dev.c

··· 9835 9835 9836 9836 free_percpu(dev->pcpu_refcnt); 9837 9837 dev->pcpu_refcnt = NULL; 9838 + free_percpu(dev->xdp_bulkq); 9839 + dev->xdp_bulkq = NULL; 9838 9840 9839 9841 netdev_unregister_lockdep_key(dev); 9840 9842

+23 -73

net/core/filter.c

··· 3459 3459 .arg2_type = ARG_ANYTHING, 3460 3460 }; 3461 3461 3462 - static int __bpf_tx_xdp(struct net_device *dev, 3463 - struct bpf_map *map, 3464 - struct xdp_buff *xdp, 3465 - u32 index) 3466 - { 3467 - struct xdp_frame *xdpf; 3468 - int err, sent; 3469 - 3470 - if (!dev->netdev_ops->ndo_xdp_xmit) { 3471 - return -EOPNOTSUPP; 3472 - } 3473 - 3474 - err = xdp_ok_fwd_dev(dev, xdp->data_end - xdp->data); 3475 - if (unlikely(err)) 3476 - return err; 3477 - 3478 - xdpf = convert_to_xdp_frame(xdp); 3479 - if (unlikely(!xdpf)) 3480 - return -EOVERFLOW; 3481 - 3482 - sent = dev->netdev_ops->ndo_xdp_xmit(dev, 1, &xdpf, XDP_XMIT_FLUSH); 3483 - if (sent <= 0) 3484 - return sent; 3485 - return 0; 3486 - } 3487 - 3488 - static noinline int 3489 - xdp_do_redirect_slow(struct net_device *dev, struct xdp_buff *xdp, 3490 - struct bpf_prog *xdp_prog, struct bpf_redirect_info *ri) 3491 - { 3492 - struct net_device *fwd; 3493 - u32 index = ri->tgt_index; 3494 - int err; 3495 - 3496 - fwd = dev_get_by_index_rcu(dev_net(dev), index); 3497 - ri->tgt_index = 0; 3498 - if (unlikely(!fwd)) { 3499 - err = -EINVAL; 3500 - goto err; 3501 - } 3502 - 3503 - err = __bpf_tx_xdp(fwd, NULL, xdp, 0); 3504 - if (unlikely(err)) 3505 - goto err; 3506 - 3507 - _trace_xdp_redirect(dev, xdp_prog, index); 3508 - return 0; 3509 - err: 3510 - _trace_xdp_redirect_err(dev, xdp_prog, index, err); 3511 - return err; 3512 - } 3513 - 3514 3462 static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd, 3515 3463 struct bpf_map *map, struct xdp_buff *xdp) 3516 3464 { ··· 3471 3523 case BPF_MAP_TYPE_XSKMAP: 3472 3524 return __xsk_map_redirect(fwd, xdp); 3473 3525 default: 3474 - break; 3526 + return -EBADRQC; 3475 3527 } 3476 3528 return 0; 3477 3529 } 3478 3530 3479 - void xdp_do_flush_map(void) 3531 + void xdp_do_flush(void) 3480 3532 { 3481 - __dev_map_flush(); 3533 + __dev_flush(); 3482 3534 __cpu_map_flush(); 3483 3535 __xsk_map_flush(); 3484 3536 } 3485 - EXPORT_SYMBOL_GPL(xdp_do_flush_map); 3537 + EXPORT_SYMBOL_GPL(xdp_do_flush); 3486 3538 3487 3539 static inline void *__xdp_map_lookup_elem(struct bpf_map *map, u32 index) 3488 3540 { ··· 3517 3569 } 3518 3570 } 3519 3571 3520 - static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp, 3521 - struct bpf_prog *xdp_prog, struct bpf_map *map, 3522 - struct bpf_redirect_info *ri) 3572 + int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp, 3573 + struct bpf_prog *xdp_prog) 3523 3574 { 3575 + struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); 3576 + struct bpf_map *map = READ_ONCE(ri->map); 3524 3577 u32 index = ri->tgt_index; 3525 3578 void *fwd = ri->tgt_value; 3526 3579 int err; ··· 3530 3581 ri->tgt_value = NULL; 3531 3582 WRITE_ONCE(ri->map, NULL); 3532 3583 3533 - err = __bpf_tx_xdp_map(dev, fwd, map, xdp); 3584 + if (unlikely(!map)) { 3585 + fwd = dev_get_by_index_rcu(dev_net(dev), index); 3586 + if (unlikely(!fwd)) { 3587 + err = -EINVAL; 3588 + goto err; 3589 + } 3590 + 3591 + err = dev_xdp_enqueue(fwd, xdp, dev); 3592 + } else { 3593 + err = __bpf_tx_xdp_map(dev, fwd, map, xdp); 3594 + } 3595 + 3534 3596 if (unlikely(err)) 3535 3597 goto err; 3536 3598 ··· 3550 3590 err: 3551 3591 _trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map, index, err); 3552 3592 return err; 3553 - } 3554 - 3555 - int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp, 3556 - struct bpf_prog *xdp_prog) 3557 - { 3558 - struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); 3559 - struct bpf_map *map = READ_ONCE(ri->map); 3560 - 3561 - if (likely(map)) 3562 - return xdp_do_redirect_map(dev, xdp, xdp_prog, map, ri); 3563 - 3564 - return xdp_do_redirect_slow(dev, xdp, xdp_prog, ri); 3565 3593 } 3566 3594 EXPORT_SYMBOL_GPL(xdp_do_redirect); 3567 3595 ··· 5883 5935 return false; 5884 5936 } 5885 5937 5886 - static const struct bpf_func_proto * 5938 + const struct bpf_func_proto * 5887 5939 bpf_base_func_proto(enum bpf_func_id func_id) 5888 5940 { 5889 5941 switch (func_id) { ··· 5923 5975 return &bpf_spin_unlock_proto; 5924 5976 case BPF_FUNC_trace_printk: 5925 5977 return bpf_get_trace_printk_proto(); 5978 + case BPF_FUNC_jiffies64: 5979 + return &bpf_jiffies64_proto; 5926 5980 default: 5927 5981 return NULL; 5928 5982 }

+1 -1

net/core/sock.c

··· 2786 2786 rcu_read_unlock(); 2787 2787 } 2788 2788 2789 - static void sock_def_readable(struct sock *sk) 2789 + void sock_def_readable(struct sock *sk) 2790 2790 { 2791 2791 struct socket_wq *wq; 2792 2792

+4

net/ipv4/Makefile

··· 65 65 66 66 obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \ 67 67 xfrm4_output.o xfrm4_protocol.o 68 + 69 + ifeq ($(CONFIG_BPF_JIT),y) 70 + obj-$(CONFIG_BPF_SYSCALL) += bpf_tcp_ca.o 71 + endif

+252

net/ipv4/bpf_tcp_ca.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright (c) 2019 Facebook */ 3 + 4 + #include <linux/types.h> 5 + #include <linux/bpf_verifier.h> 6 + #include <linux/bpf.h> 7 + #include <linux/btf.h> 8 + #include <linux/filter.h> 9 + #include <net/tcp.h> 10 + 11 + static u32 optional_ops[] = { 12 + offsetof(struct tcp_congestion_ops, init), 13 + offsetof(struct tcp_congestion_ops, release), 14 + offsetof(struct tcp_congestion_ops, set_state), 15 + offsetof(struct tcp_congestion_ops, cwnd_event), 16 + offsetof(struct tcp_congestion_ops, in_ack_event), 17 + offsetof(struct tcp_congestion_ops, pkts_acked), 18 + offsetof(struct tcp_congestion_ops, min_tso_segs), 19 + offsetof(struct tcp_congestion_ops, sndbuf_expand), 20 + offsetof(struct tcp_congestion_ops, cong_control), 21 + }; 22 + 23 + static u32 unsupported_ops[] = { 24 + offsetof(struct tcp_congestion_ops, get_info), 25 + }; 26 + 27 + static const struct btf_type *tcp_sock_type; 28 + static u32 tcp_sock_id, sock_id; 29 + 30 + static int bpf_tcp_ca_init(struct btf *btf) 31 + { 32 + s32 type_id; 33 + 34 + type_id = btf_find_by_name_kind(btf, "sock", BTF_KIND_STRUCT); 35 + if (type_id < 0) 36 + return -EINVAL; 37 + sock_id = type_id; 38 + 39 + type_id = btf_find_by_name_kind(btf, "tcp_sock", BTF_KIND_STRUCT); 40 + if (type_id < 0) 41 + return -EINVAL; 42 + tcp_sock_id = type_id; 43 + tcp_sock_type = btf_type_by_id(btf, tcp_sock_id); 44 + 45 + return 0; 46 + } 47 + 48 + static bool is_optional(u32 member_offset) 49 + { 50 + unsigned int i; 51 + 52 + for (i = 0; i < ARRAY_SIZE(optional_ops); i++) { 53 + if (member_offset == optional_ops[i]) 54 + return true; 55 + } 56 + 57 + return false; 58 + } 59 + 60 + static bool is_unsupported(u32 member_offset) 61 + { 62 + unsigned int i; 63 + 64 + for (i = 0; i < ARRAY_SIZE(unsupported_ops); i++) { 65 + if (member_offset == unsupported_ops[i]) 66 + return true; 67 + } 68 + 69 + return false; 70 + } 71 + 72 + extern struct btf *btf_vmlinux; 73 + 74 + static bool bpf_tcp_ca_is_valid_access(int off, int size, 75 + enum bpf_access_type type, 76 + const struct bpf_prog *prog, 77 + struct bpf_insn_access_aux *info) 78 + { 79 + if (off < 0 || off >= sizeof(__u64) * MAX_BPF_FUNC_ARGS) 80 + return false; 81 + if (type != BPF_READ) 82 + return false; 83 + if (off % size != 0) 84 + return false; 85 + 86 + if (!btf_ctx_access(off, size, type, prog, info)) 87 + return false; 88 + 89 + if (info->reg_type == PTR_TO_BTF_ID && info->btf_id == sock_id) 90 + /* promote it to tcp_sock */ 91 + info->btf_id = tcp_sock_id; 92 + 93 + return true; 94 + } 95 + 96 + static int bpf_tcp_ca_btf_struct_access(struct bpf_verifier_log *log, 97 + const struct btf_type *t, int off, 98 + int size, enum bpf_access_type atype, 99 + u32 *next_btf_id) 100 + { 101 + size_t end; 102 + 103 + if (atype == BPF_READ) 104 + return btf_struct_access(log, t, off, size, atype, next_btf_id); 105 + 106 + if (t != tcp_sock_type) { 107 + bpf_log(log, "only read is supported\n"); 108 + return -EACCES; 109 + } 110 + 111 + switch (off) { 112 + case bpf_ctx_range(struct inet_connection_sock, icsk_ca_priv): 113 + end = offsetofend(struct inet_connection_sock, icsk_ca_priv); 114 + break; 115 + case offsetof(struct inet_connection_sock, icsk_ack.pending): 116 + end = offsetofend(struct inet_connection_sock, 117 + icsk_ack.pending); 118 + break; 119 + case offsetof(struct tcp_sock, snd_cwnd): 120 + end = offsetofend(struct tcp_sock, snd_cwnd); 121 + break; 122 + case offsetof(struct tcp_sock, snd_cwnd_cnt): 123 + end = offsetofend(struct tcp_sock, snd_cwnd_cnt); 124 + break; 125 + case offsetof(struct tcp_sock, snd_ssthresh): 126 + end = offsetofend(struct tcp_sock, snd_ssthresh); 127 + break; 128 + case offsetof(struct tcp_sock, ecn_flags): 129 + end = offsetofend(struct tcp_sock, ecn_flags); 130 + break; 131 + default: 132 + bpf_log(log, "no write support to tcp_sock at off %d\n", off); 133 + return -EACCES; 134 + } 135 + 136 + if (off + size > end) { 137 + bpf_log(log, 138 + "write access at off %d with size %d beyond the member of tcp_sock ended at %zu\n", 139 + off, size, end); 140 + return -EACCES; 141 + } 142 + 143 + return NOT_INIT; 144 + } 145 + 146 + BPF_CALL_2(bpf_tcp_send_ack, struct tcp_sock *, tp, u32, rcv_nxt) 147 + { 148 + /* bpf_tcp_ca prog cannot have NULL tp */ 149 + __tcp_send_ack((struct sock *)tp, rcv_nxt); 150 + return 0; 151 + } 152 + 153 + static const struct bpf_func_proto bpf_tcp_send_ack_proto = { 154 + .func = bpf_tcp_send_ack, 155 + .gpl_only = false, 156 + /* In case we want to report error later */ 157 + .ret_type = RET_INTEGER, 158 + .arg1_type = ARG_PTR_TO_BTF_ID, 159 + .arg2_type = ARG_ANYTHING, 160 + .btf_id = &tcp_sock_id, 161 + }; 162 + 163 + static const struct bpf_func_proto * 164 + bpf_tcp_ca_get_func_proto(enum bpf_func_id func_id, 165 + const struct bpf_prog *prog) 166 + { 167 + switch (func_id) { 168 + case BPF_FUNC_tcp_send_ack: 169 + return &bpf_tcp_send_ack_proto; 170 + default: 171 + return bpf_base_func_proto(func_id); 172 + } 173 + } 174 + 175 + static const struct bpf_verifier_ops bpf_tcp_ca_verifier_ops = { 176 + .get_func_proto = bpf_tcp_ca_get_func_proto, 177 + .is_valid_access = bpf_tcp_ca_is_valid_access, 178 + .btf_struct_access = bpf_tcp_ca_btf_struct_access, 179 + }; 180 + 181 + static int bpf_tcp_ca_init_member(const struct btf_type *t, 182 + const struct btf_member *member, 183 + void *kdata, const void *udata) 184 + { 185 + const struct tcp_congestion_ops *utcp_ca; 186 + struct tcp_congestion_ops *tcp_ca; 187 + size_t tcp_ca_name_len; 188 + int prog_fd; 189 + u32 moff; 190 + 191 + utcp_ca = (const struct tcp_congestion_ops *)udata; 192 + tcp_ca = (struct tcp_congestion_ops *)kdata; 193 + 194 + moff = btf_member_bit_offset(t, member) / 8; 195 + switch (moff) { 196 + case offsetof(struct tcp_congestion_ops, flags): 197 + if (utcp_ca->flags & ~TCP_CONG_MASK) 198 + return -EINVAL; 199 + tcp_ca->flags = utcp_ca->flags; 200 + return 1; 201 + case offsetof(struct tcp_congestion_ops, name): 202 + tcp_ca_name_len = strnlen(utcp_ca->name, sizeof(utcp_ca->name)); 203 + if (!tcp_ca_name_len || 204 + tcp_ca_name_len == sizeof(utcp_ca->name)) 205 + return -EINVAL; 206 + if (tcp_ca_find(utcp_ca->name)) 207 + return -EEXIST; 208 + memcpy(tcp_ca->name, utcp_ca->name, sizeof(tcp_ca->name)); 209 + return 1; 210 + } 211 + 212 + if (!btf_type_resolve_func_ptr(btf_vmlinux, member->type, NULL)) 213 + return 0; 214 + 215 + /* Ensure bpf_prog is provided for compulsory func ptr */ 216 + prog_fd = (int)(*(unsigned long *)(udata + moff)); 217 + if (!prog_fd && !is_optional(moff) && !is_unsupported(moff)) 218 + return -EINVAL; 219 + 220 + return 0; 221 + } 222 + 223 + static int bpf_tcp_ca_check_member(const struct btf_type *t, 224 + const struct btf_member *member) 225 + { 226 + if (is_unsupported(btf_member_bit_offset(t, member) / 8)) 227 + return -ENOTSUPP; 228 + return 0; 229 + } 230 + 231 + static int bpf_tcp_ca_reg(void *kdata) 232 + { 233 + return tcp_register_congestion_control(kdata); 234 + } 235 + 236 + static void bpf_tcp_ca_unreg(void *kdata) 237 + { 238 + tcp_unregister_congestion_control(kdata); 239 + } 240 + 241 + /* Avoid sparse warning. It is only used in bpf_struct_ops.c. */ 242 + extern struct bpf_struct_ops bpf_tcp_congestion_ops; 243 + 244 + struct bpf_struct_ops bpf_tcp_congestion_ops = { 245 + .verifier_ops = &bpf_tcp_ca_verifier_ops, 246 + .reg = bpf_tcp_ca_reg, 247 + .unreg = bpf_tcp_ca_unreg, 248 + .check_member = bpf_tcp_ca_check_member, 249 + .init_member = bpf_tcp_ca_init_member, 250 + .init = bpf_tcp_ca_init, 251 + .name = "tcp_congestion_ops", 252 + };

+8 -8

net/ipv4/tcp_cong.c

··· 21 21 static LIST_HEAD(tcp_cong_list); 22 22 23 23 /* Simple linear search, don't expect many entries! */ 24 - static struct tcp_congestion_ops *tcp_ca_find(const char *name) 24 + struct tcp_congestion_ops *tcp_ca_find(const char *name) 25 25 { 26 26 struct tcp_congestion_ops *e; 27 27 ··· 162 162 163 163 rcu_read_lock(); 164 164 ca = rcu_dereference(net->ipv4.tcp_congestion_control); 165 - if (unlikely(!try_module_get(ca->owner))) 165 + if (unlikely(!bpf_try_module_get(ca, ca->owner))) 166 166 ca = &tcp_reno; 167 167 icsk->icsk_ca_ops = ca; 168 168 rcu_read_unlock(); ··· 208 208 209 209 if (icsk->icsk_ca_ops->release) 210 210 icsk->icsk_ca_ops->release(sk); 211 - module_put(icsk->icsk_ca_ops->owner); 211 + bpf_module_put(icsk->icsk_ca_ops, icsk->icsk_ca_ops->owner); 212 212 } 213 213 214 214 /* Used by sysctl to change default congestion control */ ··· 222 222 ca = tcp_ca_find_autoload(net, name); 223 223 if (!ca) { 224 224 ret = -ENOENT; 225 - } else if (!try_module_get(ca->owner)) { 225 + } else if (!bpf_try_module_get(ca, ca->owner)) { 226 226 ret = -EBUSY; 227 227 } else { 228 228 prev = xchg(&net->ipv4.tcp_congestion_control, ca); 229 229 if (prev) 230 - module_put(prev->owner); 230 + bpf_module_put(prev, prev->owner); 231 231 232 232 ca->flags |= TCP_CONG_NON_RESTRICTED; 233 233 ret = 0; ··· 366 366 } else if (!load) { 367 367 const struct tcp_congestion_ops *old_ca = icsk->icsk_ca_ops; 368 368 369 - if (try_module_get(ca->owner)) { 369 + if (bpf_try_module_get(ca, ca->owner)) { 370 370 if (reinit) { 371 371 tcp_reinit_congestion_control(sk, ca); 372 372 } else { 373 373 icsk->icsk_ca_ops = ca; 374 - module_put(old_ca->owner); 374 + bpf_module_put(old_ca, old_ca->owner); 375 375 } 376 376 } else { 377 377 err = -EBUSY; 378 378 } 379 379 } else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) || cap_net_admin)) { 380 380 err = -EPERM; 381 - } else if (!try_module_get(ca->owner)) { 381 + } else if (!bpf_try_module_get(ca, ca->owner)) { 382 382 err = -EBUSY; 383 383 } else { 384 384 tcp_reinit_congestion_control(sk, ca);

+4 -2

net/ipv4/tcp_ipv4.c

··· 2678 2678 int cpu; 2679 2679 2680 2680 if (net->ipv4.tcp_congestion_control) 2681 - module_put(net->ipv4.tcp_congestion_control->owner); 2681 + bpf_module_put(net->ipv4.tcp_congestion_control, 2682 + net->ipv4.tcp_congestion_control->owner); 2682 2683 2683 2684 for_each_possible_cpu(cpu) 2684 2685 inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.tcp_sk, cpu)); ··· 2786 2785 2787 2786 /* Reno is always built in */ 2788 2787 if (!net_eq(net, &init_net) && 2789 - try_module_get(init_net.ipv4.tcp_congestion_control->owner)) 2788 + bpf_try_module_get(init_net.ipv4.tcp_congestion_control, 2789 + init_net.ipv4.tcp_congestion_control->owner)) 2790 2790 net->ipv4.tcp_congestion_control = init_net.ipv4.tcp_congestion_control; 2791 2791 else 2792 2792 net->ipv4.tcp_congestion_control = &tcp_reno;

+2 -2

net/ipv4/tcp_minisocks.c

··· 414 414 415 415 rcu_read_lock(); 416 416 ca = tcp_ca_find_key(ca_key); 417 - if (likely(ca && try_module_get(ca->owner))) { 417 + if (likely(ca && bpf_try_module_get(ca, ca->owner))) { 418 418 icsk->icsk_ca_dst_locked = tcp_ca_dst_locked(dst); 419 419 icsk->icsk_ca_ops = ca; 420 420 ca_got_dst = true; ··· 425 425 /* If no valid choice made yet, assign current system default ca. */ 426 426 if (!ca_got_dst && 427 427 (!icsk->icsk_ca_setsockopt || 428 - !try_module_get(icsk->icsk_ca_ops->owner))) 428 + !bpf_try_module_get(icsk->icsk_ca_ops, icsk->icsk_ca_ops->owner))) 429 429 tcp_assign_congestion_control(sk); 430 430 431 431 tcp_set_ca_state(sk, TCP_CA_Open);

+2 -2

net/ipv4/tcp_output.c

··· 3372 3372 3373 3373 rcu_read_lock(); 3374 3374 ca = tcp_ca_find_key(ca_key); 3375 - if (likely(ca && try_module_get(ca->owner))) { 3376 - module_put(icsk->icsk_ca_ops->owner); 3375 + if (likely(ca && bpf_try_module_get(ca, ca->owner))) { 3376 + bpf_module_put(icsk->icsk_ca_ops, icsk->icsk_ca_ops->owner); 3377 3377 icsk->icsk_ca_dst_locked = tcp_ca_dst_locked(dst); 3378 3378 icsk->icsk_ca_ops = ca; 3379 3379 }

+4 -3

net/xdp/xdp_umem.c

··· 249 249 xdp_umem_unmap_pages(umem); 250 250 xdp_umem_unpin_pages(umem); 251 251 252 - kfree(umem->pages); 252 + kvfree(umem->pages); 253 253 umem->pages = NULL; 254 254 255 255 xdp_umem_unaccount_pages(umem); ··· 409 409 if (err) 410 410 goto out_account; 411 411 412 - umem->pages = kcalloc(umem->npgs, sizeof(*umem->pages), GFP_KERNEL); 412 + umem->pages = kvcalloc(umem->npgs, sizeof(*umem->pages), 413 + GFP_KERNEL_ACCOUNT); 413 414 if (!umem->pages) { 414 415 err = -ENOMEM; 415 416 goto out_pin; ··· 420 419 if (!err) 421 420 return 0; 422 421 423 - kfree(umem->pages); 422 + kvfree(umem->pages); 424 423 425 424 out_pin: 426 425 xdp_umem_unpin_pages(umem);

+1 -1

net/xdp/xsk.c

··· 217 217 static void xsk_flush(struct xdp_sock *xs) 218 218 { 219 219 xskq_prod_submit(xs->rx); 220 - xs->sk.sk_data_ready(&xs->sk); 220 + sock_def_readable(&xs->sk); 221 221 } 222 222 223 223 int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)

+2 -3

samples/bpf/Makefile

··· 184 184 TPROGS_CFLAGS += -Wstrict-prototypes 185 185 186 186 TPROGS_CFLAGS += -I$(objtree)/usr/include 187 - TPROGS_CFLAGS += -I$(srctree)/tools/lib/bpf/ 188 187 TPROGS_CFLAGS += -I$(srctree)/tools/testing/selftests/bpf/ 189 188 TPROGS_CFLAGS += -I$(srctree)/tools/lib/ 190 189 TPROGS_CFLAGS += -I$(srctree)/tools/include ··· 253 254 254 255 clean: 255 256 $(MAKE) -C ../../ M=$(CURDIR) clean 256 - @rm -f *~ 257 + @find $(CURDIR) -type f -name '*~' -delete 257 258 258 259 $(LIBBPF): FORCE 259 260 # Fix up variables inherited from Kbuild that tools/ build system won't like ··· 304 305 @echo " CLANG-bpf " $@ 305 306 $(Q)$(CLANG) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(BPF_EXTRA_CFLAGS) \ 306 307 -I$(obj) -I$(srctree)/tools/testing/selftests/bpf/ \ 307 - -I$(srctree)/tools/lib/bpf/ \ 308 + -I$(srctree)/tools/lib/ \ 308 309 -D__KERNEL__ -D__BPF_TRACING__ -Wno-unused-value -Wno-pointer-sign \ 309 310 -D__TARGET_ARCH_$(SRCARCH) -Wno-compare-distinct-pointer-types \ 310 311 -Wno-gnu-variable-sized-type-not-at-end \

+1 -1

samples/bpf/cpustat_kern.c

··· 3 3 #include <linux/version.h> 4 4 #include <linux/ptrace.h> 5 5 #include <uapi/linux/bpf.h> 6 - #include "bpf_helpers.h" 6 + #include <bpf/bpf_helpers.h> 7 7 8 8 /* 9 9 * The CPU number, cstate number and pstate number are based

+1 -1

samples/bpf/fds_example.c

··· 14 14 15 15 #include <bpf/bpf.h> 16 16 17 - #include "libbpf.h" 17 + #include <bpf/libbpf.h> 18 18 #include "bpf_insn.h" 19 19 #include "sock_example.h" 20 20

+2 -2

samples/bpf/hbm.c

··· 50 50 #include "cgroup_helpers.h" 51 51 #include "hbm.h" 52 52 #include "bpf_util.h" 53 - #include "bpf.h" 54 - #include "libbpf.h" 53 + #include <bpf/bpf.h> 54 + #include <bpf/libbpf.h> 55 55 56 56 bool outFlag = true; 57 57 int minRate = 1000; /* cgroup rate limit in Mbps */

+2 -2

samples/bpf/hbm_kern.h

··· 22 22 #include <uapi/linux/pkt_cls.h> 23 23 #include <net/ipv6.h> 24 24 #include <net/inet_ecn.h> 25 - #include "bpf_endian.h" 26 - #include "bpf_helpers.h" 25 + #include <bpf/bpf_endian.h> 26 + #include <bpf/bpf_helpers.h> 27 27 #include "hbm.h" 28 28 29 29 #define DROP_PKT 0

+1 -1

samples/bpf/ibumad_kern.c

··· 13 13 #define KBUILD_MODNAME "ibumad_count_pkts_by_class" 14 14 #include <uapi/linux/bpf.h> 15 15 16 - #include "bpf_helpers.h" 16 + #include <bpf/bpf_helpers.h> 17 17 18 18 19 19 struct bpf_map_def SEC("maps") read_count = {

+1 -1

samples/bpf/ibumad_user.c

··· 25 25 26 26 #include "bpf_load.h" 27 27 #include "bpf_util.h" 28 - #include "libbpf.h" 28 + #include <bpf/libbpf.h> 29 29 30 30 static void dump_counts(int fd) 31 31 {

+1 -1

samples/bpf/lathist_kern.c

··· 8 8 #include <linux/version.h> 9 9 #include <linux/ptrace.h> 10 10 #include <uapi/linux/bpf.h> 11 - #include "bpf_helpers.h" 11 + #include <bpf/bpf_helpers.h> 12 12 13 13 #define MAX_ENTRIES 20 14 14 #define MAX_CPU 4

+1 -1

samples/bpf/lwt_len_hist_kern.c

··· 14 14 #include <uapi/linux/if_ether.h> 15 15 #include <uapi/linux/ip.h> 16 16 #include <uapi/linux/in.h> 17 - #include "bpf_helpers.h" 17 + #include <bpf/bpf_helpers.h> 18 18 19 19 # define printk(fmt, ...) \ 20 20 ({ \

+2 -2

samples/bpf/map_perf_test_kern.c

··· 8 8 #include <linux/netdevice.h> 9 9 #include <linux/version.h> 10 10 #include <uapi/linux/bpf.h> 11 - #include "bpf_helpers.h" 11 + #include <bpf/bpf_helpers.h> 12 12 #include "bpf_legacy.h" 13 - #include "bpf_tracing.h" 13 + #include <bpf/bpf_tracing.h> 14 14 15 15 #define MAX_ENTRIES 1000 16 16 #define MAX_NR_CPUS 1024

+2 -2

samples/bpf/offwaketime_kern.c

··· 5 5 * License as published by the Free Software Foundation. 6 6 */ 7 7 #include <uapi/linux/bpf.h> 8 - #include "bpf_helpers.h" 9 - #include "bpf_tracing.h" 8 + #include <bpf/bpf_helpers.h> 9 + #include <bpf/bpf_tracing.h> 10 10 #include <uapi/linux/ptrace.h> 11 11 #include <uapi/linux/perf_event.h> 12 12 #include <linux/version.h>

+1 -1

samples/bpf/offwaketime_user.c

··· 12 12 #include <assert.h> 13 13 #include <stdbool.h> 14 14 #include <sys/resource.h> 15 - #include "libbpf.h" 15 + #include <bpf/libbpf.h> 16 16 #include "bpf_load.h" 17 17 #include "trace_helpers.h" 18 18

+1 -1

samples/bpf/parse_ldabs.c

··· 11 11 #include <linux/tcp.h> 12 12 #include <linux/udp.h> 13 13 #include <uapi/linux/bpf.h> 14 - #include "bpf_helpers.h" 14 + #include <bpf/bpf_helpers.h> 15 15 #include "bpf_legacy.h" 16 16 17 17 #define DEFAULT_PKTGEN_UDP_PORT 9

+1 -1

samples/bpf/parse_simple.c

··· 12 12 #include <linux/udp.h> 13 13 #include <uapi/linux/bpf.h> 14 14 #include <net/ip.h> 15 - #include "bpf_helpers.h" 15 + #include <bpf/bpf_helpers.h> 16 16 17 17 #define DEFAULT_PKTGEN_UDP_PORT 9 18 18

+1 -1

samples/bpf/parse_varlen.c

··· 14 14 #include <linux/udp.h> 15 15 #include <uapi/linux/bpf.h> 16 16 #include <net/ip.h> 17 - #include "bpf_helpers.h" 17 + #include <bpf/bpf_helpers.h> 18 18 19 19 #define DEFAULT_PKTGEN_UDP_PORT 9 20 20 #define DEBUG 0

+2 -2

samples/bpf/sampleip_kern.c

··· 8 8 #include <linux/ptrace.h> 9 9 #include <uapi/linux/bpf.h> 10 10 #include <uapi/linux/bpf_perf_event.h> 11 - #include "bpf_helpers.h" 12 - #include "bpf_tracing.h" 11 + #include <bpf/bpf_helpers.h> 12 + #include <bpf/bpf_tracing.h> 13 13 14 14 #define MAX_IPS 8192 15 15

+1 -1

samples/bpf/sampleip_user.c

··· 15 15 #include <linux/ptrace.h> 16 16 #include <linux/bpf.h> 17 17 #include <sys/ioctl.h> 18 - #include "libbpf.h" 18 + #include <bpf/libbpf.h> 19 19 #include "bpf_load.h" 20 20 #include "perf-sys.h" 21 21 #include "trace_helpers.h"

+1 -1

samples/bpf/sock_flags_kern.c

··· 3 3 #include <linux/net.h> 4 4 #include <uapi/linux/in.h> 5 5 #include <uapi/linux/in6.h> 6 - #include "bpf_helpers.h" 6 + #include <bpf/bpf_helpers.h> 7 7 8 8 SEC("cgroup/sock1") 9 9 int bpf_prog1(struct bpf_sock *sk)

+1 -1

samples/bpf/sockex1_kern.c

··· 2 2 #include <uapi/linux/if_ether.h> 3 3 #include <uapi/linux/if_packet.h> 4 4 #include <uapi/linux/ip.h> 5 - #include "bpf_helpers.h" 5 + #include <bpf/bpf_helpers.h> 6 6 #include "bpf_legacy.h" 7 7 8 8 struct {

+1 -1

samples/bpf/sockex1_user.c

··· 3 3 #include <assert.h> 4 4 #include <linux/bpf.h> 5 5 #include <bpf/bpf.h> 6 - #include "libbpf.h" 6 + #include <bpf/libbpf.h> 7 7 #include "sock_example.h" 8 8 #include <unistd.h> 9 9 #include <arpa/inet.h>

+1 -1

samples/bpf/sockex2_kern.c

··· 1 1 #include <uapi/linux/bpf.h> 2 - #include "bpf_helpers.h" 2 + #include <bpf/bpf_helpers.h> 3 3 #include "bpf_legacy.h" 4 4 #include <uapi/linux/in.h> 5 5 #include <uapi/linux/if.h>

+1 -1

samples/bpf/sockex2_user.c

··· 3 3 #include <assert.h> 4 4 #include <linux/bpf.h> 5 5 #include <bpf/bpf.h> 6 - #include "libbpf.h" 6 + #include <bpf/libbpf.h> 7 7 #include "sock_example.h" 8 8 #include <unistd.h> 9 9 #include <arpa/inet.h>

+1 -1

samples/bpf/sockex3_kern.c

··· 5 5 * License as published by the Free Software Foundation. 6 6 */ 7 7 #include <uapi/linux/bpf.h> 8 - #include "bpf_helpers.h" 8 + #include <bpf/bpf_helpers.h> 9 9 #include "bpf_legacy.h" 10 10 #include <uapi/linux/in.h> 11 11 #include <uapi/linux/if.h>

+2 -2

samples/bpf/spintest_kern.c

··· 9 9 #include <linux/version.h> 10 10 #include <uapi/linux/bpf.h> 11 11 #include <uapi/linux/perf_event.h> 12 - #include "bpf_helpers.h" 13 - #include "bpf_tracing.h" 12 + #include <bpf/bpf_helpers.h> 13 + #include <bpf/bpf_tracing.h> 14 14 15 15 struct bpf_map_def SEC("maps") my_map = { 16 16 .type = BPF_MAP_TYPE_HASH,

+1 -1

samples/bpf/spintest_user.c

··· 5 5 #include <string.h> 6 6 #include <assert.h> 7 7 #include <sys/resource.h> 8 - #include "libbpf.h" 8 + #include <bpf/libbpf.h> 9 9 #include "bpf_load.h" 10 10 #include "trace_helpers.h" 11 11

+1 -1

samples/bpf/syscall_tp_kern.c

··· 2 2 /* Copyright (c) 2017 Facebook 3 3 */ 4 4 #include <uapi/linux/bpf.h> 5 - #include "bpf_helpers.h" 5 + #include <bpf/bpf_helpers.h> 6 6 7 7 struct syscalls_enter_open_args { 8 8 unsigned long long unused;

+1 -1

samples/bpf/task_fd_query_kern.c

··· 2 2 #include <linux/version.h> 3 3 #include <linux/ptrace.h> 4 4 #include <uapi/linux/bpf.h> 5 - #include "bpf_helpers.h" 5 + #include <bpf/bpf_helpers.h> 6 6 7 7 SEC("kprobe/blk_mq_start_request") 8 8 int bpf_prog1(struct pt_regs *ctx)

+1 -1

samples/bpf/task_fd_query_user.c

··· 15 15 #include <sys/stat.h> 16 16 #include <linux/perf_event.h> 17 17 18 - #include "libbpf.h" 18 + #include <bpf/libbpf.h> 19 19 #include "bpf_load.h" 20 20 #include "bpf_util.h" 21 21 #include "perf-sys.h"

+1 -1

samples/bpf/tc_l2_redirect_kern.c

··· 15 15 #include <uapi/linux/filter.h> 16 16 #include <uapi/linux/pkt_cls.h> 17 17 #include <net/ipv6.h> 18 - #include "bpf_helpers.h" 18 + #include <bpf/bpf_helpers.h> 19 19 20 20 #define _htonl __builtin_bswap32 21 21

+1 -1

samples/bpf/tcbpf1_kern.c

··· 7 7 #include <uapi/linux/tcp.h> 8 8 #include <uapi/linux/filter.h> 9 9 #include <uapi/linux/pkt_cls.h> 10 - #include "bpf_helpers.h" 10 + #include <bpf/bpf_helpers.h> 11 11 #include "bpf_legacy.h" 12 12 13 13 /* compiler workaround */

+2 -2

samples/bpf/tcp_basertt_kern.c

··· 16 16 #include <uapi/linux/if_packet.h> 17 17 #include <uapi/linux/ip.h> 18 18 #include <linux/socket.h> 19 - #include "bpf_helpers.h" 20 - #include "bpf_endian.h" 19 + #include <bpf/bpf_helpers.h> 20 + #include <bpf/bpf_endian.h> 21 21 22 22 #define DEBUG 1 23 23

+2 -2

samples/bpf/tcp_bufs_kern.c

··· 17 17 #include <uapi/linux/if_packet.h> 18 18 #include <uapi/linux/ip.h> 19 19 #include <linux/socket.h> 20 - #include "bpf_helpers.h" 21 - #include "bpf_endian.h" 20 + #include <bpf/bpf_helpers.h> 21 + #include <bpf/bpf_endian.h> 22 22 23 23 #define DEBUG 1 24 24

+2 -2

samples/bpf/tcp_clamp_kern.c

··· 17 17 #include <uapi/linux/if_packet.h> 18 18 #include <uapi/linux/ip.h> 19 19 #include <linux/socket.h> 20 - #include "bpf_helpers.h" 21 - #include "bpf_endian.h" 20 + #include <bpf/bpf_helpers.h> 21 + #include <bpf/bpf_endian.h> 22 22 23 23 #define DEBUG 1 24 24

+2 -2

samples/bpf/tcp_cong_kern.c

··· 16 16 #include <uapi/linux/if_packet.h> 17 17 #include <uapi/linux/ip.h> 18 18 #include <linux/socket.h> 19 - #include "bpf_helpers.h" 20 - #include "bpf_endian.h" 19 + #include <bpf/bpf_helpers.h> 20 + #include <bpf/bpf_endian.h> 21 21 22 22 #define DEBUG 1 23 23

+2 -2

samples/bpf/tcp_dumpstats_kern.c

··· 4 4 */ 5 5 #include <linux/bpf.h> 6 6 7 - #include "bpf_helpers.h" 8 - #include "bpf_endian.h" 7 + #include <bpf/bpf_helpers.h> 8 + #include <bpf/bpf_endian.h> 9 9 10 10 #define INTERVAL 1000000000ULL 11 11

+2 -2

samples/bpf/tcp_iw_kern.c

··· 17 17 #include <uapi/linux/if_packet.h> 18 18 #include <uapi/linux/ip.h> 19 19 #include <linux/socket.h> 20 - #include "bpf_helpers.h" 21 - #include "bpf_endian.h" 20 + #include <bpf/bpf_helpers.h> 21 + #include <bpf/bpf_endian.h> 22 22 23 23 #define DEBUG 1 24 24

+2 -2

samples/bpf/tcp_rwnd_kern.c

··· 16 16 #include <uapi/linux/if_packet.h> 17 17 #include <uapi/linux/ip.h> 18 18 #include <linux/socket.h> 19 - #include "bpf_helpers.h" 20 - #include "bpf_endian.h" 19 + #include <bpf/bpf_helpers.h> 20 + #include <bpf/bpf_endian.h> 21 21 22 22 #define DEBUG 1 23 23

+2 -2

samples/bpf/tcp_synrto_kern.c

··· 16 16 #include <uapi/linux/if_packet.h> 17 17 #include <uapi/linux/ip.h> 18 18 #include <linux/socket.h> 19 - #include "bpf_helpers.h" 20 - #include "bpf_endian.h" 19 + #include <bpf/bpf_helpers.h> 20 + #include <bpf/bpf_endian.h> 21 21 22 22 #define DEBUG 1 23 23

+2 -2

samples/bpf/tcp_tos_reflect_kern.c

··· 15 15 #include <uapi/linux/ipv6.h> 16 16 #include <uapi/linux/in.h> 17 17 #include <linux/socket.h> 18 - #include "bpf_helpers.h" 19 - #include "bpf_endian.h" 18 + #include <bpf/bpf_helpers.h> 19 + #include <bpf/bpf_endian.h> 20 20 21 21 #define DEBUG 1 22 22

+1 -1

samples/bpf/test_cgrp2_tc_kern.c

··· 10 10 #include <uapi/linux/ipv6.h> 11 11 #include <uapi/linux/pkt_cls.h> 12 12 #include <uapi/linux/bpf.h> 13 - #include "bpf_helpers.h" 13 + #include <bpf/bpf_helpers.h> 14 14 15 15 /* copy of 'struct ethhdr' without __packed */ 16 16 struct eth_hdr {

+1 -1

samples/bpf/test_current_task_under_cgroup_kern.c

··· 8 8 #include <linux/ptrace.h> 9 9 #include <uapi/linux/bpf.h> 10 10 #include <linux/version.h> 11 - #include "bpf_helpers.h" 11 + #include <bpf/bpf_helpers.h> 12 12 #include <uapi/linux/utsname.h> 13 13 14 14 struct bpf_map_def SEC("maps") cgroup_map = {

+1 -1

samples/bpf/test_lwt_bpf.c

··· 20 20 #include <linux/udp.h> 21 21 #include <linux/icmpv6.h> 22 22 #include <linux/if_ether.h> 23 - #include "bpf_helpers.h" 23 + #include <bpf/bpf_helpers.h> 24 24 #include <string.h> 25 25 26 26 # define printk(fmt, ...) \

+2 -2

samples/bpf/test_map_in_map_kern.c

··· 10 10 #include <linux/version.h> 11 11 #include <uapi/linux/bpf.h> 12 12 #include <uapi/linux/in6.h> 13 - #include "bpf_helpers.h" 13 + #include <bpf/bpf_helpers.h> 14 14 #include "bpf_legacy.h" 15 - #include "bpf_tracing.h" 15 + #include <bpf/bpf_tracing.h> 16 16 17 17 #define MAX_NR_PORTS 65536 18 18

+2 -2

samples/bpf/test_overhead_kprobe_kern.c

··· 7 7 #include <linux/version.h> 8 8 #include <linux/ptrace.h> 9 9 #include <uapi/linux/bpf.h> 10 - #include "bpf_helpers.h" 11 - #include "bpf_tracing.h" 10 + #include <bpf/bpf_helpers.h> 11 + #include <bpf/bpf_tracing.h> 12 12 13 13 #define _(P) ({typeof(P) val = 0; bpf_probe_read(&val, sizeof(val), &P); val;}) 14 14

+1 -1

samples/bpf/test_overhead_raw_tp_kern.c

··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 /* Copyright (c) 2018 Facebook */ 3 3 #include <uapi/linux/bpf.h> 4 - #include "bpf_helpers.h" 4 + #include <bpf/bpf_helpers.h> 5 5 6 6 SEC("raw_tracepoint/task_rename") 7 7 int prog(struct bpf_raw_tracepoint_args *ctx)

+1 -1

samples/bpf/test_overhead_tp_kern.c

··· 5 5 * License as published by the Free Software Foundation. 6 6 */ 7 7 #include <uapi/linux/bpf.h> 8 - #include "bpf_helpers.h" 8 + #include <bpf/bpf_helpers.h> 9 9 10 10 /* from /sys/kernel/debug/tracing/events/task/task_rename/format */ 11 11 struct task_rename {

+2 -2

samples/bpf/test_probe_write_user_kern.c

··· 8 8 #include <linux/netdevice.h> 9 9 #include <uapi/linux/bpf.h> 10 10 #include <linux/version.h> 11 - #include "bpf_helpers.h" 12 - #include "bpf_tracing.h" 11 + #include <bpf/bpf_helpers.h> 12 + #include <bpf/bpf_tracing.h> 13 13 14 14 struct bpf_map_def SEC("maps") dnat_map = { 15 15 .type = BPF_MAP_TYPE_HASH,

+2 -2

samples/bpf/trace_event_kern.c

··· 9 9 #include <uapi/linux/bpf.h> 10 10 #include <uapi/linux/bpf_perf_event.h> 11 11 #include <uapi/linux/perf_event.h> 12 - #include "bpf_helpers.h" 13 - #include "bpf_tracing.h" 12 + #include <bpf/bpf_helpers.h> 13 + #include <bpf/bpf_tracing.h> 14 14 15 15 struct key_t { 16 16 char comm[TASK_COMM_LEN];

+1 -1

samples/bpf/trace_event_user.c

··· 15 15 #include <assert.h> 16 16 #include <errno.h> 17 17 #include <sys/resource.h> 18 - #include "libbpf.h" 18 + #include <bpf/libbpf.h> 19 19 #include "bpf_load.h" 20 20 #include "perf-sys.h" 21 21 #include "trace_helpers.h"

+1 -1

samples/bpf/trace_output_kern.c

··· 1 1 #include <linux/ptrace.h> 2 2 #include <linux/version.h> 3 3 #include <uapi/linux/bpf.h> 4 - #include "bpf_helpers.h" 4 + #include <bpf/bpf_helpers.h> 5 5 6 6 struct bpf_map_def SEC("maps") my_map = { 7 7 .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,

+1 -1

samples/bpf/trace_output_user.c

··· 15 15 #include <sys/mman.h> 16 16 #include <time.h> 17 17 #include <signal.h> 18 - #include <libbpf.h> 18 + #include <bpf/libbpf.h> 19 19 #include "bpf_load.h" 20 20 #include "perf-sys.h" 21 21

+2 -2

samples/bpf/tracex1_kern.c

··· 8 8 #include <linux/netdevice.h> 9 9 #include <uapi/linux/bpf.h> 10 10 #include <linux/version.h> 11 - #include "bpf_helpers.h" 12 - #include "bpf_tracing.h" 11 + #include <bpf/bpf_helpers.h> 12 + #include <bpf/bpf_tracing.h> 13 13 14 14 #define _(P) ({typeof(P) val = 0; bpf_probe_read(&val, sizeof(val), &P); val;}) 15 15

+2 -2

samples/bpf/tracex2_kern.c

··· 8 8 #include <linux/netdevice.h> 9 9 #include <linux/version.h> 10 10 #include <uapi/linux/bpf.h> 11 - #include "bpf_helpers.h" 12 - #include "bpf_tracing.h" 11 + #include <bpf/bpf_helpers.h> 12 + #include <bpf/bpf_tracing.h> 13 13 14 14 struct bpf_map_def SEC("maps") my_map = { 15 15 .type = BPF_MAP_TYPE_HASH,

+2 -2

samples/bpf/tracex3_kern.c

··· 8 8 #include <linux/netdevice.h> 9 9 #include <linux/version.h> 10 10 #include <uapi/linux/bpf.h> 11 - #include "bpf_helpers.h" 12 - #include "bpf_tracing.h" 11 + #include <bpf/bpf_helpers.h> 12 + #include <bpf/bpf_tracing.h> 13 13 14 14 struct bpf_map_def SEC("maps") my_map = { 15 15 .type = BPF_MAP_TYPE_HASH,

+2 -2

samples/bpf/tracex4_kern.c

··· 7 7 #include <linux/ptrace.h> 8 8 #include <linux/version.h> 9 9 #include <uapi/linux/bpf.h> 10 - #include "bpf_helpers.h" 11 - #include "bpf_tracing.h" 10 + #include <bpf/bpf_helpers.h> 11 + #include <bpf/bpf_tracing.h> 12 12 13 13 struct pair { 14 14 u64 val;

+2 -2

samples/bpf/tracex5_kern.c

··· 10 10 #include <uapi/linux/seccomp.h> 11 11 #include <uapi/linux/unistd.h> 12 12 #include "syscall_nrs.h" 13 - #include "bpf_helpers.h" 14 - #include "bpf_tracing.h" 13 + #include <bpf/bpf_helpers.h> 14 + #include <bpf/bpf_tracing.h> 15 15 16 16 #define PROG(F) SEC("kprobe/"__stringify(F)) int bpf_func_##F 17 17

+1 -1

samples/bpf/tracex6_kern.c

··· 1 1 #include <linux/ptrace.h> 2 2 #include <linux/version.h> 3 3 #include <uapi/linux/bpf.h> 4 - #include "bpf_helpers.h" 4 + #include <bpf/bpf_helpers.h> 5 5 6 6 struct bpf_map_def SEC("maps") counters = { 7 7 .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,

+1 -1

samples/bpf/tracex7_kern.c

··· 1 1 #include <uapi/linux/ptrace.h> 2 2 #include <uapi/linux/bpf.h> 3 3 #include <linux/version.h> 4 - #include "bpf_helpers.h" 4 + #include <bpf/bpf_helpers.h> 5 5 6 6 SEC("kprobe/open_ctree") 7 7 int bpf_prog1(struct pt_regs *ctx)

+1 -1

samples/bpf/xdp1_kern.c

··· 12 12 #include <linux/if_vlan.h> 13 13 #include <linux/ip.h> 14 14 #include <linux/ipv6.h> 15 - #include "bpf_helpers.h" 15 + #include <bpf/bpf_helpers.h> 16 16 17 17 struct { 18 18 __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);

+2 -2

samples/bpf/xdp1_user.c

··· 15 15 #include <net/if.h> 16 16 17 17 #include "bpf_util.h" 18 - #include "bpf.h" 19 - #include "libbpf.h" 18 + #include <bpf/bpf.h> 19 + #include <bpf/libbpf.h> 20 20 21 21 static int ifindex; 22 22 static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;

+1 -1

samples/bpf/xdp2_kern.c

··· 12 12 #include <linux/if_vlan.h> 13 13 #include <linux/ip.h> 14 14 #include <linux/ipv6.h> 15 - #include "bpf_helpers.h" 15 + #include <bpf/bpf_helpers.h> 16 16 17 17 struct { 18 18 __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);

+1 -1

samples/bpf/xdp2skb_meta_kern.c

··· 12 12 #include <uapi/linux/bpf.h> 13 13 #include <uapi/linux/pkt_cls.h> 14 14 15 - #include "bpf_helpers.h" 15 + #include <bpf/bpf_helpers.h> 16 16 17 17 /* 18 18 * This struct is stored in the XDP 'data_meta' area, which is located

+1 -1

samples/bpf/xdp_adjust_tail_kern.c

··· 18 18 #include <linux/if_vlan.h> 19 19 #include <linux/ip.h> 20 20 #include <linux/icmp.h> 21 - #include "bpf_helpers.h" 21 + #include <bpf/bpf_helpers.h> 22 22 23 23 #define DEFAULT_TTL 64 24 24 #define MAX_PCKT_SIZE 600

+2 -2

samples/bpf/xdp_adjust_tail_user.c

··· 19 19 #include <netinet/ether.h> 20 20 #include <unistd.h> 21 21 #include <time.h> 22 - #include "bpf.h" 23 - #include "libbpf.h" 22 + #include <bpf/bpf.h> 23 + #include <bpf/libbpf.h> 24 24 25 25 #define STATS_INTERVAL_S 2U 26 26 #define MAX_PCKT_SIZE 600

+1 -1

samples/bpf/xdp_fwd_kern.c

··· 19 19 #include <linux/ip.h> 20 20 #include <linux/ipv6.h> 21 21 22 - #include "bpf_helpers.h" 22 + #include <bpf/bpf_helpers.h> 23 23 24 24 #define IPV6_FLOWINFO_MASK cpu_to_be32(0x0FFFFFFF) 25 25

+1 -1

samples/bpf/xdp_fwd_user.c

··· 24 24 #include <fcntl.h> 25 25 #include <libgen.h> 26 26 27 - #include "libbpf.h" 27 + #include <bpf/libbpf.h> 28 28 #include <bpf/bpf.h> 29 29 30 30 static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;

+4 -6

samples/bpf/xdp_monitor_kern.c

··· 4 4 * XDP monitor tool, based on tracepoints 5 5 */ 6 6 #include <uapi/linux/bpf.h> 7 - #include "bpf_helpers.h" 7 + #include <bpf/bpf_helpers.h> 8 8 9 9 struct bpf_map_def SEC("maps") redirect_err_cnt = { 10 10 .type = BPF_MAP_TYPE_PERCPU_ARRAY, ··· 222 222 */ 223 223 struct devmap_xmit_ctx { 224 224 u64 __pad; // First 8 bytes are not accessible by bpf code 225 - int map_id; // offset:8; size:4; signed:1; 225 + int from_ifindex; // offset:8; size:4; signed:1; 226 226 u32 act; // offset:12; size:4; signed:0; 227 - u32 map_index; // offset:16; size:4; signed:0; 227 + int to_ifindex; // offset:16; size:4; signed:1; 228 228 int drops; // offset:20; size:4; signed:1; 229 229 int sent; // offset:24; size:4; signed:1; 230 - int from_ifindex; // offset:28; size:4; signed:1; 231 - int to_ifindex; // offset:32; size:4; signed:1; 232 - int err; // offset:36; size:4; signed:1; 230 + int err; // offset:28; size:4; signed:1; 233 231 }; 234 232 235 233 SEC("tracepoint/xdp/xdp_devmap_xmit")

+1 -1

samples/bpf/xdp_redirect_cpu_kern.c

··· 12 12 #include <uapi/linux/udp.h> 13 13 14 14 #include <uapi/linux/bpf.h> 15 - #include "bpf_helpers.h" 15 + #include <bpf/bpf_helpers.h> 16 16 #include "hash_func01.h" 17 17 18 18 #define MAX_CPUS 64 /* WARNING - sync with _user.c */

+1 -1

samples/bpf/xdp_redirect_cpu_user.c

··· 30 30 #define MAX_PROG 6 31 31 32 32 #include <bpf/bpf.h> 33 - #include "libbpf.h" 33 + #include <bpf/libbpf.h> 34 34 35 35 #include "bpf_util.h" 36 36

+1 -1

samples/bpf/xdp_redirect_kern.c

··· 17 17 #include <linux/if_vlan.h> 18 18 #include <linux/ip.h> 19 19 #include <linux/ipv6.h> 20 - #include "bpf_helpers.h" 20 + #include <bpf/bpf_helpers.h> 21 21 22 22 struct { 23 23 __uint(type, BPF_MAP_TYPE_ARRAY);

+1 -1

samples/bpf/xdp_redirect_map_kern.c

··· 17 17 #include <linux/if_vlan.h> 18 18 #include <linux/ip.h> 19 19 #include <linux/ipv6.h> 20 - #include "bpf_helpers.h" 20 + #include <bpf/bpf_helpers.h> 21 21 22 22 struct { 23 23 __uint(type, BPF_MAP_TYPE_DEVMAP);

+1 -1

samples/bpf/xdp_redirect_map_user.c

··· 17 17 18 18 #include "bpf_util.h" 19 19 #include <bpf/bpf.h> 20 - #include "libbpf.h" 20 + #include <bpf/libbpf.h> 21 21 22 22 static int ifindex_in; 23 23 static int ifindex_out;

+1 -1

samples/bpf/xdp_redirect_user.c

··· 17 17 18 18 #include "bpf_util.h" 19 19 #include <bpf/bpf.h> 20 - #include "libbpf.h" 20 + #include <bpf/libbpf.h> 21 21 22 22 static int ifindex_in; 23 23 static int ifindex_out;

+1 -1

samples/bpf/xdp_router_ipv4_kern.c

··· 12 12 #include <linux/if_vlan.h> 13 13 #include <linux/ip.h> 14 14 #include <linux/ipv6.h> 15 - #include "bpf_helpers.h" 15 + #include <bpf/bpf_helpers.h> 16 16 #include <linux/slab.h> 17 17 #include <net/ip_fib.h> 18 18

+1 -1

samples/bpf/xdp_router_ipv4_user.c

··· 21 21 #include <sys/ioctl.h> 22 22 #include <sys/syscall.h> 23 23 #include "bpf_util.h" 24 - #include "libbpf.h" 24 + #include <bpf/libbpf.h> 25 25 #include <sys/resource.h> 26 26 #include <libgen.h> 27 27

+1 -1

samples/bpf/xdp_rxq_info_kern.c

··· 6 6 #include <uapi/linux/bpf.h> 7 7 #include <uapi/linux/if_ether.h> 8 8 #include <uapi/linux/in.h> 9 - #include "bpf_helpers.h" 9 + #include <bpf/bpf_helpers.h> 10 10 11 11 /* Config setup from with userspace 12 12 *

+2 -2

samples/bpf/xdp_rxq_info_user.c

··· 22 22 #include <arpa/inet.h> 23 23 #include <linux/if_link.h> 24 24 25 - #include "bpf.h" 26 - #include "libbpf.h" 25 + #include <bpf/bpf.h> 26 + #include <bpf/libbpf.h> 27 27 #include "bpf_util.h" 28 28 29 29 static int ifindex = -1;

+1 -1

samples/bpf/xdp_sample_pkts_kern.c

··· 2 2 #include <linux/ptrace.h> 3 3 #include <linux/version.h> 4 4 #include <uapi/linux/bpf.h> 5 - #include "bpf_helpers.h" 5 + #include <bpf/bpf_helpers.h> 6 6 7 7 #define SAMPLE_SIZE 64ul 8 8 #define MAX_CPUS 128

+1 -1

samples/bpf/xdp_sample_pkts_user.c

··· 10 10 #include <sys/sysinfo.h> 11 11 #include <sys/ioctl.h> 12 12 #include <signal.h> 13 - #include <libbpf.h> 13 + #include <bpf/libbpf.h> 14 14 #include <bpf/bpf.h> 15 15 #include <sys/resource.h> 16 16 #include <libgen.h>

+1 -1

samples/bpf/xdp_tx_iptunnel_kern.c

··· 16 16 #include <linux/if_vlan.h> 17 17 #include <linux/ip.h> 18 18 #include <linux/ipv6.h> 19 - #include "bpf_helpers.h" 19 + #include <bpf/bpf_helpers.h> 20 20 #include "xdp_tx_iptunnel_common.h" 21 21 22 22 struct {

+1 -1

samples/bpf/xdp_tx_iptunnel_user.c

··· 15 15 #include <netinet/ether.h> 16 16 #include <unistd.h> 17 17 #include <time.h> 18 - #include "libbpf.h" 18 + #include <bpf/libbpf.h> 19 19 #include <bpf/bpf.h> 20 20 #include "bpf_util.h" 21 21 #include "xdp_tx_iptunnel_common.h"

+1 -1

samples/bpf/xdpsock_kern.c

··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 #include <linux/bpf.h> 3 - #include "bpf_helpers.h" 3 + #include <bpf/bpf_helpers.h> 4 4 #include "xdpsock.h" 5 5 6 6 /* This XDP program is only needed for the XDP_SHARED_UMEM mode.

+3 -3

samples/bpf/xdpsock_user.c

··· 30 30 #include <time.h> 31 31 #include <unistd.h> 32 32 33 - #include "libbpf.h" 34 - #include "xsk.h" 35 - #include "xdpsock.h" 33 + #include <bpf/libbpf.h> 34 + #include <bpf/xsk.h> 36 35 #include <bpf/bpf.h> 36 + #include "xdpsock.h" 37 37 38 38 #ifndef SOL_XDP 39 39 #define SOL_XDP 283

-2

scripts/bpf_helpers_doc.py

··· 158 158 break 159 159 160 160 self.reader.close() 161 - print('Parsed description of %d helper function(s)' % len(self.helpers), 162 - file=sys.stderr) 163 161 164 162 ############################################################################### 165 163

+2 -2

scripts/link-vmlinux.sh

··· 108 108 local bin_arch 109 109 110 110 if ! [ -x "$(command -v ${PAHOLE})" ]; then 111 - info "BTF" "${1}: pahole (${PAHOLE}) is not available" 111 + echo >&2 "BTF: ${1}: pahole (${PAHOLE}) is not available" 112 112 return 1 113 113 fi 114 114 115 115 pahole_ver=$(${PAHOLE} --version | sed -E 's/v([0-9]+)\.([0-9]+)/\1\2/') 116 116 if [ "${pahole_ver}" -lt "113" ]; then 117 - info "BTF" "${1}: pahole version $(${PAHOLE} --version) is too old, need at least v1.13" 117 + echo >&2 "BTF: ${1}: pahole version $(${PAHOLE} --version) is too old, need at least v1.13" 118 118 return 1 119 119 fi 120 120

+15 -5

tools/bpf/Makefile

··· 38 38 FEATURE_DISPLAY = libbfd disassembler-four-args 39 39 40 40 check_feat := 1 41 - NON_CHECK_FEAT_TARGETS := clean bpftool_clean 41 + NON_CHECK_FEAT_TARGETS := clean bpftool_clean runqslower_clean 42 42 ifdef MAKECMDGOALS 43 43 ifeq ($(filter-out $(NON_CHECK_FEAT_TARGETS),$(MAKECMDGOALS)),) 44 44 check_feat := 0 ··· 73 73 74 74 PROGS = $(OUTPUT)bpf_jit_disasm $(OUTPUT)bpf_dbg $(OUTPUT)bpf_asm 75 75 76 - all: $(PROGS) bpftool 76 + all: $(PROGS) bpftool runqslower 77 77 78 78 $(OUTPUT)bpf_jit_disasm: CFLAGS += -DPACKAGE='bpf_jit_disasm' 79 79 $(OUTPUT)bpf_jit_disasm: $(OUTPUT)bpf_jit_disasm.o ··· 89 89 $(OUTPUT)bpf_exp.yacc.o: $(OUTPUT)bpf_exp.yacc.c 90 90 $(OUTPUT)bpf_exp.lex.o: $(OUTPUT)bpf_exp.lex.c 91 91 92 - clean: bpftool_clean 92 + clean: bpftool_clean runqslower_clean 93 93 $(call QUIET_CLEAN, bpf-progs) 94 94 $(Q)$(RM) -r -- $(OUTPUT)*.o $(OUTPUT)bpf_jit_disasm $(OUTPUT)bpf_dbg \ 95 95 $(OUTPUT)bpf_asm $(OUTPUT)bpf_exp.yacc.* $(OUTPUT)bpf_exp.lex.* ··· 97 97 $(Q)$(RM) -- $(OUTPUT)FEATURE-DUMP.bpf 98 98 $(Q)$(RM) -r -- $(OUTPUT)feature 99 99 100 - install: $(PROGS) bpftool_install 100 + install: $(PROGS) bpftool_install runqslower_install 101 101 $(call QUIET_INSTALL, bpf_jit_disasm) 102 102 $(Q)$(INSTALL) -m 0755 -d $(DESTDIR)$(prefix)/bin 103 103 $(Q)$(INSTALL) $(OUTPUT)bpf_jit_disasm $(DESTDIR)$(prefix)/bin/bpf_jit_disasm ··· 115 115 bpftool_clean: 116 116 $(call descend,bpftool,clean) 117 117 118 - .PHONY: all install clean bpftool bpftool_install bpftool_clean 118 + runqslower: 119 + $(call descend,runqslower) 120 + 121 + runqslower_install: 122 + $(call descend,runqslower,install) 123 + 124 + runqslower_clean: 125 + $(call descend,runqslower,clean) 126 + 127 + .PHONY: all install clean bpftool bpftool_install bpftool_clean \ 128 + runqslower runqslower_install runqslower_clean

+1 -1

tools/bpf/bpftool/Documentation/bpftool-gen.rst

··· 196 196 #define __EXAMPLE_SKEL_H__ 197 197 198 198 #include <stdlib.h> 199 - #include <libbpf.h> 199 + #include <bpf/libbpf.h> 200 200 201 201 struct example { 202 202 struct bpf_object_skeleton *skeleton;

+1 -1

tools/bpf/bpftool/Makefile

··· 45 45 -I$(srctree)/kernel/bpf/ \ 46 46 -I$(srctree)/tools/include \ 47 47 -I$(srctree)/tools/include/uapi \ 48 - -I$(srctree)/tools/lib/bpf \ 48 + -I$(srctree)/tools/lib \ 49 49 -I$(srctree)/tools/perf 50 50 CFLAGS += -DBPFTOOL_VERSION='"$(BPFTOOL_VERSION)"' 51 51 ifneq ($(EXTRA_CFLAGS),)

+12 -4

tools/bpf/bpftool/btf.c

··· 8 8 #include <stdio.h> 9 9 #include <string.h> 10 10 #include <unistd.h> 11 - #include <bpf.h> 12 - #include <libbpf.h> 11 + #include <bpf/bpf.h> 12 + #include <bpf/btf.h> 13 + #include <bpf/libbpf.h> 13 14 #include <linux/btf.h> 14 15 #include <linux/hashtable.h> 15 16 #include <sys/types.h> 16 17 #include <sys/stat.h> 17 18 #include <unistd.h> 18 19 19 - #include "btf.h" 20 20 #include "json_writer.h" 21 21 #include "main.h" 22 22 ··· 370 370 if (IS_ERR(d)) 371 371 return PTR_ERR(d); 372 372 373 + printf("#ifndef BPF_NO_PRESERVE_ACCESS_INDEX\n"); 374 + printf("#pragma clang attribute push (__attribute__((preserve_access_index)), apply_to = record)\n"); 375 + printf("#endif\n\n"); 376 + 373 377 if (root_type_cnt) { 374 378 for (i = 0; i < root_type_cnt; i++) { 375 379 err = btf_dump__dump_type(d, root_type_ids[i]); ··· 389 385 goto done; 390 386 } 391 387 } 388 + 389 + printf("#ifndef BPF_NO_PRESERVE_ACCESS_INDEX\n"); 390 + printf("#pragma clang attribute pop\n"); 391 + printf("#endif\n"); 392 392 393 393 done: 394 394 btf_dump__free(d); ··· 532 524 if (IS_ERR(btf)) { 533 525 err = PTR_ERR(btf); 534 526 btf = NULL; 535 - p_err("failed to load BTF from %s: %s", 527 + p_err("failed to load BTF from %s: %s", 536 528 *argv, strerror(err)); 537 529 goto done; 538 530 }

+1 -1

tools/bpf/bpftool/btf_dumper.c

··· 8 8 #include <linux/bitops.h> 9 9 #include <linux/btf.h> 10 10 #include <linux/err.h> 11 + #include <bpf/btf.h> 11 12 12 - #include "btf.h" 13 13 #include "json_writer.h" 14 14 #include "main.h" 15 15

+1 -1

tools/bpf/bpftool/cgroup.c

··· 14 14 #include <sys/types.h> 15 15 #include <unistd.h> 16 16 17 - #include <bpf.h> 17 + #include <bpf/bpf.h> 18 18 19 19 #include "main.h" 20 20

+2 -2

tools/bpf/bpftool/common.c

··· 20 20 #include <sys/stat.h> 21 21 #include <sys/vfs.h> 22 22 23 - #include <bpf.h> 24 - #include <libbpf.h> /* libbpf_num_possible_cpus */ 23 + #include <bpf/bpf.h> 24 + #include <bpf/libbpf.h> /* libbpf_num_possible_cpus */ 25 25 26 26 #include "main.h" 27 27

+20 -2

tools/bpf/bpftool/feature.c

··· 12 12 #include <linux/filter.h> 13 13 #include <linux/limits.h> 14 14 15 - #include <bpf.h> 16 - #include <libbpf.h> 15 + #include <bpf/bpf.h> 16 + #include <bpf/libbpf.h> 17 17 #include <zlib.h> 18 18 19 19 #include "main.h" ··· 572 572 printf("\n"); 573 573 } 574 574 575 + static void 576 + probe_large_insn_limit(const char *define_prefix, __u32 ifindex) 577 + { 578 + bool res; 579 + 580 + res = bpf_probe_large_insn_limit(ifindex); 581 + print_bool_feature("have_large_insn_limit", 582 + "Large program size limit", 583 + "HAVE_LARGE_INSN_LIMIT", 584 + res, define_prefix); 585 + } 586 + 575 587 static int do_probe(int argc, char **argv) 576 588 { 577 589 enum probe_component target = COMPONENT_UNSPEC; ··· 735 723 for (i = BPF_PROG_TYPE_UNSPEC + 1; i < ARRAY_SIZE(prog_type_name); i++) 736 724 probe_helpers_for_progtype(i, supported_types[i], 737 725 define_prefix, ifindex); 726 + 727 + print_end_then_start_section("misc", 728 + "Scanning miscellaneous eBPF features...", 729 + "/*** eBPF misc features ***/", 730 + define_prefix); 731 + probe_large_insn_limit(define_prefix, ifindex); 738 732 739 733 exit_close_json: 740 734 if (json_output) {

+5 -5

tools/bpf/bpftool/gen.c

··· 12 12 #include <stdio.h> 13 13 #include <string.h> 14 14 #include <unistd.h> 15 - #include <bpf.h> 16 - #include <libbpf.h> 15 + #include <bpf/bpf.h> 16 + #include <bpf/libbpf.h> 17 17 #include <sys/types.h> 18 18 #include <sys/stat.h> 19 19 #include <sys/mman.h> 20 20 #include <unistd.h> 21 + #include <bpf/btf.h> 21 22 22 - #include "btf.h" 23 - #include "libbpf_internal.h" 23 + #include "bpf/libbpf_internal.h" 24 24 #include "json_writer.h" 25 25 #include "main.h" 26 26 ··· 333 333 #define %2$s \n\ 334 334 \n\ 335 335 #include <stdlib.h> \n\ 336 - #include <libbpf.h> \n\ 336 + #include <bpf/libbpf.h> \n\ 337 337 \n\ 338 338 struct %1$s { \n\ 339 339 struct bpf_object_skeleton *skeleton; \n\

+1 -1

tools/bpf/bpftool/jit_disasm.c

··· 24 24 #include <dis-asm.h> 25 25 #include <sys/stat.h> 26 26 #include <limits.h> 27 - #include <libbpf.h> 27 + #include <bpf/libbpf.h> 28 28 29 29 #include "json_writer.h" 30 30 #include "main.h"

+2 -2

tools/bpf/bpftool/main.c

··· 9 9 #include <stdlib.h> 10 10 #include <string.h> 11 11 12 - #include <bpf.h> 13 - #include <libbpf.h> 12 + #include <bpf/bpf.h> 13 + #include <bpf/libbpf.h> 14 14 15 15 #include "main.h" 16 16

+66 -40

tools/bpf/bpftool/map.c

··· 15 15 #include <sys/types.h> 16 16 #include <sys/stat.h> 17 17 18 - #include <bpf.h> 18 + #include <bpf/bpf.h> 19 + #include <bpf/btf.h> 19 20 20 - #include "btf.h" 21 21 #include "json_writer.h" 22 22 #include "main.h" 23 23 ··· 48 48 [BPF_MAP_TYPE_QUEUE] = "queue", 49 49 [BPF_MAP_TYPE_STACK] = "stack", 50 50 [BPF_MAP_TYPE_SK_STORAGE] = "sk_storage", 51 + [BPF_MAP_TYPE_STRUCT_OPS] = "struct_ops", 51 52 }; 52 53 53 54 const size_t map_type_name_size = ARRAY_SIZE(map_type_name); ··· 252 251 struct bpf_map_info *map_info, void *key, 253 252 void *value) 254 253 { 254 + __u32 value_id; 255 255 int ret; 256 256 257 257 /* start of key-value pair */ ··· 266 264 goto err_end_obj; 267 265 } 268 266 267 + value_id = map_info->btf_vmlinux_value_type_id ? 268 + : map_info->btf_value_type_id; 269 + 269 270 if (!map_is_per_cpu(map_info->type)) { 270 271 jsonw_name(d->jw, "value"); 271 - ret = btf_dumper_type(d, map_info->btf_value_type_id, value); 272 + ret = btf_dumper_type(d, value_id, value); 272 273 } else { 273 274 unsigned int i, n, step; 274 275 ··· 283 278 jsonw_start_object(d->jw); 284 279 jsonw_int_field(d->jw, "cpu", i); 285 280 jsonw_name(d->jw, "value"); 286 - ret = btf_dumper_type(d, map_info->btf_value_type_id, 287 - value + i * step); 281 + ret = btf_dumper_type(d, value_id, value + i * step); 288 282 jsonw_end_object(d->jw); 289 283 if (ret) 290 284 break; ··· 919 915 { 920 916 struct bpf_map_info info = {}; 921 917 __u32 len = sizeof(info); 922 - struct btf *btf = NULL; 923 918 int err, i; 924 919 925 920 for (i = 0; i < nb_fds; i++) { 926 921 err = bpf_obj_get_info_by_fd(fds[i], &info, &len); 927 922 if (err) { 928 923 p_err("can't get map info: %s", strerror(errno)); 929 - goto err_close; 924 + return -1; 930 925 } 931 926 932 - err = btf__get_from_id(info.btf_id, &btf); 933 - if (err) { 934 - p_err("failed to get btf"); 935 - goto err_close; 936 - } 937 - 938 - if (!btf) 927 + if (!info.btf_id) 939 928 return 0; 940 929 } 941 930 942 931 return 1; 932 + } 943 933 944 - err_close: 945 - for (; i < nb_fds; i++) 946 - close(fds[i]); 947 - return -1; 934 + static struct btf *btf_vmlinux; 935 + 936 + static struct btf *get_map_kv_btf(const struct bpf_map_info *info) 937 + { 938 + struct btf *btf = NULL; 939 + 940 + if (info->btf_vmlinux_value_type_id) { 941 + if (!btf_vmlinux) { 942 + btf_vmlinux = libbpf_find_kernel_btf(); 943 + if (IS_ERR(btf_vmlinux)) 944 + p_err("failed to get kernel btf"); 945 + } 946 + return btf_vmlinux; 947 + } else if (info->btf_value_type_id) { 948 + int err; 949 + 950 + err = btf__get_from_id(info->btf_id, &btf); 951 + if (err || !btf) { 952 + p_err("failed to get btf"); 953 + btf = err ? ERR_PTR(err) : ERR_PTR(-ESRCH); 954 + } 955 + } 956 + 957 + return btf; 958 + } 959 + 960 + static void free_map_kv_btf(struct btf *btf) 961 + { 962 + if (!IS_ERR(btf) && btf != btf_vmlinux) 963 + btf__free(btf); 964 + } 965 + 966 + static void free_btf_vmlinux(void) 967 + { 968 + if (!IS_ERR(btf_vmlinux)) 969 + btf__free(btf_vmlinux); 948 970 } 949 971 950 972 static int 951 973 map_dump(int fd, struct bpf_map_info *info, json_writer_t *wtr, 952 - bool enable_btf, bool show_header) 974 + bool show_header) 953 975 { 954 976 void *key, *value, *prev_key; 955 977 unsigned int num_elems = 0; ··· 992 962 993 963 prev_key = NULL; 994 964 995 - if (enable_btf) { 996 - err = btf__get_from_id(info->btf_id, &btf); 997 - if (err || !btf) { 998 - /* enable_btf is true only if we've already checked 999 - * that all maps have BTF information. 1000 - */ 1001 - p_err("failed to get btf"); 965 + if (wtr) { 966 + btf = get_map_kv_btf(info); 967 + if (IS_ERR(btf)) { 968 + err = PTR_ERR(btf); 1002 969 goto exit_free; 1003 970 } 1004 - } 1005 971 1006 - if (wtr) { 1007 972 if (show_header) { 1008 973 jsonw_start_object(wtr); /* map object */ 1009 974 show_map_header_json(info, wtr); ··· 1037 1012 free(key); 1038 1013 free(value); 1039 1014 close(fd); 1040 - btf__free(btf); 1015 + free_map_kv_btf(btf); 1041 1016 1042 1017 return err; 1043 1018 } ··· 1046 1021 { 1047 1022 json_writer_t *wtr = NULL, *btf_wtr = NULL; 1048 1023 struct bpf_map_info info = {}; 1049 - int nb_fds, i = 0, btf = 0; 1024 + int nb_fds, i = 0; 1050 1025 __u32 len = sizeof(info); 1051 1026 int *fds = NULL; 1052 1027 int err = -1; ··· 1066 1041 if (json_output) { 1067 1042 wtr = json_wtr; 1068 1043 } else { 1069 - btf = maps_have_btf(fds, nb_fds); 1070 - if (btf < 0) 1044 + int do_plain_btf; 1045 + 1046 + do_plain_btf = maps_have_btf(fds, nb_fds); 1047 + if (do_plain_btf < 0) 1071 1048 goto exit_close; 1072 - if (btf) { 1049 + 1050 + if (do_plain_btf) { 1073 1051 btf_wtr = get_btf_writer(); 1074 - if (btf_wtr) { 1075 - wtr = btf_wtr; 1076 - } else { 1052 + wtr = btf_wtr; 1053 + if (!btf_wtr) 1077 1054 p_info("failed to create json writer for btf. falling back to plain output"); 1078 - btf = 0; 1079 - } 1080 1055 } 1081 1056 } 1082 1057 ··· 1087 1062 p_err("can't get map info: %s", strerror(errno)); 1088 1063 break; 1089 1064 } 1090 - err = map_dump(fds[i], &info, wtr, btf, nb_fds > 1); 1065 + err = map_dump(fds[i], &info, wtr, nb_fds > 1); 1091 1066 if (!wtr && i != nb_fds - 1) 1092 1067 printf("\n"); 1093 1068 ··· 1098 1073 if (wtr && nb_fds > 1) 1099 1074 jsonw_end_array(wtr); /* root array */ 1100 1075 1101 - if (btf) 1076 + if (btf_wtr) 1102 1077 jsonw_destroy(&btf_wtr); 1103 1078 exit_close: 1104 1079 for (; i < nb_fds; i++) 1105 1080 close(fds[i]); 1106 1081 exit_free: 1107 1082 free(fds); 1083 + free_btf_vmlinux(); 1108 1084 return err; 1109 1085 } 1110 1086

+2 -2

tools/bpf/bpftool/map_perf_ring.c

··· 6 6 */ 7 7 #include <errno.h> 8 8 #include <fcntl.h> 9 - #include <libbpf.h> 9 + #include <bpf/libbpf.h> 10 10 #include <poll.h> 11 11 #include <signal.h> 12 12 #include <stdbool.h> ··· 21 21 #include <sys/mman.h> 22 22 #include <sys/syscall.h> 23 23 24 - #include <bpf.h> 24 + #include <bpf/bpf.h> 25 25 #include <perf-sys.h> 26 26 27 27 #include "main.h"

+4 -4

tools/bpf/bpftool/net.c

··· 7 7 #include <stdlib.h> 8 8 #include <string.h> 9 9 #include <unistd.h> 10 - #include <libbpf.h> 10 + #include <bpf/bpf.h> 11 + #include <bpf/libbpf.h> 11 12 #include <net/if.h> 12 13 #include <linux/if.h> 13 14 #include <linux/rtnetlink.h> ··· 17 16 #include <sys/stat.h> 18 17 #include <sys/types.h> 19 18 20 - #include <bpf.h> 21 - #include <nlattr.h> 22 - #include "libbpf_internal.h" 19 + #include "bpf/nlattr.h" 20 + #include "bpf/libbpf_internal.h" 23 21 #include "main.h" 24 22 #include "netlink_dumper.h" 25 23

+2 -2

tools/bpf/bpftool/netlink_dumper.c

··· 3 3 4 4 #include <stdlib.h> 5 5 #include <string.h> 6 - #include <libbpf.h> 6 + #include <bpf/libbpf.h> 7 7 #include <linux/rtnetlink.h> 8 8 #include <linux/tc_act/tc_bpf.h> 9 9 10 - #include <nlattr.h> 10 + #include "bpf/nlattr.h" 11 11 #include "main.h" 12 12 #include "netlink_dumper.h" 13 13

+1 -1

tools/bpf/bpftool/perf.c

··· 13 13 #include <unistd.h> 14 14 #include <ftw.h> 15 15 16 - #include <bpf.h> 16 + #include <bpf/bpf.h> 17 17 18 18 #include "main.h" 19 19

+3 -3

tools/bpf/bpftool/prog.c

··· 17 17 #include <linux/err.h> 18 18 #include <linux/sizes.h> 19 19 20 - #include <bpf.h> 21 - #include <btf.h> 22 - #include <libbpf.h> 20 + #include <bpf/bpf.h> 21 + #include <bpf/btf.h> 22 + #include <bpf/libbpf.h> 23 23 24 24 #include "cfg.h" 25 25 #include "main.h"

+1 -1

tools/bpf/bpftool/xlated_dumper.c

··· 7 7 #include <stdlib.h> 8 8 #include <string.h> 9 9 #include <sys/types.h> 10 - #include <libbpf.h> 10 + #include <bpf/libbpf.h> 11 11 12 12 #include "disasm.h" 13 13 #include "json_writer.h"

+1

tools/bpf/runqslower/.gitignore

··· 1 + /.output

+84

tools/bpf/runqslower/Makefile

··· 1 + # SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) 2 + OUTPUT := .output 3 + CLANG := clang 4 + LLC := llc 5 + LLVM_STRIP := llvm-strip 6 + DEFAULT_BPFTOOL := $(OUTPUT)/sbin/bpftool 7 + BPFTOOL ?= $(DEFAULT_BPFTOOL) 8 + LIBBPF_SRC := $(abspath ../../lib/bpf) 9 + BPFOBJ := $(OUTPUT)/libbpf.a 10 + BPF_INCLUDE := $(OUTPUT) 11 + INCLUDES := -I$(BPF_INCLUDE) -I$(OUTPUT) -I$(abspath ../../lib) 12 + CFLAGS := -g -Wall 13 + 14 + # Try to detect best kernel BTF source 15 + KERNEL_REL := $(shell uname -r) 16 + VMLINUX_BTF_PATHS := /sys/kernel/btf/vmlinux /boot/vmlinux-$(KERNEL_REL) 17 + VMLINUX_BTF_PATH := $(or $(VMLINUX_BTF),$(firstword \ 18 + $(wildcard $(VMLINUX_BTF_PATHS)))) 19 + 20 + abs_out := $(abspath $(OUTPUT)) 21 + ifeq ($(V),1) 22 + Q = 23 + msg = 24 + else 25 + Q = @ 26 + msg = @printf ' %-8s %s%s\n' "$(1)" "$(notdir $(2))" "$(if $(3), $(3))"; 27 + MAKEFLAGS += --no-print-directory 28 + submake_extras := feature_display=0 29 + endif 30 + 31 + .DELETE_ON_ERROR: 32 + 33 + .PHONY: all clean runqslower 34 + all: runqslower 35 + 36 + runqslower: $(OUTPUT)/runqslower 37 + 38 + clean: 39 + $(call msg,CLEAN) 40 + $(Q)rm -rf $(OUTPUT) runqslower 41 + 42 + $(OUTPUT)/runqslower: $(OUTPUT)/runqslower.o $(BPFOBJ) 43 + $(call msg,BINARY,$@) 44 + $(Q)$(CC) $(CFLAGS) -lelf -lz $^ -o $@ 45 + 46 + $(OUTPUT)/runqslower.o: runqslower.h $(OUTPUT)/runqslower.skel.h \ 47 + $(OUTPUT)/runqslower.bpf.o 48 + 49 + $(OUTPUT)/runqslower.bpf.o: $(OUTPUT)/vmlinux.h runqslower.h 50 + 51 + $(OUTPUT)/%.skel.h: $(OUTPUT)/%.bpf.o | $(BPFTOOL) 52 + $(call msg,GEN-SKEL,$@) 53 + $(Q)$(BPFTOOL) gen skeleton $< > $@ 54 + 55 + $(OUTPUT)/%.bpf.o: %.bpf.c $(BPFOBJ) | $(OUTPUT) 56 + $(call msg,BPF,$@) 57 + $(Q)$(CLANG) -g -O2 -target bpf $(INCLUDES) \ 58 + -c $(filter %.c,$^) -o $@ && \ 59 + $(LLVM_STRIP) -g $@ 60 + 61 + $(OUTPUT)/%.o: %.c | $(OUTPUT) 62 + $(call msg,CC,$@) 63 + $(Q)$(CC) $(CFLAGS) $(INCLUDES) -c $(filter %.c,$^) -o $@ 64 + 65 + $(OUTPUT): 66 + $(call msg,MKDIR,$@) 67 + $(Q)mkdir -p $(OUTPUT) 68 + 69 + $(OUTPUT)/vmlinux.h: $(VMLINUX_BTF_PATH) | $(OUTPUT) $(BPFTOOL) 70 + $(call msg,GEN,$@) 71 + $(Q)if [ ! -e "$(VMLINUX_BTF_PATH)" ] ; then \ 72 + echo "Couldn't find kernel BTF; set VMLINUX_BTF to" \ 73 + "specify its location." >&2; \ 74 + exit 1;\ 75 + fi 76 + $(Q)$(BPFTOOL) btf dump file $(VMLINUX_BTF_PATH) format c > $@ 77 + 78 + $(BPFOBJ): | $(OUTPUT) 79 + $(Q)$(MAKE) $(submake_extras) -C $(LIBBPF_SRC) \ 80 + OUTPUT=$(abspath $(dir $@))/ $(abspath $@) 81 + 82 + $(DEFAULT_BPFTOOL): 83 + $(Q)$(MAKE) $(submake_extras) -C ../bpftool \ 84 + prefix= OUTPUT=$(abs_out)/ DESTDIR=$(abs_out) install

+100

tools/bpf/runqslower/runqslower.bpf.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + // Copyright (c) 2019 Facebook 3 + #include "vmlinux.h" 4 + #include <bpf/bpf_helpers.h> 5 + #include "runqslower.h" 6 + 7 + #define TASK_RUNNING 0 8 + 9 + #define BPF_F_INDEX_MASK 0xffffffffULL 10 + #define BPF_F_CURRENT_CPU BPF_F_INDEX_MASK 11 + 12 + const volatile __u64 min_us = 0; 13 + const volatile pid_t targ_pid = 0; 14 + 15 + struct { 16 + __uint(type, BPF_MAP_TYPE_HASH); 17 + __uint(max_entries, 10240); 18 + __type(key, u32); 19 + __type(value, u64); 20 + } start SEC(".maps"); 21 + 22 + struct { 23 + __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); 24 + __uint(key_size, sizeof(u32)); 25 + __uint(value_size, sizeof(u32)); 26 + } events SEC(".maps"); 27 + 28 + /* record enqueue timestamp */ 29 + __always_inline 30 + static int trace_enqueue(u32 tgid, u32 pid) 31 + { 32 + u64 ts; 33 + 34 + if (!pid || (targ_pid && targ_pid != pid)) 35 + return 0; 36 + 37 + ts = bpf_ktime_get_ns(); 38 + bpf_map_update_elem(&start, &pid, &ts, 0); 39 + return 0; 40 + } 41 + 42 + SEC("tp_btf/sched_wakeup") 43 + int handle__sched_wakeup(u64 *ctx) 44 + { 45 + /* TP_PROTO(struct task_struct *p) */ 46 + struct task_struct *p = (void *)ctx[0]; 47 + 48 + return trace_enqueue(p->tgid, p->pid); 49 + } 50 + 51 + SEC("tp_btf/sched_wakeup_new") 52 + int handle__sched_wakeup_new(u64 *ctx) 53 + { 54 + /* TP_PROTO(struct task_struct *p) */ 55 + struct task_struct *p = (void *)ctx[0]; 56 + 57 + return trace_enqueue(p->tgid, p->pid); 58 + } 59 + 60 + SEC("tp_btf/sched_switch") 61 + int handle__sched_switch(u64 *ctx) 62 + { 63 + /* TP_PROTO(bool preempt, struct task_struct *prev, 64 + * struct task_struct *next) 65 + */ 66 + struct task_struct *prev = (struct task_struct *)ctx[1]; 67 + struct task_struct *next = (struct task_struct *)ctx[2]; 68 + struct event event = {}; 69 + u64 *tsp, delta_us; 70 + long state; 71 + u32 pid; 72 + 73 + /* ivcsw: treat like an enqueue event and store timestamp */ 74 + if (prev->state == TASK_RUNNING) 75 + trace_enqueue(prev->tgid, prev->pid); 76 + 77 + pid = next->pid; 78 + 79 + /* fetch timestamp and calculate delta */ 80 + tsp = bpf_map_lookup_elem(&start, &pid); 81 + if (!tsp) 82 + return 0; /* missed enqueue */ 83 + 84 + delta_us = (bpf_ktime_get_ns() - *tsp) / 1000; 85 + if (min_us && delta_us <= min_us) 86 + return 0; 87 + 88 + event.pid = pid; 89 + event.delta_us = delta_us; 90 + bpf_get_current_comm(&event.task, sizeof(event.task)); 91 + 92 + /* output */ 93 + bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, 94 + &event, sizeof(event)); 95 + 96 + bpf_map_delete_elem(&start, &pid); 97 + return 0; 98 + } 99 + 100 + char LICENSE[] SEC("license") = "GPL";

+187

tools/bpf/runqslower/runqslower.c

··· 1 + // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) 2 + // Copyright (c) 2019 Facebook 3 + #include <argp.h> 4 + #include <stdio.h> 5 + #include <stdlib.h> 6 + #include <string.h> 7 + #include <sys/resource.h> 8 + #include <time.h> 9 + #include <bpf/libbpf.h> 10 + #include <bpf/bpf.h> 11 + #include "runqslower.h" 12 + #include "runqslower.skel.h" 13 + 14 + struct env { 15 + pid_t pid; 16 + __u64 min_us; 17 + bool verbose; 18 + } env = { 19 + .min_us = 10000, 20 + }; 21 + 22 + const char *argp_program_version = "runqslower 0.1"; 23 + const char *argp_program_bug_address = "<bpf@vger.kernel.org>"; 24 + const char argp_program_doc[] = 25 + "runqslower Trace long process scheduling delays.\n" 26 + " For Linux, uses eBPF, BPF CO-RE, libbpf, BTF.\n" 27 + "\n" 28 + "This script traces high scheduling delays between tasks being\n" 29 + "ready to run and them running on CPU after that.\n" 30 + "\n" 31 + "USAGE: runqslower [-p PID] [min_us]\n" 32 + "\n" 33 + "EXAMPLES:\n" 34 + " runqslower # trace run queue latency higher than 10000 us (default)\n" 35 + " runqslower 1000 # trace run queue latency higher than 1000 us\n" 36 + " runqslower -p 123 # trace pid 123 only\n"; 37 + 38 + static const struct argp_option opts[] = { 39 + { "pid", 'p', "PID", 0, "Process PID to trace"}, 40 + { "verbose", 'v', NULL, 0, "Verbose debug output" }, 41 + {}, 42 + }; 43 + 44 + static error_t parse_arg(int key, char *arg, struct argp_state *state) 45 + { 46 + static int pos_args; 47 + int pid; 48 + long long min_us; 49 + 50 + switch (key) { 51 + case 'v': 52 + env.verbose = true; 53 + break; 54 + case 'p': 55 + errno = 0; 56 + pid = strtol(arg, NULL, 10); 57 + if (errno || pid <= 0) { 58 + fprintf(stderr, "Invalid PID: %s\n", arg); 59 + argp_usage(state); 60 + } 61 + env.pid = pid; 62 + break; 63 + case ARGP_KEY_ARG: 64 + if (pos_args++) { 65 + fprintf(stderr, 66 + "Unrecognized positional argument: %s\n", arg); 67 + argp_usage(state); 68 + } 69 + errno = 0; 70 + min_us = strtoll(arg, NULL, 10); 71 + if (errno || min_us <= 0) { 72 + fprintf(stderr, "Invalid delay (in us): %s\n", arg); 73 + argp_usage(state); 74 + } 75 + env.min_us = min_us; 76 + break; 77 + default: 78 + return ARGP_ERR_UNKNOWN; 79 + } 80 + return 0; 81 + } 82 + 83 + int libbpf_print_fn(enum libbpf_print_level level, 84 + const char *format, va_list args) 85 + { 86 + if (level == LIBBPF_DEBUG && !env.verbose) 87 + return 0; 88 + return vfprintf(stderr, format, args); 89 + } 90 + 91 + static int bump_memlock_rlimit(void) 92 + { 93 + struct rlimit rlim_new = { 94 + .rlim_cur = RLIM_INFINITY, 95 + .rlim_max = RLIM_INFINITY, 96 + }; 97 + 98 + return setrlimit(RLIMIT_MEMLOCK, &rlim_new); 99 + } 100 + 101 + void handle_event(void *ctx, int cpu, void *data, __u32 data_sz) 102 + { 103 + const struct event *e = data; 104 + struct tm *tm; 105 + char ts[32]; 106 + time_t t; 107 + 108 + time(&t); 109 + tm = localtime(&t); 110 + strftime(ts, sizeof(ts), "%H:%M:%S", tm); 111 + printf("%-8s %-16s %-6d %14llu\n", ts, e->task, e->pid, e->delta_us); 112 + } 113 + 114 + void handle_lost_events(void *ctx, int cpu, __u64 lost_cnt) 115 + { 116 + printf("Lost %llu events on CPU #%d!\n", lost_cnt, cpu); 117 + } 118 + 119 + int main(int argc, char **argv) 120 + { 121 + static const struct argp argp = { 122 + .options = opts, 123 + .parser = parse_arg, 124 + .doc = argp_program_doc, 125 + }; 126 + struct perf_buffer_opts pb_opts; 127 + struct perf_buffer *pb = NULL; 128 + struct runqslower_bpf *obj; 129 + int err; 130 + 131 + err = argp_parse(&argp, argc, argv, 0, NULL, NULL); 132 + if (err) 133 + return err; 134 + 135 + libbpf_set_print(libbpf_print_fn); 136 + 137 + err = bump_memlock_rlimit(); 138 + if (err) { 139 + fprintf(stderr, "failed to increase rlimit: %d", err); 140 + return 1; 141 + } 142 + 143 + obj = runqslower_bpf__open(); 144 + if (!obj) { 145 + fprintf(stderr, "failed to open and/or load BPF object\n"); 146 + return 1; 147 + } 148 + 149 + /* initialize global data (filtering options) */ 150 + obj->rodata->targ_pid = env.pid; 151 + obj->rodata->min_us = env.min_us; 152 + 153 + err = runqslower_bpf__load(obj); 154 + if (err) { 155 + fprintf(stderr, "failed to load BPF object: %d\n", err); 156 + goto cleanup; 157 + } 158 + 159 + err = runqslower_bpf__attach(obj); 160 + if (err) { 161 + fprintf(stderr, "failed to attach BPF programs\n"); 162 + goto cleanup; 163 + } 164 + 165 + printf("Tracing run queue latency higher than %llu us\n", env.min_us); 166 + printf("%-8s %-16s %-6s %14s\n", "TIME", "COMM", "PID", "LAT(us)"); 167 + 168 + pb_opts.sample_cb = handle_event; 169 + pb_opts.lost_cb = handle_lost_events; 170 + pb = perf_buffer__new(bpf_map__fd(obj->maps.events), 64, &pb_opts); 171 + err = libbpf_get_error(pb); 172 + if (err) { 173 + pb = NULL; 174 + fprintf(stderr, "failed to open perf buffer: %d\n", err); 175 + goto cleanup; 176 + } 177 + 178 + while ((err = perf_buffer__poll(pb, 100)) >= 0) 179 + ; 180 + printf("Error polling perf buffer: %d\n", err); 181 + 182 + cleanup: 183 + perf_buffer__free(pb); 184 + runqslower_bpf__destroy(obj); 185 + 186 + return err != 0; 187 + }

+13

tools/bpf/runqslower/runqslower.h

··· 1 + /* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ 2 + #ifndef __RUNQSLOWER_H 3 + #define __RUNQSLOWER_H 4 + 5 + #define TASK_COMM_LEN 16 6 + 7 + struct event { 8 + char task[TASK_COMM_LEN]; 9 + __u64 delta_us; 10 + pid_t pid; 11 + }; 12 + 13 + #endif /* __RUNQSLOWER_H */

+68 -4

tools/include/uapi/linux/bpf.h

··· 107 107 BPF_MAP_LOOKUP_AND_DELETE_ELEM, 108 108 BPF_MAP_FREEZE, 109 109 BPF_BTF_GET_NEXT_ID, 110 + BPF_MAP_LOOKUP_BATCH, 111 + BPF_MAP_LOOKUP_AND_DELETE_BATCH, 112 + BPF_MAP_UPDATE_BATCH, 113 + BPF_MAP_DELETE_BATCH, 110 114 }; 111 115 112 116 enum bpf_map_type { ··· 140 136 BPF_MAP_TYPE_STACK, 141 137 BPF_MAP_TYPE_SK_STORAGE, 142 138 BPF_MAP_TYPE_DEVMAP_HASH, 139 + BPF_MAP_TYPE_STRUCT_OPS, 143 140 }; 144 141 145 142 /* Note that tracing related programs such as ··· 179 174 BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, 180 175 BPF_PROG_TYPE_CGROUP_SOCKOPT, 181 176 BPF_PROG_TYPE_TRACING, 177 + BPF_PROG_TYPE_STRUCT_OPS, 178 + BPF_PROG_TYPE_EXT, 182 179 }; 183 180 184 181 enum bpf_attach_type { ··· 364 357 /* Enable memory-mapping BPF map */ 365 358 #define BPF_F_MMAPABLE (1U << 10) 366 359 367 - /* flags for BPF_PROG_QUERY */ 360 + /* Flags for BPF_PROG_QUERY. */ 361 + 362 + /* Query effective (directly attached + inherited from ancestor cgroups) 363 + * programs that will be executed for events within a cgroup. 364 + * attach_flags with this flag are returned only for directly attached programs. 365 + */ 368 366 #define BPF_F_QUERY_EFFECTIVE (1U << 0) 369 367 370 368 enum bpf_stack_build_id_status { ··· 409 397 __u32 btf_fd; /* fd pointing to a BTF type data */ 410 398 __u32 btf_key_type_id; /* BTF type_id of the key */ 411 399 __u32 btf_value_type_id; /* BTF type_id of the value */ 400 + __u32 btf_vmlinux_value_type_id;/* BTF type_id of a kernel- 401 + * struct stored as the 402 + * map value 403 + */ 412 404 }; 413 405 414 406 struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ ··· 424 408 }; 425 409 __u64 flags; 426 410 }; 411 + 412 + struct { /* struct used by BPF_MAP_*_BATCH commands */ 413 + __aligned_u64 in_batch; /* start batch, 414 + * NULL to start from beginning 415 + */ 416 + __aligned_u64 out_batch; /* output: next start batch */ 417 + __aligned_u64 keys; 418 + __aligned_u64 values; 419 + __u32 count; /* input/output: 420 + * input: # of key/value 421 + * elements 422 + * output: # of filled elements 423 + */ 424 + __u32 map_fd; 425 + __u64 elem_flags; 426 + __u64 flags; 427 + } batch; 427 428 428 429 struct { /* anonymous struct used by BPF_PROG_LOAD command */ 429 430 __u32 prog_type; /* one of enum bpf_prog_type */ ··· 2736 2703 * 2737 2704 * int bpf_send_signal(u32 sig) 2738 2705 * Description 2739 - * Send signal *sig* to the current task. 2706 + * Send signal *sig* to the process of the current task. 2707 + * The signal may be delivered to any of this process's threads. 2740 2708 * Return 2741 2709 * 0 on success or successfully queued. 2742 2710 * ··· 2865 2831 * Return 2866 2832 * On success, the strictly positive length of the string, including 2867 2833 * the trailing NUL character. On error, a negative value. 2834 + * 2835 + * int bpf_tcp_send_ack(void *tp, u32 rcv_nxt) 2836 + * Description 2837 + * Send out a tcp-ack. *tp* is the in-kernel struct tcp_sock. 2838 + * *rcv_nxt* is the ack_seq to be sent out. 2839 + * Return 2840 + * 0 on success, or a negative error in case of failure. 2841 + * 2842 + * int bpf_send_signal_thread(u32 sig) 2843 + * Description 2844 + * Send signal *sig* to the thread corresponding to the current task. 2845 + * Return 2846 + * 0 on success or successfully queued. 2847 + * 2848 + * **-EBUSY** if work queue under nmi is full. 2849 + * 2850 + * **-EINVAL** if *sig* is invalid. 2851 + * 2852 + * **-EPERM** if no permission to send the *sig*. 2853 + * 2854 + * **-EAGAIN** if bpf program can try again. 2855 + * 2856 + * u64 bpf_jiffies64(void) 2857 + * Description 2858 + * Obtain the 64bit jiffies 2859 + * Return 2860 + * The 64 bit jiffies 2868 2861 */ 2869 2862 #define __BPF_FUNC_MAPPER(FN) \ 2870 2863 FN(unspec), \ ··· 3009 2948 FN(probe_read_user), \ 3010 2949 FN(probe_read_kernel), \ 3011 2950 FN(probe_read_user_str), \ 3012 - FN(probe_read_kernel_str), 2951 + FN(probe_read_kernel_str), \ 2952 + FN(tcp_send_ack), \ 2953 + FN(send_signal_thread), \ 2954 + FN(jiffies64), 3013 2955 3014 2956 /* integer value in 'imm' field of BPF_CALL instruction selects which helper 3015 2957 * function eBPF program intends to call ··· 3413 3349 __u32 map_flags; 3414 3350 char name[BPF_OBJ_NAME_LEN]; 3415 3351 __u32 ifindex; 3416 - __u32 :32; 3352 + __u32 btf_vmlinux_value_type_id; 3417 3353 __u64 netns_dev; 3418 3354 __u64 netns_ino; 3419 3355 __u32 btf_id;

+6

tools/include/uapi/linux/btf.h

··· 146 146 BTF_VAR_GLOBAL_EXTERN = 2, 147 147 }; 148 148 149 + enum btf_func_linkage { 150 + BTF_FUNC_STATIC = 0, 151 + BTF_FUNC_GLOBAL = 1, 152 + BTF_FUNC_EXTERN = 2, 153 + }; 154 + 149 155 /* BTF_KIND_VAR is followed by a single "struct btf_var" to describe 150 156 * additional information related to the variable such as its linkage. 151 157 */

+1

tools/include/uapi/linux/if_link.h

··· 169 169 IFLA_MAX_MTU, 170 170 IFLA_PROP_LIST, 171 171 IFLA_ALT_IFNAME, /* Alternative ifname */ 172 + IFLA_PERM_ADDRESS, 172 173 __IFLA_MAX 173 174 }; 174 175

+6 -5

tools/lib/bpf/Makefile

··· 183 183 $(Q)$(MAKE) $(build)=libbpf OUTPUT=$(STATIC_OBJDIR) 184 184 185 185 $(BPF_HELPER_DEFS): $(srctree)/tools/include/uapi/linux/bpf.h 186 - $(Q)$(srctree)/scripts/bpf_helpers_doc.py --header \ 186 + $(QUIET_GEN)$(srctree)/scripts/bpf_helpers_doc.py --header \ 187 187 --file $(srctree)/tools/include/uapi/linux/bpf.h > $(BPF_HELPER_DEFS) 188 188 189 189 $(OUTPUT)libbpf.so: $(OUTPUT)libbpf.so.$(LIBBPF_VERSION) ··· 273 273 $(Q)$(MAKE) -C $(srctree)/tools/build/feature/ clean >/dev/null 274 274 275 275 clean: 276 - $(call QUIET_CLEAN, libbpf) $(RM) -rf $(CMD_TARGETS) \ 277 - *.o *~ *.a *.so *.so.$(LIBBPF_MAJOR_VERSION) .*.d .*.cmd \ 278 - *.pc LIBBPF-CFLAGS $(BPF_HELPER_DEFS) \ 279 - $(SHARED_OBJDIR) $(STATIC_OBJDIR) 276 + $(call QUIET_CLEAN, libbpf) $(RM) -rf $(CMD_TARGETS) \ 277 + *~ .*.d .*.cmd LIBBPF-CFLAGS $(BPF_HELPER_DEFS) \ 278 + $(SHARED_OBJDIR) $(STATIC_OBJDIR) \ 279 + $(addprefix $(OUTPUT), \ 280 + *.o *.a *.so *.so.$(LIBBPF_MAJOR_VERSION) *.pc) 280 281 $(call QUIET_CLEAN, core-gen) $(RM) $(OUTPUT)FEATURE-DUMP.libbpf 281 282 282 283

+70 -2

tools/lib/bpf/bpf.c

··· 32 32 #include "libbpf.h" 33 33 #include "libbpf_internal.h" 34 34 35 + /* make sure libbpf doesn't use kernel-only integer typedefs */ 36 + #pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 37 + 35 38 /* 36 39 * When building perf, unistd.h is overridden. __NR_bpf is 37 40 * required to be defined explicitly. ··· 98 95 attr.btf_key_type_id = create_attr->btf_key_type_id; 99 96 attr.btf_value_type_id = create_attr->btf_value_type_id; 100 97 attr.map_ifindex = create_attr->map_ifindex; 101 - attr.inner_map_fd = create_attr->inner_map_fd; 98 + if (attr.map_type == BPF_MAP_TYPE_STRUCT_OPS) 99 + attr.btf_vmlinux_value_type_id = 100 + create_attr->btf_vmlinux_value_type_id; 101 + else 102 + attr.inner_map_fd = create_attr->inner_map_fd; 102 103 103 104 return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr)); 104 105 } ··· 235 228 memset(&attr, 0, sizeof(attr)); 236 229 attr.prog_type = load_attr->prog_type; 237 230 attr.expected_attach_type = load_attr->expected_attach_type; 238 - if (attr.prog_type == BPF_PROG_TYPE_TRACING) { 231 + if (attr.prog_type == BPF_PROG_TYPE_STRUCT_OPS) { 232 + attr.attach_btf_id = load_attr->attach_btf_id; 233 + } else if (attr.prog_type == BPF_PROG_TYPE_TRACING || 234 + attr.prog_type == BPF_PROG_TYPE_EXT) { 239 235 attr.attach_btf_id = load_attr->attach_btf_id; 240 236 attr.attach_prog_fd = load_attr->attach_prog_fd; 241 237 } else { ··· 451 441 attr.map_fd = fd; 452 442 453 443 return sys_bpf(BPF_MAP_FREEZE, &attr, sizeof(attr)); 444 + } 445 + 446 + static int bpf_map_batch_common(int cmd, int fd, void *in_batch, 447 + void *out_batch, void *keys, void *values, 448 + __u32 *count, 449 + const struct bpf_map_batch_opts *opts) 450 + { 451 + union bpf_attr attr; 452 + int ret; 453 + 454 + if (!OPTS_VALID(opts, bpf_map_batch_opts)) 455 + return -EINVAL; 456 + 457 + memset(&attr, 0, sizeof(attr)); 458 + attr.batch.map_fd = fd; 459 + attr.batch.in_batch = ptr_to_u64(in_batch); 460 + attr.batch.out_batch = ptr_to_u64(out_batch); 461 + attr.batch.keys = ptr_to_u64(keys); 462 + attr.batch.values = ptr_to_u64(values); 463 + attr.batch.count = *count; 464 + attr.batch.elem_flags = OPTS_GET(opts, elem_flags, 0); 465 + attr.batch.flags = OPTS_GET(opts, flags, 0); 466 + 467 + ret = sys_bpf(cmd, &attr, sizeof(attr)); 468 + *count = attr.batch.count; 469 + 470 + return ret; 471 + } 472 + 473 + int bpf_map_delete_batch(int fd, void *keys, __u32 *count, 474 + const struct bpf_map_batch_opts *opts) 475 + { 476 + return bpf_map_batch_common(BPF_MAP_DELETE_BATCH, fd, NULL, 477 + NULL, keys, NULL, count, opts); 478 + } 479 + 480 + int bpf_map_lookup_batch(int fd, void *in_batch, void *out_batch, void *keys, 481 + void *values, __u32 *count, 482 + const struct bpf_map_batch_opts *opts) 483 + { 484 + return bpf_map_batch_common(BPF_MAP_LOOKUP_BATCH, fd, in_batch, 485 + out_batch, keys, values, count, opts); 486 + } 487 + 488 + int bpf_map_lookup_and_delete_batch(int fd, void *in_batch, void *out_batch, 489 + void *keys, void *values, __u32 *count, 490 + const struct bpf_map_batch_opts *opts) 491 + { 492 + return bpf_map_batch_common(BPF_MAP_LOOKUP_AND_DELETE_BATCH, 493 + fd, in_batch, out_batch, keys, values, 494 + count, opts); 495 + } 496 + 497 + int bpf_map_update_batch(int fd, void *keys, void *values, __u32 *count, 498 + const struct bpf_map_batch_opts *opts) 499 + { 500 + return bpf_map_batch_common(BPF_MAP_UPDATE_BATCH, fd, NULL, NULL, 501 + keys, values, count, opts); 454 502 } 455 503 456 504 int bpf_obj_pin(int fd, const char *pathname)

+26 -1

tools/lib/bpf/bpf.h

··· 46 46 __u32 btf_key_type_id; 47 47 __u32 btf_value_type_id; 48 48 __u32 map_ifindex; 49 - __u32 inner_map_fd; 49 + union { 50 + __u32 inner_map_fd; 51 + __u32 btf_vmlinux_value_type_id; 52 + }; 50 53 }; 51 54 52 55 LIBBPF_API int ··· 127 124 LIBBPF_API int bpf_map_delete_elem(int fd, const void *key); 128 125 LIBBPF_API int bpf_map_get_next_key(int fd, const void *key, void *next_key); 129 126 LIBBPF_API int bpf_map_freeze(int fd); 127 + 128 + struct bpf_map_batch_opts { 129 + size_t sz; /* size of this struct for forward/backward compatibility */ 130 + __u64 elem_flags; 131 + __u64 flags; 132 + }; 133 + #define bpf_map_batch_opts__last_field flags 134 + 135 + LIBBPF_API int bpf_map_delete_batch(int fd, void *keys, 136 + __u32 *count, 137 + const struct bpf_map_batch_opts *opts); 138 + LIBBPF_API int bpf_map_lookup_batch(int fd, void *in_batch, void *out_batch, 139 + void *keys, void *values, __u32 *count, 140 + const struct bpf_map_batch_opts *opts); 141 + LIBBPF_API int bpf_map_lookup_and_delete_batch(int fd, void *in_batch, 142 + void *out_batch, void *keys, 143 + void *values, __u32 *count, 144 + const struct bpf_map_batch_opts *opts); 145 + LIBBPF_API int bpf_map_update_batch(int fd, void *keys, void *values, 146 + __u32 *count, 147 + const struct bpf_map_batch_opts *opts); 148 + 130 149 LIBBPF_API int bpf_obj_pin(int fd, const char *pathname); 131 150 LIBBPF_API int bpf_obj_get(const char *pathname); 132 151

+3

tools/lib/bpf/bpf_prog_linfo.c

··· 8 8 #include "libbpf.h" 9 9 #include "libbpf_internal.h" 10 10 11 + /* make sure libbpf doesn't use kernel-only integer typedefs */ 12 + #pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 13 + 11 14 struct bpf_prog_linfo { 12 15 void *raw_linfo; 13 16 void *raw_jited_linfo;

+99 -6

tools/lib/bpf/btf.c

··· 8 8 #include <fcntl.h> 9 9 #include <unistd.h> 10 10 #include <errno.h> 11 + #include <sys/utsname.h> 12 + #include <sys/param.h> 13 + #include <sys/stat.h> 14 + #include <linux/kernel.h> 11 15 #include <linux/err.h> 12 16 #include <linux/btf.h> 13 17 #include <gelf.h> ··· 21 17 #include "libbpf_internal.h" 22 18 #include "hashmap.h" 23 19 24 - #define BTF_MAX_NR_TYPES 0x7fffffff 25 - #define BTF_MAX_STR_OFFSET 0x7fffffff 20 + /* make sure libbpf doesn't use kernel-only integer typedefs */ 21 + #pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 22 + 23 + #define BTF_MAX_NR_TYPES 0x7fffffffU 24 + #define BTF_MAX_STR_OFFSET 0x7fffffffU 26 25 27 26 static struct btf_type btf_void; 28 27 ··· 57 50 if (btf->types_size == BTF_MAX_NR_TYPES) 58 51 return -E2BIG; 59 52 60 - expand_by = max(btf->types_size >> 2, 16); 53 + expand_by = max(btf->types_size >> 2, 16U); 61 54 new_size = min(BTF_MAX_NR_TYPES, btf->types_size + expand_by); 62 55 63 56 new_types = realloc(btf->types, sizeof(*new_types) * new_size); ··· 293 286 switch (kind) { 294 287 case BTF_KIND_INT: 295 288 case BTF_KIND_ENUM: 296 - return min(sizeof(void *), t->size); 289 + return min(sizeof(void *), (size_t)t->size); 297 290 case BTF_KIND_PTR: 298 291 return sizeof(void *); 299 292 case BTF_KIND_TYPEDEF: ··· 1405 1398 if (d->hypot_cnt == d->hypot_cap) { 1406 1399 __u32 *new_list; 1407 1400 1408 - d->hypot_cap += max(16, d->hypot_cap / 2); 1401 + d->hypot_cap += max((size_t)16, d->hypot_cap / 2); 1409 1402 new_list = realloc(d->hypot_list, sizeof(__u32) * d->hypot_cap); 1410 1403 if (!new_list) 1411 1404 return -ENOMEM; ··· 1701 1694 if (strs.cnt + 1 > strs.cap) { 1702 1695 struct btf_str_ptr *new_ptrs; 1703 1696 1704 - strs.cap += max(strs.cnt / 2, 16); 1697 + strs.cap += max(strs.cnt / 2, 16U); 1705 1698 new_ptrs = realloc(strs.ptrs, 1706 1699 sizeof(strs.ptrs[0]) * strs.cap); 1707 1700 if (!new_ptrs) { ··· 2934 2927 return r; 2935 2928 } 2936 2929 return 0; 2930 + } 2931 + 2932 + static struct btf *btf_load_raw(const char *path) 2933 + { 2934 + struct btf *btf; 2935 + size_t read_cnt; 2936 + struct stat st; 2937 + void *data; 2938 + FILE *f; 2939 + 2940 + if (stat(path, &st)) 2941 + return ERR_PTR(-errno); 2942 + 2943 + data = malloc(st.st_size); 2944 + if (!data) 2945 + return ERR_PTR(-ENOMEM); 2946 + 2947 + f = fopen(path, "rb"); 2948 + if (!f) { 2949 + btf = ERR_PTR(-errno); 2950 + goto cleanup; 2951 + } 2952 + 2953 + read_cnt = fread(data, 1, st.st_size, f); 2954 + fclose(f); 2955 + if (read_cnt < st.st_size) { 2956 + btf = ERR_PTR(-EBADF); 2957 + goto cleanup; 2958 + } 2959 + 2960 + btf = btf__new(data, read_cnt); 2961 + 2962 + cleanup: 2963 + free(data); 2964 + return btf; 2965 + } 2966 + 2967 + /* 2968 + * Probe few well-known locations for vmlinux kernel image and try to load BTF 2969 + * data out of it to use for target BTF. 2970 + */ 2971 + struct btf *libbpf_find_kernel_btf(void) 2972 + { 2973 + struct { 2974 + const char *path_fmt; 2975 + bool raw_btf; 2976 + } locations[] = { 2977 + /* try canonical vmlinux BTF through sysfs first */ 2978 + { "/sys/kernel/btf/vmlinux", true /* raw BTF */ }, 2979 + /* fall back to trying to find vmlinux ELF on disk otherwise */ 2980 + { "/boot/vmlinux-%1$s" }, 2981 + { "/lib/modules/%1$s/vmlinux-%1$s" }, 2982 + { "/lib/modules/%1$s/build/vmlinux" }, 2983 + { "/usr/lib/modules/%1$s/kernel/vmlinux" }, 2984 + { "/usr/lib/debug/boot/vmlinux-%1$s" }, 2985 + { "/usr/lib/debug/boot/vmlinux-%1$s.debug" }, 2986 + { "/usr/lib/debug/lib/modules/%1$s/vmlinux" }, 2987 + }; 2988 + char path[PATH_MAX + 1]; 2989 + struct utsname buf; 2990 + struct btf *btf; 2991 + int i; 2992 + 2993 + uname(&buf); 2994 + 2995 + for (i = 0; i < ARRAY_SIZE(locations); i++) { 2996 + snprintf(path, PATH_MAX, locations[i].path_fmt, buf.release); 2997 + 2998 + if (access(path, R_OK)) 2999 + continue; 3000 + 3001 + if (locations[i].raw_btf) 3002 + btf = btf_load_raw(path); 3003 + else 3004 + btf = btf__parse_elf(path, NULL); 3005 + 3006 + pr_debug("loading kernel BTF '%s': %ld\n", 3007 + path, IS_ERR(btf) ? PTR_ERR(btf) : 0); 3008 + if (IS_ERR(btf)) 3009 + continue; 3010 + 3011 + return btf; 3012 + } 3013 + 3014 + pr_warn("failed to find valid kernel BTF\n"); 3015 + return ERR_PTR(-ESRCH); 2937 3016 }

+2

tools/lib/bpf/btf.h

··· 102 102 LIBBPF_API __u32 btf_ext__func_info_rec_size(const struct btf_ext *btf_ext); 103 103 LIBBPF_API __u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext); 104 104 105 + LIBBPF_API struct btf *libbpf_find_kernel_btf(void); 106 + 105 107 struct btf_dedup_opts { 106 108 unsigned int dedup_table_size; 107 109 bool dont_resolve_fwds;

+4

tools/lib/bpf/btf_dump.c

··· 18 18 #include "libbpf.h" 19 19 #include "libbpf_internal.h" 20 20 21 + /* make sure libbpf doesn't use kernel-only integer typedefs */ 22 + #pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 23 + 21 24 static const char PREFIXES[] = "\t\t\t\t\t\t\t\t\t\t\t\t\t"; 22 25 static const size_t PREFIX_CNT = sizeof(PREFIXES) - 1; 23 26 ··· 142 139 if (IS_ERR(d->type_names)) { 143 140 err = PTR_ERR(d->type_names); 144 141 d->type_names = NULL; 142 + goto err; 145 143 } 146 144 d->ident_names = hashmap__new(str_hash_fn, str_equal_fn, NULL); 147 145 if (IS_ERR(d->ident_names)) {

+3

tools/lib/bpf/hashmap.c

··· 12 12 #include <linux/err.h> 13 13 #include "hashmap.h" 14 14 15 + /* make sure libbpf doesn't use kernel-only integer typedefs */ 16 + #pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 17 + 15 18 /* start with 4 buckets */ 16 19 #define HASHMAP_MIN_CAP_BITS 2 17 20

+791 -161

tools/lib/bpf/libbpf.c

··· 55 55 #include "libbpf_internal.h" 56 56 #include "hashmap.h" 57 57 58 + /* make sure libbpf doesn't use kernel-only integer typedefs */ 59 + #pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 60 + 58 61 #ifndef EM_BPF 59 62 #define EM_BPF 247 60 63 #endif ··· 72 69 #pragma GCC diagnostic ignored "-Wformat-nonliteral" 73 70 74 71 #define __printf(a, b) __attribute__((format(printf, a, b))) 72 + 73 + static struct bpf_map *bpf_object__add_map(struct bpf_object *obj); 74 + static struct bpf_program *bpf_object__find_prog_by_idx(struct bpf_object *obj, 75 + int idx); 76 + static const struct btf_type * 77 + skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id); 75 78 76 79 static int __base_pr(enum libbpf_print_level level, const char *format, 77 80 va_list args) ··· 175 166 __u32 btf_datasec:1; 176 167 /* BPF_F_MMAPABLE is supported for arrays */ 177 168 __u32 array_mmap:1; 169 + /* BTF_FUNC_GLOBAL is supported */ 170 + __u32 btf_func_global:1; 178 171 }; 179 172 180 173 enum reloc_type { ··· 240 229 __u32 prog_flags; 241 230 }; 242 231 232 + struct bpf_struct_ops { 233 + const char *tname; 234 + const struct btf_type *type; 235 + struct bpf_program **progs; 236 + __u32 *kern_func_off; 237 + /* e.g. struct tcp_congestion_ops in bpf_prog's btf format */ 238 + void *data; 239 + /* e.g. struct bpf_struct_ops_tcp_congestion_ops in 240 + * btf_vmlinux's format. 241 + * struct bpf_struct_ops_tcp_congestion_ops { 242 + * [... some other kernel fields ...] 243 + * struct tcp_congestion_ops data; 244 + * } 245 + * kern_vdata-size == sizeof(struct bpf_struct_ops_tcp_congestion_ops) 246 + * bpf_map__init_kern_struct_ops() will populate the "kern_vdata" 247 + * from "data". 248 + */ 249 + void *kern_vdata; 250 + __u32 type_id; 251 + }; 252 + 243 253 #define DATA_SEC ".data" 244 254 #define BSS_SEC ".bss" 245 255 #define RODATA_SEC ".rodata" 246 256 #define KCONFIG_SEC ".kconfig" 257 + #define STRUCT_OPS_SEC ".struct_ops" 247 258 248 259 enum libbpf_map_type { 249 260 LIBBPF_MAP_UNSPEC, ··· 292 259 struct bpf_map_def def; 293 260 __u32 btf_key_type_id; 294 261 __u32 btf_value_type_id; 262 + __u32 btf_vmlinux_value_type_id; 295 263 void *priv; 296 264 bpf_map_clear_priv_t clear_priv; 297 265 enum libbpf_map_type libbpf_type; 298 266 void *mmaped; 267 + struct bpf_struct_ops *st_ops; 299 268 char *pin_path; 300 269 bool pinned; 301 270 bool reused; ··· 361 326 Elf_Data *data; 362 327 Elf_Data *rodata; 363 328 Elf_Data *bss; 329 + Elf_Data *st_ops_data; 364 330 size_t strtabidx; 365 331 struct { 366 332 GElf_Shdr shdr; ··· 375 339 int data_shndx; 376 340 int rodata_shndx; 377 341 int bss_shndx; 342 + int st_ops_shndx; 378 343 } efile; 379 344 /* 380 345 * All loaded bpf_object is linked in a list, which is ··· 385 348 struct list_head list; 386 349 387 350 struct btf *btf; 351 + /* Parse and load BTF vmlinux if any of the programs in the object need 352 + * it at load time. 353 + */ 354 + struct btf *btf_vmlinux; 388 355 struct btf_ext *btf_ext; 389 356 390 357 void *priv; ··· 607 566 return KERNEL_VERSION(major, minor, patch); 608 567 } 609 568 569 + static const struct btf_member * 570 + find_member_by_offset(const struct btf_type *t, __u32 bit_offset) 571 + { 572 + struct btf_member *m; 573 + int i; 574 + 575 + for (i = 0, m = btf_members(t); i < btf_vlen(t); i++, m++) { 576 + if (btf_member_bit_offset(t, i) == bit_offset) 577 + return m; 578 + } 579 + 580 + return NULL; 581 + } 582 + 583 + static const struct btf_member * 584 + find_member_by_name(const struct btf *btf, const struct btf_type *t, 585 + const char *name) 586 + { 587 + struct btf_member *m; 588 + int i; 589 + 590 + for (i = 0, m = btf_members(t); i < btf_vlen(t); i++, m++) { 591 + if (!strcmp(btf__name_by_offset(btf, m->name_off), name)) 592 + return m; 593 + } 594 + 595 + return NULL; 596 + } 597 + 598 + #define STRUCT_OPS_VALUE_PREFIX "bpf_struct_ops_" 599 + static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix, 600 + const char *name, __u32 kind); 601 + 602 + static int 603 + find_struct_ops_kern_types(const struct btf *btf, const char *tname, 604 + const struct btf_type **type, __u32 *type_id, 605 + const struct btf_type **vtype, __u32 *vtype_id, 606 + const struct btf_member **data_member) 607 + { 608 + const struct btf_type *kern_type, *kern_vtype; 609 + const struct btf_member *kern_data_member; 610 + __s32 kern_vtype_id, kern_type_id; 611 + __u32 i; 612 + 613 + kern_type_id = btf__find_by_name_kind(btf, tname, BTF_KIND_STRUCT); 614 + if (kern_type_id < 0) { 615 + pr_warn("struct_ops init_kern: struct %s is not found in kernel BTF\n", 616 + tname); 617 + return kern_type_id; 618 + } 619 + kern_type = btf__type_by_id(btf, kern_type_id); 620 + 621 + /* Find the corresponding "map_value" type that will be used 622 + * in map_update(BPF_MAP_TYPE_STRUCT_OPS). For example, 623 + * find "struct bpf_struct_ops_tcp_congestion_ops" from the 624 + * btf_vmlinux. 625 + */ 626 + kern_vtype_id = find_btf_by_prefix_kind(btf, STRUCT_OPS_VALUE_PREFIX, 627 + tname, BTF_KIND_STRUCT); 628 + if (kern_vtype_id < 0) { 629 + pr_warn("struct_ops init_kern: struct %s%s is not found in kernel BTF\n", 630 + STRUCT_OPS_VALUE_PREFIX, tname); 631 + return kern_vtype_id; 632 + } 633 + kern_vtype = btf__type_by_id(btf, kern_vtype_id); 634 + 635 + /* Find "struct tcp_congestion_ops" from 636 + * struct bpf_struct_ops_tcp_congestion_ops { 637 + * [ ... ] 638 + * struct tcp_congestion_ops data; 639 + * } 640 + */ 641 + kern_data_member = btf_members(kern_vtype); 642 + for (i = 0; i < btf_vlen(kern_vtype); i++, kern_data_member++) { 643 + if (kern_data_member->type == kern_type_id) 644 + break; 645 + } 646 + if (i == btf_vlen(kern_vtype)) { 647 + pr_warn("struct_ops init_kern: struct %s data is not found in struct %s%s\n", 648 + tname, STRUCT_OPS_VALUE_PREFIX, tname); 649 + return -EINVAL; 650 + } 651 + 652 + *type = kern_type; 653 + *type_id = kern_type_id; 654 + *vtype = kern_vtype; 655 + *vtype_id = kern_vtype_id; 656 + *data_member = kern_data_member; 657 + 658 + return 0; 659 + } 660 + 661 + static bool bpf_map__is_struct_ops(const struct bpf_map *map) 662 + { 663 + return map->def.type == BPF_MAP_TYPE_STRUCT_OPS; 664 + } 665 + 666 + /* Init the map's fields that depend on kern_btf */ 667 + static int bpf_map__init_kern_struct_ops(struct bpf_map *map, 668 + const struct btf *btf, 669 + const struct btf *kern_btf) 670 + { 671 + const struct btf_member *member, *kern_member, *kern_data_member; 672 + const struct btf_type *type, *kern_type, *kern_vtype; 673 + __u32 i, kern_type_id, kern_vtype_id, kern_data_off; 674 + struct bpf_struct_ops *st_ops; 675 + void *data, *kern_data; 676 + const char *tname; 677 + int err; 678 + 679 + st_ops = map->st_ops; 680 + type = st_ops->type; 681 + tname = st_ops->tname; 682 + err = find_struct_ops_kern_types(kern_btf, tname, 683 + &kern_type, &kern_type_id, 684 + &kern_vtype, &kern_vtype_id, 685 + &kern_data_member); 686 + if (err) 687 + return err; 688 + 689 + pr_debug("struct_ops init_kern %s: type_id:%u kern_type_id:%u kern_vtype_id:%u\n", 690 + map->name, st_ops->type_id, kern_type_id, kern_vtype_id); 691 + 692 + map->def.value_size = kern_vtype->size; 693 + map->btf_vmlinux_value_type_id = kern_vtype_id; 694 + 695 + st_ops->kern_vdata = calloc(1, kern_vtype->size); 696 + if (!st_ops->kern_vdata) 697 + return -ENOMEM; 698 + 699 + data = st_ops->data; 700 + kern_data_off = kern_data_member->offset / 8; 701 + kern_data = st_ops->kern_vdata + kern_data_off; 702 + 703 + member = btf_members(type); 704 + for (i = 0; i < btf_vlen(type); i++, member++) { 705 + const struct btf_type *mtype, *kern_mtype; 706 + __u32 mtype_id, kern_mtype_id; 707 + void *mdata, *kern_mdata; 708 + __s64 msize, kern_msize; 709 + __u32 moff, kern_moff; 710 + __u32 kern_member_idx; 711 + const char *mname; 712 + 713 + mname = btf__name_by_offset(btf, member->name_off); 714 + kern_member = find_member_by_name(kern_btf, kern_type, mname); 715 + if (!kern_member) { 716 + pr_warn("struct_ops init_kern %s: Cannot find member %s in kernel BTF\n", 717 + map->name, mname); 718 + return -ENOTSUP; 719 + } 720 + 721 + kern_member_idx = kern_member - btf_members(kern_type); 722 + if (btf_member_bitfield_size(type, i) || 723 + btf_member_bitfield_size(kern_type, kern_member_idx)) { 724 + pr_warn("struct_ops init_kern %s: bitfield %s is not supported\n", 725 + map->name, mname); 726 + return -ENOTSUP; 727 + } 728 + 729 + moff = member->offset / 8; 730 + kern_moff = kern_member->offset / 8; 731 + 732 + mdata = data + moff; 733 + kern_mdata = kern_data + kern_moff; 734 + 735 + mtype = skip_mods_and_typedefs(btf, member->type, &mtype_id); 736 + kern_mtype = skip_mods_and_typedefs(kern_btf, kern_member->type, 737 + &kern_mtype_id); 738 + if (BTF_INFO_KIND(mtype->info) != 739 + BTF_INFO_KIND(kern_mtype->info)) { 740 + pr_warn("struct_ops init_kern %s: Unmatched member type %s %u != %u(kernel)\n", 741 + map->name, mname, BTF_INFO_KIND(mtype->info), 742 + BTF_INFO_KIND(kern_mtype->info)); 743 + return -ENOTSUP; 744 + } 745 + 746 + if (btf_is_ptr(mtype)) { 747 + struct bpf_program *prog; 748 + 749 + mtype = skip_mods_and_typedefs(btf, mtype->type, &mtype_id); 750 + kern_mtype = skip_mods_and_typedefs(kern_btf, 751 + kern_mtype->type, 752 + &kern_mtype_id); 753 + if (!btf_is_func_proto(mtype) || 754 + !btf_is_func_proto(kern_mtype)) { 755 + pr_warn("struct_ops init_kern %s: non func ptr %s is not supported\n", 756 + map->name, mname); 757 + return -ENOTSUP; 758 + } 759 + 760 + prog = st_ops->progs[i]; 761 + if (!prog) { 762 + pr_debug("struct_ops init_kern %s: func ptr %s is not set\n", 763 + map->name, mname); 764 + continue; 765 + } 766 + 767 + prog->attach_btf_id = kern_type_id; 768 + prog->expected_attach_type = kern_member_idx; 769 + 770 + st_ops->kern_func_off[i] = kern_data_off + kern_moff; 771 + 772 + pr_debug("struct_ops init_kern %s: func ptr %s is set to prog %s from data(+%u) to kern_data(+%u)\n", 773 + map->name, mname, prog->name, moff, 774 + kern_moff); 775 + 776 + continue; 777 + } 778 + 779 + msize = btf__resolve_size(btf, mtype_id); 780 + kern_msize = btf__resolve_size(kern_btf, kern_mtype_id); 781 + if (msize < 0 || kern_msize < 0 || msize != kern_msize) { 782 + pr_warn("struct_ops init_kern %s: Error in size of member %s: %zd != %zd(kernel)\n", 783 + map->name, mname, (ssize_t)msize, 784 + (ssize_t)kern_msize); 785 + return -ENOTSUP; 786 + } 787 + 788 + pr_debug("struct_ops init_kern %s: copy %s %u bytes from data(+%u) to kern_data(+%u)\n", 789 + map->name, mname, (unsigned int)msize, 790 + moff, kern_moff); 791 + memcpy(kern_mdata, mdata, msize); 792 + } 793 + 794 + return 0; 795 + } 796 + 797 + static int bpf_object__init_kern_struct_ops_maps(struct bpf_object *obj) 798 + { 799 + struct bpf_map *map; 800 + size_t i; 801 + int err; 802 + 803 + for (i = 0; i < obj->nr_maps; i++) { 804 + map = &obj->maps[i]; 805 + 806 + if (!bpf_map__is_struct_ops(map)) 807 + continue; 808 + 809 + err = bpf_map__init_kern_struct_ops(map, obj->btf, 810 + obj->btf_vmlinux); 811 + if (err) 812 + return err; 813 + } 814 + 815 + return 0; 816 + } 817 + 818 + static int bpf_object__init_struct_ops_maps(struct bpf_object *obj) 819 + { 820 + const struct btf_type *type, *datasec; 821 + const struct btf_var_secinfo *vsi; 822 + struct bpf_struct_ops *st_ops; 823 + const char *tname, *var_name; 824 + __s32 type_id, datasec_id; 825 + const struct btf *btf; 826 + struct bpf_map *map; 827 + __u32 i; 828 + 829 + if (obj->efile.st_ops_shndx == -1) 830 + return 0; 831 + 832 + btf = obj->btf; 833 + datasec_id = btf__find_by_name_kind(btf, STRUCT_OPS_SEC, 834 + BTF_KIND_DATASEC); 835 + if (datasec_id < 0) { 836 + pr_warn("struct_ops init: DATASEC %s not found\n", 837 + STRUCT_OPS_SEC); 838 + return -EINVAL; 839 + } 840 + 841 + datasec = btf__type_by_id(btf, datasec_id); 842 + vsi = btf_var_secinfos(datasec); 843 + for (i = 0; i < btf_vlen(datasec); i++, vsi++) { 844 + type = btf__type_by_id(obj->btf, vsi->type); 845 + var_name = btf__name_by_offset(obj->btf, type->name_off); 846 + 847 + type_id = btf__resolve_type(obj->btf, vsi->type); 848 + if (type_id < 0) { 849 + pr_warn("struct_ops init: Cannot resolve var type_id %u in DATASEC %s\n", 850 + vsi->type, STRUCT_OPS_SEC); 851 + return -EINVAL; 852 + } 853 + 854 + type = btf__type_by_id(obj->btf, type_id); 855 + tname = btf__name_by_offset(obj->btf, type->name_off); 856 + if (!tname[0]) { 857 + pr_warn("struct_ops init: anonymous type is not supported\n"); 858 + return -ENOTSUP; 859 + } 860 + if (!btf_is_struct(type)) { 861 + pr_warn("struct_ops init: %s is not a struct\n", tname); 862 + return -EINVAL; 863 + } 864 + 865 + map = bpf_object__add_map(obj); 866 + if (IS_ERR(map)) 867 + return PTR_ERR(map); 868 + 869 + map->sec_idx = obj->efile.st_ops_shndx; 870 + map->sec_offset = vsi->offset; 871 + map->name = strdup(var_name); 872 + if (!map->name) 873 + return -ENOMEM; 874 + 875 + map->def.type = BPF_MAP_TYPE_STRUCT_OPS; 876 + map->def.key_size = sizeof(int); 877 + map->def.value_size = type->size; 878 + map->def.max_entries = 1; 879 + 880 + map->st_ops = calloc(1, sizeof(*map->st_ops)); 881 + if (!map->st_ops) 882 + return -ENOMEM; 883 + st_ops = map->st_ops; 884 + st_ops->data = malloc(type->size); 885 + st_ops->progs = calloc(btf_vlen(type), sizeof(*st_ops->progs)); 886 + st_ops->kern_func_off = malloc(btf_vlen(type) * 887 + sizeof(*st_ops->kern_func_off)); 888 + if (!st_ops->data || !st_ops->progs || !st_ops->kern_func_off) 889 + return -ENOMEM; 890 + 891 + if (vsi->offset + type->size > obj->efile.st_ops_data->d_size) { 892 + pr_warn("struct_ops init: var %s is beyond the end of DATASEC %s\n", 893 + var_name, STRUCT_OPS_SEC); 894 + return -EINVAL; 895 + } 896 + 897 + memcpy(st_ops->data, 898 + obj->efile.st_ops_data->d_buf + vsi->offset, 899 + type->size); 900 + st_ops->tname = tname; 901 + st_ops->type = type; 902 + st_ops->type_id = type_id; 903 + 904 + pr_debug("struct_ops init: struct %s(type_id=%u) %s found at offset %u\n", 905 + tname, type_id, var_name, vsi->offset); 906 + } 907 + 908 + return 0; 909 + } 910 + 610 911 static struct bpf_object *bpf_object__new(const char *path, 611 912 const void *obj_buf, 612 913 size_t obj_buf_sz, ··· 990 607 obj->efile.data_shndx = -1; 991 608 obj->efile.rodata_shndx = -1; 992 609 obj->efile.bss_shndx = -1; 610 + obj->efile.st_ops_shndx = -1; 993 611 obj->kconfig_map_idx = -1; 994 612 995 613 obj->kern_version = get_kernel_version(); ··· 1014 630 obj->efile.data = NULL; 1015 631 obj->efile.rodata = NULL; 1016 632 obj->efile.bss = NULL; 633 + obj->efile.st_ops_data = NULL; 1017 634 1018 635 zfree(&obj->efile.reloc_sects); 1019 636 obj->efile.nr_reloc_sects = 0; ··· 1120 735 return 0; 1121 736 } 1122 737 1123 - static int compare_bpf_map(const void *_a, const void *_b) 1124 - { 1125 - const struct bpf_map *a = _a; 1126 - const struct bpf_map *b = _b; 1127 - 1128 - if (a->sec_idx != b->sec_idx) 1129 - return a->sec_idx - b->sec_idx; 1130 - return a->sec_offset - b->sec_offset; 1131 - } 1132 - 1133 738 static bool bpf_map_type__is_map_in_map(enum bpf_map_type type) 1134 739 { 1135 740 if (type == BPF_MAP_TYPE_ARRAY_OF_MAPS || ··· 1190 815 } else if (!strcmp(name, RODATA_SEC)) { 1191 816 if (obj->efile.rodata) 1192 817 *size = obj->efile.rodata->d_size; 818 + } else if (!strcmp(name, STRUCT_OPS_SEC)) { 819 + if (obj->efile.st_ops_data) 820 + *size = obj->efile.st_ops_data->d_size; 1193 821 } else { 1194 822 ret = bpf_object_search_section_size(obj, name, &d_size); 1195 823 if (!ret) ··· 1276 898 long page_sz = sysconf(_SC_PAGE_SIZE); 1277 899 size_t map_sz; 1278 900 1279 - map_sz = roundup(map->def.value_size, 8) * map->def.max_entries; 901 + map_sz = (size_t)roundup(map->def.value_size, 8) * map->def.max_entries; 1280 902 map_sz = roundup(map_sz, page_sz); 1281 903 return map_sz; 1282 904 } ··· 1818 1440 return t; 1819 1441 } 1820 1442 1443 + static const struct btf_type * 1444 + resolve_func_ptr(const struct btf *btf, __u32 id, __u32 *res_id) 1445 + { 1446 + const struct btf_type *t; 1447 + 1448 + t = skip_mods_and_typedefs(btf, id, NULL); 1449 + if (!btf_is_ptr(t)) 1450 + return NULL; 1451 + 1452 + t = skip_mods_and_typedefs(btf, t->type, res_id); 1453 + 1454 + return btf_is_func_proto(t) ? t : NULL; 1455 + } 1456 + 1821 1457 /* 1822 1458 * Fetch integer attribute of BTF map definition. Such attributes are 1823 1459 * represented using a pointer to an array, in which dimensionality of array ··· 2179 1787 err = err ?: bpf_object__init_user_btf_maps(obj, strict, pin_root_path); 2180 1788 err = err ?: bpf_object__init_global_data_maps(obj); 2181 1789 err = err ?: bpf_object__init_kconfig_map(obj); 1790 + err = err ?: bpf_object__init_struct_ops_maps(obj); 2182 1791 if (err) 2183 1792 return err; 2184 1793 2185 - if (obj->nr_maps) { 2186 - qsort(obj->maps, obj->nr_maps, sizeof(obj->maps[0]), 2187 - compare_bpf_map); 2188 - } 2189 1794 return 0; 2190 1795 } 2191 1796 ··· 2206 1817 2207 1818 static void bpf_object__sanitize_btf(struct bpf_object *obj) 2208 1819 { 1820 + bool has_func_global = obj->caps.btf_func_global; 2209 1821 bool has_datasec = obj->caps.btf_datasec; 2210 1822 bool has_func = obj->caps.btf_func; 2211 1823 struct btf *btf = obj->btf; 2212 1824 struct btf_type *t; 2213 1825 int i, j, vlen; 2214 1826 2215 - if (!obj->btf || (has_func && has_datasec)) 1827 + if (!obj->btf || (has_func && has_datasec && has_func_global)) 2216 1828 return; 2217 1829 2218 1830 for (i = 1; i <= btf__get_nr_types(btf); i++) { ··· 2261 1871 } else if (!has_func && btf_is_func(t)) { 2262 1872 /* replace FUNC with TYPEDEF */ 2263 1873 t->info = BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0); 1874 + } else if (!has_func_global && btf_is_func(t)) { 1875 + /* replace BTF_FUNC_GLOBAL with BTF_FUNC_STATIC */ 1876 + t->info = BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0); 2264 1877 } 2265 1878 } 2266 1879 } ··· 2282 1889 static bool bpf_object__is_btf_mandatory(const struct bpf_object *obj) 2283 1890 { 2284 1891 return obj->efile.btf_maps_shndx >= 0 || 2285 - obj->nr_extern > 0; 1892 + obj->efile.st_ops_shndx >= 0 || 1893 + obj->nr_extern > 0; 2286 1894 } 2287 1895 2288 1896 static int bpf_object__init_btf(struct bpf_object *obj, 2289 1897 Elf_Data *btf_data, 2290 1898 Elf_Data *btf_ext_data) 2291 1899 { 2292 - bool btf_required = bpf_object__is_btf_mandatory(obj); 2293 - int err = 0; 1900 + int err = -ENOENT; 2294 1901 2295 1902 if (btf_data) { 2296 1903 obj->btf = btf__new(btf_data->d_buf, btf_data->d_size); 2297 1904 if (IS_ERR(obj->btf)) { 1905 + err = PTR_ERR(obj->btf); 1906 + obj->btf = NULL; 2298 1907 pr_warn("Error loading ELF section %s: %d.\n", 2299 1908 BTF_ELF_SEC, err); 2300 1909 goto out; 2301 1910 } 1911 + err = 0; 2302 1912 } 2303 1913 if (btf_ext_data) { 2304 1914 if (!obj->btf) { ··· 2319 1923 } 2320 1924 } 2321 1925 out: 2322 - if (err || IS_ERR(obj->btf)) { 2323 - if (btf_required) 2324 - err = err ? : PTR_ERR(obj->btf); 2325 - else 2326 - err = 0; 2327 - if (!IS_ERR_OR_NULL(obj->btf)) 2328 - btf__free(obj->btf); 2329 - obj->btf = NULL; 2330 - } 2331 - if (btf_required && !obj->btf) { 1926 + if (err && bpf_object__is_btf_mandatory(obj)) { 2332 1927 pr_warn("BTF is required, but is missing or corrupted.\n"); 2333 - return err == 0 ? -ENOENT : err; 1928 + return err; 2334 1929 } 2335 1930 return 0; 2336 1931 } ··· 2347 1960 pr_warn("BTF is required, but is missing or corrupted.\n"); 2348 1961 return -ENOENT; 2349 1962 } 1963 + return 0; 1964 + } 1965 + 1966 + static inline bool libbpf_prog_needs_vmlinux_btf(struct bpf_program *prog) 1967 + { 1968 + if (prog->type == BPF_PROG_TYPE_STRUCT_OPS) 1969 + return true; 1970 + 1971 + /* BPF_PROG_TYPE_TRACING programs which do not attach to other programs 1972 + * also need vmlinux BTF 1973 + */ 1974 + if (prog->type == BPF_PROG_TYPE_TRACING && !prog->attach_prog_fd) 1975 + return true; 1976 + 1977 + return false; 1978 + } 1979 + 1980 + static int bpf_object__load_vmlinux_btf(struct bpf_object *obj) 1981 + { 1982 + struct bpf_program *prog; 1983 + int err; 1984 + 1985 + bpf_object__for_each_program(prog, obj) { 1986 + if (libbpf_prog_needs_vmlinux_btf(prog)) { 1987 + obj->btf_vmlinux = libbpf_find_kernel_btf(); 1988 + if (IS_ERR(obj->btf_vmlinux)) { 1989 + err = PTR_ERR(obj->btf_vmlinux); 1990 + pr_warn("Error loading vmlinux BTF: %d\n", err); 1991 + obj->btf_vmlinux = NULL; 1992 + return err; 1993 + } 1994 + return 0; 1995 + } 1996 + } 1997 + 2350 1998 return 0; 2351 1999 } 2352 2000 ··· 2510 2088 } else if (strcmp(name, RODATA_SEC) == 0) { 2511 2089 obj->efile.rodata = data; 2512 2090 obj->efile.rodata_shndx = idx; 2091 + } else if (strcmp(name, STRUCT_OPS_SEC) == 0) { 2092 + obj->efile.st_ops_data = data; 2093 + obj->efile.st_ops_shndx = idx; 2513 2094 } else { 2514 2095 pr_debug("skip section(%d) %s\n", idx, name); 2515 2096 } ··· 2522 2097 int sec = sh.sh_info; /* points to other section */ 2523 2098 2524 2099 /* Only do relo for section with exec instructions */ 2525 - if (!section_have_execinstr(obj, sec)) { 2100 + if (!section_have_execinstr(obj, sec) && 2101 + strcmp(name, ".rel" STRUCT_OPS_SEC)) { 2526 2102 pr_debug("skip relo %s(%d) for section(%d)\n", 2527 2103 name, idx, sec); 2528 2104 continue; ··· 3025 2599 __u32 key_type_id = 0, value_type_id = 0; 3026 2600 int ret; 3027 2601 3028 - /* if it's BTF-defined map, we don't need to search for type IDs */ 3029 - if (map->sec_idx == obj->efile.btf_maps_shndx) 2602 + /* if it's BTF-defined map, we don't need to search for type IDs. 2603 + * For struct_ops map, it does not need btf_key_type_id and 2604 + * btf_value_type_id. 2605 + */ 2606 + if (map->sec_idx == obj->efile.btf_maps_shndx || 2607 + bpf_map__is_struct_ops(map)) 3030 2608 return 0; 3031 2609 3032 2610 if (!bpf_map__is_internal(map)) { ··· 3234 2804 return 0; 3235 2805 } 3236 2806 2807 + static int bpf_object__probe_btf_func_global(struct bpf_object *obj) 2808 + { 2809 + static const char strs[] = "\0int\0x\0a"; 2810 + /* static void x(int a) {} */ 2811 + __u32 types[] = { 2812 + /* int */ 2813 + BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ 2814 + /* FUNC_PROTO */ /* [2] */ 2815 + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0), 2816 + BTF_PARAM_ENC(7, 1), 2817 + /* FUNC x BTF_FUNC_GLOBAL */ /* [3] */ 2818 + BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 2), 2819 + }; 2820 + int btf_fd; 2821 + 2822 + btf_fd = libbpf__load_raw_btf((char *)types, sizeof(types), 2823 + strs, sizeof(strs)); 2824 + if (btf_fd >= 0) { 2825 + obj->caps.btf_func_global = 1; 2826 + close(btf_fd); 2827 + return 1; 2828 + } 2829 + 2830 + return 0; 2831 + } 2832 + 3237 2833 static int bpf_object__probe_btf_datasec(struct bpf_object *obj) 3238 2834 { 3239 2835 static const char strs[] = "\0x\0.data"; ··· 3315 2859 bpf_object__probe_name, 3316 2860 bpf_object__probe_global_data, 3317 2861 bpf_object__probe_btf_func, 2862 + bpf_object__probe_btf_func_global, 3318 2863 bpf_object__probe_btf_datasec, 3319 2864 bpf_object__probe_array_mmap, 3320 2865 }; ··· 3482 3025 if (bpf_map_type__is_map_in_map(def->type) && 3483 3026 map->inner_map_fd >= 0) 3484 3027 create_attr.inner_map_fd = map->inner_map_fd; 3028 + if (bpf_map__is_struct_ops(map)) 3029 + create_attr.btf_vmlinux_value_type_id = 3030 + map->btf_vmlinux_value_type_id; 3485 3031 3486 3032 if (obj->btf && !bpf_map_find_btf_info(obj, map)) { 3487 3033 create_attr.btf_fd = btf__fd(obj->btf); ··· 4320 3860 return 0; 4321 3861 } 4322 3862 4323 - static struct btf *btf_load_raw(const char *path) 4324 - { 4325 - struct btf *btf; 4326 - size_t read_cnt; 4327 - struct stat st; 4328 - void *data; 4329 - FILE *f; 4330 - 4331 - if (stat(path, &st)) 4332 - return ERR_PTR(-errno); 4333 - 4334 - data = malloc(st.st_size); 4335 - if (!data) 4336 - return ERR_PTR(-ENOMEM); 4337 - 4338 - f = fopen(path, "rb"); 4339 - if (!f) { 4340 - btf = ERR_PTR(-errno); 4341 - goto cleanup; 4342 - } 4343 - 4344 - read_cnt = fread(data, 1, st.st_size, f); 4345 - fclose(f); 4346 - if (read_cnt < st.st_size) { 4347 - btf = ERR_PTR(-EBADF); 4348 - goto cleanup; 4349 - } 4350 - 4351 - btf = btf__new(data, read_cnt); 4352 - 4353 - cleanup: 4354 - free(data); 4355 - return btf; 4356 - } 4357 - 4358 - /* 4359 - * Probe few well-known locations for vmlinux kernel image and try to load BTF 4360 - * data out of it to use for target BTF. 4361 - */ 4362 - static struct btf *bpf_core_find_kernel_btf(void) 4363 - { 4364 - struct { 4365 - const char *path_fmt; 4366 - bool raw_btf; 4367 - } locations[] = { 4368 - /* try canonical vmlinux BTF through sysfs first */ 4369 - { "/sys/kernel/btf/vmlinux", true /* raw BTF */ }, 4370 - /* fall back to trying to find vmlinux ELF on disk otherwise */ 4371 - { "/boot/vmlinux-%1$s" }, 4372 - { "/lib/modules/%1$s/vmlinux-%1$s" }, 4373 - { "/lib/modules/%1$s/build/vmlinux" }, 4374 - { "/usr/lib/modules/%1$s/kernel/vmlinux" }, 4375 - { "/usr/lib/debug/boot/vmlinux-%1$s" }, 4376 - { "/usr/lib/debug/boot/vmlinux-%1$s.debug" }, 4377 - { "/usr/lib/debug/lib/modules/%1$s/vmlinux" }, 4378 - }; 4379 - char path[PATH_MAX + 1]; 4380 - struct utsname buf; 4381 - struct btf *btf; 4382 - int i; 4383 - 4384 - uname(&buf); 4385 - 4386 - for (i = 0; i < ARRAY_SIZE(locations); i++) { 4387 - snprintf(path, PATH_MAX, locations[i].path_fmt, buf.release); 4388 - 4389 - if (access(path, R_OK)) 4390 - continue; 4391 - 4392 - if (locations[i].raw_btf) 4393 - btf = btf_load_raw(path); 4394 - else 4395 - btf = btf__parse_elf(path, NULL); 4396 - 4397 - pr_debug("loading kernel BTF '%s': %ld\n", 4398 - path, IS_ERR(btf) ? PTR_ERR(btf) : 0); 4399 - if (IS_ERR(btf)) 4400 - continue; 4401 - 4402 - return btf; 4403 - } 4404 - 4405 - pr_warn("failed to find valid kernel BTF\n"); 4406 - return ERR_PTR(-ESRCH); 4407 - } 4408 - 4409 3863 /* Output spec definition in the format: 4410 3864 * [<type-id>] (<type-name>) + <raw-spec> => <offset>@<spec>, 4411 3865 * where <spec> is a C-syntax view of recorded field access, e.g.: x.a[3].b ··· 4554 4180 if (targ_btf_path) 4555 4181 targ_btf = btf__parse_elf(targ_btf_path, NULL); 4556 4182 else 4557 - targ_btf = bpf_core_find_kernel_btf(); 4183 + targ_btf = libbpf_find_kernel_btf(); 4558 4184 if (IS_ERR(targ_btf)) { 4559 4185 pr_warn("failed to get target BTF: %ld\n", PTR_ERR(targ_btf)); 4560 4186 return PTR_ERR(targ_btf); ··· 4626 4252 size_t new_cnt; 4627 4253 int err; 4628 4254 4629 - if (prog->idx == obj->efile.text_shndx) { 4630 - pr_warn("relo in .text insn %d into off %d (insn #%d)\n", 4631 - relo->insn_idx, relo->sym_off, relo->sym_off / 8); 4632 - return -LIBBPF_ERRNO__RELOC; 4633 - } 4634 - 4635 - if (prog->main_prog_cnt == 0) { 4255 + if (prog->idx != obj->efile.text_shndx && prog->main_prog_cnt == 0) { 4636 4256 text = bpf_object__find_prog_by_idx(obj, obj->efile.text_shndx); 4637 4257 if (!text) { 4638 4258 pr_warn("no .text section found yet relo into text exist\n"); ··· 4656 4288 text->insns_cnt, text->section_name, 4657 4289 prog->section_name); 4658 4290 } 4291 + 4659 4292 insn = &prog->insns[relo->insn_idx]; 4660 4293 insn->imm += relo->sym_off / 8 + prog->main_prog_cnt - relo->insn_idx; 4661 4294 return 0; ··· 4736 4367 return err; 4737 4368 } 4738 4369 } 4370 + /* ensure .text is relocated first, as it's going to be copied as-is 4371 + * later for sub-program calls 4372 + */ 4739 4373 for (i = 0; i < obj->nr_programs; i++) { 4740 4374 prog = &obj->programs[i]; 4375 + if (prog->idx != obj->efile.text_shndx) 4376 + continue; 4377 + 4378 + err = bpf_program__relocate(prog, obj); 4379 + if (err) { 4380 + pr_warn("failed to relocate '%s'\n", prog->section_name); 4381 + return err; 4382 + } 4383 + break; 4384 + } 4385 + /* now relocate everything but .text, which by now is relocated 4386 + * properly, so we can copy raw sub-program instructions as is safely 4387 + */ 4388 + for (i = 0; i < obj->nr_programs; i++) { 4389 + prog = &obj->programs[i]; 4390 + if (prog->idx == obj->efile.text_shndx) 4391 + continue; 4741 4392 4742 4393 err = bpf_program__relocate(prog, obj); 4743 4394 if (err) { ··· 4767 4378 } 4768 4379 return 0; 4769 4380 } 4381 + 4382 + static int bpf_object__collect_struct_ops_map_reloc(struct bpf_object *obj, 4383 + GElf_Shdr *shdr, 4384 + Elf_Data *data); 4770 4385 4771 4386 static int bpf_object__collect_reloc(struct bpf_object *obj) 4772 4387 { ··· 4790 4397 if (shdr->sh_type != SHT_REL) { 4791 4398 pr_warn("internal error at %d\n", __LINE__); 4792 4399 return -LIBBPF_ERRNO__INTERNAL; 4400 + } 4401 + 4402 + if (idx == obj->efile.st_ops_shndx) { 4403 + err = bpf_object__collect_struct_ops_map_reloc(obj, 4404 + shdr, 4405 + data); 4406 + if (err) 4407 + return err; 4408 + continue; 4793 4409 } 4794 4410 4795 4411 prog = bpf_object__find_prog_by_idx(obj, idx); ··· 4835 4433 load_attr.insns = insns; 4836 4434 load_attr.insns_cnt = insns_cnt; 4837 4435 load_attr.license = license; 4838 - if (prog->type == BPF_PROG_TYPE_TRACING) { 4436 + if (prog->type == BPF_PROG_TYPE_STRUCT_OPS) { 4437 + load_attr.attach_btf_id = prog->attach_btf_id; 4438 + } else if (prog->type == BPF_PROG_TYPE_TRACING || 4439 + prog->type == BPF_PROG_TYPE_EXT) { 4839 4440 load_attr.attach_prog_fd = prog->attach_prog_fd; 4840 4441 load_attr.attach_btf_id = prog->attach_btf_id; 4841 4442 } else { ··· 4913 4508 return ret; 4914 4509 } 4915 4510 4916 - static int libbpf_find_attach_btf_id(const char *name, 4917 - enum bpf_attach_type attach_type, 4918 - __u32 attach_prog_fd); 4511 + static int libbpf_find_attach_btf_id(struct bpf_program *prog); 4919 4512 4920 4513 int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver) 4921 4514 { 4922 4515 int err = 0, fd, i, btf_id; 4923 4516 4924 - if (prog->type == BPF_PROG_TYPE_TRACING) { 4925 - btf_id = libbpf_find_attach_btf_id(prog->section_name, 4926 - prog->expected_attach_type, 4927 - prog->attach_prog_fd); 4517 + if (prog->type == BPF_PROG_TYPE_TRACING || 4518 + prog->type == BPF_PROG_TYPE_EXT) { 4519 + btf_id = libbpf_find_attach_btf_id(prog); 4928 4520 if (btf_id <= 0) 4929 4521 return btf_id; 4930 4522 prog->attach_btf_id = btf_id; ··· 5081 4679 enum bpf_prog_type prog_type; 5082 4680 enum bpf_attach_type attach_type; 5083 4681 4682 + if (prog->type != BPF_PROG_TYPE_UNSPEC) 4683 + continue; 4684 + 5084 4685 err = libbpf_prog_type_by_name(prog->section_name, &prog_type, 5085 4686 &attach_type); 5086 4687 if (err == -ESRCH) ··· 5094 4689 5095 4690 bpf_program__set_type(prog, prog_type); 5096 4691 bpf_program__set_expected_attach_type(prog, attach_type); 5097 - if (prog_type == BPF_PROG_TYPE_TRACING) 4692 + if (prog_type == BPF_PROG_TYPE_TRACING || 4693 + prog_type == BPF_PROG_TYPE_EXT) 5098 4694 prog->attach_prog_fd = OPTS_GET(opts, attach_prog_fd, 0); 5099 4695 } 5100 4696 ··· 5180 4774 if (!obj) 5181 4775 return -EINVAL; 5182 4776 5183 - for (i = 0; i < obj->nr_maps; i++) 4777 + for (i = 0; i < obj->nr_maps; i++) { 5184 4778 zclose(obj->maps[i].fd); 4779 + if (obj->maps[i].st_ops) 4780 + zfree(&obj->maps[i].st_ops->kern_vdata); 4781 + } 5185 4782 5186 4783 for (i = 0; i < obj->nr_programs; i++) 5187 4784 bpf_program__unload(&obj->programs[i]); ··· 5300 4891 err = err ? : bpf_object__resolve_externs(obj, obj->kconfig); 5301 4892 err = err ? : bpf_object__sanitize_and_load_btf(obj); 5302 4893 err = err ? : bpf_object__sanitize_maps(obj); 4894 + err = err ? : bpf_object__load_vmlinux_btf(obj); 4895 + err = err ? : bpf_object__init_kern_struct_ops_maps(obj); 5303 4896 err = err ? : bpf_object__create_maps(obj); 5304 4897 err = err ? : bpf_object__relocate(obj, attr->target_btf_path); 5305 4898 err = err ? : bpf_object__load_progs(obj, attr->log_level); 4899 + 4900 + btf__free(obj->btf_vmlinux); 4901 + obj->btf_vmlinux = NULL; 4902 + 5306 4903 if (err) 5307 4904 goto out; 5308 4905 ··· 5893 5478 map->mmaped = NULL; 5894 5479 } 5895 5480 5481 + if (map->st_ops) { 5482 + zfree(&map->st_ops->data); 5483 + zfree(&map->st_ops->progs); 5484 + zfree(&map->st_ops->kern_func_off); 5485 + zfree(&map->st_ops); 5486 + } 5487 + 5896 5488 zfree(&map->name); 5897 5489 zfree(&map->pin_path); 5898 5490 } ··· 6168 5746 BPF_PROG_TYPE_FNS(xdp, BPF_PROG_TYPE_XDP); 6169 5747 BPF_PROG_TYPE_FNS(perf_event, BPF_PROG_TYPE_PERF_EVENT); 6170 5748 BPF_PROG_TYPE_FNS(tracing, BPF_PROG_TYPE_TRACING); 5749 + BPF_PROG_TYPE_FNS(struct_ops, BPF_PROG_TYPE_STRUCT_OPS); 5750 + BPF_PROG_TYPE_FNS(extension, BPF_PROG_TYPE_EXT); 6171 5751 6172 5752 enum bpf_attach_type 6173 5753 bpf_program__get_expected_attach_type(struct bpf_program *prog) ··· 6269 5845 .expected_attach_type = BPF_TRACE_FEXIT, 6270 5846 .is_attach_btf = true, 6271 5847 .attach_fn = attach_trace), 5848 + SEC_DEF("freplace/", EXT, 5849 + .is_attach_btf = true, 5850 + .attach_fn = attach_trace), 6272 5851 BPF_PROG_SEC("xdp", BPF_PROG_TYPE_XDP), 6273 5852 BPF_PROG_SEC("perf_event", BPF_PROG_TYPE_PERF_EVENT), 6274 5853 BPF_PROG_SEC("lwt_in", BPF_PROG_TYPE_LWT_IN), ··· 6326 5899 BPF_CGROUP_GETSOCKOPT), 6327 5900 BPF_EAPROG_SEC("cgroup/setsockopt", BPF_PROG_TYPE_CGROUP_SOCKOPT, 6328 5901 BPF_CGROUP_SETSOCKOPT), 5902 + BPF_PROG_SEC("struct_ops", BPF_PROG_TYPE_STRUCT_OPS), 6329 5903 }; 6330 5904 6331 5905 #undef BPF_PROG_SEC_IMPL ··· 6403 5975 return -ESRCH; 6404 5976 } 6405 5977 6406 - #define BTF_PREFIX "btf_trace_" 5978 + static struct bpf_map *find_struct_ops_map_by_offset(struct bpf_object *obj, 5979 + size_t offset) 5980 + { 5981 + struct bpf_map *map; 5982 + size_t i; 5983 + 5984 + for (i = 0; i < obj->nr_maps; i++) { 5985 + map = &obj->maps[i]; 5986 + if (!bpf_map__is_struct_ops(map)) 5987 + continue; 5988 + if (map->sec_offset <= offset && 5989 + offset - map->sec_offset < map->def.value_size) 5990 + return map; 5991 + } 5992 + 5993 + return NULL; 5994 + } 5995 + 5996 + /* Collect the reloc from ELF and populate the st_ops->progs[] */ 5997 + static int bpf_object__collect_struct_ops_map_reloc(struct bpf_object *obj, 5998 + GElf_Shdr *shdr, 5999 + Elf_Data *data) 6000 + { 6001 + const struct btf_member *member; 6002 + struct bpf_struct_ops *st_ops; 6003 + struct bpf_program *prog; 6004 + unsigned int shdr_idx; 6005 + const struct btf *btf; 6006 + struct bpf_map *map; 6007 + Elf_Data *symbols; 6008 + unsigned int moff; 6009 + const char *name; 6010 + __u32 member_idx; 6011 + GElf_Sym sym; 6012 + GElf_Rel rel; 6013 + int i, nrels; 6014 + 6015 + symbols = obj->efile.symbols; 6016 + btf = obj->btf; 6017 + nrels = shdr->sh_size / shdr->sh_entsize; 6018 + for (i = 0; i < nrels; i++) { 6019 + if (!gelf_getrel(data, i, &rel)) { 6020 + pr_warn("struct_ops reloc: failed to get %d reloc\n", i); 6021 + return -LIBBPF_ERRNO__FORMAT; 6022 + } 6023 + 6024 + if (!gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym)) { 6025 + pr_warn("struct_ops reloc: symbol %zx not found\n", 6026 + (size_t)GELF_R_SYM(rel.r_info)); 6027 + return -LIBBPF_ERRNO__FORMAT; 6028 + } 6029 + 6030 + name = elf_strptr(obj->efile.elf, obj->efile.strtabidx, 6031 + sym.st_name) ? : "<?>"; 6032 + map = find_struct_ops_map_by_offset(obj, rel.r_offset); 6033 + if (!map) { 6034 + pr_warn("struct_ops reloc: cannot find map at rel.r_offset %zu\n", 6035 + (size_t)rel.r_offset); 6036 + return -EINVAL; 6037 + } 6038 + 6039 + moff = rel.r_offset - map->sec_offset; 6040 + shdr_idx = sym.st_shndx; 6041 + st_ops = map->st_ops; 6042 + pr_debug("struct_ops reloc %s: for %lld value %lld shdr_idx %u rel.r_offset %zu map->sec_offset %zu name %d (\'%s\')\n", 6043 + map->name, 6044 + (long long)(rel.r_info >> 32), 6045 + (long long)sym.st_value, 6046 + shdr_idx, (size_t)rel.r_offset, 6047 + map->sec_offset, sym.st_name, name); 6048 + 6049 + if (shdr_idx >= SHN_LORESERVE) { 6050 + pr_warn("struct_ops reloc %s: rel.r_offset %zu shdr_idx %u unsupported non-static function\n", 6051 + map->name, (size_t)rel.r_offset, shdr_idx); 6052 + return -LIBBPF_ERRNO__RELOC; 6053 + } 6054 + 6055 + member = find_member_by_offset(st_ops->type, moff * 8); 6056 + if (!member) { 6057 + pr_warn("struct_ops reloc %s: cannot find member at moff %u\n", 6058 + map->name, moff); 6059 + return -EINVAL; 6060 + } 6061 + member_idx = member - btf_members(st_ops->type); 6062 + name = btf__name_by_offset(btf, member->name_off); 6063 + 6064 + if (!resolve_func_ptr(btf, member->type, NULL)) { 6065 + pr_warn("struct_ops reloc %s: cannot relocate non func ptr %s\n", 6066 + map->name, name); 6067 + return -EINVAL; 6068 + } 6069 + 6070 + prog = bpf_object__find_prog_by_idx(obj, shdr_idx); 6071 + if (!prog) { 6072 + pr_warn("struct_ops reloc %s: cannot find prog at shdr_idx %u to relocate func ptr %s\n", 6073 + map->name, shdr_idx, name); 6074 + return -EINVAL; 6075 + } 6076 + 6077 + if (prog->type == BPF_PROG_TYPE_UNSPEC) { 6078 + const struct bpf_sec_def *sec_def; 6079 + 6080 + sec_def = find_sec_def(prog->section_name); 6081 + if (sec_def && 6082 + sec_def->prog_type != BPF_PROG_TYPE_STRUCT_OPS) { 6083 + /* for pr_warn */ 6084 + prog->type = sec_def->prog_type; 6085 + goto invalid_prog; 6086 + } 6087 + 6088 + prog->type = BPF_PROG_TYPE_STRUCT_OPS; 6089 + prog->attach_btf_id = st_ops->type_id; 6090 + prog->expected_attach_type = member_idx; 6091 + } else if (prog->type != BPF_PROG_TYPE_STRUCT_OPS || 6092 + prog->attach_btf_id != st_ops->type_id || 6093 + prog->expected_attach_type != member_idx) { 6094 + goto invalid_prog; 6095 + } 6096 + st_ops->progs[member_idx] = prog; 6097 + } 6098 + 6099 + return 0; 6100 + 6101 + invalid_prog: 6102 + pr_warn("struct_ops reloc %s: cannot use prog %s in sec %s with type %u attach_btf_id %u expected_attach_type %u for func ptr %s\n", 6103 + map->name, prog->name, prog->section_name, prog->type, 6104 + prog->attach_btf_id, prog->expected_attach_type, name); 6105 + return -EINVAL; 6106 + } 6107 + 6108 + #define BTF_TRACE_PREFIX "btf_trace_" 6109 + #define BTF_MAX_NAME_SIZE 128 6110 + 6111 + static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix, 6112 + const char *name, __u32 kind) 6113 + { 6114 + char btf_type_name[BTF_MAX_NAME_SIZE]; 6115 + int ret; 6116 + 6117 + ret = snprintf(btf_type_name, sizeof(btf_type_name), 6118 + "%s%s", prefix, name); 6119 + /* snprintf returns the number of characters written excluding the 6120 + * the terminating null. So, if >= BTF_MAX_NAME_SIZE are written, it 6121 + * indicates truncation. 6122 + */ 6123 + if (ret < 0 || ret >= sizeof(btf_type_name)) 6124 + return -ENAMETOOLONG; 6125 + return btf__find_by_name_kind(btf, btf_type_name, kind); 6126 + } 6127 + 6128 + static inline int __find_vmlinux_btf_id(struct btf *btf, const char *name, 6129 + enum bpf_attach_type attach_type) 6130 + { 6131 + int err; 6132 + 6133 + if (attach_type == BPF_TRACE_RAW_TP) 6134 + err = find_btf_by_prefix_kind(btf, BTF_TRACE_PREFIX, name, 6135 + BTF_KIND_TYPEDEF); 6136 + else 6137 + err = btf__find_by_name_kind(btf, name, BTF_KIND_FUNC); 6138 + 6139 + return err; 6140 + } 6141 + 6407 6142 int libbpf_find_vmlinux_btf_id(const char *name, 6408 6143 enum bpf_attach_type attach_type) 6409 6144 { 6410 - struct btf *btf = bpf_core_find_kernel_btf(); 6411 - char raw_tp_btf[128] = BTF_PREFIX; 6412 - char *dst = raw_tp_btf + sizeof(BTF_PREFIX) - 1; 6413 - const char *btf_name; 6414 - int err = -EINVAL; 6415 - __u32 kind; 6145 + struct btf *btf; 6416 6146 6147 + btf = libbpf_find_kernel_btf(); 6417 6148 if (IS_ERR(btf)) { 6418 6149 pr_warn("vmlinux BTF is not found\n"); 6419 6150 return -EINVAL; 6420 6151 } 6421 6152 6422 - if (attach_type == BPF_TRACE_RAW_TP) { 6423 - /* prepend "btf_trace_" prefix per kernel convention */ 6424 - strncat(dst, name, sizeof(raw_tp_btf) - sizeof(BTF_PREFIX)); 6425 - btf_name = raw_tp_btf; 6426 - kind = BTF_KIND_TYPEDEF; 6427 - } else { 6428 - btf_name = name; 6429 - kind = BTF_KIND_FUNC; 6430 - } 6431 - err = btf__find_by_name_kind(btf, btf_name, kind); 6432 - btf__free(btf); 6433 - return err; 6153 + return __find_vmlinux_btf_id(btf, name, attach_type); 6434 6154 } 6435 6155 6436 6156 static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd) ··· 6614 6038 return err; 6615 6039 } 6616 6040 6617 - static int libbpf_find_attach_btf_id(const char *name, 6618 - enum bpf_attach_type attach_type, 6619 - __u32 attach_prog_fd) 6041 + static int libbpf_find_attach_btf_id(struct bpf_program *prog) 6620 6042 { 6043 + enum bpf_attach_type attach_type = prog->expected_attach_type; 6044 + __u32 attach_prog_fd = prog->attach_prog_fd; 6045 + const char *name = prog->section_name; 6621 6046 int i, err; 6622 6047 6623 6048 if (!name) ··· 6633 6056 err = libbpf_find_prog_btf_id(name + section_defs[i].len, 6634 6057 attach_prog_fd); 6635 6058 else 6636 - err = libbpf_find_vmlinux_btf_id(name + section_defs[i].len, 6637 - attach_type); 6059 + err = __find_vmlinux_btf_id(prog->obj->btf_vmlinux, 6060 + name + section_defs[i].len, 6061 + attach_type); 6638 6062 if (err <= 0) 6639 6063 pr_warn("%s is not found in vmlinux BTF\n", name); 6640 6064 return err; ··· 7381 6803 return ERR_PTR(-ESRCH); 7382 6804 7383 6805 return sec_def->attach_fn(sec_def, prog); 6806 + } 6807 + 6808 + static int bpf_link__detach_struct_ops(struct bpf_link *link) 6809 + { 6810 + struct bpf_link_fd *l = (void *)link; 6811 + __u32 zero = 0; 6812 + 6813 + if (bpf_map_delete_elem(l->fd, &zero)) 6814 + return -errno; 6815 + 6816 + return 0; 6817 + } 6818 + 6819 + struct bpf_link *bpf_map__attach_struct_ops(struct bpf_map *map) 6820 + { 6821 + struct bpf_struct_ops *st_ops; 6822 + struct bpf_link_fd *link; 6823 + __u32 i, zero = 0; 6824 + int err; 6825 + 6826 + if (!bpf_map__is_struct_ops(map) || map->fd == -1) 6827 + return ERR_PTR(-EINVAL); 6828 + 6829 + link = calloc(1, sizeof(*link)); 6830 + if (!link) 6831 + return ERR_PTR(-EINVAL); 6832 + 6833 + st_ops = map->st_ops; 6834 + for (i = 0; i < btf_vlen(st_ops->type); i++) { 6835 + struct bpf_program *prog = st_ops->progs[i]; 6836 + void *kern_data; 6837 + int prog_fd; 6838 + 6839 + if (!prog) 6840 + continue; 6841 + 6842 + prog_fd = bpf_program__fd(prog); 6843 + kern_data = st_ops->kern_vdata + st_ops->kern_func_off[i]; 6844 + *(unsigned long *)kern_data = prog_fd; 6845 + } 6846 + 6847 + err = bpf_map_update_elem(map->fd, &zero, st_ops->kern_vdata, 0); 6848 + if (err) { 6849 + err = -errno; 6850 + free(link); 6851 + return ERR_PTR(err); 6852 + } 6853 + 6854 + link->link.detach = bpf_link__detach_struct_ops; 6855 + link->fd = map->fd; 6856 + 6857 + return (struct bpf_link *)link; 7384 6858 } 7385 6859 7386 6860 enum bpf_perf_event_ret

+7 -1

tools/lib/bpf/libbpf.h

··· 239 239 240 240 LIBBPF_API struct bpf_link * 241 241 bpf_program__attach_trace(struct bpf_program *prog); 242 + struct bpf_map; 243 + LIBBPF_API struct bpf_link *bpf_map__attach_struct_ops(struct bpf_map *map); 242 244 struct bpf_insn; 243 245 244 246 /* ··· 317 315 LIBBPF_API int bpf_program__set_xdp(struct bpf_program *prog); 318 316 LIBBPF_API int bpf_program__set_perf_event(struct bpf_program *prog); 319 317 LIBBPF_API int bpf_program__set_tracing(struct bpf_program *prog); 318 + LIBBPF_API int bpf_program__set_struct_ops(struct bpf_program *prog); 319 + LIBBPF_API int bpf_program__set_extension(struct bpf_program *prog); 320 320 321 321 LIBBPF_API enum bpf_prog_type bpf_program__get_type(struct bpf_program *prog); 322 322 LIBBPF_API void bpf_program__set_type(struct bpf_program *prog, ··· 339 335 LIBBPF_API bool bpf_program__is_xdp(const struct bpf_program *prog); 340 336 LIBBPF_API bool bpf_program__is_perf_event(const struct bpf_program *prog); 341 337 LIBBPF_API bool bpf_program__is_tracing(const struct bpf_program *prog); 338 + LIBBPF_API bool bpf_program__is_struct_ops(const struct bpf_program *prog); 339 + LIBBPF_API bool bpf_program__is_extension(const struct bpf_program *prog); 342 340 343 341 /* 344 342 * No need for __attribute__((packed)), all members of 'bpf_map_def' ··· 360 354 * The 'struct bpf_map' in include/linux/bpf.h is internal to the kernel, 361 355 * so no need to worry about a name clash. 362 356 */ 363 - struct bpf_map; 364 357 LIBBPF_API struct bpf_map * 365 358 bpf_object__find_map_by_name(const struct bpf_object *obj, const char *name); 366 359 ··· 526 521 LIBBPF_API bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex); 527 522 LIBBPF_API bool bpf_probe_helper(enum bpf_func_id id, 528 523 enum bpf_prog_type prog_type, __u32 ifindex); 524 + LIBBPF_API bool bpf_probe_large_insn_limit(__u32 ifindex); 529 525 530 526 /* 531 527 * Get bpf_prog_info in continuous memory

+11

tools/lib/bpf/libbpf.map

··· 213 213 global: 214 214 btf_dump__emit_type_decl; 215 215 bpf_link__disconnect; 216 + bpf_map__attach_struct_ops; 217 + bpf_map_delete_batch; 218 + bpf_map_lookup_and_delete_batch; 219 + bpf_map_lookup_batch; 220 + bpf_map_update_batch; 216 221 bpf_object__find_program_by_name; 217 222 bpf_object__attach_skeleton; 218 223 bpf_object__destroy_skeleton; 219 224 bpf_object__detach_skeleton; 220 225 bpf_object__load_skeleton; 221 226 bpf_object__open_skeleton; 227 + bpf_probe_large_insn_limit; 222 228 bpf_prog_attach_xattr; 223 229 bpf_program__attach; 224 230 bpf_program__name; 231 + bpf_program__is_extension; 232 + bpf_program__is_struct_ops; 233 + bpf_program__set_extension; 234 + bpf_program__set_struct_ops; 225 235 btf__align_of; 236 + libbpf_find_kernel_btf; 226 237 } LIBBPF_0.0.6;

+3

tools/lib/bpf/libbpf_errno.c

··· 13 13 14 14 #include "libbpf.h" 15 15 16 + /* make sure libbpf doesn't use kernel-only integer typedefs */ 17 + #pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 18 + 16 19 #define ERRNO_OFFSET(e) ((e) - __LIBBPF_ERRNO__START) 17 20 #define ERRCODE_OFFSET(c) ERRNO_OFFSET(LIBBPF_ERRNO__##c) 18 21 #define NR_ERRNO (__LIBBPF_ERRNO__END - __LIBBPF_ERRNO__START)

+27

tools/lib/bpf/libbpf_probes.c

··· 17 17 #include "libbpf.h" 18 18 #include "libbpf_internal.h" 19 19 20 + /* make sure libbpf doesn't use kernel-only integer typedefs */ 21 + #pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 22 + 20 23 static bool grep(const char *buffer, const char *pattern) 21 24 { 22 25 return !!strstr(buffer, pattern); ··· 106 103 case BPF_PROG_TYPE_CGROUP_SYSCTL: 107 104 case BPF_PROG_TYPE_CGROUP_SOCKOPT: 108 105 case BPF_PROG_TYPE_TRACING: 106 + case BPF_PROG_TYPE_STRUCT_OPS: 107 + case BPF_PROG_TYPE_EXT: 109 108 default: 110 109 break; 111 110 } ··· 256 251 case BPF_MAP_TYPE_XSKMAP: 257 252 case BPF_MAP_TYPE_SOCKHASH: 258 253 case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY: 254 + case BPF_MAP_TYPE_STRUCT_OPS: 259 255 default: 260 256 break; 261 257 } ··· 326 320 } 327 321 328 322 return res; 323 + } 324 + 325 + /* 326 + * Probe for availability of kernel commit (5.3): 327 + * 328 + * c04c0d2b968a ("bpf: increase complexity limit and maximum program size") 329 + */ 330 + bool bpf_probe_large_insn_limit(__u32 ifindex) 331 + { 332 + struct bpf_insn insns[BPF_MAXINSNS + 1]; 333 + int i; 334 + 335 + for (i = 0; i < BPF_MAXINSNS; i++) 336 + insns[i] = BPF_MOV64_IMM(BPF_REG_0, 1); 337 + insns[BPF_MAXINSNS] = BPF_EXIT_INSN(); 338 + 339 + errno = 0; 340 + probe_load(BPF_PROG_TYPE_SCHED_CLS, insns, ARRAY_SIZE(insns), NULL, 0, 341 + ifindex); 342 + 343 + return errno != E2BIG && errno != EINVAL; 329 344 }

+3

tools/lib/bpf/netlink.c

··· 15 15 #include "libbpf_internal.h" 16 16 #include "nlattr.h" 17 17 18 + /* make sure libbpf doesn't use kernel-only integer typedefs */ 19 + #pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 20 + 18 21 #ifndef SOL_NETLINK 19 22 #define SOL_NETLINK 270 20 23 #endif

+3

tools/lib/bpf/nlattr.c

··· 13 13 #include <string.h> 14 14 #include <stdio.h> 15 15 16 + /* make sure libbpf doesn't use kernel-only integer typedefs */ 17 + #pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 18 + 16 19 static uint16_t nla_attr_minlen[LIBBPF_NLA_TYPE_MAX+1] = { 17 20 [LIBBPF_NLA_U8] = sizeof(uint8_t), 18 21 [LIBBPF_NLA_U16] = sizeof(uint16_t),

+3

tools/lib/bpf/str_error.c

··· 4 4 #include <stdio.h> 5 5 #include "str_error.h" 6 6 7 + /* make sure libbpf doesn't use kernel-only integer typedefs */ 8 + #pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 9 + 7 10 /* 8 11 * Wrapper to allow for building in non-GNU systems such as Alpine Linux's musl 9 12 * libc, while checking strerror_r() return to avoid having to check this in

+3

tools/lib/bpf/xsk.c

··· 32 32 #include "libbpf_internal.h" 33 33 #include "xsk.h" 34 34 35 + /* make sure libbpf doesn't use kernel-only integer typedefs */ 36 + #pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 37 + 35 38 #ifndef SOL_XDP 36 39 #define SOL_XDP 283 37 40 #endif

+1 -1

tools/perf/examples/bpf/5sec.c

··· 39 39 Copyright (C) 2018 Red Hat, Inc., Arnaldo Carvalho de Melo <acme@redhat.com> 40 40 */ 41 41 42 - #include <bpf.h> 42 + #include <bpf/bpf.h> 43 43 44 44 int probe(hrtimer_nanosleep, rqtp->tv_sec)(void *ctx, int err, long sec) 45 45 {

+1 -1

tools/perf/examples/bpf/empty.c

··· 1 - #include <bpf.h> 1 + #include <bpf/bpf.h> 2 2 3 3 license(GPL);

+1 -1

tools/perf/examples/bpf/sys_enter_openat.c

··· 14 14 * the return value. 15 15 */ 16 16 17 - #include <bpf.h> 17 + #include <bpf/bpf.h> 18 18 19 19 struct syscall_enter_openat_args { 20 20 unsigned long long unused;

+1 -1

tools/perf/include/bpf/pid_filter.h

··· 3 3 #ifndef _PERF_BPF_PID_FILTER_ 4 4 #define _PERF_BPF_PID_FILTER_ 5 5 6 - #include <bpf.h> 6 + #include <bpf/bpf.h> 7 7 8 8 #define pid_filter(name) pid_map(name, bool) 9 9

+1 -1

tools/perf/include/bpf/stdio.h

··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 3 - #include <bpf.h> 3 + #include <bpf/bpf.h> 4 4 5 5 struct bpf_map SEC("maps") __bpf_stdout__ = { 6 6 .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,

+1 -1

tools/perf/include/bpf/unistd.h

··· 1 1 // SPDX-License-Identifier: LGPL-2.1 2 2 3 - #include <bpf.h> 3 + #include <bpf/bpf.h> 4 4 5 5 static int (*bpf_get_current_pid_tgid)(void) = (void *)BPF_FUNC_get_current_pid_tgid; 6 6

+1 -5

tools/testing/selftests/bpf/.gitignore

··· 22 22 test_skb_cgroup_id_user 23 23 test_socket_cookie 24 24 test_cgroup_storage 25 - test_select_reuseport 26 25 test_flow_dissector 27 26 flow_dissector_load 28 27 test_netcnt 29 - test_section_names 30 28 test_tcpnotify_user 31 29 test_libbpf 32 30 test_tcp_check_syncookie_user 33 31 test_sysctl 34 - libbpf.pc 35 - libbpf.so.* 36 32 test_hashmap 37 33 test_btf_dump 38 34 xdping ··· 37 41 /no_alu32 38 42 /bpf_gcc 39 43 /tools 40 - bpf_helper_defs.h 44 +

+60 -44

tools/testing/selftests/bpf/Makefile

··· 20 20 LLC ?= llc 21 21 LLVM_OBJCOPY ?= llvm-objcopy 22 22 BPF_GCC ?= $(shell command -v bpf-gcc;) 23 - CFLAGS += -g -Wall -O2 $(GENFLAGS) -I$(APIDIR) -I$(LIBDIR) -I$(BPFDIR) \ 24 - -I$(GENDIR) -I$(TOOLSINCDIR) -I$(CURDIR) \ 23 + CFLAGS += -g -Wall -O2 $(GENFLAGS) -I$(CURDIR) -I$(APIDIR) \ 24 + -I$(INCLUDE_DIR) -I$(GENDIR) -I$(LIBDIR) -I$(TOOLSINCDIR) \ 25 25 -Dbpf_prog_load=bpf_prog_test_load \ 26 26 -Dbpf_load_program=bpf_test_load_program 27 27 LDLIBS += -lcap -lelf -lz -lrt -lpthread ··· 73 73 # Compile but not part of 'make run_tests' 74 74 TEST_GEN_PROGS_EXTENDED = test_sock_addr test_skb_cgroup_id_user \ 75 75 flow_dissector_load test_flow_dissector test_tcp_check_syncookie_user \ 76 - test_lirc_mode2_user xdping test_cpp 76 + test_lirc_mode2_user xdping test_cpp runqslower 77 77 78 78 TEST_CUSTOM_PROGS = urandom_read 79 79 ··· 83 83 # $3 - target (assumed to be file); only file name will be emitted; 84 84 # $4 - optional extra arg, emitted as-is, if provided. 85 85 ifeq ($(V),1) 86 + Q = 86 87 msg = 87 88 else 88 - msg = @$(info $(1)$(if $(2), [$(2)]) $(notdir $(3)))$(if $(4), $(4)) 89 + Q = @ 90 + msg = @printf ' %-8s%s %s%s\n' "$(1)" "$(if $(2), [$(2)])" "$(notdir $(3))" "$(if $(4), $(4))"; 91 + MAKEFLAGS += --no-print-directory 92 + submake_extras := feature_display=0 89 93 endif 90 94 91 95 # override lib.mk's default rules 92 96 OVERRIDE_TARGETS := 1 93 97 override define CLEAN 94 - $(call msg, CLEAN) 98 + $(call msg,CLEAN) 95 99 $(RM) -r $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(EXTRA_CLEAN) 96 100 endef 97 101 98 102 include ../lib.mk 103 + 104 + SCRATCH_DIR := $(OUTPUT)/tools 105 + BUILD_DIR := $(SCRATCH_DIR)/build 106 + INCLUDE_DIR := $(SCRATCH_DIR)/include 107 + BPFOBJ := $(BUILD_DIR)/libbpf/libbpf.a 99 108 100 109 # Define simple and short `make test_progs`, `make test_sysctl`, etc targets 101 110 # to build individual tests. ··· 117 108 $(TEST_CUSTOM_PROGS)): %: $(OUTPUT)/% ; 118 109 119 110 $(OUTPUT)/%:%.c 120 - $(call msg, BINARY,,$@) 111 + $(call msg,BINARY,,$@) 121 112 $(LINK.c) $^ $(LDLIBS) -o $@ 122 113 123 114 $(OUTPUT)/urandom_read: urandom_read.c 124 - $(call msg, BINARY,,$@) 125 - $(CC) -o $@ $< -Wl,--build-id 115 + $(call msg,BINARY,,$@) 116 + $(CC) $(LDFLAGS) -o $@ $< $(LDLIBS) -Wl,--build-id 126 117 127 - $(OUTPUT)/test_stub.o: test_stub.c 128 - $(call msg, CC,,$@) 118 + $(OUTPUT)/test_stub.o: test_stub.c $(BPFOBJ) 119 + $(call msg,CC,,$@) 129 120 $(CC) -c $(CFLAGS) -o $@ $< 130 121 131 - BPFOBJ := $(OUTPUT)/libbpf.a 122 + VMLINUX_BTF_PATHS := $(abspath ../../../../vmlinux) \ 123 + /sys/kernel/btf/vmlinux \ 124 + /boot/vmlinux-$(shell uname -r) 125 + VMLINUX_BTF:= $(firstword $(wildcard $(VMLINUX_BTF_PATHS))) 126 + $(OUTPUT)/runqslower: $(BPFOBJ) 127 + $(Q)$(MAKE) $(submake_extras) -C $(TOOLSDIR)/bpf/runqslower \ 128 + OUTPUT=$(SCRATCH_DIR)/ VMLINUX_BTF=$(VMLINUX_BTF) \ 129 + BPFOBJ=$(BPFOBJ) BPF_INCLUDE=$(INCLUDE_DIR) 132 130 133 131 $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED): $(OUTPUT)/test_stub.o $(BPFOBJ) 134 132 ··· 153 137 $(OUTPUT)/test_sock_fields: cgroup_helpers.c 154 138 $(OUTPUT)/test_sysctl: cgroup_helpers.c 155 139 156 - .PHONY: force 157 - 158 - # force a rebuild of BPFOBJ when its dependencies are updated 159 - force: 160 - 161 - DEFAULT_BPFTOOL := $(OUTPUT)/tools/usr/local/sbin/bpftool 140 + DEFAULT_BPFTOOL := $(SCRATCH_DIR)/sbin/bpftool 162 141 BPFTOOL ?= $(DEFAULT_BPFTOOL) 142 + $(DEFAULT_BPFTOOL): $(BPFOBJ) | $(BUILD_DIR)/bpftool 143 + $(Q)$(MAKE) $(submake_extras) -C $(BPFTOOLDIR) \ 144 + OUTPUT=$(BUILD_DIR)/bpftool/ \ 145 + prefix= DESTDIR=$(SCRATCH_DIR)/ install 163 146 164 - $(DEFAULT_BPFTOOL): force 165 - $(MAKE) -C $(BPFTOOLDIR) DESTDIR=$(OUTPUT)/tools install 147 + $(BPFOBJ): $(wildcard $(BPFDIR)/*.c $(BPFDIR)/*.h $(BPFDIR)/Makefile) \ 148 + ../../../include/uapi/linux/bpf.h \ 149 + | $(INCLUDE_DIR) $(BUILD_DIR)/libbpf 150 + $(Q)$(MAKE) $(submake_extras) -C $(BPFDIR) OUTPUT=$(BUILD_DIR)/libbpf/ \ 151 + DESTDIR=$(SCRATCH_DIR) prefix= all install_headers 166 152 167 - $(BPFOBJ): force 168 - $(MAKE) -C $(BPFDIR) OUTPUT=$(OUTPUT)/ 169 - 170 - BPF_HELPERS := $(OUTPUT)/bpf_helper_defs.h $(wildcard $(BPFDIR)/bpf_*.h) 171 - $(OUTPUT)/bpf_helper_defs.h: 172 - $(MAKE) -C $(BPFDIR) OUTPUT=$(OUTPUT)/ $(OUTPUT)/bpf_helper_defs.h 153 + $(BUILD_DIR)/libbpf $(BUILD_DIR)/bpftool $(INCLUDE_DIR): 154 + $(call msg,MKDIR,,$@) 155 + mkdir -p $@ 173 156 174 157 # Get Clang's default includes on this system, as opposed to those seen by 175 158 # '-target bpf'. This fixes "missing" files on some architectures/distros, ··· 188 173 189 174 CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG)) 190 175 BPF_CFLAGS = -g -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN) \ 191 - -I. -I./include/uapi -I$(APIDIR) \ 192 - -I$(BPFDIR) -I$(abspath $(OUTPUT)/../usr/include) 176 + -I$(INCLUDE_DIR) -I$(CURDIR) -I$(CURDIR)/include/uapi \ 177 + -I$(APIDIR) -I$(abspath $(OUTPUT)/../usr/include) 193 178 194 179 CLANG_CFLAGS = $(CLANG_SYS_INCLUDES) \ 195 180 -Wno-compare-distinct-pointer-types ··· 205 190 # $3 - CFLAGS 206 191 # $4 - LDFLAGS 207 192 define CLANG_BPF_BUILD_RULE 208 - $(call msg, CLANG-LLC,$(TRUNNER_BINARY),$2) 193 + $(call msg,CLNG-LLC,$(TRUNNER_BINARY),$2) 209 194 ($(CLANG) $3 -O2 -target bpf -emit-llvm \ 210 195 -c $1 -o - || echo "BPF obj compilation failed") | \ 211 196 $(LLC) -mattr=dwarfris -march=bpf -mcpu=probe $4 -filetype=obj -o $2 212 197 endef 213 198 # Similar to CLANG_BPF_BUILD_RULE, but with disabled alu32 214 199 define CLANG_NOALU32_BPF_BUILD_RULE 215 - $(call msg, CLANG-LLC,$(TRUNNER_BINARY),$2) 200 + $(call msg,CLNG-LLC,$(TRUNNER_BINARY),$2) 216 201 ($(CLANG) $3 -O2 -target bpf -emit-llvm \ 217 202 -c $1 -o - || echo "BPF obj compilation failed") | \ 218 203 $(LLC) -march=bpf -mcpu=v2 $4 -filetype=obj -o $2 219 204 endef 220 205 # Similar to CLANG_BPF_BUILD_RULE, but using native Clang and bpf LLC 221 206 define CLANG_NATIVE_BPF_BUILD_RULE 222 - $(call msg, CLANG-BPF,$(TRUNNER_BINARY),$2) 207 + $(call msg,CLNG-BPF,$(TRUNNER_BINARY),$2) 223 208 ($(CLANG) $3 -O2 -emit-llvm \ 224 209 -c $1 -o - || echo "BPF obj compilation failed") | \ 225 210 $(LLC) -march=bpf -mcpu=probe $4 -filetype=obj -o $2 226 211 endef 227 212 # Build BPF object using GCC 228 213 define GCC_BPF_BUILD_RULE 229 - $(call msg, GCC-BPF,$(TRUNNER_BINARY),$2) 214 + $(call msg,GCC-BPF,$(TRUNNER_BINARY),$2) 230 215 $(BPF_GCC) $3 $4 -O2 -c $1 -o $2 231 216 endef 232 217 ··· 267 252 ifeq ($($(TRUNNER_OUTPUT)-dir),) 268 253 $(TRUNNER_OUTPUT)-dir := y 269 254 $(TRUNNER_OUTPUT): 255 + $$(call msg,MKDIR,,$$@) 270 256 mkdir -p $$@ 271 257 endif 272 258 ··· 278 262 $(TRUNNER_BPF_OBJS): $(TRUNNER_OUTPUT)/%.o: \ 279 263 $(TRUNNER_BPF_PROGS_DIR)/%.c \ 280 264 $(TRUNNER_BPF_PROGS_DIR)/*.h \ 281 - $$(BPF_HELPERS) | $(TRUNNER_OUTPUT) 265 + $$(BPFOBJ) | $(TRUNNER_OUTPUT) 282 266 $$(call $(TRUNNER_BPF_BUILD_RULE),$$<,$$@, \ 283 267 $(TRUNNER_BPF_CFLAGS), \ 284 268 $(TRUNNER_BPF_LDFLAGS)) ··· 286 270 $(TRUNNER_BPF_SKELS): $(TRUNNER_OUTPUT)/%.skel.h: \ 287 271 $(TRUNNER_OUTPUT)/%.o \ 288 272 | $(BPFTOOL) $(TRUNNER_OUTPUT) 289 - $$(call msg, GEN-SKEL,$(TRUNNER_BINARY),$$@) 273 + $$(call msg,GEN-SKEL,$(TRUNNER_BINARY),$$@) 290 274 $$(BPFTOOL) gen skeleton $$< > $$@ 291 275 endif 292 276 ··· 294 278 ifeq ($($(TRUNNER_TESTS_DIR)-tests-hdr),) 295 279 $(TRUNNER_TESTS_DIR)-tests-hdr := y 296 280 $(TRUNNER_TESTS_HDR): $(TRUNNER_TESTS_DIR)/*.c 297 - $$(call msg, TEST-HDR,$(TRUNNER_BINARY),$$@) 281 + $$(call msg,TEST-HDR,$(TRUNNER_BINARY),$$@) 298 282 $$(shell ( cd $(TRUNNER_TESTS_DIR); \ 299 283 echo '/* Generated header, do not edit */'; \ 300 284 ls *.c 2> /dev/null | \ ··· 310 294 $(TRUNNER_BPF_OBJS) \ 311 295 $(TRUNNER_BPF_SKELS) \ 312 296 $$(BPFOBJ) | $(TRUNNER_OUTPUT) 313 - $$(call msg, TEST-OBJ,$(TRUNNER_BINARY),$$@) 297 + $$(call msg,TEST-OBJ,$(TRUNNER_BINARY),$$@) 314 298 cd $$(@D) && $$(CC) $$(CFLAGS) -c $(CURDIR)/$$< $$(LDLIBS) -o $$(@F) 315 299 316 300 $(TRUNNER_EXTRA_OBJS): $(TRUNNER_OUTPUT)/%.o: \ ··· 318 302 $(TRUNNER_EXTRA_HDRS) \ 319 303 $(TRUNNER_TESTS_HDR) \ 320 304 $$(BPFOBJ) | $(TRUNNER_OUTPUT) 321 - $$(call msg, EXTRA-OBJ,$(TRUNNER_BINARY),$$@) 305 + $$(call msg,EXT-OBJ,$(TRUNNER_BINARY),$$@) 322 306 $$(CC) $$(CFLAGS) -c $$< $$(LDLIBS) -o $$@ 323 307 324 308 # only copy extra resources if in flavored build 325 309 $(TRUNNER_BINARY)-extras: $(TRUNNER_EXTRA_FILES) | $(TRUNNER_OUTPUT) 326 310 ifneq ($2,) 327 - $$(call msg, EXTRAS-CP,$(TRUNNER_BINARY),$(TRUNNER_EXTRA_FILES)) 311 + $$(call msg,EXT-COPY,$(TRUNNER_BINARY),$(TRUNNER_EXTRA_FILES)) 328 312 cp -a $$^ $(TRUNNER_OUTPUT)/ 329 313 endif 330 314 331 315 $(OUTPUT)/$(TRUNNER_BINARY): $(TRUNNER_TEST_OBJS) \ 332 316 $(TRUNNER_EXTRA_OBJS) $$(BPFOBJ) \ 333 317 | $(TRUNNER_BINARY)-extras 334 - $$(call msg, BINARY,,$$@) 318 + $$(call msg,BINARY,,$$@) 335 319 $$(CC) $$(CFLAGS) $$(filter %.a %.o,$$^) $$(LDLIBS) -o $$@ 336 320 337 321 endef ··· 344 328 TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read \ 345 329 $(wildcard progs/btf_dump_test_case_*.c) 346 330 TRUNNER_BPF_BUILD_RULE := CLANG_BPF_BUILD_RULE 347 - TRUNNER_BPF_CFLAGS := -I. -I$(OUTPUT) $(BPF_CFLAGS) $(CLANG_CFLAGS) 331 + TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(CLANG_CFLAGS) 348 332 TRUNNER_BPF_LDFLAGS := -mattr=+alu32 349 333 $(eval $(call DEFINE_TEST_RUNNER,test_progs)) 350 334 ··· 383 367 echo '#endif' \ 384 368 ) > verifier/tests.h) 385 369 $(OUTPUT)/test_verifier: test_verifier.c verifier/tests.h $(BPFOBJ) | $(OUTPUT) 386 - $(call msg, BINARY,,$@) 370 + $(call msg,BINARY,,$@) 387 371 $(CC) $(CFLAGS) $(filter %.a %.o %.c,$^) $(LDLIBS) -o $@ 388 372 389 373 # Make sure we are able to include and link libbpf against c++. 390 374 $(OUTPUT)/test_cpp: test_cpp.cpp $(OUTPUT)/test_core_extern.skel.h $(BPFOBJ) 391 - $(call msg, CXX,,$@) 375 + $(call msg,CXX,,$@) 392 376 $(CXX) $(CFLAGS) $^ $(LDLIBS) -o $@ 393 377 394 - EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) \ 378 + EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(SCRATCH_DIR) \ 395 379 prog_tests/tests.h map_tests/tests.h verifier/tests.h \ 396 - feature $(OUTPUT)/*.o $(OUTPUT)/no_alu32 $(OUTPUT)/bpf_gcc \ 397 - tools *.skel.h 380 + feature \ 381 + $(addprefix $(OUTPUT)/,*.o *.skel.h no_alu32 bpf_gcc)

+235

tools/testing/selftests/bpf/bpf_tcp_helpers.h

··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + #ifndef __BPF_TCP_HELPERS_H 3 + #define __BPF_TCP_HELPERS_H 4 + 5 + #include <stdbool.h> 6 + #include <linux/types.h> 7 + #include <bpf/bpf_helpers.h> 8 + #include <bpf/bpf_core_read.h> 9 + #include "bpf_trace_helpers.h" 10 + 11 + #define BPF_STRUCT_OPS(name, args...) \ 12 + SEC("struct_ops/"#name) \ 13 + BPF_PROG(name, args) 14 + 15 + #define tcp_jiffies32 ((__u32)bpf_jiffies64()) 16 + 17 + struct sock_common { 18 + unsigned char skc_state; 19 + } __attribute__((preserve_access_index)); 20 + 21 + enum sk_pacing { 22 + SK_PACING_NONE = 0, 23 + SK_PACING_NEEDED = 1, 24 + SK_PACING_FQ = 2, 25 + }; 26 + 27 + struct sock { 28 + struct sock_common __sk_common; 29 + unsigned long sk_pacing_rate; 30 + __u32 sk_pacing_status; /* see enum sk_pacing */ 31 + } __attribute__((preserve_access_index)); 32 + 33 + struct inet_sock { 34 + struct sock sk; 35 + } __attribute__((preserve_access_index)); 36 + 37 + struct inet_connection_sock { 38 + struct inet_sock icsk_inet; 39 + __u8 icsk_ca_state:6, 40 + icsk_ca_setsockopt:1, 41 + icsk_ca_dst_locked:1; 42 + struct { 43 + __u8 pending; 44 + } icsk_ack; 45 + __u64 icsk_ca_priv[104 / sizeof(__u64)]; 46 + } __attribute__((preserve_access_index)); 47 + 48 + struct tcp_sock { 49 + struct inet_connection_sock inet_conn; 50 + 51 + __u32 rcv_nxt; 52 + __u32 snd_nxt; 53 + __u32 snd_una; 54 + __u8 ecn_flags; 55 + __u32 delivered; 56 + __u32 delivered_ce; 57 + __u32 snd_cwnd; 58 + __u32 snd_cwnd_cnt; 59 + __u32 snd_cwnd_clamp; 60 + __u32 snd_ssthresh; 61 + __u8 syn_data:1, /* SYN includes data */ 62 + syn_fastopen:1, /* SYN includes Fast Open option */ 63 + syn_fastopen_exp:1,/* SYN includes Fast Open exp. option */ 64 + syn_fastopen_ch:1, /* Active TFO re-enabling probe */ 65 + syn_data_acked:1,/* data in SYN is acked by SYN-ACK */ 66 + save_syn:1, /* Save headers of SYN packet */ 67 + is_cwnd_limited:1,/* forward progress limited by snd_cwnd? */ 68 + syn_smc:1; /* SYN includes SMC */ 69 + __u32 max_packets_out; 70 + __u32 lsndtime; 71 + __u32 prior_cwnd; 72 + __u64 tcp_mstamp; /* most recent packet received/sent */ 73 + } __attribute__((preserve_access_index)); 74 + 75 + static __always_inline struct inet_connection_sock *inet_csk(const struct sock *sk) 76 + { 77 + return (struct inet_connection_sock *)sk; 78 + } 79 + 80 + static __always_inline void *inet_csk_ca(const struct sock *sk) 81 + { 82 + return (void *)inet_csk(sk)->icsk_ca_priv; 83 + } 84 + 85 + static __always_inline struct tcp_sock *tcp_sk(const struct sock *sk) 86 + { 87 + return (struct tcp_sock *)sk; 88 + } 89 + 90 + static __always_inline bool before(__u32 seq1, __u32 seq2) 91 + { 92 + return (__s32)(seq1-seq2) < 0; 93 + } 94 + #define after(seq2, seq1) before(seq1, seq2) 95 + 96 + #define TCP_ECN_OK 1 97 + #define TCP_ECN_QUEUE_CWR 2 98 + #define TCP_ECN_DEMAND_CWR 4 99 + #define TCP_ECN_SEEN 8 100 + 101 + enum inet_csk_ack_state_t { 102 + ICSK_ACK_SCHED = 1, 103 + ICSK_ACK_TIMER = 2, 104 + ICSK_ACK_PUSHED = 4, 105 + ICSK_ACK_PUSHED2 = 8, 106 + ICSK_ACK_NOW = 16 /* Send the next ACK immediately (once) */ 107 + }; 108 + 109 + enum tcp_ca_event { 110 + CA_EVENT_TX_START = 0, 111 + CA_EVENT_CWND_RESTART = 1, 112 + CA_EVENT_COMPLETE_CWR = 2, 113 + CA_EVENT_LOSS = 3, 114 + CA_EVENT_ECN_NO_CE = 4, 115 + CA_EVENT_ECN_IS_CE = 5, 116 + }; 117 + 118 + enum tcp_ca_state { 119 + TCP_CA_Open = 0, 120 + TCP_CA_Disorder = 1, 121 + TCP_CA_CWR = 2, 122 + TCP_CA_Recovery = 3, 123 + TCP_CA_Loss = 4 124 + }; 125 + 126 + struct ack_sample { 127 + __u32 pkts_acked; 128 + __s32 rtt_us; 129 + __u32 in_flight; 130 + } __attribute__((preserve_access_index)); 131 + 132 + struct rate_sample { 133 + __u64 prior_mstamp; /* starting timestamp for interval */ 134 + __u32 prior_delivered; /* tp->delivered at "prior_mstamp" */ 135 + __s32 delivered; /* number of packets delivered over interval */ 136 + long interval_us; /* time for tp->delivered to incr "delivered" */ 137 + __u32 snd_interval_us; /* snd interval for delivered packets */ 138 + __u32 rcv_interval_us; /* rcv interval for delivered packets */ 139 + long rtt_us; /* RTT of last (S)ACKed packet (or -1) */ 140 + int losses; /* number of packets marked lost upon ACK */ 141 + __u32 acked_sacked; /* number of packets newly (S)ACKed upon ACK */ 142 + __u32 prior_in_flight; /* in flight before this ACK */ 143 + bool is_app_limited; /* is sample from packet with bubble in pipe? */ 144 + bool is_retrans; /* is sample from retransmission? */ 145 + bool is_ack_delayed; /* is this (likely) a delayed ACK? */ 146 + } __attribute__((preserve_access_index)); 147 + 148 + #define TCP_CA_NAME_MAX 16 149 + #define TCP_CONG_NEEDS_ECN 0x2 150 + 151 + struct tcp_congestion_ops { 152 + char name[TCP_CA_NAME_MAX]; 153 + __u32 flags; 154 + 155 + /* initialize private data (optional) */ 156 + void (*init)(struct sock *sk); 157 + /* cleanup private data (optional) */ 158 + void (*release)(struct sock *sk); 159 + 160 + /* return slow start threshold (required) */ 161 + __u32 (*ssthresh)(struct sock *sk); 162 + /* do new cwnd calculation (required) */ 163 + void (*cong_avoid)(struct sock *sk, __u32 ack, __u32 acked); 164 + /* call before changing ca_state (optional) */ 165 + void (*set_state)(struct sock *sk, __u8 new_state); 166 + /* call when cwnd event occurs (optional) */ 167 + void (*cwnd_event)(struct sock *sk, enum tcp_ca_event ev); 168 + /* call when ack arrives (optional) */ 169 + void (*in_ack_event)(struct sock *sk, __u32 flags); 170 + /* new value of cwnd after loss (required) */ 171 + __u32 (*undo_cwnd)(struct sock *sk); 172 + /* hook for packet ack accounting (optional) */ 173 + void (*pkts_acked)(struct sock *sk, const struct ack_sample *sample); 174 + /* override sysctl_tcp_min_tso_segs */ 175 + __u32 (*min_tso_segs)(struct sock *sk); 176 + /* returns the multiplier used in tcp_sndbuf_expand (optional) */ 177 + __u32 (*sndbuf_expand)(struct sock *sk); 178 + /* call when packets are delivered to update cwnd and pacing rate, 179 + * after all the ca_state processing. (optional) 180 + */ 181 + void (*cong_control)(struct sock *sk, const struct rate_sample *rs); 182 + }; 183 + 184 + #define min(a, b) ((a) < (b) ? (a) : (b)) 185 + #define max(a, b) ((a) > (b) ? (a) : (b)) 186 + #define min_not_zero(x, y) ({ \ 187 + typeof(x) __x = (x); \ 188 + typeof(y) __y = (y); \ 189 + __x == 0 ? __y : ((__y == 0) ? __x : min(__x, __y)); }) 190 + 191 + static __always_inline __u32 tcp_slow_start(struct tcp_sock *tp, __u32 acked) 192 + { 193 + __u32 cwnd = min(tp->snd_cwnd + acked, tp->snd_ssthresh); 194 + 195 + acked -= cwnd - tp->snd_cwnd; 196 + tp->snd_cwnd = min(cwnd, tp->snd_cwnd_clamp); 197 + 198 + return acked; 199 + } 200 + 201 + static __always_inline bool tcp_in_slow_start(const struct tcp_sock *tp) 202 + { 203 + return tp->snd_cwnd < tp->snd_ssthresh; 204 + } 205 + 206 + static __always_inline bool tcp_is_cwnd_limited(const struct sock *sk) 207 + { 208 + const struct tcp_sock *tp = tcp_sk(sk); 209 + 210 + /* If in slow start, ensure cwnd grows to twice what was ACKed. */ 211 + if (tcp_in_slow_start(tp)) 212 + return tp->snd_cwnd < 2 * tp->max_packets_out; 213 + 214 + return !!BPF_CORE_READ_BITFIELD(tp, is_cwnd_limited); 215 + } 216 + 217 + static __always_inline void tcp_cong_avoid_ai(struct tcp_sock *tp, __u32 w, __u32 acked) 218 + { 219 + /* If credits accumulated at a higher w, apply them gently now. */ 220 + if (tp->snd_cwnd_cnt >= w) { 221 + tp->snd_cwnd_cnt = 0; 222 + tp->snd_cwnd++; 223 + } 224 + 225 + tp->snd_cwnd_cnt += acked; 226 + if (tp->snd_cwnd_cnt >= w) { 227 + __u32 delta = tp->snd_cwnd_cnt / w; 228 + 229 + tp->snd_cwnd_cnt -= delta * w; 230 + tp->snd_cwnd += delta; 231 + } 232 + tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_cwnd_clamp); 233 + } 234 + 235 + #endif

+109 -47

tools/testing/selftests/bpf/bpf_trace_helpers.h

··· 1 - /* SPDX-License-Identifier: GPL-2.0 */ 1 + /* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ 2 2 #ifndef __BPF_TRACE_HELPERS_H 3 3 #define __BPF_TRACE_HELPERS_H 4 4 5 - #include "bpf_helpers.h" 5 + #include <bpf/bpf_helpers.h> 6 6 7 - #define __BPF_MAP_0(i, m, v, ...) v 8 - #define __BPF_MAP_1(i, m, v, t, a, ...) m(t, a, ctx[i]) 9 - #define __BPF_MAP_2(i, m, v, t, a, ...) m(t, a, ctx[i]), __BPF_MAP_1(i+1, m, v, __VA_ARGS__) 10 - #define __BPF_MAP_3(i, m, v, t, a, ...) m(t, a, ctx[i]), __BPF_MAP_2(i+1, m, v, __VA_ARGS__) 11 - #define __BPF_MAP_4(i, m, v, t, a, ...) m(t, a, ctx[i]), __BPF_MAP_3(i+1, m, v, __VA_ARGS__) 12 - #define __BPF_MAP_5(i, m, v, t, a, ...) m(t, a, ctx[i]), __BPF_MAP_4(i+1, m, v, __VA_ARGS__) 13 - #define __BPF_MAP_6(i, m, v, t, a, ...) m(t, a, ctx[i]), __BPF_MAP_5(i+1, m, v, __VA_ARGS__) 14 - #define __BPF_MAP_7(i, m, v, t, a, ...) m(t, a, ctx[i]), __BPF_MAP_6(i+1, m, v, __VA_ARGS__) 15 - #define __BPF_MAP_8(i, m, v, t, a, ...) m(t, a, ctx[i]), __BPF_MAP_7(i+1, m, v, __VA_ARGS__) 16 - #define __BPF_MAP_9(i, m, v, t, a, ...) m(t, a, ctx[i]), __BPF_MAP_8(i+1, m, v, __VA_ARGS__) 17 - #define __BPF_MAP_10(i, m, v, t, a, ...) m(t, a, ctx[i]), __BPF_MAP_9(i+1, m, v, __VA_ARGS__) 18 - #define __BPF_MAP_11(i, m, v, t, a, ...) m(t, a, ctx[i]), __BPF_MAP_10(i+1, m, v, __VA_ARGS__) 19 - #define __BPF_MAP_12(i, m, v, t, a, ...) m(t, a, ctx[i]), __BPF_MAP_11(i+1, m, v, __VA_ARGS__) 20 - #define __BPF_MAP(n, ...) __BPF_MAP_##n(0, __VA_ARGS__) 7 + #define ___bpf_concat(a, b) a ## b 8 + #define ___bpf_apply(fn, n) ___bpf_concat(fn, n) 9 + #define ___bpf_nth(_, _1, _2, _3, _4, _5, _6, _7, _8, _9, _a, _b, _c, N, ...) N 10 + #define ___bpf_narg(...) \ 11 + ___bpf_nth(_, ##__VA_ARGS__, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) 12 + #define ___bpf_empty(...) \ 13 + ___bpf_nth(_, ##__VA_ARGS__, N, N, N, N, N, N, N, N, N, N, 0) 21 14 22 - /* BPF sizeof(void *) is always 8, so no need to cast to long first 23 - * for ptr to avoid compiler warning. 15 + #define ___bpf_ctx_cast0() ctx 16 + #define ___bpf_ctx_cast1(x) ___bpf_ctx_cast0(), (void *)ctx[0] 17 + #define ___bpf_ctx_cast2(x, args...) ___bpf_ctx_cast1(args), (void *)ctx[1] 18 + #define ___bpf_ctx_cast3(x, args...) ___bpf_ctx_cast2(args), (void *)ctx[2] 19 + #define ___bpf_ctx_cast4(x, args...) ___bpf_ctx_cast3(args), (void *)ctx[3] 20 + #define ___bpf_ctx_cast5(x, args...) ___bpf_ctx_cast4(args), (void *)ctx[4] 21 + #define ___bpf_ctx_cast6(x, args...) ___bpf_ctx_cast5(args), (void *)ctx[5] 22 + #define ___bpf_ctx_cast7(x, args...) ___bpf_ctx_cast6(args), (void *)ctx[6] 23 + #define ___bpf_ctx_cast8(x, args...) ___bpf_ctx_cast7(args), (void *)ctx[7] 24 + #define ___bpf_ctx_cast9(x, args...) ___bpf_ctx_cast8(args), (void *)ctx[8] 25 + #define ___bpf_ctx_cast10(x, args...) ___bpf_ctx_cast9(args), (void *)ctx[9] 26 + #define ___bpf_ctx_cast11(x, args...) ___bpf_ctx_cast10(args), (void *)ctx[10] 27 + #define ___bpf_ctx_cast12(x, args...) ___bpf_ctx_cast11(args), (void *)ctx[11] 28 + #define ___bpf_ctx_cast(args...) \ 29 + ___bpf_apply(___bpf_ctx_cast, ___bpf_narg(args))(args) 30 + 31 + /* 32 + * BPF_PROG is a convenience wrapper for generic tp_btf/fentry/fexit and 33 + * similar kinds of BPF programs, that accept input arguments as a single 34 + * pointer to untyped u64 array, where each u64 can actually be a typed 35 + * pointer or integer of different size. Instead of requring user to write 36 + * manual casts and work with array elements by index, BPF_PROG macro 37 + * allows user to declare a list of named and typed input arguments in the 38 + * same syntax as for normal C function. All the casting is hidden and 39 + * performed transparently, while user code can just assume working with 40 + * function arguments of specified type and name. 41 + * 42 + * Original raw context argument is preserved as well as 'ctx' argument. 43 + * This is useful when using BPF helpers that expect original context 44 + * as one of the parameters (e.g., for bpf_perf_event_output()). 24 45 */ 25 - #define __BPF_CAST(t, a, ctx) (t) ctx 26 - #define __BPF_V void 27 - #define __BPF_N 46 + #define BPF_PROG(name, args...) \ 47 + name(unsigned long long *ctx); \ 48 + static __always_inline typeof(name(0)) \ 49 + ____##name(unsigned long long *ctx, ##args); \ 50 + typeof(name(0)) name(unsigned long long *ctx) \ 51 + { \ 52 + _Pragma("GCC diagnostic push") \ 53 + _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \ 54 + return ____##name(___bpf_ctx_cast(args)); \ 55 + _Pragma("GCC diagnostic pop") \ 56 + } \ 57 + static __always_inline typeof(name(0)) \ 58 + ____##name(unsigned long long *ctx, ##args) 28 59 29 - #define __BPF_DECL_ARGS(t, a, ctx) t a 60 + struct pt_regs; 30 61 31 - #define BPF_TRACE_x(x, sec_name, fname, ret_type, ...) \ 32 - static __always_inline ret_type \ 33 - ____##fname(__BPF_MAP(x, __BPF_DECL_ARGS, __BPF_V, __VA_ARGS__)); \ 34 - \ 35 - SEC(sec_name) \ 36 - ret_type fname(__u64 *ctx) \ 37 - { \ 38 - return ____##fname(__BPF_MAP(x, __BPF_CAST, __BPF_N, __VA_ARGS__));\ 39 - } \ 40 - \ 41 - static __always_inline \ 42 - ret_type ____##fname(__BPF_MAP(x, __BPF_DECL_ARGS, __BPF_V, __VA_ARGS__)) 62 + #define ___bpf_kprobe_args0() ctx 63 + #define ___bpf_kprobe_args1(x) \ 64 + ___bpf_kprobe_args0(), (void *)PT_REGS_PARM1(ctx) 65 + #define ___bpf_kprobe_args2(x, args...) \ 66 + ___bpf_kprobe_args1(args), (void *)PT_REGS_PARM2(ctx) 67 + #define ___bpf_kprobe_args3(x, args...) \ 68 + ___bpf_kprobe_args2(args), (void *)PT_REGS_PARM3(ctx) 69 + #define ___bpf_kprobe_args4(x, args...) \ 70 + ___bpf_kprobe_args3(args), (void *)PT_REGS_PARM4(ctx) 71 + #define ___bpf_kprobe_args5(x, args...) \ 72 + ___bpf_kprobe_args4(args), (void *)PT_REGS_PARM5(ctx) 73 + #define ___bpf_kprobe_args(args...) \ 74 + ___bpf_apply(___bpf_kprobe_args, ___bpf_narg(args))(args) 43 75 44 - #define BPF_TRACE_0(sec, fname, ...) BPF_TRACE_x(0, sec, fname, int, __VA_ARGS__) 45 - #define BPF_TRACE_1(sec, fname, ...) BPF_TRACE_x(1, sec, fname, int, __VA_ARGS__) 46 - #define BPF_TRACE_2(sec, fname, ...) BPF_TRACE_x(2, sec, fname, int, __VA_ARGS__) 47 - #define BPF_TRACE_3(sec, fname, ...) BPF_TRACE_x(3, sec, fname, int, __VA_ARGS__) 48 - #define BPF_TRACE_4(sec, fname, ...) BPF_TRACE_x(4, sec, fname, int, __VA_ARGS__) 49 - #define BPF_TRACE_5(sec, fname, ...) BPF_TRACE_x(5, sec, fname, int, __VA_ARGS__) 50 - #define BPF_TRACE_6(sec, fname, ...) BPF_TRACE_x(6, sec, fname, int, __VA_ARGS__) 51 - #define BPF_TRACE_7(sec, fname, ...) BPF_TRACE_x(7, sec, fname, int, __VA_ARGS__) 52 - #define BPF_TRACE_8(sec, fname, ...) BPF_TRACE_x(8, sec, fname, int, __VA_ARGS__) 53 - #define BPF_TRACE_9(sec, fname, ...) BPF_TRACE_x(9, sec, fname, int, __VA_ARGS__) 54 - #define BPF_TRACE_10(sec, fname, ...) BPF_TRACE_x(10, sec, fname, int, __VA_ARGS__) 55 - #define BPF_TRACE_11(sec, fname, ...) BPF_TRACE_x(11, sec, fname, int, __VA_ARGS__) 56 - #define BPF_TRACE_12(sec, fname, ...) BPF_TRACE_x(12, sec, fname, int, __VA_ARGS__) 76 + /* 77 + * BPF_KPROBE serves the same purpose for kprobes as BPF_PROG for 78 + * tp_btf/fentry/fexit BPF programs. It hides the underlying platform-specific 79 + * low-level way of getting kprobe input arguments from struct pt_regs, and 80 + * provides a familiar typed and named function arguments syntax and 81 + * semantics of accessing kprobe input paremeters. 82 + * 83 + * Original struct pt_regs* context is preserved as 'ctx' argument. This might 84 + * be necessary when using BPF helpers like bpf_perf_event_output(). 85 + */ 86 + #define BPF_KPROBE(name, args...) \ 87 + name(struct pt_regs *ctx); \ 88 + static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args);\ 89 + typeof(name(0)) name(struct pt_regs *ctx) \ 90 + { \ 91 + _Pragma("GCC diagnostic push") \ 92 + _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \ 93 + return ____##name(___bpf_kprobe_args(args)); \ 94 + _Pragma("GCC diagnostic pop") \ 95 + } \ 96 + static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args) 57 97 98 + #define ___bpf_kretprobe_args0() ctx 99 + #define ___bpf_kretprobe_argsN(x, args...) \ 100 + ___bpf_kprobe_args(args), (void *)PT_REGS_RET(ctx) 101 + #define ___bpf_kretprobe_args(args...) \ 102 + ___bpf_apply(___bpf_kretprobe_args, ___bpf_empty(args))(args) 103 + 104 + /* 105 + * BPF_KRETPROBE is similar to BPF_KPROBE, except, in addition to listing all 106 + * input kprobe arguments, one last extra argument has to be specified, which 107 + * captures kprobe return value. 108 + */ 109 + #define BPF_KRETPROBE(name, args...) \ 110 + name(struct pt_regs *ctx); \ 111 + static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args);\ 112 + typeof(name(0)) name(struct pt_regs *ctx) \ 113 + { \ 114 + _Pragma("GCC diagnostic push") \ 115 + _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \ 116 + return ____##name(___bpf_kretprobe_args(args)); \ 117 + _Pragma("GCC diagnostic pop") \ 118 + } \ 119 + static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args) 58 120 #endif

+1 -1

tools/testing/selftests/bpf/bpf_util.h

··· 6 6 #include <stdlib.h> 7 7 #include <string.h> 8 8 #include <errno.h> 9 - #include <libbpf.h> /* libbpf_num_possible_cpus */ 9 + #include <bpf/libbpf.h> /* libbpf_num_possible_cpus */ 10 10 11 11 static inline unsigned int bpf_num_possible_cpus(void) 12 12 {

+129

tools/testing/selftests/bpf/map_tests/array_map_batch_ops.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + #include <stdio.h> 4 + #include <errno.h> 5 + #include <string.h> 6 + 7 + #include <bpf/bpf.h> 8 + #include <bpf/libbpf.h> 9 + 10 + #include <test_maps.h> 11 + 12 + static void map_batch_update(int map_fd, __u32 max_entries, int *keys, 13 + int *values) 14 + { 15 + int i, err; 16 + DECLARE_LIBBPF_OPTS(bpf_map_batch_opts, opts, 17 + .elem_flags = 0, 18 + .flags = 0, 19 + ); 20 + 21 + for (i = 0; i < max_entries; i++) { 22 + keys[i] = i; 23 + values[i] = i + 1; 24 + } 25 + 26 + err = bpf_map_update_batch(map_fd, keys, values, &max_entries, &opts); 27 + CHECK(err, "bpf_map_update_batch()", "error:%s\n", strerror(errno)); 28 + } 29 + 30 + static void map_batch_verify(int *visited, __u32 max_entries, 31 + int *keys, int *values) 32 + { 33 + int i; 34 + 35 + memset(visited, 0, max_entries * sizeof(*visited)); 36 + for (i = 0; i < max_entries; i++) { 37 + CHECK(keys[i] + 1 != values[i], "key/value checking", 38 + "error: i %d key %d value %d\n", i, keys[i], values[i]); 39 + visited[i] = 1; 40 + } 41 + for (i = 0; i < max_entries; i++) { 42 + CHECK(visited[i] != 1, "visited checking", 43 + "error: keys array at index %d missing\n", i); 44 + } 45 + } 46 + 47 + void test_array_map_batch_ops(void) 48 + { 49 + struct bpf_create_map_attr xattr = { 50 + .name = "array_map", 51 + .map_type = BPF_MAP_TYPE_ARRAY, 52 + .key_size = sizeof(int), 53 + .value_size = sizeof(int), 54 + }; 55 + int map_fd, *keys, *values, *visited; 56 + __u32 count, total, total_success; 57 + const __u32 max_entries = 10; 58 + bool nospace_err; 59 + __u64 batch = 0; 60 + int err, step; 61 + DECLARE_LIBBPF_OPTS(bpf_map_batch_opts, opts, 62 + .elem_flags = 0, 63 + .flags = 0, 64 + ); 65 + 66 + xattr.max_entries = max_entries; 67 + map_fd = bpf_create_map_xattr(&xattr); 68 + CHECK(map_fd == -1, 69 + "bpf_create_map_xattr()", "error:%s\n", strerror(errno)); 70 + 71 + keys = malloc(max_entries * sizeof(int)); 72 + values = malloc(max_entries * sizeof(int)); 73 + visited = malloc(max_entries * sizeof(int)); 74 + CHECK(!keys || !values || !visited, "malloc()", "error:%s\n", 75 + strerror(errno)); 76 + 77 + /* populate elements to the map */ 78 + map_batch_update(map_fd, max_entries, keys, values); 79 + 80 + /* test 1: lookup in a loop with various steps. */ 81 + total_success = 0; 82 + for (step = 1; step < max_entries; step++) { 83 + map_batch_update(map_fd, max_entries, keys, values); 84 + map_batch_verify(visited, max_entries, keys, values); 85 + memset(keys, 0, max_entries * sizeof(*keys)); 86 + memset(values, 0, max_entries * sizeof(*values)); 87 + batch = 0; 88 + total = 0; 89 + /* iteratively lookup/delete elements with 'step' 90 + * elements each. 91 + */ 92 + count = step; 93 + nospace_err = false; 94 + while (true) { 95 + err = bpf_map_lookup_batch(map_fd, 96 + total ? &batch : NULL, &batch, 97 + keys + total, 98 + values + total, 99 + &count, &opts); 100 + 101 + CHECK((err && errno != ENOENT), "lookup with steps", 102 + "error: %s\n", strerror(errno)); 103 + 104 + total += count; 105 + if (err) 106 + break; 107 + 108 + } 109 + 110 + if (nospace_err == true) 111 + continue; 112 + 113 + CHECK(total != max_entries, "lookup with steps", 114 + "total = %u, max_entries = %u\n", total, max_entries); 115 + 116 + map_batch_verify(visited, max_entries, keys, values); 117 + 118 + total_success++; 119 + } 120 + 121 + CHECK(total_success == 0, "check total_success", 122 + "unexpected failure\n"); 123 + 124 + printf("%s:PASS\n", __func__); 125 + 126 + free(keys); 127 + free(values); 128 + free(visited); 129 + }

+283

tools/testing/selftests/bpf/map_tests/htab_map_batch_ops.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright (c) 2019 Facebook */ 3 + #include <stdio.h> 4 + #include <errno.h> 5 + #include <string.h> 6 + 7 + #include <bpf/bpf.h> 8 + #include <bpf/libbpf.h> 9 + 10 + #include <bpf_util.h> 11 + #include <test_maps.h> 12 + 13 + static void map_batch_update(int map_fd, __u32 max_entries, int *keys, 14 + void *values, bool is_pcpu) 15 + { 16 + typedef BPF_DECLARE_PERCPU(int, value); 17 + value *v = NULL; 18 + int i, j, err; 19 + DECLARE_LIBBPF_OPTS(bpf_map_batch_opts, opts, 20 + .elem_flags = 0, 21 + .flags = 0, 22 + ); 23 + 24 + if (is_pcpu) 25 + v = (value *)values; 26 + 27 + for (i = 0; i < max_entries; i++) { 28 + keys[i] = i + 1; 29 + if (is_pcpu) 30 + for (j = 0; j < bpf_num_possible_cpus(); j++) 31 + bpf_percpu(v[i], j) = i + 2 + j; 32 + else 33 + ((int *)values)[i] = i + 2; 34 + } 35 + 36 + err = bpf_map_update_batch(map_fd, keys, values, &max_entries, &opts); 37 + CHECK(err, "bpf_map_update_batch()", "error:%s\n", strerror(errno)); 38 + } 39 + 40 + static void map_batch_verify(int *visited, __u32 max_entries, 41 + int *keys, void *values, bool is_pcpu) 42 + { 43 + typedef BPF_DECLARE_PERCPU(int, value); 44 + value *v = NULL; 45 + int i, j; 46 + 47 + if (is_pcpu) 48 + v = (value *)values; 49 + 50 + memset(visited, 0, max_entries * sizeof(*visited)); 51 + for (i = 0; i < max_entries; i++) { 52 + 53 + if (is_pcpu) { 54 + for (j = 0; j < bpf_num_possible_cpus(); j++) { 55 + CHECK(keys[i] + 1 + j != bpf_percpu(v[i], j), 56 + "key/value checking", 57 + "error: i %d j %d key %d value %d\n", 58 + i, j, keys[i], bpf_percpu(v[i], j)); 59 + } 60 + } else { 61 + CHECK(keys[i] + 1 != ((int *)values)[i], 62 + "key/value checking", 63 + "error: i %d key %d value %d\n", i, keys[i], 64 + ((int *)values)[i]); 65 + } 66 + 67 + visited[i] = 1; 68 + 69 + } 70 + for (i = 0; i < max_entries; i++) { 71 + CHECK(visited[i] != 1, "visited checking", 72 + "error: keys array at index %d missing\n", i); 73 + } 74 + } 75 + 76 + void __test_map_lookup_and_delete_batch(bool is_pcpu) 77 + { 78 + __u32 batch, count, total, total_success; 79 + typedef BPF_DECLARE_PERCPU(int, value); 80 + int map_fd, *keys, *visited, key; 81 + const __u32 max_entries = 10; 82 + value pcpu_values[max_entries]; 83 + int err, step, value_size; 84 + bool nospace_err; 85 + void *values; 86 + struct bpf_create_map_attr xattr = { 87 + .name = "hash_map", 88 + .map_type = is_pcpu ? BPF_MAP_TYPE_PERCPU_HASH : 89 + BPF_MAP_TYPE_HASH, 90 + .key_size = sizeof(int), 91 + .value_size = sizeof(int), 92 + }; 93 + DECLARE_LIBBPF_OPTS(bpf_map_batch_opts, opts, 94 + .elem_flags = 0, 95 + .flags = 0, 96 + ); 97 + 98 + xattr.max_entries = max_entries; 99 + map_fd = bpf_create_map_xattr(&xattr); 100 + CHECK(map_fd == -1, 101 + "bpf_create_map_xattr()", "error:%s\n", strerror(errno)); 102 + 103 + value_size = is_pcpu ? sizeof(value) : sizeof(int); 104 + keys = malloc(max_entries * sizeof(int)); 105 + if (is_pcpu) 106 + values = pcpu_values; 107 + else 108 + values = malloc(max_entries * sizeof(int)); 109 + visited = malloc(max_entries * sizeof(int)); 110 + CHECK(!keys || !values || !visited, "malloc()", 111 + "error:%s\n", strerror(errno)); 112 + 113 + /* test 1: lookup/delete an empty hash table, -ENOENT */ 114 + count = max_entries; 115 + err = bpf_map_lookup_and_delete_batch(map_fd, NULL, &batch, keys, 116 + values, &count, &opts); 117 + CHECK((err && errno != ENOENT), "empty map", 118 + "error: %s\n", strerror(errno)); 119 + 120 + /* populate elements to the map */ 121 + map_batch_update(map_fd, max_entries, keys, values, is_pcpu); 122 + 123 + /* test 2: lookup/delete with count = 0, success */ 124 + count = 0; 125 + err = bpf_map_lookup_and_delete_batch(map_fd, NULL, &batch, keys, 126 + values, &count, &opts); 127 + CHECK(err, "count = 0", "error: %s\n", strerror(errno)); 128 + 129 + /* test 3: lookup/delete with count = max_entries, success */ 130 + memset(keys, 0, max_entries * sizeof(*keys)); 131 + memset(values, 0, max_entries * value_size); 132 + count = max_entries; 133 + err = bpf_map_lookup_and_delete_batch(map_fd, NULL, &batch, keys, 134 + values, &count, &opts); 135 + CHECK((err && errno != ENOENT), "count = max_entries", 136 + "error: %s\n", strerror(errno)); 137 + CHECK(count != max_entries, "count = max_entries", 138 + "count = %u, max_entries = %u\n", count, max_entries); 139 + map_batch_verify(visited, max_entries, keys, values, is_pcpu); 140 + 141 + /* bpf_map_get_next_key() should return -ENOENT for an empty map. */ 142 + err = bpf_map_get_next_key(map_fd, NULL, &key); 143 + CHECK(!err, "bpf_map_get_next_key()", "error: %s\n", strerror(errno)); 144 + 145 + /* test 4: lookup/delete in a loop with various steps. */ 146 + total_success = 0; 147 + for (step = 1; step < max_entries; step++) { 148 + map_batch_update(map_fd, max_entries, keys, values, is_pcpu); 149 + memset(keys, 0, max_entries * sizeof(*keys)); 150 + memset(values, 0, max_entries * value_size); 151 + total = 0; 152 + /* iteratively lookup/delete elements with 'step' 153 + * elements each 154 + */ 155 + count = step; 156 + nospace_err = false; 157 + while (true) { 158 + err = bpf_map_lookup_batch(map_fd, 159 + total ? &batch : NULL, 160 + &batch, keys + total, 161 + values + 162 + total * value_size, 163 + &count, &opts); 164 + /* It is possible that we are failing due to buffer size 165 + * not big enough. In such cases, let us just exit and 166 + * go with large steps. Not that a buffer size with 167 + * max_entries should always work. 168 + */ 169 + if (err && errno == ENOSPC) { 170 + nospace_err = true; 171 + break; 172 + } 173 + 174 + CHECK((err && errno != ENOENT), "lookup with steps", 175 + "error: %s\n", strerror(errno)); 176 + 177 + total += count; 178 + if (err) 179 + break; 180 + 181 + } 182 + if (nospace_err == true) 183 + continue; 184 + 185 + CHECK(total != max_entries, "lookup with steps", 186 + "total = %u, max_entries = %u\n", total, max_entries); 187 + map_batch_verify(visited, max_entries, keys, values, is_pcpu); 188 + 189 + total = 0; 190 + count = step; 191 + while (total < max_entries) { 192 + if (max_entries - total < step) 193 + count = max_entries - total; 194 + err = bpf_map_delete_batch(map_fd, 195 + keys + total, 196 + &count, &opts); 197 + CHECK((err && errno != ENOENT), "delete batch", 198 + "error: %s\n", strerror(errno)); 199 + total += count; 200 + if (err) 201 + break; 202 + } 203 + CHECK(total != max_entries, "delete with steps", 204 + "total = %u, max_entries = %u\n", total, max_entries); 205 + 206 + /* check map is empty, errono == ENOENT */ 207 + err = bpf_map_get_next_key(map_fd, NULL, &key); 208 + CHECK(!err || errno != ENOENT, "bpf_map_get_next_key()", 209 + "error: %s\n", strerror(errno)); 210 + 211 + /* iteratively lookup/delete elements with 'step' 212 + * elements each 213 + */ 214 + map_batch_update(map_fd, max_entries, keys, values, is_pcpu); 215 + memset(keys, 0, max_entries * sizeof(*keys)); 216 + memset(values, 0, max_entries * value_size); 217 + total = 0; 218 + count = step; 219 + nospace_err = false; 220 + while (true) { 221 + err = bpf_map_lookup_and_delete_batch(map_fd, 222 + total ? &batch : NULL, 223 + &batch, keys + total, 224 + values + 225 + total * value_size, 226 + &count, &opts); 227 + /* It is possible that we are failing due to buffer size 228 + * not big enough. In such cases, let us just exit and 229 + * go with large steps. Not that a buffer size with 230 + * max_entries should always work. 231 + */ 232 + if (err && errno == ENOSPC) { 233 + nospace_err = true; 234 + break; 235 + } 236 + 237 + CHECK((err && errno != ENOENT), "lookup with steps", 238 + "error: %s\n", strerror(errno)); 239 + 240 + total += count; 241 + if (err) 242 + break; 243 + } 244 + 245 + if (nospace_err == true) 246 + continue; 247 + 248 + CHECK(total != max_entries, "lookup/delete with steps", 249 + "total = %u, max_entries = %u\n", total, max_entries); 250 + 251 + map_batch_verify(visited, max_entries, keys, values, is_pcpu); 252 + err = bpf_map_get_next_key(map_fd, NULL, &key); 253 + CHECK(!err, "bpf_map_get_next_key()", "error: %s\n", 254 + strerror(errno)); 255 + 256 + total_success++; 257 + } 258 + 259 + CHECK(total_success == 0, "check total_success", 260 + "unexpected failure\n"); 261 + free(keys); 262 + free(visited); 263 + if (!is_pcpu) 264 + free(values); 265 + } 266 + 267 + void htab_map_batch_ops(void) 268 + { 269 + __test_map_lookup_and_delete_batch(false); 270 + printf("test_%s:PASS\n", __func__); 271 + } 272 + 273 + void htab_percpu_map_batch_ops(void) 274 + { 275 + __test_map_lookup_and_delete_batch(true); 276 + printf("test_%s:PASS\n", __func__); 277 + } 278 + 279 + void test_htab_map_batch_ops(void) 280 + { 281 + htab_map_batch_ops(); 282 + htab_percpu_map_batch_ops(); 283 + }

+212

tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright (c) 2019 Facebook */ 3 + 4 + #include <linux/err.h> 5 + #include <test_progs.h> 6 + #include "bpf_dctcp.skel.h" 7 + #include "bpf_cubic.skel.h" 8 + 9 + #define min(a, b) ((a) < (b) ? (a) : (b)) 10 + 11 + static const unsigned int total_bytes = 10 * 1024 * 1024; 12 + static const struct timeval timeo_sec = { .tv_sec = 10 }; 13 + static const size_t timeo_optlen = sizeof(timeo_sec); 14 + static int stop, duration; 15 + 16 + static int settimeo(int fd) 17 + { 18 + int err; 19 + 20 + err = setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &timeo_sec, 21 + timeo_optlen); 22 + if (CHECK(err == -1, "setsockopt(fd, SO_RCVTIMEO)", "errno:%d\n", 23 + errno)) 24 + return -1; 25 + 26 + err = setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &timeo_sec, 27 + timeo_optlen); 28 + if (CHECK(err == -1, "setsockopt(fd, SO_SNDTIMEO)", "errno:%d\n", 29 + errno)) 30 + return -1; 31 + 32 + return 0; 33 + } 34 + 35 + static int settcpca(int fd, const char *tcp_ca) 36 + { 37 + int err; 38 + 39 + err = setsockopt(fd, IPPROTO_TCP, TCP_CONGESTION, tcp_ca, strlen(tcp_ca)); 40 + if (CHECK(err == -1, "setsockopt(fd, TCP_CONGESTION)", "errno:%d\n", 41 + errno)) 42 + return -1; 43 + 44 + return 0; 45 + } 46 + 47 + static void *server(void *arg) 48 + { 49 + int lfd = (int)(long)arg, err = 0, fd; 50 + ssize_t nr_sent = 0, bytes = 0; 51 + char batch[1500]; 52 + 53 + fd = accept(lfd, NULL, NULL); 54 + while (fd == -1) { 55 + if (errno == EINTR) 56 + continue; 57 + err = -errno; 58 + goto done; 59 + } 60 + 61 + if (settimeo(fd)) { 62 + err = -errno; 63 + goto done; 64 + } 65 + 66 + while (bytes < total_bytes && !READ_ONCE(stop)) { 67 + nr_sent = send(fd, &batch, 68 + min(total_bytes - bytes, sizeof(batch)), 0); 69 + if (nr_sent == -1 && errno == EINTR) 70 + continue; 71 + if (nr_sent == -1) { 72 + err = -errno; 73 + break; 74 + } 75 + bytes += nr_sent; 76 + } 77 + 78 + CHECK(bytes != total_bytes, "send", "%zd != %u nr_sent:%zd errno:%d\n", 79 + bytes, total_bytes, nr_sent, errno); 80 + 81 + done: 82 + if (fd != -1) 83 + close(fd); 84 + if (err) { 85 + WRITE_ONCE(stop, 1); 86 + return ERR_PTR(err); 87 + } 88 + return NULL; 89 + } 90 + 91 + static void do_test(const char *tcp_ca) 92 + { 93 + struct sockaddr_in6 sa6 = {}; 94 + ssize_t nr_recv = 0, bytes = 0; 95 + int lfd = -1, fd = -1; 96 + pthread_t srv_thread; 97 + socklen_t addrlen = sizeof(sa6); 98 + void *thread_ret; 99 + char batch[1500]; 100 + int err; 101 + 102 + WRITE_ONCE(stop, 0); 103 + 104 + lfd = socket(AF_INET6, SOCK_STREAM, 0); 105 + if (CHECK(lfd == -1, "socket", "errno:%d\n", errno)) 106 + return; 107 + fd = socket(AF_INET6, SOCK_STREAM, 0); 108 + if (CHECK(fd == -1, "socket", "errno:%d\n", errno)) { 109 + close(lfd); 110 + return; 111 + } 112 + 113 + if (settcpca(lfd, tcp_ca) || settcpca(fd, tcp_ca) || 114 + settimeo(lfd) || settimeo(fd)) 115 + goto done; 116 + 117 + /* bind, listen and start server thread to accept */ 118 + sa6.sin6_family = AF_INET6; 119 + sa6.sin6_addr = in6addr_loopback; 120 + err = bind(lfd, (struct sockaddr *)&sa6, addrlen); 121 + if (CHECK(err == -1, "bind", "errno:%d\n", errno)) 122 + goto done; 123 + err = getsockname(lfd, (struct sockaddr *)&sa6, &addrlen); 124 + if (CHECK(err == -1, "getsockname", "errno:%d\n", errno)) 125 + goto done; 126 + err = listen(lfd, 1); 127 + if (CHECK(err == -1, "listen", "errno:%d\n", errno)) 128 + goto done; 129 + err = pthread_create(&srv_thread, NULL, server, (void *)(long)lfd); 130 + if (CHECK(err != 0, "pthread_create", "err:%d\n", err)) 131 + goto done; 132 + 133 + /* connect to server */ 134 + err = connect(fd, (struct sockaddr *)&sa6, addrlen); 135 + if (CHECK(err == -1, "connect", "errno:%d\n", errno)) 136 + goto wait_thread; 137 + 138 + /* recv total_bytes */ 139 + while (bytes < total_bytes && !READ_ONCE(stop)) { 140 + nr_recv = recv(fd, &batch, 141 + min(total_bytes - bytes, sizeof(batch)), 0); 142 + if (nr_recv == -1 && errno == EINTR) 143 + continue; 144 + if (nr_recv == -1) 145 + break; 146 + bytes += nr_recv; 147 + } 148 + 149 + CHECK(bytes != total_bytes, "recv", "%zd != %u nr_recv:%zd errno:%d\n", 150 + bytes, total_bytes, nr_recv, errno); 151 + 152 + wait_thread: 153 + WRITE_ONCE(stop, 1); 154 + pthread_join(srv_thread, &thread_ret); 155 + CHECK(IS_ERR(thread_ret), "pthread_join", "thread_ret:%ld", 156 + PTR_ERR(thread_ret)); 157 + done: 158 + close(lfd); 159 + close(fd); 160 + } 161 + 162 + static void test_cubic(void) 163 + { 164 + struct bpf_cubic *cubic_skel; 165 + struct bpf_link *link; 166 + 167 + cubic_skel = bpf_cubic__open_and_load(); 168 + if (CHECK(!cubic_skel, "bpf_cubic__open_and_load", "failed\n")) 169 + return; 170 + 171 + link = bpf_map__attach_struct_ops(cubic_skel->maps.cubic); 172 + if (CHECK(IS_ERR(link), "bpf_map__attach_struct_ops", "err:%ld\n", 173 + PTR_ERR(link))) { 174 + bpf_cubic__destroy(cubic_skel); 175 + return; 176 + } 177 + 178 + do_test("bpf_cubic"); 179 + 180 + bpf_link__destroy(link); 181 + bpf_cubic__destroy(cubic_skel); 182 + } 183 + 184 + static void test_dctcp(void) 185 + { 186 + struct bpf_dctcp *dctcp_skel; 187 + struct bpf_link *link; 188 + 189 + dctcp_skel = bpf_dctcp__open_and_load(); 190 + if (CHECK(!dctcp_skel, "bpf_dctcp__open_and_load", "failed\n")) 191 + return; 192 + 193 + link = bpf_map__attach_struct_ops(dctcp_skel->maps.dctcp); 194 + if (CHECK(IS_ERR(link), "bpf_map__attach_struct_ops", "err:%ld\n", 195 + PTR_ERR(link))) { 196 + bpf_dctcp__destroy(dctcp_skel); 197 + return; 198 + } 199 + 200 + do_test("bpf_dctcp"); 201 + 202 + bpf_link__destroy(link); 203 + bpf_dctcp__destroy(dctcp_skel); 204 + } 205 + 206 + void test_bpf_tcp_ca(void) 207 + { 208 + if (test__start_subtest("dctcp")) 209 + test_dctcp(); 210 + if (test__start_subtest("cubic")) 211 + test_cubic(); 212 + }

+2

tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c

··· 48 48 { "test_verif_scale2.o", BPF_PROG_TYPE_SCHED_CLS }, 49 49 { "test_verif_scale3.o", BPF_PROG_TYPE_SCHED_CLS }, 50 50 51 + { "pyperf_global.o", BPF_PROG_TYPE_RAW_TRACEPOINT }, 52 + 51 53 /* full unroll by llvm */ 52 54 { "pyperf50.o", BPF_PROG_TYPE_RAW_TRACEPOINT }, 53 55 { "pyperf100.o", BPF_PROG_TYPE_RAW_TRACEPOINT },

+1 -1

tools/testing/selftests/bpf/prog_tests/cpu_mask.c

··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 #include <test_progs.h> 3 3 #include <bpf/btf.h> 4 - #include "libbpf_internal.h" 4 + #include "bpf/libbpf_internal.h" 5 5 6 6 static int duration = 0; 7 7

+20 -1

tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c

··· 26 26 27 27 link = calloc(sizeof(struct bpf_link *), prog_cnt); 28 28 prog = calloc(sizeof(struct bpf_program *), prog_cnt); 29 - result = malloc(prog_cnt * sizeof(u64)); 29 + result = malloc((prog_cnt + 32 /* spare */) * sizeof(u64)); 30 30 if (CHECK(!link || !prog || !result, "alloc_memory", 31 31 "failed to alloc memory")) 32 32 goto close_prog; ··· 98 98 "fexit/test_pkt_access", 99 99 "fexit/test_pkt_access_subprog1", 100 100 "fexit/test_pkt_access_subprog2", 101 + "fexit/test_pkt_access_subprog3", 102 + }; 103 + test_fexit_bpf2bpf_common("./fexit_bpf2bpf.o", 104 + "./test_pkt_access.o", 105 + ARRAY_SIZE(prog_name), 106 + prog_name); 107 + } 108 + 109 + static void test_func_replace(void) 110 + { 111 + const char *prog_name[] = { 112 + "fexit/test_pkt_access", 113 + "fexit/test_pkt_access_subprog1", 114 + "fexit/test_pkt_access_subprog2", 115 + "fexit/test_pkt_access_subprog3", 116 + "freplace/get_skb_len", 117 + "freplace/get_skb_ifindex", 118 + "freplace/get_constant", 101 119 }; 102 120 test_fexit_bpf2bpf_common("./fexit_bpf2bpf.o", 103 121 "./test_pkt_access.o", ··· 127 109 { 128 110 test_target_no_callees(); 129 111 test_target_yes_callees(); 112 + test_func_replace(); 130 113 }

+1 -1

tools/testing/selftests/bpf/prog_tests/perf_buffer.c

··· 4 4 #include <sched.h> 5 5 #include <sys/socket.h> 6 6 #include <test_progs.h> 7 - #include "libbpf_internal.h" 7 + #include "bpf/libbpf_internal.h" 8 8 9 9 static void on_sample(void *ctx, int cpu, void *data, __u32 size) 10 10 {

+50 -80

tools/testing/selftests/bpf/prog_tests/send_signal.c

··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 #include <test_progs.h> 3 + #include "test_send_signal_kern.skel.h" 3 4 4 5 static volatile int sigusr1_received = 0; 5 6 ··· 10 9 } 11 10 12 11 static void test_send_signal_common(struct perf_event_attr *attr, 13 - int prog_type, 12 + bool signal_thread, 14 13 const char *test_name) 15 14 { 16 - int err = -1, pmu_fd, prog_fd, info_map_fd, status_map_fd; 17 - const char *file = "./test_send_signal_kern.o"; 18 - struct bpf_object *obj = NULL; 15 + struct test_send_signal_kern *skel; 19 16 int pipe_c2p[2], pipe_p2c[2]; 20 - __u32 key = 0, duration = 0; 17 + int err = -1, pmu_fd = -1; 18 + __u32 duration = 0; 21 19 char buf[256]; 22 20 pid_t pid; 23 - __u64 val; 24 21 25 22 if (CHECK(pipe(pipe_c2p), test_name, 26 23 "pipe pipe_c2p error: %s\n", strerror(errno))) ··· 72 73 close(pipe_c2p[1]); /* close write */ 73 74 close(pipe_p2c[0]); /* close read */ 74 75 75 - err = bpf_prog_load(file, prog_type, &obj, &prog_fd); 76 - if (CHECK(err < 0, test_name, "bpf_prog_load error: %s\n", 77 - strerror(errno))) 78 - goto prog_load_failure; 76 + skel = test_send_signal_kern__open_and_load(); 77 + if (CHECK(!skel, "skel_open_and_load", "skeleton open_and_load failed\n")) 78 + goto skel_open_load_failure; 79 79 80 - pmu_fd = syscall(__NR_perf_event_open, attr, pid, -1, 81 - -1 /* group id */, 0 /* flags */); 82 - if (CHECK(pmu_fd < 0, test_name, "perf_event_open error: %s\n", 83 - strerror(errno))) { 84 - err = -1; 85 - goto close_prog; 80 + if (!attr) { 81 + err = test_send_signal_kern__attach(skel); 82 + if (CHECK(err, "skel_attach", "skeleton attach failed\n")) { 83 + err = -1; 84 + goto destroy_skel; 85 + } 86 + } else { 87 + pmu_fd = syscall(__NR_perf_event_open, attr, pid, -1, 88 + -1 /* group id */, 0 /* flags */); 89 + if (CHECK(pmu_fd < 0, test_name, "perf_event_open error: %s\n", 90 + strerror(errno))) { 91 + err = -1; 92 + goto destroy_skel; 93 + } 94 + 95 + skel->links.send_signal_perf = 96 + bpf_program__attach_perf_event(skel->progs.send_signal_perf, pmu_fd); 97 + if (CHECK(IS_ERR(skel->links.send_signal_perf), "attach_perf_event", 98 + "err %ld\n", PTR_ERR(skel->links.send_signal_perf))) 99 + goto disable_pmu; 86 100 } 87 - 88 - err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0); 89 - if (CHECK(err < 0, test_name, "ioctl perf_event_ioc_enable error: %s\n", 90 - strerror(errno))) 91 - goto disable_pmu; 92 - 93 - err = ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd); 94 - if (CHECK(err < 0, test_name, "ioctl perf_event_ioc_set_bpf error: %s\n", 95 - strerror(errno))) 96 - goto disable_pmu; 97 - 98 - err = -1; 99 - info_map_fd = bpf_object__find_map_fd_by_name(obj, "info_map"); 100 - if (CHECK(info_map_fd < 0, test_name, "find map %s error\n", "info_map")) 101 - goto disable_pmu; 102 - 103 - status_map_fd = bpf_object__find_map_fd_by_name(obj, "status_map"); 104 - if (CHECK(status_map_fd < 0, test_name, "find map %s error\n", "status_map")) 105 - goto disable_pmu; 106 101 107 102 /* wait until child signal handler installed */ 108 103 read(pipe_c2p[0], buf, 1); 109 104 110 105 /* trigger the bpf send_signal */ 111 - key = 0; 112 - val = (((__u64)(SIGUSR1)) << 32) | pid; 113 - bpf_map_update_elem(info_map_fd, &key, &val, 0); 106 + skel->bss->pid = pid; 107 + skel->bss->sig = SIGUSR1; 108 + skel->bss->signal_thread = signal_thread; 114 109 115 110 /* notify child that bpf program can send_signal now */ 116 111 write(pipe_p2c[1], buf, 1); ··· 125 132 126 133 disable_pmu: 127 134 close(pmu_fd); 128 - close_prog: 129 - bpf_object__close(obj); 130 - prog_load_failure: 135 + destroy_skel: 136 + test_send_signal_kern__destroy(skel); 137 + skel_open_load_failure: 131 138 close(pipe_c2p[0]); 132 139 close(pipe_p2c[1]); 133 140 wait(NULL); 134 141 } 135 142 136 - static void test_send_signal_tracepoint(void) 143 + static void test_send_signal_tracepoint(bool signal_thread) 137 144 { 138 - const char *id_path = "/sys/kernel/debug/tracing/events/syscalls/sys_enter_nanosleep/id"; 139 - struct perf_event_attr attr = { 140 - .type = PERF_TYPE_TRACEPOINT, 141 - .sample_type = PERF_SAMPLE_RAW | PERF_SAMPLE_CALLCHAIN, 142 - .sample_period = 1, 143 - .wakeup_events = 1, 144 - }; 145 - __u32 duration = 0; 146 - int bytes, efd; 147 - char buf[256]; 148 - 149 - efd = open(id_path, O_RDONLY, 0); 150 - if (CHECK(efd < 0, "tracepoint", 151 - "open syscalls/sys_enter_nanosleep/id failure: %s\n", 152 - strerror(errno))) 153 - return; 154 - 155 - bytes = read(efd, buf, sizeof(buf)); 156 - close(efd); 157 - if (CHECK(bytes <= 0 || bytes >= sizeof(buf), "tracepoint", 158 - "read syscalls/sys_enter_nanosleep/id failure: %s\n", 159 - strerror(errno))) 160 - return; 161 - 162 - attr.config = strtol(buf, NULL, 0); 163 - 164 - test_send_signal_common(&attr, BPF_PROG_TYPE_TRACEPOINT, "tracepoint"); 145 + test_send_signal_common(NULL, signal_thread, "tracepoint"); 165 146 } 166 147 167 - static void test_send_signal_perf(void) 148 + static void test_send_signal_perf(bool signal_thread) 168 149 { 169 150 struct perf_event_attr attr = { 170 151 .sample_period = 1, ··· 146 179 .config = PERF_COUNT_SW_CPU_CLOCK, 147 180 }; 148 181 149 - test_send_signal_common(&attr, BPF_PROG_TYPE_PERF_EVENT, 150 - "perf_sw_event"); 182 + test_send_signal_common(&attr, signal_thread, "perf_sw_event"); 151 183 } 152 184 153 - static void test_send_signal_nmi(void) 185 + static void test_send_signal_nmi(bool signal_thread) 154 186 { 155 187 struct perf_event_attr attr = { 156 - .sample_freq = 50, 157 - .freq = 1, 188 + .sample_period = 1, 158 189 .type = PERF_TYPE_HARDWARE, 159 190 .config = PERF_COUNT_HW_CPU_CYCLES, 160 191 }; ··· 175 210 close(pmu_fd); 176 211 } 177 212 178 - test_send_signal_common(&attr, BPF_PROG_TYPE_PERF_EVENT, 179 - "perf_hw_event"); 213 + test_send_signal_common(&attr, signal_thread, "perf_hw_event"); 180 214 } 181 215 182 216 void test_send_signal(void) 183 217 { 184 218 if (test__start_subtest("send_signal_tracepoint")) 185 - test_send_signal_tracepoint(); 219 + test_send_signal_tracepoint(false); 186 220 if (test__start_subtest("send_signal_perf")) 187 - test_send_signal_perf(); 221 + test_send_signal_perf(false); 188 222 if (test__start_subtest("send_signal_nmi")) 189 - test_send_signal_nmi(); 223 + test_send_signal_nmi(false); 224 + if (test__start_subtest("send_signal_tracepoint_thread")) 225 + test_send_signal_tracepoint(true); 226 + if (test__start_subtest("send_signal_perf_thread")) 227 + test_send_signal_perf(true); 228 + if (test__start_subtest("send_signal_nmi_thread")) 229 + test_send_signal_nmi(true); 190 230 }

+6 -2

tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c

··· 49 49 pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */, 50 50 0 /* cpu 0 */, -1 /* group id */, 51 51 0 /* flags */); 52 - if (CHECK(pmu_fd < 0, "perf_event_open", 53 - "err %d errno %d. Does the test host support PERF_COUNT_HW_CPU_CYCLES?\n", 52 + if (pmu_fd < 0 && errno == ENOENT) { 53 + printf("%s:SKIP:no PERF_COUNT_HW_CPU_CYCLES\n", __func__); 54 + test__skip(); 55 + goto cleanup; 56 + } 57 + if (CHECK(pmu_fd < 0, "perf_event_open", "err %d errno %d\n", 54 58 pmu_fd, errno)) 55 59 goto cleanup; 56 60

+82

tools/testing/selftests/bpf/prog_tests/test_global_funcs.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright (c) 2020 Facebook */ 3 + #include <test_progs.h> 4 + 5 + const char *err_str; 6 + bool found; 7 + 8 + static int libbpf_debug_print(enum libbpf_print_level level, 9 + const char *format, va_list args) 10 + { 11 + char *log_buf; 12 + 13 + if (level != LIBBPF_WARN || 14 + strcmp(format, "libbpf: \n%s\n")) { 15 + vprintf(format, args); 16 + return 0; 17 + } 18 + 19 + log_buf = va_arg(args, char *); 20 + if (!log_buf) 21 + goto out; 22 + if (strstr(log_buf, err_str) == 0) 23 + found = true; 24 + out: 25 + printf(format, log_buf); 26 + return 0; 27 + } 28 + 29 + extern int extra_prog_load_log_flags; 30 + 31 + static int check_load(const char *file) 32 + { 33 + struct bpf_prog_load_attr attr; 34 + struct bpf_object *obj = NULL; 35 + int err, prog_fd; 36 + 37 + memset(&attr, 0, sizeof(struct bpf_prog_load_attr)); 38 + attr.file = file; 39 + attr.prog_type = BPF_PROG_TYPE_UNSPEC; 40 + attr.log_level = extra_prog_load_log_flags; 41 + attr.prog_flags = BPF_F_TEST_RND_HI32; 42 + found = false; 43 + err = bpf_prog_load_xattr(&attr, &obj, &prog_fd); 44 + bpf_object__close(obj); 45 + return err; 46 + } 47 + 48 + struct test_def { 49 + const char *file; 50 + const char *err_str; 51 + }; 52 + 53 + void test_test_global_funcs(void) 54 + { 55 + struct test_def tests[] = { 56 + { "test_global_func1.o", "combined stack size of 4 calls is 544" }, 57 + { "test_global_func2.o" }, 58 + { "test_global_func3.o" , "the call stack of 8 frames" }, 59 + { "test_global_func4.o" }, 60 + { "test_global_func5.o" , "expected pointer to ctx, but got PTR" }, 61 + { "test_global_func6.o" , "modified ctx ptr R2" }, 62 + { "test_global_func7.o" , "foo() doesn't return scalar" }, 63 + }; 64 + libbpf_print_fn_t old_print_fn = NULL; 65 + int err, i, duration = 0; 66 + 67 + old_print_fn = libbpf_set_print(libbpf_debug_print); 68 + 69 + for (i = 0; i < ARRAY_SIZE(tests); i++) { 70 + const struct test_def *test = &tests[i]; 71 + 72 + if (!test__start_subtest(test->file)) 73 + continue; 74 + 75 + err_str = test->err_str; 76 + err = check_load(test->file); 77 + CHECK_FAIL(!!err ^ !!err_str); 78 + if (err_str) 79 + CHECK(found, "", "expected string '%s'", err_str); 80 + } 81 + libbpf_set_print(old_print_fn); 82 + }

+7 -1

tools/testing/selftests/bpf/prog_tests/test_overhead.c

··· 2 2 /* Copyright (c) 2019 Facebook */ 3 3 #define _GNU_SOURCE 4 4 #include <sched.h> 5 + #include <sys/prctl.h> 5 6 #include <test_progs.h> 6 7 7 8 #define MAX_CNT 100000 ··· 18 17 static int test_task_rename(const char *prog) 19 18 { 20 19 int i, fd, duration = 0, err; 21 - char buf[] = "test\n"; 20 + char buf[] = "test_overhead"; 22 21 __u64 start_time; 23 22 24 23 fd = open("/proc/self/comm", O_WRONLY|O_TRUNC); ··· 67 66 struct bpf_object *obj; 68 67 struct bpf_link *link; 69 68 int err, duration = 0; 69 + char comm[16] = {}; 70 + 71 + if (CHECK_FAIL(prctl(PR_GET_NAME, comm, 0L, 0L, 0L))) 72 + return; 70 73 71 74 obj = bpf_object__open_file("./test_overhead.o", NULL); 72 75 if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj))) ··· 143 138 test_run("fexit"); 144 139 bpf_link__destroy(link); 145 140 cleanup: 141 + prctl(PR_SET_NAME, comm, 0L, 0L, 0L); 146 142 bpf_object__close(obj); 147 143 }

+65

tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #include <test_progs.h> 3 + #include <net/if.h> 4 + #include "test_xdp.skel.h" 5 + #include "test_xdp_bpf2bpf.skel.h" 6 + 7 + void test_xdp_bpf2bpf(void) 8 + { 9 + __u32 duration = 0, retval, size; 10 + char buf[128]; 11 + int err, pkt_fd, map_fd; 12 + struct iphdr *iph = (void *)buf + sizeof(struct ethhdr); 13 + struct iptnl_info value4 = {.family = AF_INET}; 14 + struct test_xdp *pkt_skel = NULL; 15 + struct test_xdp_bpf2bpf *ftrace_skel = NULL; 16 + struct vip key4 = {.protocol = 6, .family = AF_INET}; 17 + DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts); 18 + 19 + /* Load XDP program to introspect */ 20 + pkt_skel = test_xdp__open_and_load(); 21 + if (CHECK(!pkt_skel, "pkt_skel_load", "test_xdp skeleton failed\n")) 22 + return; 23 + 24 + pkt_fd = bpf_program__fd(pkt_skel->progs._xdp_tx_iptunnel); 25 + 26 + map_fd = bpf_map__fd(pkt_skel->maps.vip2tnl); 27 + bpf_map_update_elem(map_fd, &key4, &value4, 0); 28 + 29 + /* Load trace program */ 30 + opts.attach_prog_fd = pkt_fd, 31 + ftrace_skel = test_xdp_bpf2bpf__open_opts(&opts); 32 + if (CHECK(!ftrace_skel, "__open", "ftrace skeleton failed\n")) 33 + goto out; 34 + 35 + err = test_xdp_bpf2bpf__load(ftrace_skel); 36 + if (CHECK(err, "__load", "ftrace skeleton failed\n")) 37 + goto out; 38 + 39 + err = test_xdp_bpf2bpf__attach(ftrace_skel); 40 + if (CHECK(err, "ftrace_attach", "ftrace attach failed: %d\n", err)) 41 + goto out; 42 + 43 + /* Run test program */ 44 + err = bpf_prog_test_run(pkt_fd, 1, &pkt_v4, sizeof(pkt_v4), 45 + buf, &size, &retval, &duration); 46 + 47 + if (CHECK(err || retval != XDP_TX || size != 74 || 48 + iph->protocol != IPPROTO_IPIP, "ipv4", 49 + "err %d errno %d retval %d size %d\n", 50 + err, errno, retval, size)) 51 + goto out; 52 + 53 + /* Verify test results */ 54 + if (CHECK(ftrace_skel->bss->test_result_fentry != if_nametoindex("lo"), 55 + "result", "fentry failed err %llu\n", 56 + ftrace_skel->bss->test_result_fentry)) 57 + goto out; 58 + 59 + CHECK(ftrace_skel->bss->test_result_fexit != XDP_TX, "result", 60 + "fexit failed err %llu\n", ftrace_skel->bss->test_result_fexit); 61 + 62 + out: 63 + test_xdp__destroy(pkt_skel); 64 + test_xdp_bpf2bpf__destroy(ftrace_skel); 65 + }

+544

tools/testing/selftests/bpf/progs/bpf_cubic.c

··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + 3 + /* WARNING: This implemenation is not necessarily the same 4 + * as the tcp_cubic.c. The purpose is mainly for testing 5 + * the kernel BPF logic. 6 + * 7 + * Highlights: 8 + * 1. CONFIG_HZ .kconfig map is used. 9 + * 2. In bictcp_update(), calculation is changed to use usec 10 + * resolution (i.e. USEC_PER_JIFFY) instead of using jiffies. 11 + * Thus, usecs_to_jiffies() is not used in the bpf_cubic.c. 12 + * 3. In bitctcp_update() [under tcp_friendliness], the original 13 + * "while (ca->ack_cnt > delta)" loop is changed to the equivalent 14 + * "ca->ack_cnt / delta" operation. 15 + */ 16 + 17 + #include <linux/bpf.h> 18 + #include "bpf_tcp_helpers.h" 19 + 20 + char _license[] SEC("license") = "GPL"; 21 + 22 + #define clamp(val, lo, hi) min((typeof(val))max(val, lo), hi) 23 + 24 + #define BICTCP_BETA_SCALE 1024 /* Scale factor beta calculation 25 + * max_cwnd = snd_cwnd * beta 26 + */ 27 + #define BICTCP_HZ 10 /* BIC HZ 2^10 = 1024 */ 28 + 29 + /* Two methods of hybrid slow start */ 30 + #define HYSTART_ACK_TRAIN 0x1 31 + #define HYSTART_DELAY 0x2 32 + 33 + /* Number of delay samples for detecting the increase of delay */ 34 + #define HYSTART_MIN_SAMPLES 8 35 + #define HYSTART_DELAY_MIN (4000U) /* 4ms */ 36 + #define HYSTART_DELAY_MAX (16000U) /* 16 ms */ 37 + #define HYSTART_DELAY_THRESH(x) clamp(x, HYSTART_DELAY_MIN, HYSTART_DELAY_MAX) 38 + 39 + static int fast_convergence = 1; 40 + static const int beta = 717; /* = 717/1024 (BICTCP_BETA_SCALE) */ 41 + static int initial_ssthresh; 42 + static const int bic_scale = 41; 43 + static int tcp_friendliness = 1; 44 + 45 + static int hystart = 1; 46 + static int hystart_detect = HYSTART_ACK_TRAIN | HYSTART_DELAY; 47 + static int hystart_low_window = 16; 48 + static int hystart_ack_delta_us = 2000; 49 + 50 + static const __u32 cube_rtt_scale = (bic_scale * 10); /* 1024*c/rtt */ 51 + static const __u32 beta_scale = 8*(BICTCP_BETA_SCALE+beta) / 3 52 + / (BICTCP_BETA_SCALE - beta); 53 + /* calculate the "K" for (wmax-cwnd) = c/rtt * K^3 54 + * so K = cubic_root( (wmax-cwnd)*rtt/c ) 55 + * the unit of K is bictcp_HZ=2^10, not HZ 56 + * 57 + * c = bic_scale >> 10 58 + * rtt = 100ms 59 + * 60 + * the following code has been designed and tested for 61 + * cwnd < 1 million packets 62 + * RTT < 100 seconds 63 + * HZ < 1,000,00 (corresponding to 10 nano-second) 64 + */ 65 + 66 + /* 1/c * 2^2*bictcp_HZ * srtt, 2^40 */ 67 + static const __u64 cube_factor = (__u64)(1ull << (10+3*BICTCP_HZ)) 68 + / (bic_scale * 10); 69 + 70 + /* BIC TCP Parameters */ 71 + struct bictcp { 72 + __u32 cnt; /* increase cwnd by 1 after ACKs */ 73 + __u32 last_max_cwnd; /* last maximum snd_cwnd */ 74 + __u32 last_cwnd; /* the last snd_cwnd */ 75 + __u32 last_time; /* time when updated last_cwnd */ 76 + __u32 bic_origin_point;/* origin point of bic function */ 77 + __u32 bic_K; /* time to origin point 78 + from the beginning of the current epoch */ 79 + __u32 delay_min; /* min delay (usec) */ 80 + __u32 epoch_start; /* beginning of an epoch */ 81 + __u32 ack_cnt; /* number of acks */ 82 + __u32 tcp_cwnd; /* estimated tcp cwnd */ 83 + __u16 unused; 84 + __u8 sample_cnt; /* number of samples to decide curr_rtt */ 85 + __u8 found; /* the exit point is found? */ 86 + __u32 round_start; /* beginning of each round */ 87 + __u32 end_seq; /* end_seq of the round */ 88 + __u32 last_ack; /* last time when the ACK spacing is close */ 89 + __u32 curr_rtt; /* the minimum rtt of current round */ 90 + }; 91 + 92 + static inline void bictcp_reset(struct bictcp *ca) 93 + { 94 + ca->cnt = 0; 95 + ca->last_max_cwnd = 0; 96 + ca->last_cwnd = 0; 97 + ca->last_time = 0; 98 + ca->bic_origin_point = 0; 99 + ca->bic_K = 0; 100 + ca->delay_min = 0; 101 + ca->epoch_start = 0; 102 + ca->ack_cnt = 0; 103 + ca->tcp_cwnd = 0; 104 + ca->found = 0; 105 + } 106 + 107 + extern unsigned long CONFIG_HZ __kconfig; 108 + #define HZ CONFIG_HZ 109 + #define USEC_PER_MSEC 1000UL 110 + #define USEC_PER_SEC 1000000UL 111 + #define USEC_PER_JIFFY (USEC_PER_SEC / HZ) 112 + 113 + static __always_inline __u64 div64_u64(__u64 dividend, __u64 divisor) 114 + { 115 + return dividend / divisor; 116 + } 117 + 118 + #define div64_ul div64_u64 119 + 120 + #define BITS_PER_U64 (sizeof(__u64) * 8) 121 + static __always_inline int fls64(__u64 x) 122 + { 123 + int num = BITS_PER_U64 - 1; 124 + 125 + if (x == 0) 126 + return 0; 127 + 128 + if (!(x & (~0ull << (BITS_PER_U64-32)))) { 129 + num -= 32; 130 + x <<= 32; 131 + } 132 + if (!(x & (~0ull << (BITS_PER_U64-16)))) { 133 + num -= 16; 134 + x <<= 16; 135 + } 136 + if (!(x & (~0ull << (BITS_PER_U64-8)))) { 137 + num -= 8; 138 + x <<= 8; 139 + } 140 + if (!(x & (~0ull << (BITS_PER_U64-4)))) { 141 + num -= 4; 142 + x <<= 4; 143 + } 144 + if (!(x & (~0ull << (BITS_PER_U64-2)))) { 145 + num -= 2; 146 + x <<= 2; 147 + } 148 + if (!(x & (~0ull << (BITS_PER_U64-1)))) 149 + num -= 1; 150 + 151 + return num + 1; 152 + } 153 + 154 + static __always_inline __u32 bictcp_clock_us(const struct sock *sk) 155 + { 156 + return tcp_sk(sk)->tcp_mstamp; 157 + } 158 + 159 + static __always_inline void bictcp_hystart_reset(struct sock *sk) 160 + { 161 + struct tcp_sock *tp = tcp_sk(sk); 162 + struct bictcp *ca = inet_csk_ca(sk); 163 + 164 + ca->round_start = ca->last_ack = bictcp_clock_us(sk); 165 + ca->end_seq = tp->snd_nxt; 166 + ca->curr_rtt = ~0U; 167 + ca->sample_cnt = 0; 168 + } 169 + 170 + /* "struct_ops/" prefix is not a requirement 171 + * It will be recognized as BPF_PROG_TYPE_STRUCT_OPS 172 + * as long as it is used in one of the func ptr 173 + * under SEC(".struct_ops"). 174 + */ 175 + SEC("struct_ops/bictcp_init") 176 + void BPF_PROG(bictcp_init, struct sock *sk) 177 + { 178 + struct bictcp *ca = inet_csk_ca(sk); 179 + 180 + bictcp_reset(ca); 181 + 182 + if (hystart) 183 + bictcp_hystart_reset(sk); 184 + 185 + if (!hystart && initial_ssthresh) 186 + tcp_sk(sk)->snd_ssthresh = initial_ssthresh; 187 + } 188 + 189 + /* No prefix in SEC will also work. 190 + * The remaining tcp-cubic functions have an easier way. 191 + */ 192 + SEC("no-sec-prefix-bictcp_cwnd_event") 193 + void BPF_PROG(bictcp_cwnd_event, struct sock *sk, enum tcp_ca_event event) 194 + { 195 + if (event == CA_EVENT_TX_START) { 196 + struct bictcp *ca = inet_csk_ca(sk); 197 + __u32 now = tcp_jiffies32; 198 + __s32 delta; 199 + 200 + delta = now - tcp_sk(sk)->lsndtime; 201 + 202 + /* We were application limited (idle) for a while. 203 + * Shift epoch_start to keep cwnd growth to cubic curve. 204 + */ 205 + if (ca->epoch_start && delta > 0) { 206 + ca->epoch_start += delta; 207 + if (after(ca->epoch_start, now)) 208 + ca->epoch_start = now; 209 + } 210 + return; 211 + } 212 + } 213 + 214 + /* 215 + * cbrt(x) MSB values for x MSB values in [0..63]. 216 + * Precomputed then refined by hand - Willy Tarreau 217 + * 218 + * For x in [0..63], 219 + * v = cbrt(x << 18) - 1 220 + * cbrt(x) = (v[x] + 10) >> 6 221 + */ 222 + static const __u8 v[] = { 223 + /* 0x00 */ 0, 54, 54, 54, 118, 118, 118, 118, 224 + /* 0x08 */ 123, 129, 134, 138, 143, 147, 151, 156, 225 + /* 0x10 */ 157, 161, 164, 168, 170, 173, 176, 179, 226 + /* 0x18 */ 181, 185, 187, 190, 192, 194, 197, 199, 227 + /* 0x20 */ 200, 202, 204, 206, 209, 211, 213, 215, 228 + /* 0x28 */ 217, 219, 221, 222, 224, 225, 227, 229, 229 + /* 0x30 */ 231, 232, 234, 236, 237, 239, 240, 242, 230 + /* 0x38 */ 244, 245, 246, 248, 250, 251, 252, 254, 231 + }; 232 + 233 + /* calculate the cubic root of x using a table lookup followed by one 234 + * Newton-Raphson iteration. 235 + * Avg err ~= 0.195% 236 + */ 237 + static __always_inline __u32 cubic_root(__u64 a) 238 + { 239 + __u32 x, b, shift; 240 + 241 + if (a < 64) { 242 + /* a in [0..63] */ 243 + return ((__u32)v[(__u32)a] + 35) >> 6; 244 + } 245 + 246 + b = fls64(a); 247 + b = ((b * 84) >> 8) - 1; 248 + shift = (a >> (b * 3)); 249 + 250 + /* it is needed for verifier's bound check on v */ 251 + if (shift >= 64) 252 + return 0; 253 + 254 + x = ((__u32)(((__u32)v[shift] + 10) << b)) >> 6; 255 + 256 + /* 257 + * Newton-Raphson iteration 258 + * 2 259 + * x = ( 2 * x + a / x ) / 3 260 + * k+1 k k 261 + */ 262 + x = (2 * x + (__u32)div64_u64(a, (__u64)x * (__u64)(x - 1))); 263 + x = ((x * 341) >> 10); 264 + return x; 265 + } 266 + 267 + /* 268 + * Compute congestion window to use. 269 + */ 270 + static __always_inline void bictcp_update(struct bictcp *ca, __u32 cwnd, 271 + __u32 acked) 272 + { 273 + __u32 delta, bic_target, max_cnt; 274 + __u64 offs, t; 275 + 276 + ca->ack_cnt += acked; /* count the number of ACKed packets */ 277 + 278 + if (ca->last_cwnd == cwnd && 279 + (__s32)(tcp_jiffies32 - ca->last_time) <= HZ / 32) 280 + return; 281 + 282 + /* The CUBIC function can update ca->cnt at most once per jiffy. 283 + * On all cwnd reduction events, ca->epoch_start is set to 0, 284 + * which will force a recalculation of ca->cnt. 285 + */ 286 + if (ca->epoch_start && tcp_jiffies32 == ca->last_time) 287 + goto tcp_friendliness; 288 + 289 + ca->last_cwnd = cwnd; 290 + ca->last_time = tcp_jiffies32; 291 + 292 + if (ca->epoch_start == 0) { 293 + ca->epoch_start = tcp_jiffies32; /* record beginning */ 294 + ca->ack_cnt = acked; /* start counting */ 295 + ca->tcp_cwnd = cwnd; /* syn with cubic */ 296 + 297 + if (ca->last_max_cwnd <= cwnd) { 298 + ca->bic_K = 0; 299 + ca->bic_origin_point = cwnd; 300 + } else { 301 + /* Compute new K based on 302 + * (wmax-cwnd) * (srtt>>3 / HZ) / c * 2^(3*bictcp_HZ) 303 + */ 304 + ca->bic_K = cubic_root(cube_factor 305 + * (ca->last_max_cwnd - cwnd)); 306 + ca->bic_origin_point = ca->last_max_cwnd; 307 + } 308 + } 309 + 310 + /* cubic function - calc*/ 311 + /* calculate c * time^3 / rtt, 312 + * while considering overflow in calculation of time^3 313 + * (so time^3 is done by using 64 bit) 314 + * and without the support of division of 64bit numbers 315 + * (so all divisions are done by using 32 bit) 316 + * also NOTE the unit of those veriables 317 + * time = (t - K) / 2^bictcp_HZ 318 + * c = bic_scale >> 10 319 + * rtt = (srtt >> 3) / HZ 320 + * !!! The following code does not have overflow problems, 321 + * if the cwnd < 1 million packets !!! 322 + */ 323 + 324 + t = (__s32)(tcp_jiffies32 - ca->epoch_start) * USEC_PER_JIFFY; 325 + t += ca->delay_min; 326 + /* change the unit from usec to bictcp_HZ */ 327 + t <<= BICTCP_HZ; 328 + t /= USEC_PER_SEC; 329 + 330 + if (t < ca->bic_K) /* t - K */ 331 + offs = ca->bic_K - t; 332 + else 333 + offs = t - ca->bic_K; 334 + 335 + /* c/rtt * (t-K)^3 */ 336 + delta = (cube_rtt_scale * offs * offs * offs) >> (10+3*BICTCP_HZ); 337 + if (t < ca->bic_K) /* below origin*/ 338 + bic_target = ca->bic_origin_point - delta; 339 + else /* above origin*/ 340 + bic_target = ca->bic_origin_point + delta; 341 + 342 + /* cubic function - calc bictcp_cnt*/ 343 + if (bic_target > cwnd) { 344 + ca->cnt = cwnd / (bic_target - cwnd); 345 + } else { 346 + ca->cnt = 100 * cwnd; /* very small increment*/ 347 + } 348 + 349 + /* 350 + * The initial growth of cubic function may be too conservative 351 + * when the available bandwidth is still unknown. 352 + */ 353 + if (ca->last_max_cwnd == 0 && ca->cnt > 20) 354 + ca->cnt = 20; /* increase cwnd 5% per RTT */ 355 + 356 + tcp_friendliness: 357 + /* TCP Friendly */ 358 + if (tcp_friendliness) { 359 + __u32 scale = beta_scale; 360 + __u32 n; 361 + 362 + /* update tcp cwnd */ 363 + delta = (cwnd * scale) >> 3; 364 + if (ca->ack_cnt > delta && delta) { 365 + n = ca->ack_cnt / delta; 366 + ca->ack_cnt -= n * delta; 367 + ca->tcp_cwnd += n; 368 + } 369 + 370 + if (ca->tcp_cwnd > cwnd) { /* if bic is slower than tcp */ 371 + delta = ca->tcp_cwnd - cwnd; 372 + max_cnt = cwnd / delta; 373 + if (ca->cnt > max_cnt) 374 + ca->cnt = max_cnt; 375 + } 376 + } 377 + 378 + /* The maximum rate of cwnd increase CUBIC allows is 1 packet per 379 + * 2 packets ACKed, meaning cwnd grows at 1.5x per RTT. 380 + */ 381 + ca->cnt = max(ca->cnt, 2U); 382 + } 383 + 384 + /* Or simply use the BPF_STRUCT_OPS to avoid the SEC boiler plate. */ 385 + void BPF_STRUCT_OPS(bictcp_cong_avoid, struct sock *sk, __u32 ack, __u32 acked) 386 + { 387 + struct tcp_sock *tp = tcp_sk(sk); 388 + struct bictcp *ca = inet_csk_ca(sk); 389 + 390 + if (!tcp_is_cwnd_limited(sk)) 391 + return; 392 + 393 + if (tcp_in_slow_start(tp)) { 394 + if (hystart && after(ack, ca->end_seq)) 395 + bictcp_hystart_reset(sk); 396 + acked = tcp_slow_start(tp, acked); 397 + if (!acked) 398 + return; 399 + } 400 + bictcp_update(ca, tp->snd_cwnd, acked); 401 + tcp_cong_avoid_ai(tp, ca->cnt, acked); 402 + } 403 + 404 + __u32 BPF_STRUCT_OPS(bictcp_recalc_ssthresh, struct sock *sk) 405 + { 406 + const struct tcp_sock *tp = tcp_sk(sk); 407 + struct bictcp *ca = inet_csk_ca(sk); 408 + 409 + ca->epoch_start = 0; /* end of epoch */ 410 + 411 + /* Wmax and fast convergence */ 412 + if (tp->snd_cwnd < ca->last_max_cwnd && fast_convergence) 413 + ca->last_max_cwnd = (tp->snd_cwnd * (BICTCP_BETA_SCALE + beta)) 414 + / (2 * BICTCP_BETA_SCALE); 415 + else 416 + ca->last_max_cwnd = tp->snd_cwnd; 417 + 418 + return max((tp->snd_cwnd * beta) / BICTCP_BETA_SCALE, 2U); 419 + } 420 + 421 + void BPF_STRUCT_OPS(bictcp_state, struct sock *sk, __u8 new_state) 422 + { 423 + if (new_state == TCP_CA_Loss) { 424 + bictcp_reset(inet_csk_ca(sk)); 425 + bictcp_hystart_reset(sk); 426 + } 427 + } 428 + 429 + #define GSO_MAX_SIZE 65536 430 + 431 + /* Account for TSO/GRO delays. 432 + * Otherwise short RTT flows could get too small ssthresh, since during 433 + * slow start we begin with small TSO packets and ca->delay_min would 434 + * not account for long aggregation delay when TSO packets get bigger. 435 + * Ideally even with a very small RTT we would like to have at least one 436 + * TSO packet being sent and received by GRO, and another one in qdisc layer. 437 + * We apply another 100% factor because @rate is doubled at this point. 438 + * We cap the cushion to 1ms. 439 + */ 440 + static __always_inline __u32 hystart_ack_delay(struct sock *sk) 441 + { 442 + unsigned long rate; 443 + 444 + rate = sk->sk_pacing_rate; 445 + if (!rate) 446 + return 0; 447 + return min((__u64)USEC_PER_MSEC, 448 + div64_ul((__u64)GSO_MAX_SIZE * 4 * USEC_PER_SEC, rate)); 449 + } 450 + 451 + static __always_inline void hystart_update(struct sock *sk, __u32 delay) 452 + { 453 + struct tcp_sock *tp = tcp_sk(sk); 454 + struct bictcp *ca = inet_csk_ca(sk); 455 + __u32 threshold; 456 + 457 + if (hystart_detect & HYSTART_ACK_TRAIN) { 458 + __u32 now = bictcp_clock_us(sk); 459 + 460 + /* first detection parameter - ack-train detection */ 461 + if ((__s32)(now - ca->last_ack) <= hystart_ack_delta_us) { 462 + ca->last_ack = now; 463 + 464 + threshold = ca->delay_min + hystart_ack_delay(sk); 465 + 466 + /* Hystart ack train triggers if we get ack past 467 + * ca->delay_min/2. 468 + * Pacing might have delayed packets up to RTT/2 469 + * during slow start. 470 + */ 471 + if (sk->sk_pacing_status == SK_PACING_NONE) 472 + threshold >>= 1; 473 + 474 + if ((__s32)(now - ca->round_start) > threshold) { 475 + ca->found = 1; 476 + tp->snd_ssthresh = tp->snd_cwnd; 477 + } 478 + } 479 + } 480 + 481 + if (hystart_detect & HYSTART_DELAY) { 482 + /* obtain the minimum delay of more than sampling packets */ 483 + if (ca->sample_cnt < HYSTART_MIN_SAMPLES) { 484 + if (ca->curr_rtt > delay) 485 + ca->curr_rtt = delay; 486 + 487 + ca->sample_cnt++; 488 + } else { 489 + if (ca->curr_rtt > ca->delay_min + 490 + HYSTART_DELAY_THRESH(ca->delay_min >> 3)) { 491 + ca->found = 1; 492 + tp->snd_ssthresh = tp->snd_cwnd; 493 + } 494 + } 495 + } 496 + } 497 + 498 + void BPF_STRUCT_OPS(bictcp_acked, struct sock *sk, 499 + const struct ack_sample *sample) 500 + { 501 + const struct tcp_sock *tp = tcp_sk(sk); 502 + struct bictcp *ca = inet_csk_ca(sk); 503 + __u32 delay; 504 + 505 + /* Some calls are for duplicates without timetamps */ 506 + if (sample->rtt_us < 0) 507 + return; 508 + 509 + /* Discard delay samples right after fast recovery */ 510 + if (ca->epoch_start && (__s32)(tcp_jiffies32 - ca->epoch_start) < HZ) 511 + return; 512 + 513 + delay = sample->rtt_us; 514 + if (delay == 0) 515 + delay = 1; 516 + 517 + /* first time call or link delay decreases */ 518 + if (ca->delay_min == 0 || ca->delay_min > delay) 519 + ca->delay_min = delay; 520 + 521 + /* hystart triggers when cwnd is larger than some threshold */ 522 + if (!ca->found && tcp_in_slow_start(tp) && hystart && 523 + tp->snd_cwnd >= hystart_low_window) 524 + hystart_update(sk, delay); 525 + } 526 + 527 + __u32 BPF_STRUCT_OPS(tcp_reno_undo_cwnd, struct sock *sk) 528 + { 529 + const struct tcp_sock *tp = tcp_sk(sk); 530 + 531 + return max(tp->snd_cwnd, tp->prior_cwnd); 532 + } 533 + 534 + SEC(".struct_ops") 535 + struct tcp_congestion_ops cubic = { 536 + .init = (void *)bictcp_init, 537 + .ssthresh = (void *)bictcp_recalc_ssthresh, 538 + .cong_avoid = (void *)bictcp_cong_avoid, 539 + .set_state = (void *)bictcp_state, 540 + .undo_cwnd = (void *)tcp_reno_undo_cwnd, 541 + .cwnd_event = (void *)bictcp_cwnd_event, 542 + .pkts_acked = (void *)bictcp_acked, 543 + .name = "bpf_cubic", 544 + };

+216

tools/testing/selftests/bpf/progs/bpf_dctcp.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright (c) 2019 Facebook */ 3 + 4 + /* WARNING: This implemenation is not necessarily the same 5 + * as the tcp_dctcp.c. The purpose is mainly for testing 6 + * the kernel BPF logic. 7 + */ 8 + 9 + #include <linux/bpf.h> 10 + #include <linux/types.h> 11 + #include <bpf/bpf_helpers.h> 12 + #include "bpf_trace_helpers.h" 13 + #include "bpf_tcp_helpers.h" 14 + 15 + char _license[] SEC("license") = "GPL"; 16 + 17 + #define DCTCP_MAX_ALPHA 1024U 18 + 19 + struct dctcp { 20 + __u32 old_delivered; 21 + __u32 old_delivered_ce; 22 + __u32 prior_rcv_nxt; 23 + __u32 dctcp_alpha; 24 + __u32 next_seq; 25 + __u32 ce_state; 26 + __u32 loss_cwnd; 27 + }; 28 + 29 + static unsigned int dctcp_shift_g = 4; /* g = 1/2^4 */ 30 + static unsigned int dctcp_alpha_on_init = DCTCP_MAX_ALPHA; 31 + 32 + static __always_inline void dctcp_reset(const struct tcp_sock *tp, 33 + struct dctcp *ca) 34 + { 35 + ca->next_seq = tp->snd_nxt; 36 + 37 + ca->old_delivered = tp->delivered; 38 + ca->old_delivered_ce = tp->delivered_ce; 39 + } 40 + 41 + SEC("struct_ops/dctcp_init") 42 + void BPF_PROG(dctcp_init, struct sock *sk) 43 + { 44 + const struct tcp_sock *tp = tcp_sk(sk); 45 + struct dctcp *ca = inet_csk_ca(sk); 46 + 47 + ca->prior_rcv_nxt = tp->rcv_nxt; 48 + ca->dctcp_alpha = min(dctcp_alpha_on_init, DCTCP_MAX_ALPHA); 49 + ca->loss_cwnd = 0; 50 + ca->ce_state = 0; 51 + 52 + dctcp_reset(tp, ca); 53 + } 54 + 55 + SEC("struct_ops/dctcp_ssthresh") 56 + __u32 BPF_PROG(dctcp_ssthresh, struct sock *sk) 57 + { 58 + struct dctcp *ca = inet_csk_ca(sk); 59 + struct tcp_sock *tp = tcp_sk(sk); 60 + 61 + ca->loss_cwnd = tp->snd_cwnd; 62 + return max(tp->snd_cwnd - ((tp->snd_cwnd * ca->dctcp_alpha) >> 11U), 2U); 63 + } 64 + 65 + SEC("struct_ops/dctcp_update_alpha") 66 + void BPF_PROG(dctcp_update_alpha, struct sock *sk, __u32 flags) 67 + { 68 + const struct tcp_sock *tp = tcp_sk(sk); 69 + struct dctcp *ca = inet_csk_ca(sk); 70 + 71 + /* Expired RTT */ 72 + if (!before(tp->snd_una, ca->next_seq)) { 73 + __u32 delivered_ce = tp->delivered_ce - ca->old_delivered_ce; 74 + __u32 alpha = ca->dctcp_alpha; 75 + 76 + /* alpha = (1 - g) * alpha + g * F */ 77 + 78 + alpha -= min_not_zero(alpha, alpha >> dctcp_shift_g); 79 + if (delivered_ce) { 80 + __u32 delivered = tp->delivered - ca->old_delivered; 81 + 82 + /* If dctcp_shift_g == 1, a 32bit value would overflow 83 + * after 8 M packets. 84 + */ 85 + delivered_ce <<= (10 - dctcp_shift_g); 86 + delivered_ce /= max(1U, delivered); 87 + 88 + alpha = min(alpha + delivered_ce, DCTCP_MAX_ALPHA); 89 + } 90 + ca->dctcp_alpha = alpha; 91 + dctcp_reset(tp, ca); 92 + } 93 + } 94 + 95 + static __always_inline void dctcp_react_to_loss(struct sock *sk) 96 + { 97 + struct dctcp *ca = inet_csk_ca(sk); 98 + struct tcp_sock *tp = tcp_sk(sk); 99 + 100 + ca->loss_cwnd = tp->snd_cwnd; 101 + tp->snd_ssthresh = max(tp->snd_cwnd >> 1U, 2U); 102 + } 103 + 104 + SEC("struct_ops/dctcp_state") 105 + void BPF_PROG(dctcp_state, struct sock *sk, __u8 new_state) 106 + { 107 + if (new_state == TCP_CA_Recovery && 108 + new_state != BPF_CORE_READ_BITFIELD(inet_csk(sk), icsk_ca_state)) 109 + dctcp_react_to_loss(sk); 110 + /* We handle RTO in dctcp_cwnd_event to ensure that we perform only 111 + * one loss-adjustment per RTT. 112 + */ 113 + } 114 + 115 + static __always_inline void dctcp_ece_ack_cwr(struct sock *sk, __u32 ce_state) 116 + { 117 + struct tcp_sock *tp = tcp_sk(sk); 118 + 119 + if (ce_state == 1) 120 + tp->ecn_flags |= TCP_ECN_DEMAND_CWR; 121 + else 122 + tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR; 123 + } 124 + 125 + /* Minimal DCTP CE state machine: 126 + * 127 + * S: 0 <- last pkt was non-CE 128 + * 1 <- last pkt was CE 129 + */ 130 + static __always_inline 131 + void dctcp_ece_ack_update(struct sock *sk, enum tcp_ca_event evt, 132 + __u32 *prior_rcv_nxt, __u32 *ce_state) 133 + { 134 + __u32 new_ce_state = (evt == CA_EVENT_ECN_IS_CE) ? 1 : 0; 135 + 136 + if (*ce_state != new_ce_state) { 137 + /* CE state has changed, force an immediate ACK to 138 + * reflect the new CE state. If an ACK was delayed, 139 + * send that first to reflect the prior CE state. 140 + */ 141 + if (inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER) { 142 + dctcp_ece_ack_cwr(sk, *ce_state); 143 + bpf_tcp_send_ack(sk, *prior_rcv_nxt); 144 + } 145 + inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW; 146 + } 147 + *prior_rcv_nxt = tcp_sk(sk)->rcv_nxt; 148 + *ce_state = new_ce_state; 149 + dctcp_ece_ack_cwr(sk, new_ce_state); 150 + } 151 + 152 + SEC("struct_ops/dctcp_cwnd_event") 153 + void BPF_PROG(dctcp_cwnd_event, struct sock *sk, enum tcp_ca_event ev) 154 + { 155 + struct dctcp *ca = inet_csk_ca(sk); 156 + 157 + switch (ev) { 158 + case CA_EVENT_ECN_IS_CE: 159 + case CA_EVENT_ECN_NO_CE: 160 + dctcp_ece_ack_update(sk, ev, &ca->prior_rcv_nxt, &ca->ce_state); 161 + break; 162 + case CA_EVENT_LOSS: 163 + dctcp_react_to_loss(sk); 164 + break; 165 + default: 166 + /* Don't care for the rest. */ 167 + break; 168 + } 169 + } 170 + 171 + SEC("struct_ops/dctcp_cwnd_undo") 172 + __u32 BPF_PROG(dctcp_cwnd_undo, struct sock *sk) 173 + { 174 + const struct dctcp *ca = inet_csk_ca(sk); 175 + 176 + return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd); 177 + } 178 + 179 + SEC("struct_ops/tcp_reno_cong_avoid") 180 + void BPF_PROG(tcp_reno_cong_avoid, struct sock *sk, __u32 ack, __u32 acked) 181 + { 182 + struct tcp_sock *tp = tcp_sk(sk); 183 + 184 + if (!tcp_is_cwnd_limited(sk)) 185 + return; 186 + 187 + /* In "safe" area, increase. */ 188 + if (tcp_in_slow_start(tp)) { 189 + acked = tcp_slow_start(tp, acked); 190 + if (!acked) 191 + return; 192 + } 193 + /* In dangerous area, increase slowly. */ 194 + tcp_cong_avoid_ai(tp, tp->snd_cwnd, acked); 195 + } 196 + 197 + SEC(".struct_ops") 198 + struct tcp_congestion_ops dctcp_nouse = { 199 + .init = (void *)dctcp_init, 200 + .set_state = (void *)dctcp_state, 201 + .flags = TCP_CONG_NEEDS_ECN, 202 + .name = "bpf_dctcp_nouse", 203 + }; 204 + 205 + SEC(".struct_ops") 206 + struct tcp_congestion_ops dctcp = { 207 + .init = (void *)dctcp_init, 208 + .in_ack_event = (void *)dctcp_update_alpha, 209 + .cwnd_event = (void *)dctcp_cwnd_event, 210 + .ssthresh = (void *)dctcp_ssthresh, 211 + .cong_avoid = (void *)tcp_reno_cong_avoid, 212 + .undo_cwnd = (void *)dctcp_cwnd_undo, 213 + .set_state = (void *)dctcp_state, 214 + .flags = TCP_CONG_NEEDS_ECN, 215 + .name = "bpf_dctcp", 216 + };

+2 -2

tools/testing/selftests/bpf/progs/bpf_flow.c

··· 16 16 #include <sys/socket.h> 17 17 #include <linux/if_tunnel.h> 18 18 #include <linux/mpls.h> 19 - #include "bpf_helpers.h" 20 - #include "bpf_endian.h" 19 + #include <bpf/bpf_helpers.h> 20 + #include <bpf/bpf_endian.h> 21 21 22 22 int _version SEC("version") = 1; 23 23 #define PROG(F) SEC(#F) int bpf_func_##F

+2 -2

tools/testing/selftests/bpf/progs/connect4_prog.c

··· 9 9 #include <linux/in6.h> 10 10 #include <sys/socket.h> 11 11 12 - #include "bpf_helpers.h" 13 - #include "bpf_endian.h" 12 + #include <bpf/bpf_helpers.h> 13 + #include <bpf/bpf_endian.h> 14 14 15 15 #define SRC_REWRITE_IP4 0x7f000004U 16 16 #define DST_REWRITE_IP4 0x7f000001U

+2 -2

tools/testing/selftests/bpf/progs/connect6_prog.c

··· 9 9 #include <linux/in6.h> 10 10 #include <sys/socket.h> 11 11 12 - #include "bpf_helpers.h" 13 - #include "bpf_endian.h" 12 + #include <bpf/bpf_helpers.h> 13 + #include <bpf/bpf_endian.h> 14 14 15 15 #define SRC_REWRITE_IP6_0 0 16 16 #define SRC_REWRITE_IP6_1 0

+1 -1

tools/testing/selftests/bpf/progs/dev_cgroup.c

··· 7 7 8 8 #include <linux/bpf.h> 9 9 #include <linux/version.h> 10 - #include "bpf_helpers.h" 10 + #include <bpf/bpf_helpers.h> 11 11 12 12 SEC("cgroup/dev") 13 13 int bpf_prog1(struct bpf_cgroup_dev_ctx *ctx)

+13 -10

tools/testing/selftests/bpf/progs/fentry_test.c

··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 /* Copyright (c) 2019 Facebook */ 3 3 #include <linux/bpf.h> 4 - #include "bpf_helpers.h" 4 + #include <bpf/bpf_helpers.h> 5 5 #include "bpf_trace_helpers.h" 6 6 7 7 char _license[] SEC("license") = "GPL"; 8 8 9 9 __u64 test1_result = 0; 10 - BPF_TRACE_1("fentry/bpf_fentry_test1", test1, int, a) 10 + SEC("fentry/bpf_fentry_test1") 11 + int BPF_PROG(test1, int a) 11 12 { 12 13 test1_result = a == 1; 13 14 return 0; 14 15 } 15 16 16 17 __u64 test2_result = 0; 17 - BPF_TRACE_2("fentry/bpf_fentry_test2", test2, int, a, __u64, b) 18 + SEC("fentry/bpf_fentry_test2") 19 + int BPF_PROG(test2, int a, __u64 b) 18 20 { 19 21 test2_result = a == 2 && b == 3; 20 22 return 0; 21 23 } 22 24 23 25 __u64 test3_result = 0; 24 - BPF_TRACE_3("fentry/bpf_fentry_test3", test3, char, a, int, b, __u64, c) 26 + SEC("fentry/bpf_fentry_test3") 27 + int BPF_PROG(test3, char a, int b, __u64 c) 25 28 { 26 29 test3_result = a == 4 && b == 5 && c == 6; 27 30 return 0; 28 31 } 29 32 30 33 __u64 test4_result = 0; 31 - BPF_TRACE_4("fentry/bpf_fentry_test4", test4, 32 - void *, a, char, b, int, c, __u64, d) 34 + SEC("fentry/bpf_fentry_test4") 35 + int BPF_PROG(test4, void *a, char b, int c, __u64 d) 33 36 { 34 37 test4_result = a == (void *)7 && b == 8 && c == 9 && d == 10; 35 38 return 0; 36 39 } 37 40 38 41 __u64 test5_result = 0; 39 - BPF_TRACE_5("fentry/bpf_fentry_test5", test5, 40 - __u64, a, void *, b, short, c, int, d, __u64, e) 42 + SEC("fentry/bpf_fentry_test5") 43 + int BPF_PROG(test5, __u64 a, void *b, short c, int d, __u64 e) 41 44 { 42 45 test5_result = a == 11 && b == (void *)12 && c == 13 && d == 14 && 43 46 e == 15; ··· 48 45 } 49 46 50 47 __u64 test6_result = 0; 51 - BPF_TRACE_6("fentry/bpf_fentry_test6", test6, 52 - __u64, a, void *, b, short, c, int, d, void *, e, __u64, f) 48 + SEC("fentry/bpf_fentry_test6") 49 + int BPF_PROG(test6, __u64 a, void *b, short c, int d, void * e, __u64 f) 53 50 { 54 51 test6_result = a == 16 && b == (void *)17 && c == 18 && d == 19 && 55 52 e == (void *)20 && f == 21;

+77 -5

tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c

··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 /* Copyright (c) 2019 Facebook */ 3 + #include <linux/stddef.h> 4 + #include <linux/ipv6.h> 3 5 #include <linux/bpf.h> 4 - #include "bpf_helpers.h" 6 + #include <bpf/bpf_helpers.h> 7 + #include <bpf/bpf_endian.h> 5 8 #include "bpf_trace_helpers.h" 6 9 7 10 struct sk_buff { ··· 12 9 }; 13 10 14 11 __u64 test_result = 0; 15 - BPF_TRACE_2("fexit/test_pkt_access", test_main, 16 - struct sk_buff *, skb, int, ret) 12 + SEC("fexit/test_pkt_access") 13 + int BPF_PROG(test_main, struct sk_buff *skb, int ret) 17 14 { 18 15 int len; 19 16 ··· 27 24 } 28 25 29 26 __u64 test_result_subprog1 = 0; 30 - BPF_TRACE_2("fexit/test_pkt_access_subprog1", test_subprog1, 31 - struct sk_buff *, skb, int, ret) 27 + SEC("fexit/test_pkt_access_subprog1") 28 + int BPF_PROG(test_subprog1, struct sk_buff *skb, int ret) 32 29 { 33 30 int len; 34 31 ··· 81 78 return 0; 82 79 test_result_subprog2 = 1; 83 80 return 0; 81 + } 82 + 83 + __u64 test_result_subprog3 = 0; 84 + SEC("fexit/test_pkt_access_subprog3") 85 + int BPF_PROG(test_subprog3, int val, struct sk_buff *skb, int ret) 86 + { 87 + int len; 88 + 89 + __builtin_preserve_access_index(({ 90 + len = skb->len; 91 + })); 92 + if (len != 74 || ret != 74 * val || val != 3) 93 + return 0; 94 + test_result_subprog3 = 1; 95 + return 0; 96 + } 97 + 98 + __u64 test_get_skb_len = 0; 99 + SEC("freplace/get_skb_len") 100 + int new_get_skb_len(struct __sk_buff *skb) 101 + { 102 + int len = skb->len; 103 + 104 + if (len != 74) 105 + return 0; 106 + test_get_skb_len = 1; 107 + return 74; /* original get_skb_len() returns skb->len */ 108 + } 109 + 110 + __u64 test_get_skb_ifindex = 0; 111 + SEC("freplace/get_skb_ifindex") 112 + int new_get_skb_ifindex(int val, struct __sk_buff *skb, int var) 113 + { 114 + void *data_end = (void *)(long)skb->data_end; 115 + void *data = (void *)(long)skb->data; 116 + struct ipv6hdr ip6, *ip6p; 117 + int ifindex = skb->ifindex; 118 + __u32 eth_proto; 119 + __u32 nh_off; 120 + 121 + /* check that BPF extension can read packet via direct packet access */ 122 + if (data + 14 + sizeof(ip6) > data_end) 123 + return 0; 124 + ip6p = data + 14; 125 + 126 + if (ip6p->nexthdr != 6 || ip6p->payload_len != __bpf_constant_htons(123)) 127 + return 0; 128 + 129 + /* check that legacy packet access helper works too */ 130 + if (bpf_skb_load_bytes(skb, 14, &ip6, sizeof(ip6)) < 0) 131 + return 0; 132 + ip6p = &ip6; 133 + if (ip6p->nexthdr != 6 || ip6p->payload_len != __bpf_constant_htons(123)) 134 + return 0; 135 + 136 + if (ifindex != 1 || val != 3 || var != 1) 137 + return 0; 138 + test_get_skb_ifindex = 1; 139 + return 3; /* original get_skb_ifindex() returns val * ifindex * var */ 140 + } 141 + 142 + volatile __u64 test_get_constant = 0; 143 + SEC("freplace/get_constant") 144 + int new_get_constant(long val) 145 + { 146 + if (val != 123) 147 + return 0; 148 + test_get_constant = 1; 149 + return test_get_constant; /* original get_constant() returns val - 122 */ 84 150 } 85 151 char _license[] SEC("license") = "GPL";

+4 -3

tools/testing/selftests/bpf/progs/fexit_bpf2bpf_simple.c

··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 /* Copyright (c) 2019 Facebook */ 3 3 #include <linux/bpf.h> 4 - #include "bpf_helpers.h" 4 + #include <bpf/bpf_helpers.h> 5 5 #include "bpf_trace_helpers.h" 6 6 7 7 struct sk_buff { ··· 9 9 }; 10 10 11 11 __u64 test_result = 0; 12 - BPF_TRACE_2("fexit/test_pkt_md_access", test_main2, 13 - struct sk_buff *, skb, int, ret) 12 + 13 + SEC("fexit/test_pkt_md_access") 14 + int BPF_PROG(test_main2, struct sk_buff *skb, int ret) 14 15 { 15 16 int len; 16 17

+13 -12

tools/testing/selftests/bpf/progs/fexit_test.c

··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 /* Copyright (c) 2019 Facebook */ 3 3 #include <linux/bpf.h> 4 - #include "bpf_helpers.h" 4 + #include <bpf/bpf_helpers.h> 5 5 #include "bpf_trace_helpers.h" 6 6 7 7 char _license[] SEC("license") = "GPL"; 8 8 9 9 __u64 test1_result = 0; 10 - BPF_TRACE_2("fexit/bpf_fentry_test1", test1, int, a, int, ret) 10 + SEC("fexit/bpf_fentry_test1") 11 + int BPF_PROG(test1, int a, int ret) 11 12 { 12 13 test1_result = a == 1 && ret == 2; 13 14 return 0; 14 15 } 15 16 16 17 __u64 test2_result = 0; 17 - BPF_TRACE_3("fexit/bpf_fentry_test2", test2, int, a, __u64, b, int, ret) 18 + SEC("fexit/bpf_fentry_test2") 19 + int BPF_PROG(test2, int a, __u64 b, int ret) 18 20 { 19 21 test2_result = a == 2 && b == 3 && ret == 5; 20 22 return 0; 21 23 } 22 24 23 25 __u64 test3_result = 0; 24 - BPF_TRACE_4("fexit/bpf_fentry_test3", test3, char, a, int, b, __u64, c, int, ret) 26 + SEC("fexit/bpf_fentry_test3") 27 + int BPF_PROG(test3, char a, int b, __u64 c, int ret) 25 28 { 26 29 test3_result = a == 4 && b == 5 && c == 6 && ret == 15; 27 30 return 0; 28 31 } 29 32 30 33 __u64 test4_result = 0; 31 - BPF_TRACE_5("fexit/bpf_fentry_test4", test4, 32 - void *, a, char, b, int, c, __u64, d, int, ret) 34 + SEC("fexit/bpf_fentry_test4") 35 + int BPF_PROG(test4, void *a, char b, int c, __u64 d, int ret) 33 36 { 34 - 35 37 test4_result = a == (void *)7 && b == 8 && c == 9 && d == 10 && 36 38 ret == 34; 37 39 return 0; 38 40 } 39 41 40 42 __u64 test5_result = 0; 41 - BPF_TRACE_6("fexit/bpf_fentry_test5", test5, 42 - __u64, a, void *, b, short, c, int, d, __u64, e, int, ret) 43 + SEC("fexit/bpf_fentry_test5") 44 + int BPF_PROG(test5, __u64 a, void *b, short c, int d, __u64 e, int ret) 43 45 { 44 46 test5_result = a == 11 && b == (void *)12 && c == 13 && d == 14 && 45 47 e == 15 && ret == 65; ··· 49 47 } 50 48 51 49 __u64 test6_result = 0; 52 - BPF_TRACE_7("fexit/bpf_fentry_test6", test6, 53 - __u64, a, void *, b, short, c, int, d, void *, e, __u64, f, 54 - int, ret) 50 + SEC("fexit/bpf_fentry_test6") 51 + int BPF_PROG(test6, __u64 a, void *b, short c, int d, void *e, __u64 f, int ret) 55 52 { 56 53 test6_result = a == 16 && b == (void *)17 && c == 18 && d == 19 && 57 54 e == (void *)20 && f == 21 && ret == 111;

+1 -1

tools/testing/selftests/bpf/progs/get_cgroup_id_kern.c

+10 -10

tools/testing/selftests/bpf/progs/kfree_skb.c

··· 2 2 // Copyright (c) 2019 Facebook 3 3 #include <linux/bpf.h> 4 4 #include <stdbool.h> 5 - #include "bpf_helpers.h" 6 - #include "bpf_endian.h" 5 + #include <bpf/bpf_helpers.h> 6 + #include <bpf/bpf_endian.h> 7 7 #include "bpf_trace_helpers.h" 8 8 9 9 char _license[] SEC("license") = "GPL"; ··· 57 57 /* TRACE_EVENT(kfree_skb, 58 58 * TP_PROTO(struct sk_buff *skb, void *location), 59 59 */ 60 - BPF_TRACE_2("tp_btf/kfree_skb", trace_kfree_skb, 61 - struct sk_buff *, skb, void *, location) 60 + SEC("tp_btf/kfree_skb") 61 + int BPF_PROG(trace_kfree_skb, struct sk_buff *skb, void *location) 62 62 { 63 63 struct net_device *dev; 64 64 struct callback_head *ptr; ··· 114 114 bool fexit_test_ok; 115 115 } result; 116 116 117 - BPF_TRACE_3("fentry/eth_type_trans", fentry_eth_type_trans, 118 - struct sk_buff *, skb, struct net_device *, dev, 119 - unsigned short, protocol) 117 + SEC("fentry/eth_type_trans") 118 + int BPF_PROG(fentry_eth_type_trans, struct sk_buff *skb, struct net_device *dev, 119 + unsigned short protocol) 120 120 { 121 121 int len, ifindex; 122 122 ··· 132 132 return 0; 133 133 } 134 134 135 - BPF_TRACE_3("fexit/eth_type_trans", fexit_eth_type_trans, 136 - struct sk_buff *, skb, struct net_device *, dev, 137 - unsigned short, protocol) 135 + SEC("fexit/eth_type_trans") 136 + int BPF_PROG(fexit_eth_type_trans, struct sk_buff *skb, struct net_device *dev, 137 + unsigned short protocol) 138 138 { 139 139 int len, ifindex; 140 140

+2 -2

tools/testing/selftests/bpf/progs/loop1.c

··· 6 6 #include <stddef.h> 7 7 #include <stdbool.h> 8 8 #include <linux/bpf.h> 9 - #include "bpf_helpers.h" 10 - #include "bpf_tracing.h" 9 + #include <bpf/bpf_helpers.h> 10 + #include <bpf/bpf_tracing.h> 11 11 12 12 char _license[] SEC("license") = "GPL"; 13 13

+2 -2

tools/testing/selftests/bpf/progs/loop2.c

··· 6 6 #include <stddef.h> 7 7 #include <stdbool.h> 8 8 #include <linux/bpf.h> 9 - #include "bpf_helpers.h" 10 - #include "bpf_tracing.h" 9 + #include <bpf/bpf_helpers.h> 10 + #include <bpf/bpf_tracing.h> 11 11 12 12 char _license[] SEC("license") = "GPL"; 13 13

+2 -2

tools/testing/selftests/bpf/progs/loop3.c

··· 6 6 #include <stddef.h> 7 7 #include <stdbool.h> 8 8 #include <linux/bpf.h> 9 - #include "bpf_helpers.h" 10 - #include "bpf_tracing.h" 9 + #include <bpf/bpf_helpers.h> 10 + #include <bpf/bpf_tracing.h> 11 11 12 12 char _license[] SEC("license") = "GPL"; 13 13

+1 -1

tools/testing/selftests/bpf/progs/loop4.c

··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 // Copyright (c) 2019 Facebook 3 3 #include <linux/bpf.h> 4 - #include "bpf_helpers.h" 4 + #include <bpf/bpf_helpers.h> 5 5 6 6 char _license[] SEC("license") = "GPL"; 7 7

+1 -1

tools/testing/selftests/bpf/progs/loop5.c

··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 // Copyright (c) 2019 Facebook 3 3 #include <linux/bpf.h> 4 - #include "bpf_helpers.h" 4 + #include <bpf/bpf_helpers.h> 5 5 #define barrier() __asm__ __volatile__("": : :"memory") 6 6 7 7 char _license[] SEC("license") = "GPL";

+1 -1

tools/testing/selftests/bpf/progs/netcnt_prog.c

··· 2 2 #include <linux/bpf.h> 3 3 #include <linux/version.h> 4 4 5 - #include "bpf_helpers.h" 5 + #include <bpf/bpf_helpers.h> 6 6 #include "netcnt_common.h" 7 7 8 8 #define MAX_BPS (3 * 1024 * 1024)

+8 -3

tools/testing/selftests/bpf/progs/pyperf.h

··· 6 6 #include <stddef.h> 7 7 #include <stdbool.h> 8 8 #include <linux/bpf.h> 9 - #include "bpf_helpers.h" 9 + #include <bpf/bpf_helpers.h> 10 10 11 11 #define FUNCTION_NAME_LEN 64 12 12 #define FILE_NAME_LEN 128 ··· 154 154 __uint(value_size, sizeof(long long) * 127); 155 155 } stackmap SEC(".maps"); 156 156 157 - static __always_inline int __on_event(struct pt_regs *ctx) 157 + #ifdef GLOBAL_FUNC 158 + __attribute__((noinline)) 159 + #else 160 + static __always_inline 161 + #endif 162 + int __on_event(struct bpf_raw_tracepoint_args *ctx) 158 163 { 159 164 uint64_t pid_tgid = bpf_get_current_pid_tgid(); 160 165 pid_t pid = (pid_t)(pid_tgid >> 32); ··· 259 254 } 260 255 261 256 SEC("raw_tracepoint/kfree_skb") 262 - int on_event(struct pt_regs* ctx) 257 + int on_event(struct bpf_raw_tracepoint_args* ctx) 263 258 { 264 259 int i, ret = 0; 265 260 ret |= __on_event(ctx);

+5

tools/testing/selftests/bpf/progs/pyperf_global.c

+1 -1

tools/testing/selftests/bpf/progs/sample_map_ret0.c

··· 1 1 /* SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) */ 2 2 #include <linux/bpf.h> 3 - #include "bpf_helpers.h" 3 + #include <bpf/bpf_helpers.h> 4 4 5 5 struct bpf_map_def SEC("maps") htab = { 6 6 .type = BPF_MAP_TYPE_HASH,

+2 -2

tools/testing/selftests/bpf/progs/sendmsg4_prog.c

··· 5 5 #include <linux/bpf.h> 6 6 #include <sys/socket.h> 7 7 8 - #include "bpf_helpers.h" 9 - #include "bpf_endian.h" 8 + #include <bpf/bpf_helpers.h> 9 + #include <bpf/bpf_endian.h> 10 10 11 11 #define SRC1_IP4 0xAC100001U /* 172.16.0.1 */ 12 12 #define SRC2_IP4 0x00000000U

+2 -2

tools/testing/selftests/bpf/progs/sendmsg6_prog.c

··· 5 5 #include <linux/bpf.h> 6 6 #include <sys/socket.h> 7 7 8 - #include "bpf_helpers.h" 9 - #include "bpf_endian.h" 8 + #include <bpf/bpf_helpers.h> 9 + #include <bpf/bpf_endian.h> 10 10 11 11 #define SRC_REWRITE_IP6_0 0 12 12 #define SRC_REWRITE_IP6_1 0

+2 -2

tools/testing/selftests/bpf/progs/socket_cookie_prog.c

··· 4 4 #include <linux/bpf.h> 5 5 #include <sys/socket.h> 6 6 7 - #include "bpf_helpers.h" 8 - #include "bpf_endian.h" 7 + #include <bpf/bpf_helpers.h> 8 + #include <bpf/bpf_endian.h> 9 9 10 10 struct socket_cookie { 11 11 __u64 cookie_key;

+2 -2

tools/testing/selftests/bpf/progs/sockmap_parse_prog.c

··· 1 1 #include <linux/bpf.h> 2 - #include "bpf_helpers.h" 3 - #include "bpf_endian.h" 2 + #include <bpf/bpf_helpers.h> 3 + #include <bpf/bpf_endian.h> 4 4 5 5 int _version SEC("version") = 1; 6 6

+2 -2

tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c

··· 1 1 #include <linux/bpf.h> 2 2 3 - #include "bpf_helpers.h" 4 - #include "bpf_endian.h" 3 + #include <bpf/bpf_helpers.h> 4 + #include <bpf/bpf_endian.h> 5 5 6 6 int _version SEC("version") = 1; 7 7

+2 -2

tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c

··· 1 1 #include <linux/bpf.h> 2 - #include "bpf_helpers.h" 3 - #include "bpf_endian.h" 2 + #include <bpf/bpf_helpers.h> 3 + #include <bpf/bpf_endian.h> 4 4 5 5 int _version SEC("version") = 1; 6 6

+1 -1

tools/testing/selftests/bpf/progs/sockopt_inherit.c

··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 #include <linux/bpf.h> 3 - #include "bpf_helpers.h" 3 + #include <bpf/bpf_helpers.h> 4 4 5 5 char _license[] SEC("license") = "GPL"; 6 6 __u32 _version SEC("version") = 1;

+1 -1

tools/testing/selftests/bpf/progs/sockopt_multi.c

··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 #include <netinet/in.h> 3 3 #include <linux/bpf.h> 4 - #include "bpf_helpers.h" 4 + #include <bpf/bpf_helpers.h> 5 5 6 6 char _license[] SEC("license") = "GPL"; 7 7 __u32 _version SEC("version") = 1;

+1 -1

tools/testing/selftests/bpf/progs/sockopt_sk.c

··· 3 3 #include <netinet/in.h> 4 4 #include <netinet/tcp.h> 5 5 #include <linux/bpf.h> 6 - #include "bpf_helpers.h" 6 + #include <bpf/bpf_helpers.h> 7 7 8 8 char _license[] SEC("license") = "GPL"; 9 9 __u32 _version SEC("version") = 1;

+1 -1

tools/testing/selftests/bpf/progs/strobemeta.h

··· 8 8 #include <linux/ptrace.h> 9 9 #include <linux/sched.h> 10 10 #include <linux/types.h> 11 - #include "bpf_helpers.h" 11 + #include <bpf/bpf_helpers.h> 12 12 13 13 typedef uint32_t pid_t; 14 14 struct task_struct {};

+1 -1

tools/testing/selftests/bpf/progs/tailcall1.c

··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 #include <linux/bpf.h> 3 3 4 - #include "bpf_helpers.h" 4 + #include <bpf/bpf_helpers.h> 5 5 6 6 struct { 7 7 __uint(type, BPF_MAP_TYPE_PROG_ARRAY);

+1 -1

tools/testing/selftests/bpf/progs/tailcall2.c

··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 #include <linux/bpf.h> 3 3 4 - #include "bpf_helpers.h" 4 + #include <bpf/bpf_helpers.h> 5 5 6 6 struct { 7 7 __uint(type, BPF_MAP_TYPE_PROG_ARRAY);

+1 -1

tools/testing/selftests/bpf/progs/tailcall3.c

··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 #include <linux/bpf.h> 3 3 4 - #include "bpf_helpers.h" 4 + #include <bpf/bpf_helpers.h> 5 5 6 6 struct { 7 7 __uint(type, BPF_MAP_TYPE_PROG_ARRAY);

+1 -1

tools/testing/selftests/bpf/progs/tailcall4.c

··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 #include <linux/bpf.h> 3 3 4 - #include "bpf_helpers.h" 4 + #include <bpf/bpf_helpers.h> 5 5 6 6 struct { 7 7 __uint(type, BPF_MAP_TYPE_PROG_ARRAY);

+1 -1

tools/testing/selftests/bpf/progs/tailcall5.c

··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 #include <linux/bpf.h> 3 3 4 - #include "bpf_helpers.h" 4 + #include <bpf/bpf_helpers.h> 5 5 6 6 struct { 7 7 __uint(type, BPF_MAP_TYPE_PROG_ARRAY);

+1 -1

tools/testing/selftests/bpf/progs/tcp_rtt.c

··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 #include <linux/bpf.h> 3 - #include "bpf_helpers.h" 3 + #include <bpf/bpf_helpers.h> 4 4 5 5 char _license[] SEC("license") = "GPL"; 6 6 __u32 _version SEC("version") = 1;

+1 -1

tools/testing/selftests/bpf/progs/test_adjust_tail.c

··· 7 7 */ 8 8 #include <linux/bpf.h> 9 9 #include <linux/if_ether.h> 10 - #include "bpf_helpers.h" 10 + #include <bpf/bpf_helpers.h> 11 11 12 12 int _version SEC("version") = 1; 13 13

+1 -1

tools/testing/selftests/bpf/progs/test_attach_probe.c

··· 3 3 4 4 #include <linux/ptrace.h> 5 5 #include <linux/bpf.h> 6 - #include "bpf_helpers.h" 6 + #include <bpf/bpf_helpers.h> 7 7 8 8 int kprobe_res = 0; 9 9 int kretprobe_res = 0;

+1 -1

tools/testing/selftests/bpf/progs/test_btf_haskv.c

··· 1 1 /* SPDX-License-Identifier: GPL-2.0 */ 2 2 /* Copyright (c) 2018 Facebook */ 3 3 #include <linux/bpf.h> 4 - #include "bpf_helpers.h" 4 + #include <bpf/bpf_helpers.h> 5 5 #include "bpf_legacy.h" 6 6 7 7 int _version SEC("version") = 1;

+1 -1

tools/testing/selftests/bpf/progs/test_btf_newkv.c

··· 1 1 /* SPDX-License-Identifier: GPL-2.0 */ 2 2 /* Copyright (c) 2018 Facebook */ 3 3 #include <linux/bpf.h> 4 - #include "bpf_helpers.h" 4 + #include <bpf/bpf_helpers.h> 5 5 #include "bpf_legacy.h" 6 6 7 7 int _version SEC("version") = 1;

+1 -1

tools/testing/selftests/bpf/progs/test_btf_nokv.c

··· 1 1 /* SPDX-License-Identifier: GPL-2.0 */ 2 2 /* Copyright (c) 2018 Facebook */ 3 3 #include <linux/bpf.h> 4 - #include "bpf_helpers.h" 4 + #include <bpf/bpf_helpers.h> 5 5 6 6 int _version SEC("version") = 1; 7 7

+1 -1

tools/testing/selftests/bpf/progs/test_core_extern.c

··· 5 5 #include <stdbool.h> 6 6 #include <linux/ptrace.h> 7 7 #include <linux/bpf.h> 8 - #include "bpf_helpers.h" 8 + #include <bpf/bpf_helpers.h> 9 9 10 10 /* non-existing BPF helper, to test dead code elimination */ 11 11 static int (*bpf_missing_helper)(const void *arg1, int arg2) = (void *) 999;

+2 -2

tools/testing/selftests/bpf/progs/test_core_reloc_arrays.c