Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next

+9 -1

drivers/net/ethernet/netronome/nfp/bpf/main.h

··· 189 189 NFP_MAP_USE_ATOMIC_CNT, 190 190 }; 191 191 192 + struct nfp_bpf_map_word { 193 + unsigned char type :4; 194 + unsigned char non_zero_update :1; 195 + }; 196 + 192 197 /** 193 198 * struct nfp_bpf_map - private per-map data attached to BPF maps for offload 194 199 * @offmap: pointer to the offloaded BPF map ··· 207 202 struct nfp_app_bpf *bpf; 208 203 u32 tid; 209 204 struct list_head l; 210 - enum nfp_bpf_map_use use_map[]; 205 + struct nfp_bpf_map_word use_map[]; 211 206 }; 212 207 213 208 struct nfp_bpf_neutral_map { ··· 441 436 * @prog: machine code 442 437 * @prog_len: number of valid instructions in @prog array 443 438 * @__prog_alloc_len: alloc size of @prog array 439 + * @stack_size: total amount of stack used 444 440 * @verifier_meta: temporary storage for verifier's insn meta 445 441 * @type: BPF program type 446 442 * @last_bpf_off: address of the last instruction translated from BPF ··· 465 459 u64 *prog; 466 460 unsigned int prog_len; 467 461 unsigned int __prog_alloc_len; 462 + 463 + unsigned int stack_size; 468 464 469 465 struct nfp_insn_meta *verifier_meta; 470 466

+30 -2

drivers/net/ethernet/netronome/nfp/bpf/offload.c

··· 262 262 unsigned int i; 263 263 264 264 for (i = 0; i < DIV_ROUND_UP(nfp_map->offmap->map.value_size, 4); i++) 265 - if (nfp_map->use_map[i] == NFP_MAP_USE_ATOMIC_CNT) 265 + if (nfp_map->use_map[i].type == NFP_MAP_USE_ATOMIC_CNT) 266 266 word[i] = (__force u32)cpu_to_be32(word[i]); 267 + } 268 + 269 + /* Mark value as unsafely initialized in case it becomes atomic later 270 + * and we didn't byte swap something non-byte swap neutral. 271 + */ 272 + static void 273 + nfp_map_bpf_byte_swap_record(struct nfp_bpf_map *nfp_map, void *value) 274 + { 275 + u32 *word = value; 276 + unsigned int i; 277 + 278 + for (i = 0; i < DIV_ROUND_UP(nfp_map->offmap->map.value_size, 4); i++) 279 + if (nfp_map->use_map[i].type == NFP_MAP_UNUSED && 280 + word[i] != (__force u32)cpu_to_be32(word[i])) 281 + nfp_map->use_map[i].non_zero_update = 1; 267 282 } 268 283 269 284 static int ··· 300 285 void *key, void *value, u64 flags) 301 286 { 302 287 nfp_map_bpf_byte_swap(offmap->dev_priv, value); 288 + nfp_map_bpf_byte_swap_record(offmap->dev_priv, value); 303 289 return nfp_bpf_ctrl_update_entry(offmap, key, value, flags); 304 290 } 305 291 ··· 489 473 struct netlink_ext_ack *extack) 490 474 { 491 475 struct nfp_prog *nfp_prog = prog->aux->offload->dev_priv; 492 - unsigned int max_mtu; 476 + unsigned int max_mtu, max_stack, max_prog_len; 493 477 dma_addr_t dma_addr; 494 478 void *img; 495 479 int err; ··· 497 481 max_mtu = nn_readb(nn, NFP_NET_CFG_BPF_INL_MTU) * 64 - 32; 498 482 if (max_mtu < nn->dp.netdev->mtu) { 499 483 NL_SET_ERR_MSG_MOD(extack, "BPF offload not supported with MTU larger than HW packet split boundary"); 484 + return -EOPNOTSUPP; 485 + } 486 + 487 + max_stack = nn_readb(nn, NFP_NET_CFG_BPF_STACK_SZ) * 64; 488 + if (nfp_prog->stack_size > max_stack) { 489 + NL_SET_ERR_MSG_MOD(extack, "stack too large"); 490 + return -EOPNOTSUPP; 491 + } 492 + 493 + max_prog_len = nn_readw(nn, NFP_NET_CFG_BPF_MAX_LEN); 494 + if (nfp_prog->prog_len > max_prog_len) { 495 + NL_SET_ERR_MSG_MOD(extack, "program too long"); 500 496 return -EOPNOTSUPP; 501 497 } 502 498

+59 -10

drivers/net/ethernet/netronome/nfp/bpf/verifier.c

··· 80 80 nfp_prog->adjust_head_location = location; 81 81 } 82 82 83 + static bool nfp_bpf_map_update_value_ok(struct bpf_verifier_env *env) 84 + { 85 + const struct bpf_reg_state *reg1 = cur_regs(env) + BPF_REG_1; 86 + const struct bpf_reg_state *reg3 = cur_regs(env) + BPF_REG_3; 87 + struct bpf_offloaded_map *offmap; 88 + struct bpf_func_state *state; 89 + struct nfp_bpf_map *nfp_map; 90 + int off, i; 91 + 92 + state = env->cur_state->frame[reg3->frameno]; 93 + 94 + /* We need to record each time update happens with non-zero words, 95 + * in case such word is used in atomic operations. 96 + * Implicitly depend on nfp_bpf_stack_arg_ok(reg3) being run before. 97 + */ 98 + 99 + offmap = map_to_offmap(reg1->map_ptr); 100 + nfp_map = offmap->dev_priv; 101 + off = reg3->off + reg3->var_off.value; 102 + 103 + for (i = 0; i < offmap->map.value_size; i++) { 104 + struct bpf_stack_state *stack_entry; 105 + unsigned int soff; 106 + 107 + soff = -(off + i) - 1; 108 + stack_entry = &state->stack[soff / BPF_REG_SIZE]; 109 + if (stack_entry->slot_type[soff % BPF_REG_SIZE] == STACK_ZERO) 110 + continue; 111 + 112 + if (nfp_map->use_map[i / 4].type == NFP_MAP_USE_ATOMIC_CNT) { 113 + pr_vlog(env, "value at offset %d/%d may be non-zero, bpf_map_update_elem() is required to initialize atomic counters to zero to avoid offload endian issues\n", 114 + i, soff); 115 + return false; 116 + } 117 + nfp_map->use_map[i / 4].non_zero_update = 1; 118 + } 119 + 120 + return true; 121 + } 122 + 83 123 static int 84 124 nfp_bpf_stack_arg_ok(const char *fname, struct bpf_verifier_env *env, 85 125 const struct bpf_reg_state *reg, ··· 211 171 bpf->helpers.map_update, reg1) || 212 172 !nfp_bpf_stack_arg_ok("map_update", env, reg2, 213 173 meta->func_id ? &meta->arg2 : NULL) || 214 - !nfp_bpf_stack_arg_ok("map_update", env, reg3, NULL)) 174 + !nfp_bpf_stack_arg_ok("map_update", env, reg3, NULL) || 175 + !nfp_bpf_map_update_value_ok(env)) 215 176 return -EOPNOTSUPP; 216 177 break; 217 178 ··· 393 352 struct nfp_bpf_map *nfp_map, 394 353 unsigned int off, enum nfp_bpf_map_use use) 395 354 { 396 - if (nfp_map->use_map[off / 4] != NFP_MAP_UNUSED && 397 - nfp_map->use_map[off / 4] != use) { 355 + if (nfp_map->use_map[off / 4].type != NFP_MAP_UNUSED && 356 + nfp_map->use_map[off / 4].type != use) { 398 357 pr_vlog(env, "map value use type conflict %s vs %s off: %u\n", 399 - nfp_bpf_map_use_name(nfp_map->use_map[off / 4]), 358 + nfp_bpf_map_use_name(nfp_map->use_map[off / 4].type), 400 359 nfp_bpf_map_use_name(use), off); 401 360 return -EOPNOTSUPP; 402 361 } 403 362 404 - nfp_map->use_map[off / 4] = use; 363 + if (nfp_map->use_map[off / 4].non_zero_update && 364 + use == NFP_MAP_USE_ATOMIC_CNT) { 365 + pr_vlog(env, "atomic counter in map value may already be initialized to non-zero value off: %u\n", 366 + off); 367 + return -EOPNOTSUPP; 368 + } 369 + 370 + nfp_map->use_map[off / 4].type = use; 405 371 406 372 return 0; 407 373 } ··· 747 699 748 700 static int nfp_bpf_finalize(struct bpf_verifier_env *env) 749 701 { 750 - unsigned int stack_size, stack_needed; 751 702 struct bpf_subprog_info *info; 752 703 struct nfp_prog *nfp_prog; 704 + unsigned int max_stack; 753 705 struct nfp_net *nn; 754 706 int i; 755 707 ··· 777 729 } 778 730 779 731 nn = netdev_priv(env->prog->aux->offload->netdev); 780 - stack_size = nn_readb(nn, NFP_NET_CFG_BPF_STACK_SZ) * 64; 781 - stack_needed = nfp_bpf_get_stack_usage(nfp_prog, env->prog->len); 782 - if (stack_needed > stack_size) { 732 + max_stack = nn_readb(nn, NFP_NET_CFG_BPF_STACK_SZ) * 64; 733 + nfp_prog->stack_size = nfp_bpf_get_stack_usage(nfp_prog, 734 + env->prog->len); 735 + if (nfp_prog->stack_size > max_stack) { 783 736 pr_vlog(env, "stack too large: program %dB > FW stack %dB\n", 784 - stack_needed, stack_size); 737 + nfp_prog->stack_size, max_stack); 785 738 return -EOPNOTSUPP; 786 739 } 787 740

+7

include/linux/bpf.h

··· 39 39 void *(*map_lookup_elem)(struct bpf_map *map, void *key); 40 40 int (*map_update_elem)(struct bpf_map *map, void *key, void *value, u64 flags); 41 41 int (*map_delete_elem)(struct bpf_map *map, void *key); 42 + int (*map_push_elem)(struct bpf_map *map, void *value, u64 flags); 43 + int (*map_pop_elem)(struct bpf_map *map, void *value); 44 + int (*map_peek_elem)(struct bpf_map *map, void *value); 42 45 43 46 /* funcs called by prog_array and perf_event_array map */ 44 47 void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file, ··· 141 138 ARG_CONST_MAP_PTR, /* const argument used as pointer to bpf_map */ 142 139 ARG_PTR_TO_MAP_KEY, /* pointer to stack used as map key */ 143 140 ARG_PTR_TO_MAP_VALUE, /* pointer to stack used as map value */ 141 + ARG_PTR_TO_UNINIT_MAP_VALUE, /* pointer to valid memory used to store a map value */ 144 142 145 143 /* the following constraints used to prototype bpf_memcmp() and other 146 144 * functions that access data on eBPF program stack ··· 814 810 extern const struct bpf_func_proto bpf_map_lookup_elem_proto; 815 811 extern const struct bpf_func_proto bpf_map_update_elem_proto; 816 812 extern const struct bpf_func_proto bpf_map_delete_elem_proto; 813 + extern const struct bpf_func_proto bpf_map_push_elem_proto; 814 + extern const struct bpf_func_proto bpf_map_pop_elem_proto; 815 + extern const struct bpf_func_proto bpf_map_peek_elem_proto; 817 816 818 817 extern const struct bpf_func_proto bpf_get_prandom_u32_proto; 819 818 extern const struct bpf_func_proto bpf_get_smp_processor_id_proto;

+3 -1

include/linux/bpf_types.h

··· 51 51 BPF_MAP_TYPE(BPF_MAP_TYPE_LRU_PERCPU_HASH, htab_lru_percpu_map_ops) 52 52 BPF_MAP_TYPE(BPF_MAP_TYPE_LPM_TRIE, trie_map_ops) 53 53 #ifdef CONFIG_PERF_EVENTS 54 - BPF_MAP_TYPE(BPF_MAP_TYPE_STACK_TRACE, stack_map_ops) 54 + BPF_MAP_TYPE(BPF_MAP_TYPE_STACK_TRACE, stack_trace_map_ops) 55 55 #endif 56 56 BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY_OF_MAPS, array_of_maps_map_ops) 57 57 BPF_MAP_TYPE(BPF_MAP_TYPE_HASH_OF_MAPS, htab_of_maps_map_ops) ··· 69 69 BPF_MAP_TYPE(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, reuseport_array_ops) 70 70 #endif 71 71 #endif 72 + BPF_MAP_TYPE(BPF_MAP_TYPE_QUEUE, queue_map_ops) 73 + BPF_MAP_TYPE(BPF_MAP_TYPE_STACK, stack_map_ops)

+21

include/linux/filter.h

··· 548 548 cb->data_end = skb->data + skb_headlen(skb); 549 549 } 550 550 551 + /* Similar to bpf_compute_data_pointers(), except that save orginal 552 + * data in cb->data and cb->meta_data for restore. 553 + */ 554 + static inline void bpf_compute_and_save_data_end( 555 + struct sk_buff *skb, void **saved_data_end) 556 + { 557 + struct bpf_skb_data_end *cb = (struct bpf_skb_data_end *)skb->cb; 558 + 559 + *saved_data_end = cb->data_end; 560 + cb->data_end = skb->data + skb_headlen(skb); 561 + } 562 + 563 + /* Restore data saved by bpf_compute_data_pointers(). */ 564 + static inline void bpf_restore_data_end( 565 + struct sk_buff *skb, void *saved_data_end) 566 + { 567 + struct bpf_skb_data_end *cb = (struct bpf_skb_data_end *)skb->cb; 568 + 569 + cb->data_end = saved_data_end; 570 + } 571 + 551 572 static inline u8 *bpf_skb_cb(struct sk_buff *skb) 552 573 { 553 574 /* eBPF programs may read/write skb->cb[] area to transfer meta

+36 -12

include/linux/skmsg.h

··· 176 176 { 177 177 dst->sg.data[which] = src->sg.data[which]; 178 178 dst->sg.data[which].length = size; 179 + dst->sg.size += size; 179 180 src->sg.data[which].length -= size; 180 181 src->sg.data[which].offset += size; 181 182 } ··· 187 186 sk_msg_init(src); 188 187 } 189 188 190 - static inline u32 sk_msg_elem_used(const struct sk_msg *msg) 191 - { 192 - return msg->sg.end >= msg->sg.start ? 193 - msg->sg.end - msg->sg.start : 194 - msg->sg.end + (MAX_MSG_FRAGS - msg->sg.start); 195 - } 196 - 197 189 static inline bool sk_msg_full(const struct sk_msg *msg) 198 190 { 199 191 return (msg->sg.end == msg->sg.start) && msg->sg.size; 200 192 } 201 193 194 + static inline u32 sk_msg_elem_used(const struct sk_msg *msg) 195 + { 196 + if (sk_msg_full(msg)) 197 + return MAX_MSG_FRAGS; 198 + 199 + return msg->sg.end >= msg->sg.start ? 200 + msg->sg.end - msg->sg.start : 201 + msg->sg.end + (MAX_MSG_FRAGS - msg->sg.start); 202 + } 203 + 202 204 static inline struct scatterlist *sk_msg_elem(struct sk_msg *msg, int which) 203 205 { 204 206 return &msg->sg.data[which]; 207 + } 208 + 209 + static inline struct scatterlist sk_msg_elem_cpy(struct sk_msg *msg, int which) 210 + { 211 + return msg->sg.data[which]; 205 212 } 206 213 207 214 static inline struct page *sk_msg_page(struct sk_msg *msg, int which) ··· 273 264 static inline struct sk_psock *sk_psock(const struct sock *sk) 274 265 { 275 266 return rcu_dereference_sk_user_data(sk); 276 - } 277 - 278 - static inline bool sk_has_psock(struct sock *sk) 279 - { 280 - return sk_psock(sk) != NULL && sk->sk_prot->recvmsg == tcp_bpf_recvmsg; 281 267 } 282 268 283 269 static inline void sk_psock_queue_msg(struct sk_psock *psock, ··· 372 368 enum sk_psock_state_bits bit) 373 369 { 374 370 return test_bit(bit, &psock->state); 371 + } 372 + 373 + static inline struct sk_psock *sk_psock_get_checked(struct sock *sk) 374 + { 375 + struct sk_psock *psock; 376 + 377 + rcu_read_lock(); 378 + psock = sk_psock(sk); 379 + if (psock) { 380 + if (sk->sk_prot->recvmsg != tcp_bpf_recvmsg) { 381 + psock = ERR_PTR(-EBUSY); 382 + goto out; 383 + } 384 + 385 + if (!refcount_inc_not_zero(&psock->refcnt)) 386 + psock = ERR_PTR(-EBUSY); 387 + } 388 + out: 389 + rcu_read_unlock(); 390 + return psock; 375 391 } 376 392 377 393 static inline struct sk_psock *sk_psock_get(struct sock *sk)

+1 -8

include/net/tcp.h

··· 2051 2051 #define TCP_ULP_MAX 128 2052 2052 #define TCP_ULP_BUF_MAX (TCP_ULP_NAME_MAX*TCP_ULP_MAX) 2053 2053 2054 - enum { 2055 - TCP_ULP_TLS, 2056 - TCP_ULP_BPF, 2057 - }; 2058 - 2059 2054 struct tcp_ulp_ops { 2060 2055 struct list_head list; 2061 2056 ··· 2059 2064 /* cleanup ulp */ 2060 2065 void (*release)(struct sock *sk); 2061 2066 2062 - int uid; 2063 2067 char name[TCP_ULP_NAME_MAX]; 2064 - bool user_visible; 2065 2068 struct module *owner; 2066 2069 }; 2067 2070 int tcp_register_ulp(struct tcp_ulp_ops *type); ··· 2082 2089 int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, 2083 2090 int nonblock, int flags, int *addr_len); 2084 2091 int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock, 2085 - struct msghdr *msg, int len); 2092 + struct msghdr *msg, int len, int flags); 2086 2093 2087 2094 /* Call BPF_SOCK_OPS program that returns an int. If the return value 2088 2095 * is < 0, then the BPF op failed (for example if the loaded BPF

+48 -2

include/uapi/linux/bpf.h

··· 103 103 BPF_BTF_LOAD, 104 104 BPF_BTF_GET_FD_BY_ID, 105 105 BPF_TASK_FD_QUERY, 106 + BPF_MAP_LOOKUP_AND_DELETE_ELEM, 106 107 }; 107 108 108 109 enum bpf_map_type { ··· 129 128 BPF_MAP_TYPE_CGROUP_STORAGE, 130 129 BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, 131 130 BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, 131 + BPF_MAP_TYPE_QUEUE, 132 + BPF_MAP_TYPE_STACK, 132 133 }; 133 134 134 135 enum bpf_prog_type { ··· 463 460 * Description 464 461 * Delete entry with *key* from *map*. 465 462 * Return 463 + * 0 on success, or a negative error in case of failure. 464 + * 465 + * int bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags) 466 + * Description 467 + * Push an element *value* in *map*. *flags* is one of: 468 + * 469 + * **BPF_EXIST** 470 + * If the queue/stack is full, the oldest element is removed to 471 + * make room for this. 472 + * Return 473 + * 0 on success, or a negative error in case of failure. 474 + * 475 + * int bpf_map_pop_elem(struct bpf_map *map, void *value) 476 + * Description 477 + * Pop an element from *map*. 478 + * Return 479 + * 0 on success, or a negative error in case of failure. 480 + * 481 + * int bpf_map_peek_elem(struct bpf_map *map, void *value) 482 + * Description 483 + * Get an element from *map* without removing it. 484 + * Return 466 485 * 0 on success, or a negative error in case of failure. 467 486 * 468 487 * int bpf_probe_read(void *dst, u32 size, const void *src) ··· 1458 1433 * Return 1459 1434 * 0 on success, or a negative error in case of failure. 1460 1435 * 1461 - * int bpf_skb_adjust_room(struct sk_buff *skb, u32 len_diff, u32 mode, u64 flags) 1436 + * int bpf_skb_adjust_room(struct sk_buff *skb, s32 len_diff, u32 mode, u64 flags) 1462 1437 * Description 1463 1438 * Grow or shrink the room for data in the packet associated to 1464 1439 * *skb* by *len_diff*, and according to the selected *mode*. ··· 2240 2215 * pointer that was returned from bpf_sk_lookup_xxx\ (). 2241 2216 * Return 2242 2217 * 0 on success, or a negative error in case of failure. 2218 + * 2219 + * int bpf_msg_push_data(struct sk_buff *skb, u32 start, u32 len, u64 flags) 2220 + * Description 2221 + * For socket policies, insert *len* bytes into msg at offset 2222 + * *start*. 2223 + * 2224 + * If a program of type **BPF_PROG_TYPE_SK_MSG** is run on a 2225 + * *msg* it may want to insert metadata or options into the msg. 2226 + * This can later be read and used by any of the lower layer BPF 2227 + * hooks. 2228 + * 2229 + * This helper may fail if under memory pressure (a malloc 2230 + * fails) in these cases BPF programs will get an appropriate 2231 + * error and BPF programs will need to handle them. 2232 + * 2233 + * Return 2234 + * 0 on success, or a negative error in case of failure. 2243 2235 */ 2244 2236 #define __BPF_FUNC_MAPPER(FN) \ 2245 2237 FN(unspec), \ ··· 2345 2303 FN(skb_ancestor_cgroup_id), \ 2346 2304 FN(sk_lookup_tcp), \ 2347 2305 FN(sk_lookup_udp), \ 2348 - FN(sk_release), 2306 + FN(sk_release), \ 2307 + FN(map_push_elem), \ 2308 + FN(map_pop_elem), \ 2309 + FN(map_peek_elem), \ 2310 + FN(msg_push_data), 2349 2311 2350 2312 /* integer value in 'imm' field of BPF_CALL instruction selects which helper 2351 2313 * function eBPF program intends to call

+1 -1

kernel/bpf/Makefile

··· 3 3 4 4 obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o 5 5 obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o 6 - obj-$(CONFIG_BPF_SYSCALL) += local_storage.o 6 + obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o 7 7 obj-$(CONFIG_BPF_SYSCALL) += disasm.o 8 8 obj-$(CONFIG_BPF_SYSCALL) += btf.o 9 9 ifeq ($(CONFIG_NET),y)

+6

kernel/bpf/cgroup.c

··· 553 553 { 554 554 unsigned int offset = skb->data - skb_network_header(skb); 555 555 struct sock *save_sk; 556 + void *saved_data_end; 556 557 struct cgroup *cgrp; 557 558 int ret; 558 559 ··· 567 566 save_sk = skb->sk; 568 567 skb->sk = sk; 569 568 __skb_push(skb, offset); 569 + 570 + /* compute pointers for the bpf prog */ 571 + bpf_compute_and_save_data_end(skb, &saved_data_end); 572 + 570 573 ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], skb, 571 574 bpf_prog_run_save_cb); 575 + bpf_restore_data_end(skb, saved_data_end); 572 576 __skb_pull(skb, offset); 573 577 skb->sk = save_sk; 574 578 return ret == 1 ? 0 : -EPERM;

+3

kernel/bpf/core.c

··· 1783 1783 const struct bpf_func_proto bpf_map_lookup_elem_proto __weak; 1784 1784 const struct bpf_func_proto bpf_map_update_elem_proto __weak; 1785 1785 const struct bpf_func_proto bpf_map_delete_elem_proto __weak; 1786 + const struct bpf_func_proto bpf_map_push_elem_proto __weak; 1787 + const struct bpf_func_proto bpf_map_pop_elem_proto __weak; 1788 + const struct bpf_func_proto bpf_map_peek_elem_proto __weak; 1786 1789 1787 1790 const struct bpf_func_proto bpf_get_prandom_u32_proto __weak; 1788 1791 const struct bpf_func_proto bpf_get_smp_processor_id_proto __weak;

+43

kernel/bpf/helpers.c

··· 76 76 .arg2_type = ARG_PTR_TO_MAP_KEY, 77 77 }; 78 78 79 + BPF_CALL_3(bpf_map_push_elem, struct bpf_map *, map, void *, value, u64, flags) 80 + { 81 + return map->ops->map_push_elem(map, value, flags); 82 + } 83 + 84 + const struct bpf_func_proto bpf_map_push_elem_proto = { 85 + .func = bpf_map_push_elem, 86 + .gpl_only = false, 87 + .pkt_access = true, 88 + .ret_type = RET_INTEGER, 89 + .arg1_type = ARG_CONST_MAP_PTR, 90 + .arg2_type = ARG_PTR_TO_MAP_VALUE, 91 + .arg3_type = ARG_ANYTHING, 92 + }; 93 + 94 + BPF_CALL_2(bpf_map_pop_elem, struct bpf_map *, map, void *, value) 95 + { 96 + return map->ops->map_pop_elem(map, value); 97 + } 98 + 99 + const struct bpf_func_proto bpf_map_pop_elem_proto = { 100 + .func = bpf_map_pop_elem, 101 + .gpl_only = false, 102 + .pkt_access = true, 103 + .ret_type = RET_INTEGER, 104 + .arg1_type = ARG_CONST_MAP_PTR, 105 + .arg2_type = ARG_PTR_TO_UNINIT_MAP_VALUE, 106 + }; 107 + 108 + BPF_CALL_2(bpf_map_peek_elem, struct bpf_map *, map, void *, value) 109 + { 110 + return map->ops->map_peek_elem(map, value); 111 + } 112 + 113 + const struct bpf_func_proto bpf_map_peek_elem_proto = { 114 + .func = bpf_map_pop_elem, 115 + .gpl_only = false, 116 + .pkt_access = true, 117 + .ret_type = RET_INTEGER, 118 + .arg1_type = ARG_CONST_MAP_PTR, 119 + .arg2_type = ARG_PTR_TO_UNINIT_MAP_VALUE, 120 + }; 121 + 79 122 const struct bpf_func_proto bpf_get_prandom_u32_proto = { 80 123 .func = bpf_user_rnd_u32, 81 124 .gpl_only = false,

+288

kernel/bpf/queue_stack_maps.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * queue_stack_maps.c: BPF queue and stack maps 4 + * 5 + * Copyright (c) 2018 Politecnico di Torino 6 + */ 7 + #include <linux/bpf.h> 8 + #include <linux/list.h> 9 + #include <linux/slab.h> 10 + #include "percpu_freelist.h" 11 + 12 + #define QUEUE_STACK_CREATE_FLAG_MASK \ 13 + (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY) 14 + 15 + 16 + struct bpf_queue_stack { 17 + struct bpf_map map; 18 + raw_spinlock_t lock; 19 + u32 head, tail; 20 + u32 size; /* max_entries + 1 */ 21 + 22 + char elements[0] __aligned(8); 23 + }; 24 + 25 + static struct bpf_queue_stack *bpf_queue_stack(struct bpf_map *map) 26 + { 27 + return container_of(map, struct bpf_queue_stack, map); 28 + } 29 + 30 + static bool queue_stack_map_is_empty(struct bpf_queue_stack *qs) 31 + { 32 + return qs->head == qs->tail; 33 + } 34 + 35 + static bool queue_stack_map_is_full(struct bpf_queue_stack *qs) 36 + { 37 + u32 head = qs->head + 1; 38 + 39 + if (unlikely(head >= qs->size)) 40 + head = 0; 41 + 42 + return head == qs->tail; 43 + } 44 + 45 + /* Called from syscall */ 46 + static int queue_stack_map_alloc_check(union bpf_attr *attr) 47 + { 48 + /* check sanity of attributes */ 49 + if (attr->max_entries == 0 || attr->key_size != 0 || 50 + attr->map_flags & ~QUEUE_STACK_CREATE_FLAG_MASK) 51 + return -EINVAL; 52 + 53 + if (attr->value_size > KMALLOC_MAX_SIZE) 54 + /* if value_size is bigger, the user space won't be able to 55 + * access the elements. 56 + */ 57 + return -E2BIG; 58 + 59 + return 0; 60 + } 61 + 62 + static struct bpf_map *queue_stack_map_alloc(union bpf_attr *attr) 63 + { 64 + int ret, numa_node = bpf_map_attr_numa_node(attr); 65 + struct bpf_queue_stack *qs; 66 + u32 size, value_size; 67 + u64 queue_size, cost; 68 + 69 + size = attr->max_entries + 1; 70 + value_size = attr->value_size; 71 + 72 + queue_size = sizeof(*qs) + (u64) value_size * size; 73 + 74 + cost = queue_size; 75 + if (cost >= U32_MAX - PAGE_SIZE) 76 + return ERR_PTR(-E2BIG); 77 + 78 + cost = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT; 79 + 80 + ret = bpf_map_precharge_memlock(cost); 81 + if (ret < 0) 82 + return ERR_PTR(ret); 83 + 84 + qs = bpf_map_area_alloc(queue_size, numa_node); 85 + if (!qs) 86 + return ERR_PTR(-ENOMEM); 87 + 88 + memset(qs, 0, sizeof(*qs)); 89 + 90 + bpf_map_init_from_attr(&qs->map, attr); 91 + 92 + qs->map.pages = cost; 93 + qs->size = size; 94 + 95 + raw_spin_lock_init(&qs->lock); 96 + 97 + return &qs->map; 98 + } 99 + 100 + /* Called when map->refcnt goes to zero, either from workqueue or from syscall */ 101 + static void queue_stack_map_free(struct bpf_map *map) 102 + { 103 + struct bpf_queue_stack *qs = bpf_queue_stack(map); 104 + 105 + /* at this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0, 106 + * so the programs (can be more than one that used this map) were 107 + * disconnected from events. Wait for outstanding critical sections in 108 + * these programs to complete 109 + */ 110 + synchronize_rcu(); 111 + 112 + bpf_map_area_free(qs); 113 + } 114 + 115 + static int __queue_map_get(struct bpf_map *map, void *value, bool delete) 116 + { 117 + struct bpf_queue_stack *qs = bpf_queue_stack(map); 118 + unsigned long flags; 119 + int err = 0; 120 + void *ptr; 121 + 122 + raw_spin_lock_irqsave(&qs->lock, flags); 123 + 124 + if (queue_stack_map_is_empty(qs)) { 125 + err = -ENOENT; 126 + goto out; 127 + } 128 + 129 + ptr = &qs->elements[qs->tail * qs->map.value_size]; 130 + memcpy(value, ptr, qs->map.value_size); 131 + 132 + if (delete) { 133 + if (unlikely(++qs->tail >= qs->size)) 134 + qs->tail = 0; 135 + } 136 + 137 + out: 138 + raw_spin_unlock_irqrestore(&qs->lock, flags); 139 + return err; 140 + } 141 + 142 + 143 + static int __stack_map_get(struct bpf_map *map, void *value, bool delete) 144 + { 145 + struct bpf_queue_stack *qs = bpf_queue_stack(map); 146 + unsigned long flags; 147 + int err = 0; 148 + void *ptr; 149 + u32 index; 150 + 151 + raw_spin_lock_irqsave(&qs->lock, flags); 152 + 153 + if (queue_stack_map_is_empty(qs)) { 154 + err = -ENOENT; 155 + goto out; 156 + } 157 + 158 + index = qs->head - 1; 159 + if (unlikely(index >= qs->size)) 160 + index = qs->size - 1; 161 + 162 + ptr = &qs->elements[index * qs->map.value_size]; 163 + memcpy(value, ptr, qs->map.value_size); 164 + 165 + if (delete) 166 + qs->head = index; 167 + 168 + out: 169 + raw_spin_unlock_irqrestore(&qs->lock, flags); 170 + return err; 171 + } 172 + 173 + /* Called from syscall or from eBPF program */ 174 + static int queue_map_peek_elem(struct bpf_map *map, void *value) 175 + { 176 + return __queue_map_get(map, value, false); 177 + } 178 + 179 + /* Called from syscall or from eBPF program */ 180 + static int stack_map_peek_elem(struct bpf_map *map, void *value) 181 + { 182 + return __stack_map_get(map, value, false); 183 + } 184 + 185 + /* Called from syscall or from eBPF program */ 186 + static int queue_map_pop_elem(struct bpf_map *map, void *value) 187 + { 188 + return __queue_map_get(map, value, true); 189 + } 190 + 191 + /* Called from syscall or from eBPF program */ 192 + static int stack_map_pop_elem(struct bpf_map *map, void *value) 193 + { 194 + return __stack_map_get(map, value, true); 195 + } 196 + 197 + /* Called from syscall or from eBPF program */ 198 + static int queue_stack_map_push_elem(struct bpf_map *map, void *value, 199 + u64 flags) 200 + { 201 + struct bpf_queue_stack *qs = bpf_queue_stack(map); 202 + unsigned long irq_flags; 203 + int err = 0; 204 + void *dst; 205 + 206 + /* BPF_EXIST is used to force making room for a new element in case the 207 + * map is full 208 + */ 209 + bool replace = (flags & BPF_EXIST); 210 + 211 + /* Check supported flags for queue and stack maps */ 212 + if (flags & BPF_NOEXIST || flags > BPF_EXIST) 213 + return -EINVAL; 214 + 215 + raw_spin_lock_irqsave(&qs->lock, irq_flags); 216 + 217 + if (queue_stack_map_is_full(qs)) { 218 + if (!replace) { 219 + err = -E2BIG; 220 + goto out; 221 + } 222 + /* advance tail pointer to overwrite oldest element */ 223 + if (unlikely(++qs->tail >= qs->size)) 224 + qs->tail = 0; 225 + } 226 + 227 + dst = &qs->elements[qs->head * qs->map.value_size]; 228 + memcpy(dst, value, qs->map.value_size); 229 + 230 + if (unlikely(++qs->head >= qs->size)) 231 + qs->head = 0; 232 + 233 + out: 234 + raw_spin_unlock_irqrestore(&qs->lock, irq_flags); 235 + return err; 236 + } 237 + 238 + /* Called from syscall or from eBPF program */ 239 + static void *queue_stack_map_lookup_elem(struct bpf_map *map, void *key) 240 + { 241 + return NULL; 242 + } 243 + 244 + /* Called from syscall or from eBPF program */ 245 + static int queue_stack_map_update_elem(struct bpf_map *map, void *key, 246 + void *value, u64 flags) 247 + { 248 + return -EINVAL; 249 + } 250 + 251 + /* Called from syscall or from eBPF program */ 252 + static int queue_stack_map_delete_elem(struct bpf_map *map, void *key) 253 + { 254 + return -EINVAL; 255 + } 256 + 257 + /* Called from syscall */ 258 + static int queue_stack_map_get_next_key(struct bpf_map *map, void *key, 259 + void *next_key) 260 + { 261 + return -EINVAL; 262 + } 263 + 264 + const struct bpf_map_ops queue_map_ops = { 265 + .map_alloc_check = queue_stack_map_alloc_check, 266 + .map_alloc = queue_stack_map_alloc, 267 + .map_free = queue_stack_map_free, 268 + .map_lookup_elem = queue_stack_map_lookup_elem, 269 + .map_update_elem = queue_stack_map_update_elem, 270 + .map_delete_elem = queue_stack_map_delete_elem, 271 + .map_push_elem = queue_stack_map_push_elem, 272 + .map_pop_elem = queue_map_pop_elem, 273 + .map_peek_elem = queue_map_peek_elem, 274 + .map_get_next_key = queue_stack_map_get_next_key, 275 + }; 276 + 277 + const struct bpf_map_ops stack_map_ops = { 278 + .map_alloc_check = queue_stack_map_alloc_check, 279 + .map_alloc = queue_stack_map_alloc, 280 + .map_free = queue_stack_map_free, 281 + .map_lookup_elem = queue_stack_map_lookup_elem, 282 + .map_update_elem = queue_stack_map_update_elem, 283 + .map_delete_elem = queue_stack_map_delete_elem, 284 + .map_push_elem = queue_stack_map_push_elem, 285 + .map_pop_elem = stack_map_pop_elem, 286 + .map_peek_elem = stack_map_peek_elem, 287 + .map_get_next_key = queue_stack_map_get_next_key, 288 + };

+1 -1

kernel/bpf/stackmap.c

··· 600 600 put_callchain_buffers(); 601 601 } 602 602 603 - const struct bpf_map_ops stack_map_ops = { 603 + const struct bpf_map_ops stack_trace_map_ops = { 604 604 .map_alloc = stack_map_alloc, 605 605 .map_free = stack_map_free, 606 606 .map_get_next_key = stack_map_get_next_key,

+87 -4

kernel/bpf/syscall.c

··· 651 651 return -ENOTSUPP; 652 652 } 653 653 654 + static void *__bpf_copy_key(void __user *ukey, u64 key_size) 655 + { 656 + if (key_size) 657 + return memdup_user(ukey, key_size); 658 + 659 + if (ukey) 660 + return ERR_PTR(-EINVAL); 661 + 662 + return NULL; 663 + } 664 + 654 665 /* last field in 'union bpf_attr' used by this command */ 655 666 #define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value 656 667 ··· 689 678 goto err_put; 690 679 } 691 680 692 - key = memdup_user(ukey, map->key_size); 681 + key = __bpf_copy_key(ukey, map->key_size); 693 682 if (IS_ERR(key)) { 694 683 err = PTR_ERR(key); 695 684 goto err_put; ··· 727 716 err = bpf_fd_htab_map_lookup_elem(map, key, value); 728 717 } else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) { 729 718 err = bpf_fd_reuseport_array_lookup_elem(map, key, value); 719 + } else if (map->map_type == BPF_MAP_TYPE_QUEUE || 720 + map->map_type == BPF_MAP_TYPE_STACK) { 721 + err = map->ops->map_peek_elem(map, value); 730 722 } else { 731 723 rcu_read_lock(); 732 724 ptr = map->ops->map_lookup_elem(map, key); ··· 799 785 goto err_put; 800 786 } 801 787 802 - key = memdup_user(ukey, map->key_size); 788 + key = __bpf_copy_key(ukey, map->key_size); 803 789 if (IS_ERR(key)) { 804 790 err = PTR_ERR(key); 805 791 goto err_put; ··· 860 846 /* rcu_read_lock() is not needed */ 861 847 err = bpf_fd_reuseport_array_update_elem(map, key, value, 862 848 attr->flags); 849 + } else if (map->map_type == BPF_MAP_TYPE_QUEUE || 850 + map->map_type == BPF_MAP_TYPE_STACK) { 851 + err = map->ops->map_push_elem(map, value, attr->flags); 863 852 } else { 864 853 rcu_read_lock(); 865 854 err = map->ops->map_update_elem(map, key, value, attr->flags); ··· 905 888 goto err_put; 906 889 } 907 890 908 - key = memdup_user(ukey, map->key_size); 891 + key = __bpf_copy_key(ukey, map->key_size); 909 892 if (IS_ERR(key)) { 910 893 err = PTR_ERR(key); 911 894 goto err_put; ··· 958 941 } 959 942 960 943 if (ukey) { 961 - key = memdup_user(ukey, map->key_size); 944 + key = __bpf_copy_key(ukey, map->key_size); 962 945 if (IS_ERR(key)) { 963 946 err = PTR_ERR(key); 964 947 goto err_put; ··· 992 975 993 976 free_next_key: 994 977 kfree(next_key); 978 + free_key: 979 + kfree(key); 980 + err_put: 981 + fdput(f); 982 + return err; 983 + } 984 + 985 + #define BPF_MAP_LOOKUP_AND_DELETE_ELEM_LAST_FIELD value 986 + 987 + static int map_lookup_and_delete_elem(union bpf_attr *attr) 988 + { 989 + void __user *ukey = u64_to_user_ptr(attr->key); 990 + void __user *uvalue = u64_to_user_ptr(attr->value); 991 + int ufd = attr->map_fd; 992 + struct bpf_map *map; 993 + void *key, *value; 994 + u32 value_size; 995 + struct fd f; 996 + int err; 997 + 998 + if (CHECK_ATTR(BPF_MAP_LOOKUP_AND_DELETE_ELEM)) 999 + return -EINVAL; 1000 + 1001 + f = fdget(ufd); 1002 + map = __bpf_map_get(f); 1003 + if (IS_ERR(map)) 1004 + return PTR_ERR(map); 1005 + 1006 + if (!(f.file->f_mode & FMODE_CAN_WRITE)) { 1007 + err = -EPERM; 1008 + goto err_put; 1009 + } 1010 + 1011 + key = __bpf_copy_key(ukey, map->key_size); 1012 + if (IS_ERR(key)) { 1013 + err = PTR_ERR(key); 1014 + goto err_put; 1015 + } 1016 + 1017 + value_size = map->value_size; 1018 + 1019 + err = -ENOMEM; 1020 + value = kmalloc(value_size, GFP_USER | __GFP_NOWARN); 1021 + if (!value) 1022 + goto free_key; 1023 + 1024 + if (map->map_type == BPF_MAP_TYPE_QUEUE || 1025 + map->map_type == BPF_MAP_TYPE_STACK) { 1026 + err = map->ops->map_pop_elem(map, value); 1027 + } else { 1028 + err = -ENOTSUPP; 1029 + } 1030 + 1031 + if (err) 1032 + goto free_value; 1033 + 1034 + if (copy_to_user(uvalue, value, value_size) != 0) 1035 + goto free_value; 1036 + 1037 + err = 0; 1038 + 1039 + free_value: 1040 + kfree(value); 995 1041 free_key: 996 1042 kfree(key); 997 1043 err_put: ··· 2534 2454 break; 2535 2455 case BPF_TASK_FD_QUERY: 2536 2456 err = bpf_task_fd_query(&attr, uattr); 2457 + break; 2458 + case BPF_MAP_LOOKUP_AND_DELETE_ELEM: 2459 + err = map_lookup_and_delete_elem(&attr); 2537 2460 break; 2538 2461 default: 2539 2462 err = -EINVAL;

+71 -14

kernel/bpf/verifier.c

··· 1528 1528 return reg->type != SCALAR_VALUE; 1529 1529 } 1530 1530 1531 + static struct bpf_reg_state *reg_state(struct bpf_verifier_env *env, int regno) 1532 + { 1533 + return cur_regs(env) + regno; 1534 + } 1535 + 1531 1536 static bool is_pointer_value(struct bpf_verifier_env *env, int regno) 1532 1537 { 1533 - return __is_pointer_value(env->allow_ptr_leaks, cur_regs(env) + regno); 1538 + return __is_pointer_value(env->allow_ptr_leaks, reg_state(env, regno)); 1534 1539 } 1535 1540 1536 1541 static bool is_ctx_reg(struct bpf_verifier_env *env, int regno) 1537 1542 { 1538 - const struct bpf_reg_state *reg = cur_regs(env) + regno; 1543 + const struct bpf_reg_state *reg = reg_state(env, regno); 1539 1544 1540 1545 return reg->type == PTR_TO_CTX || 1541 1546 reg->type == PTR_TO_SOCKET; ··· 1548 1543 1549 1544 static bool is_pkt_reg(struct bpf_verifier_env *env, int regno) 1550 1545 { 1551 - const struct bpf_reg_state *reg = cur_regs(env) + regno; 1546 + const struct bpf_reg_state *reg = reg_state(env, regno); 1552 1547 1553 1548 return type_is_pkt_pointer(reg->type); 1549 + } 1550 + 1551 + static bool is_flow_key_reg(struct bpf_verifier_env *env, int regno) 1552 + { 1553 + const struct bpf_reg_state *reg = reg_state(env, regno); 1554 + 1555 + /* Separate to is_ctx_reg() since we still want to allow BPF_ST here. */ 1556 + return reg->type == PTR_TO_FLOW_KEYS; 1554 1557 } 1555 1558 1556 1559 static int check_pkt_ptr_alignment(struct bpf_verifier_env *env, ··· 1969 1956 } 1970 1957 1971 1958 if (is_ctx_reg(env, insn->dst_reg) || 1972 - is_pkt_reg(env, insn->dst_reg)) { 1959 + is_pkt_reg(env, insn->dst_reg) || 1960 + is_flow_key_reg(env, insn->dst_reg)) { 1973 1961 verbose(env, "BPF_XADD stores into R%d %s is not allowed\n", 1974 - insn->dst_reg, reg_type_str[insn->dst_reg]); 1962 + insn->dst_reg, 1963 + reg_type_str[reg_state(env, insn->dst_reg)->type]); 1975 1964 return -EACCES; 1976 1965 } 1977 1966 ··· 1998 1983 int access_size, bool zero_size_allowed, 1999 1984 struct bpf_call_arg_meta *meta) 2000 1985 { 2001 - struct bpf_reg_state *reg = cur_regs(env) + regno; 1986 + struct bpf_reg_state *reg = reg_state(env, regno); 2002 1987 struct bpf_func_state *state = func(env, reg); 2003 1988 int off, i, slot, spi; 2004 1989 ··· 2077 2062 case PTR_TO_PACKET_META: 2078 2063 return check_packet_access(env, regno, reg->off, access_size, 2079 2064 zero_size_allowed); 2080 - case PTR_TO_FLOW_KEYS: 2081 - return check_flow_keys_access(env, reg->off, access_size); 2082 2065 case PTR_TO_MAP_VALUE: 2083 2066 return check_map_access(env, regno, reg->off, access_size, 2084 2067 zero_size_allowed); ··· 2130 2117 } 2131 2118 2132 2119 if (arg_type == ARG_PTR_TO_MAP_KEY || 2133 - arg_type == ARG_PTR_TO_MAP_VALUE) { 2120 + arg_type == ARG_PTR_TO_MAP_VALUE || 2121 + arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE) { 2134 2122 expected_type = PTR_TO_STACK; 2135 2123 if (!type_is_pkt_pointer(type) && type != PTR_TO_MAP_VALUE && 2136 2124 type != expected_type) ··· 2201 2187 err = check_helper_mem_access(env, regno, 2202 2188 meta->map_ptr->key_size, false, 2203 2189 NULL); 2204 - } else if (arg_type == ARG_PTR_TO_MAP_VALUE) { 2190 + } else if (arg_type == ARG_PTR_TO_MAP_VALUE || 2191 + arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE) { 2205 2192 /* bpf_map_xxx(..., map_ptr, ..., value) call: 2206 2193 * check [value, value + map->value_size) validity 2207 2194 */ ··· 2211 2196 verbose(env, "invalid map_ptr to access map->value\n"); 2212 2197 return -EACCES; 2213 2198 } 2199 + meta->raw_mode = (arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE); 2214 2200 err = check_helper_mem_access(env, regno, 2215 2201 meta->map_ptr->value_size, false, 2216 - NULL); 2202 + meta); 2217 2203 } else if (arg_type_is_mem_size(arg_type)) { 2218 2204 bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO); 2219 2205 ··· 2337 2321 if (func_id != BPF_FUNC_sk_select_reuseport) 2338 2322 goto error; 2339 2323 break; 2324 + case BPF_MAP_TYPE_QUEUE: 2325 + case BPF_MAP_TYPE_STACK: 2326 + if (func_id != BPF_FUNC_map_peek_elem && 2327 + func_id != BPF_FUNC_map_pop_elem && 2328 + func_id != BPF_FUNC_map_push_elem) 2329 + goto error; 2330 + break; 2340 2331 default: 2341 2332 break; 2342 2333 } ··· 2398 2375 break; 2399 2376 case BPF_FUNC_sk_select_reuseport: 2400 2377 if (map->map_type != BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) 2378 + goto error; 2379 + break; 2380 + case BPF_FUNC_map_peek_elem: 2381 + case BPF_FUNC_map_pop_elem: 2382 + case BPF_FUNC_map_push_elem: 2383 + if (map->map_type != BPF_MAP_TYPE_QUEUE && 2384 + map->map_type != BPF_MAP_TYPE_STACK) 2401 2385 goto error; 2402 2386 break; 2403 2387 default: ··· 2702 2672 if (func_id != BPF_FUNC_tail_call && 2703 2673 func_id != BPF_FUNC_map_lookup_elem && 2704 2674 func_id != BPF_FUNC_map_update_elem && 2705 - func_id != BPF_FUNC_map_delete_elem) 2675 + func_id != BPF_FUNC_map_delete_elem && 2676 + func_id != BPF_FUNC_map_push_elem && 2677 + func_id != BPF_FUNC_map_pop_elem && 2678 + func_id != BPF_FUNC_map_peek_elem) 2706 2679 return 0; 2707 2680 2708 2681 if (meta->map_ptr == NULL) { ··· 5277 5244 5278 5245 if (is_ctx_reg(env, insn->dst_reg)) { 5279 5246 verbose(env, "BPF_ST stores into R%d %s is not allowed\n", 5280 - insn->dst_reg, reg_type_str[insn->dst_reg]); 5247 + insn->dst_reg, 5248 + reg_type_str[reg_state(env, insn->dst_reg)->type]); 5281 5249 return -EACCES; 5282 5250 } 5283 5251 ··· 6178 6144 if (prog->jit_requested && BITS_PER_LONG == 64 && 6179 6145 (insn->imm == BPF_FUNC_map_lookup_elem || 6180 6146 insn->imm == BPF_FUNC_map_update_elem || 6181 - insn->imm == BPF_FUNC_map_delete_elem)) { 6147 + insn->imm == BPF_FUNC_map_delete_elem || 6148 + insn->imm == BPF_FUNC_map_push_elem || 6149 + insn->imm == BPF_FUNC_map_pop_elem || 6150 + insn->imm == BPF_FUNC_map_peek_elem)) { 6182 6151 aux = &env->insn_aux_data[i + delta]; 6183 6152 if (bpf_map_ptr_poisoned(aux)) 6184 6153 goto patch_call_imm; ··· 6214 6177 BUILD_BUG_ON(!__same_type(ops->map_update_elem, 6215 6178 (int (*)(struct bpf_map *map, void *key, void *value, 6216 6179 u64 flags))NULL)); 6180 + BUILD_BUG_ON(!__same_type(ops->map_push_elem, 6181 + (int (*)(struct bpf_map *map, void *value, 6182 + u64 flags))NULL)); 6183 + BUILD_BUG_ON(!__same_type(ops->map_pop_elem, 6184 + (int (*)(struct bpf_map *map, void *value))NULL)); 6185 + BUILD_BUG_ON(!__same_type(ops->map_peek_elem, 6186 + (int (*)(struct bpf_map *map, void *value))NULL)); 6187 + 6217 6188 switch (insn->imm) { 6218 6189 case BPF_FUNC_map_lookup_elem: 6219 6190 insn->imm = BPF_CAST_CALL(ops->map_lookup_elem) - ··· 6233 6188 continue; 6234 6189 case BPF_FUNC_map_delete_elem: 6235 6190 insn->imm = BPF_CAST_CALL(ops->map_delete_elem) - 6191 + __bpf_call_base; 6192 + continue; 6193 + case BPF_FUNC_map_push_elem: 6194 + insn->imm = BPF_CAST_CALL(ops->map_push_elem) - 6195 + __bpf_call_base; 6196 + continue; 6197 + case BPF_FUNC_map_pop_elem: 6198 + insn->imm = BPF_CAST_CALL(ops->map_pop_elem) - 6199 + __bpf_call_base; 6200 + continue; 6201 + case BPF_FUNC_map_peek_elem: 6202 + insn->imm = BPF_CAST_CALL(ops->map_peek_elem) - 6236 6203 __bpf_call_base; 6237 6204 continue; 6238 6205 }

+17 -2

net/bpf/test_run.c

··· 10 10 #include <linux/etherdevice.h> 11 11 #include <linux/filter.h> 12 12 #include <linux/sched/signal.h> 13 + #include <net/sock.h> 14 + #include <net/tcp.h> 13 15 14 16 static __always_inline u32 bpf_test_run_one(struct bpf_prog *prog, void *ctx, 15 17 struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE]) ··· 117 115 u32 retval, duration; 118 116 int hh_len = ETH_HLEN; 119 117 struct sk_buff *skb; 118 + struct sock *sk; 120 119 void *data; 121 120 int ret; 122 121 ··· 140 137 break; 141 138 } 142 139 143 - skb = build_skb(data, 0); 144 - if (!skb) { 140 + sk = kzalloc(sizeof(struct sock), GFP_USER); 141 + if (!sk) { 145 142 kfree(data); 146 143 return -ENOMEM; 147 144 } 145 + sock_net_set(sk, current->nsproxy->net_ns); 146 + sock_init_data(NULL, sk); 147 + 148 + skb = build_skb(data, 0); 149 + if (!skb) { 150 + kfree(data); 151 + kfree(sk); 152 + return -ENOMEM; 153 + } 154 + skb->sk = sk; 148 155 149 156 skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN); 150 157 __skb_put(skb, size); ··· 172 159 173 160 if (pskb_expand_head(skb, nhead, 0, GFP_USER)) { 174 161 kfree_skb(skb); 162 + kfree(sk); 175 163 return -ENOMEM; 176 164 } 177 165 } ··· 185 171 size = skb_headlen(skb); 186 172 ret = bpf_test_finish(kattr, uattr, skb->data, size, retval, duration); 187 173 kfree_skb(skb); 174 + kfree(sk); 188 175 return ret; 189 176 } 190 177

+175 -1

net/core/filter.c

··· 2297 2297 .arg4_type = ARG_ANYTHING, 2298 2298 }; 2299 2299 2300 + BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start, 2301 + u32, len, u64, flags) 2302 + { 2303 + struct scatterlist sge, nsge, nnsge, rsge = {0}, *psge; 2304 + u32 new, i = 0, l, space, copy = 0, offset = 0; 2305 + u8 *raw, *to, *from; 2306 + struct page *page; 2307 + 2308 + if (unlikely(flags)) 2309 + return -EINVAL; 2310 + 2311 + /* First find the starting scatterlist element */ 2312 + i = msg->sg.start; 2313 + do { 2314 + l = sk_msg_elem(msg, i)->length; 2315 + 2316 + if (start < offset + l) 2317 + break; 2318 + offset += l; 2319 + sk_msg_iter_var_next(i); 2320 + } while (i != msg->sg.end); 2321 + 2322 + if (start >= offset + l) 2323 + return -EINVAL; 2324 + 2325 + space = MAX_MSG_FRAGS - sk_msg_elem_used(msg); 2326 + 2327 + /* If no space available will fallback to copy, we need at 2328 + * least one scatterlist elem available to push data into 2329 + * when start aligns to the beginning of an element or two 2330 + * when it falls inside an element. We handle the start equals 2331 + * offset case because its the common case for inserting a 2332 + * header. 2333 + */ 2334 + if (!space || (space == 1 && start != offset)) 2335 + copy = msg->sg.data[i].length; 2336 + 2337 + page = alloc_pages(__GFP_NOWARN | GFP_ATOMIC | __GFP_COMP, 2338 + get_order(copy + len)); 2339 + if (unlikely(!page)) 2340 + return -ENOMEM; 2341 + 2342 + if (copy) { 2343 + int front, back; 2344 + 2345 + raw = page_address(page); 2346 + 2347 + psge = sk_msg_elem(msg, i); 2348 + front = start - offset; 2349 + back = psge->length - front; 2350 + from = sg_virt(psge); 2351 + 2352 + if (front) 2353 + memcpy(raw, from, front); 2354 + 2355 + if (back) { 2356 + from += front; 2357 + to = raw + front + len; 2358 + 2359 + memcpy(to, from, back); 2360 + } 2361 + 2362 + put_page(sg_page(psge)); 2363 + } else if (start - offset) { 2364 + psge = sk_msg_elem(msg, i); 2365 + rsge = sk_msg_elem_cpy(msg, i); 2366 + 2367 + psge->length = start - offset; 2368 + rsge.length -= psge->length; 2369 + rsge.offset += start; 2370 + 2371 + sk_msg_iter_var_next(i); 2372 + sg_unmark_end(psge); 2373 + sk_msg_iter_next(msg, end); 2374 + } 2375 + 2376 + /* Slot(s) to place newly allocated data */ 2377 + new = i; 2378 + 2379 + /* Shift one or two slots as needed */ 2380 + if (!copy) { 2381 + sge = sk_msg_elem_cpy(msg, i); 2382 + 2383 + sk_msg_iter_var_next(i); 2384 + sg_unmark_end(&sge); 2385 + sk_msg_iter_next(msg, end); 2386 + 2387 + nsge = sk_msg_elem_cpy(msg, i); 2388 + if (rsge.length) { 2389 + sk_msg_iter_var_next(i); 2390 + nnsge = sk_msg_elem_cpy(msg, i); 2391 + } 2392 + 2393 + while (i != msg->sg.end) { 2394 + msg->sg.data[i] = sge; 2395 + sge = nsge; 2396 + sk_msg_iter_var_next(i); 2397 + if (rsge.length) { 2398 + nsge = nnsge; 2399 + nnsge = sk_msg_elem_cpy(msg, i); 2400 + } else { 2401 + nsge = sk_msg_elem_cpy(msg, i); 2402 + } 2403 + } 2404 + } 2405 + 2406 + /* Place newly allocated data buffer */ 2407 + sk_mem_charge(msg->sk, len); 2408 + msg->sg.size += len; 2409 + msg->sg.copy[new] = false; 2410 + sg_set_page(&msg->sg.data[new], page, len + copy, 0); 2411 + if (rsge.length) { 2412 + get_page(sg_page(&rsge)); 2413 + sk_msg_iter_var_next(new); 2414 + msg->sg.data[new] = rsge; 2415 + } 2416 + 2417 + sk_msg_compute_data_pointers(msg); 2418 + return 0; 2419 + } 2420 + 2421 + static const struct bpf_func_proto bpf_msg_push_data_proto = { 2422 + .func = bpf_msg_push_data, 2423 + .gpl_only = false, 2424 + .ret_type = RET_INTEGER, 2425 + .arg1_type = ARG_PTR_TO_CTX, 2426 + .arg2_type = ARG_ANYTHING, 2427 + .arg3_type = ARG_ANYTHING, 2428 + .arg4_type = ARG_ANYTHING, 2429 + }; 2430 + 2300 2431 BPF_CALL_1(bpf_get_cgroup_classid, const struct sk_buff *, skb) 2301 2432 { 2302 2433 return task_get_classid(skb); ··· 4985 4854 func == bpf_xdp_adjust_head || 4986 4855 func == bpf_xdp_adjust_meta || 4987 4856 func == bpf_msg_pull_data || 4857 + func == bpf_msg_push_data || 4988 4858 func == bpf_xdp_adjust_tail || 4989 4859 #if IS_ENABLED(CONFIG_IPV6_SEG6_BPF) 4990 4860 func == bpf_lwt_seg6_store_bytes || ··· 5008 4876 return &bpf_map_update_elem_proto; 5009 4877 case BPF_FUNC_map_delete_elem: 5010 4878 return &bpf_map_delete_elem_proto; 4879 + case BPF_FUNC_map_push_elem: 4880 + return &bpf_map_push_elem_proto; 4881 + case BPF_FUNC_map_pop_elem: 4882 + return &bpf_map_pop_elem_proto; 4883 + case BPF_FUNC_map_peek_elem: 4884 + return &bpf_map_peek_elem_proto; 5011 4885 case BPF_FUNC_get_prandom_u32: 5012 4886 return &bpf_get_prandom_u32_proto; 5013 4887 case BPF_FUNC_get_smp_processor_id: ··· 5262 5124 return &bpf_msg_cork_bytes_proto; 5263 5125 case BPF_FUNC_msg_pull_data: 5264 5126 return &bpf_msg_pull_data_proto; 5127 + case BPF_FUNC_msg_push_data: 5128 + return &bpf_msg_push_data_proto; 5265 5129 case BPF_FUNC_get_local_storage: 5266 5130 return &bpf_get_local_storage_proto; 5267 5131 default: ··· 5481 5341 default: 5482 5342 return false; 5483 5343 } 5344 + } 5345 + 5346 + return bpf_skb_is_valid_access(off, size, type, prog, info); 5347 + } 5348 + 5349 + static bool cg_skb_is_valid_access(int off, int size, 5350 + enum bpf_access_type type, 5351 + const struct bpf_prog *prog, 5352 + struct bpf_insn_access_aux *info) 5353 + { 5354 + switch (off) { 5355 + case bpf_ctx_range(struct __sk_buff, tc_classid): 5356 + case bpf_ctx_range(struct __sk_buff, data_meta): 5357 + case bpf_ctx_range(struct __sk_buff, flow_keys): 5358 + return false; 5359 + } 5360 + if (type == BPF_WRITE) { 5361 + switch (off) { 5362 + case bpf_ctx_range(struct __sk_buff, mark): 5363 + case bpf_ctx_range(struct __sk_buff, priority): 5364 + case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]): 5365 + break; 5366 + default: 5367 + return false; 5368 + } 5369 + } 5370 + 5371 + switch (off) { 5372 + case bpf_ctx_range(struct __sk_buff, data): 5373 + info->reg_type = PTR_TO_PACKET; 5374 + break; 5375 + case bpf_ctx_range(struct __sk_buff, data_end): 5376 + info->reg_type = PTR_TO_PACKET_END; 5377 + break; 5484 5378 } 5485 5379 5486 5380 return bpf_skb_is_valid_access(off, size, type, prog, info); ··· 7212 7038 7213 7039 const struct bpf_verifier_ops cg_skb_verifier_ops = { 7214 7040 .get_func_proto = cg_skb_func_proto, 7215 - .is_valid_access = sk_filter_is_valid_access, 7041 + .is_valid_access = cg_skb_is_valid_access, 7216 7042 .convert_ctx_access = bpf_convert_ctx_access, 7217 7043 }; 7218 7044

+6 -5

net/core/sock_map.c

··· 175 175 } 176 176 } 177 177 178 - psock = sk_psock_get(sk); 178 + psock = sk_psock_get_checked(sk); 179 + if (IS_ERR(psock)) { 180 + ret = PTR_ERR(psock); 181 + goto out_progs; 182 + } 183 + 179 184 if (psock) { 180 - if (!sk_has_psock(sk)) { 181 - ret = -EBUSY; 182 - goto out_progs; 183 - } 184 185 if ((msg_parser && READ_ONCE(psock->progs.msg_parser)) || 185 186 (skb_progs && READ_ONCE(psock->progs.skb_parser))) { 186 187 sk_psock_put(sk, psock);

+27 -14

net/ipv4/tcp_bpf.c

··· 39 39 } 40 40 41 41 int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock, 42 - struct msghdr *msg, int len) 42 + struct msghdr *msg, int len, int flags) 43 43 { 44 44 struct iov_iter *iter = &msg->msg_iter; 45 + int peek = flags & MSG_PEEK; 45 46 int i, ret, copied = 0; 47 + struct sk_msg *msg_rx; 48 + 49 + msg_rx = list_first_entry_or_null(&psock->ingress_msg, 50 + struct sk_msg, list); 46 51 47 52 while (copied != len) { 48 53 struct scatterlist *sge; 49 - struct sk_msg *msg_rx; 50 54 51 - msg_rx = list_first_entry_or_null(&psock->ingress_msg, 52 - struct sk_msg, list); 53 55 if (unlikely(!msg_rx)) 54 56 break; 55 57 ··· 72 70 } 73 71 74 72 copied += copy; 75 - sge->offset += copy; 76 - sge->length -= copy; 77 - sk_mem_uncharge(sk, copy); 78 - if (!sge->length) { 79 - i++; 80 - if (i == MAX_SKB_FRAGS) 81 - i = 0; 82 - if (!msg_rx->skb) 83 - put_page(page); 73 + if (likely(!peek)) { 74 + sge->offset += copy; 75 + sge->length -= copy; 76 + sk_mem_uncharge(sk, copy); 77 + msg_rx->sg.size -= copy; 78 + 79 + if (!sge->length) { 80 + sk_msg_iter_var_next(i); 81 + if (!msg_rx->skb) 82 + put_page(page); 83 + } 84 + } else { 85 + sk_msg_iter_var_next(i); 84 86 } 85 87 86 88 if (copied == len) 87 89 break; 88 90 } while (i != msg_rx->sg.end); 91 + 92 + if (unlikely(peek)) { 93 + msg_rx = list_next_entry(msg_rx, list); 94 + continue; 95 + } 89 96 90 97 msg_rx->sg.start = i; 91 98 if (!sge->length && msg_rx->sg.start == msg_rx->sg.end) { ··· 103 92 consume_skb(msg_rx->skb); 104 93 kfree(msg_rx); 105 94 } 95 + msg_rx = list_first_entry_or_null(&psock->ingress_msg, 96 + struct sk_msg, list); 106 97 } 107 98 108 99 return copied; ··· 127 114 return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len); 128 115 lock_sock(sk); 129 116 msg_bytes_ready: 130 - copied = __tcp_bpf_recvmsg(sk, psock, msg, len); 117 + copied = __tcp_bpf_recvmsg(sk, psock, msg, len, flags); 131 118 if (!copied) { 132 119 int data, err = 0; 133 120 long timeo;

-2

net/tls/tls_main.c

··· 715 715 716 716 static struct tcp_ulp_ops tcp_tls_ulp_ops __read_mostly = { 717 717 .name = "tls", 718 - .uid = TCP_ULP_TLS, 719 - .user_visible = true, 720 718 .owner = THIS_MODULE, 721 719 .init = tls_init, 722 720 };

+2 -1

net/tls/tls_sw.c

··· 1478 1478 skb = tls_wait_data(sk, psock, flags, timeo, &err); 1479 1479 if (!skb) { 1480 1480 if (psock) { 1481 - int ret = __tcp_bpf_recvmsg(sk, psock, msg, len); 1481 + int ret = __tcp_bpf_recvmsg(sk, psock, 1482 + msg, len, flags); 1482 1483 1483 1484 if (ret > 0) { 1484 1485 copied += ret;

+70

tools/arch/arm64/include/asm/barrier.h

··· 14 14 #define wmb() asm volatile("dmb ishst" ::: "memory") 15 15 #define rmb() asm volatile("dmb ishld" ::: "memory") 16 16 17 + #define smp_store_release(p, v) \ 18 + do { \ 19 + union { typeof(*p) __val; char __c[1]; } __u = \ 20 + { .__val = (__force typeof(*p)) (v) }; \ 21 + \ 22 + switch (sizeof(*p)) { \ 23 + case 1: \ 24 + asm volatile ("stlrb %w1, %0" \ 25 + : "=Q" (*p) \ 26 + : "r" (*(__u8 *)__u.__c) \ 27 + : "memory"); \ 28 + break; \ 29 + case 2: \ 30 + asm volatile ("stlrh %w1, %0" \ 31 + : "=Q" (*p) \ 32 + : "r" (*(__u16 *)__u.__c) \ 33 + : "memory"); \ 34 + break; \ 35 + case 4: \ 36 + asm volatile ("stlr %w1, %0" \ 37 + : "=Q" (*p) \ 38 + : "r" (*(__u32 *)__u.__c) \ 39 + : "memory"); \ 40 + break; \ 41 + case 8: \ 42 + asm volatile ("stlr %1, %0" \ 43 + : "=Q" (*p) \ 44 + : "r" (*(__u64 *)__u.__c) \ 45 + : "memory"); \ 46 + break; \ 47 + default: \ 48 + /* Only to shut up gcc ... */ \ 49 + mb(); \ 50 + break; \ 51 + } \ 52 + } while (0) 53 + 54 + #define smp_load_acquire(p) \ 55 + ({ \ 56 + union { typeof(*p) __val; char __c[1]; } __u; \ 57 + \ 58 + switch (sizeof(*p)) { \ 59 + case 1: \ 60 + asm volatile ("ldarb %w0, %1" \ 61 + : "=r" (*(__u8 *)__u.__c) \ 62 + : "Q" (*p) : "memory"); \ 63 + break; \ 64 + case 2: \ 65 + asm volatile ("ldarh %w0, %1" \ 66 + : "=r" (*(__u16 *)__u.__c) \ 67 + : "Q" (*p) : "memory"); \ 68 + break; \ 69 + case 4: \ 70 + asm volatile ("ldar %w0, %1" \ 71 + : "=r" (*(__u32 *)__u.__c) \ 72 + : "Q" (*p) : "memory"); \ 73 + break; \ 74 + case 8: \ 75 + asm volatile ("ldar %0, %1" \ 76 + : "=r" (*(__u64 *)__u.__c) \ 77 + : "Q" (*p) : "memory"); \ 78 + break; \ 79 + default: \ 80 + /* Only to shut up gcc ... */ \ 81 + mb(); \ 82 + break; \ 83 + } \ 84 + __u.__val; \ 85 + }) 86 + 17 87 #endif /* _TOOLS_LINUX_ASM_AARCH64_BARRIER_H */

+13

tools/arch/ia64/include/asm/barrier.h

··· 46 46 #define rmb() mb() 47 47 #define wmb() mb() 48 48 49 + #define smp_store_release(p, v) \ 50 + do { \ 51 + barrier(); \ 52 + WRITE_ONCE(*p, v); \ 53 + } while (0) 54 + 55 + #define smp_load_acquire(p) \ 56 + ({ \ 57 + typeof(*p) ___p1 = READ_ONCE(*p); \ 58 + barrier(); \ 59 + ___p1; \ 60 + }) 61 + 49 62 #endif /* _TOOLS_LINUX_ASM_IA64_BARRIER_H */

+16

tools/arch/powerpc/include/asm/barrier.h

··· 27 27 #define rmb() __asm__ __volatile__ ("sync" : : : "memory") 28 28 #define wmb() __asm__ __volatile__ ("sync" : : : "memory") 29 29 30 + #if defined(__powerpc64__) 31 + #define smp_lwsync() __asm__ __volatile__ ("lwsync" : : : "memory") 32 + 33 + #define smp_store_release(p, v) \ 34 + do { \ 35 + smp_lwsync(); \ 36 + WRITE_ONCE(*p, v); \ 37 + } while (0) 38 + 39 + #define smp_load_acquire(p) \ 40 + ({ \ 41 + typeof(*p) ___p1 = READ_ONCE(*p); \ 42 + smp_lwsync(); \ 43 + ___p1; \ 44 + }) 45 + #endif /* defined(__powerpc64__) */ 30 46 #endif /* _TOOLS_LINUX_ASM_POWERPC_BARRIER_H */

+13

tools/arch/s390/include/asm/barrier.h

··· 28 28 #define rmb() mb() 29 29 #define wmb() mb() 30 30 31 + #define smp_store_release(p, v) \ 32 + do { \ 33 + barrier(); \ 34 + WRITE_ONCE(*p, v); \ 35 + } while (0) 36 + 37 + #define smp_load_acquire(p) \ 38 + ({ \ 39 + typeof(*p) ___p1 = READ_ONCE(*p); \ 40 + barrier(); \ 41 + ___p1; \ 42 + }) 43 + 31 44 #endif /* __TOOLS_LIB_ASM_BARRIER_H */

+13

tools/arch/sparc/include/asm/barrier_64.h

··· 40 40 #define rmb() __asm__ __volatile__("":::"memory") 41 41 #define wmb() __asm__ __volatile__("":::"memory") 42 42 43 + #define smp_store_release(p, v) \ 44 + do { \ 45 + barrier(); \ 46 + WRITE_ONCE(*p, v); \ 47 + } while (0) 48 + 49 + #define smp_load_acquire(p) \ 50 + ({ \ 51 + typeof(*p) ___p1 = READ_ONCE(*p); \ 52 + barrier(); \ 53 + ___p1; \ 54 + }) 55 + 43 56 #endif /* !(__TOOLS_LINUX_SPARC64_BARRIER_H) */

+14

tools/arch/x86/include/asm/barrier.h

··· 26 26 #define wmb() asm volatile("sfence" ::: "memory") 27 27 #endif 28 28 29 + #if defined(__x86_64__) 30 + #define smp_store_release(p, v) \ 31 + do { \ 32 + barrier(); \ 33 + WRITE_ONCE(*p, v); \ 34 + } while (0) 35 + 36 + #define smp_load_acquire(p) \ 37 + ({ \ 38 + typeof(*p) ___p1 = READ_ONCE(*p); \ 39 + barrier(); \ 40 + ___p1; \ 41 + }) 42 + #endif /* defined(__x86_64__) */ 29 43 #endif /* _TOOLS_LINUX_ASM_X86_BARRIER_H */

+3 -1

tools/bpf/bpftool/Documentation/bpftool-map.rst

··· 86 86 **bpftool map pin** *MAP* *FILE* 87 87 Pin map *MAP* as *FILE*. 88 88 89 - Note: *FILE* must be located in *bpffs* mount. 89 + Note: *FILE* must be located in *bpffs* mount. It must not 90 + contain a dot character ('.'), which is reserved for future 91 + extensions of *bpffs*. 90 92 91 93 **bpftool** **map event_pipe** *MAP* [**cpu** *N* **index** *M*] 92 94 Read events from a BPF_MAP_TYPE_PERF_EVENT_ARRAY map.

+6 -2

tools/bpf/bpftool/Documentation/bpftool-prog.rst

··· 75 75 **bpftool prog pin** *PROG* *FILE* 76 76 Pin program *PROG* as *FILE*. 77 77 78 - Note: *FILE* must be located in *bpffs* mount. 78 + Note: *FILE* must be located in *bpffs* mount. It must not 79 + contain a dot character ('.'), which is reserved for future 80 + extensions of *bpffs*. 79 81 80 82 **bpftool prog load** *OBJ* *FILE* [**type** *TYPE*] [**map** {**idx** *IDX* | **name** *NAME*} *MAP*] [**dev** *NAME*] 81 83 Load bpf program from binary *OBJ* and pin as *FILE*. ··· 93 91 If **dev** *NAME* is specified program will be loaded onto 94 92 given networking device (offload). 95 93 96 - Note: *FILE* must be located in *bpffs* mount. 94 + Note: *FILE* must be located in *bpffs* mount. It must not 95 + contain a dot character ('.'), which is reserved for future 96 + extensions of *bpffs*. 97 97 98 98 **bpftool prog attach** *PROG* *ATTACH_TYPE* *MAP* 99 99 Attach bpf program *PROG* (with type specified by *ATTACH_TYPE*)

+1 -1

tools/bpf/bpftool/bash-completion/bpftool

··· 143 143 local type 144 144 type=$(bpftool -jp map show $keyword $ref | \ 145 145 command sed -n 's/.*"type": "$.*$",$/\1/p') 146 - printf $type 146 + [[ -n $type ]] && printf $type 147 147 } 148 148 149 149 _bpftool_map_update_get_id()

+4 -1

tools/bpf/bpftool/common.c

··· 554 554 return read_sysfs_hex_int(full_path); 555 555 } 556 556 557 - const char *ifindex_to_bfd_name_ns(__u32 ifindex, __u64 ns_dev, __u64 ns_ino) 557 + const char * 558 + ifindex_to_bfd_params(__u32 ifindex, __u64 ns_dev, __u64 ns_ino, 559 + const char **opt) 558 560 { 559 561 char devname[IF_NAMESIZE]; 560 562 int vendor_id; ··· 581 579 device_id != 0x6000 && 582 580 device_id != 0x6003) 583 581 p_info("Unknown NFP device ID, assuming it is NFP-6xxx arch"); 582 + *opt = "ctx4"; 584 583 return "NFP-6xxx"; 585 584 default: 586 585 p_err("Can't get bfd arch name for device vendor id 0x%04x",

+3 -1

tools/bpf/bpftool/jit_disasm.c

··· 77 77 } 78 78 79 79 void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes, 80 - const char *arch) 80 + const char *arch, const char *disassembler_options) 81 81 { 82 82 disassembler_ftype disassemble; 83 83 struct disassemble_info info; ··· 116 116 117 117 info.arch = bfd_get_arch(bfdf); 118 118 info.mach = bfd_get_mach(bfdf); 119 + if (disassembler_options) 120 + info.disassembler_options = disassembler_options; 119 121 info.buffer = image; 120 122 info.buffer_length = len; 121 123

+2 -1

tools/bpf/bpftool/main.c

··· 321 321 p_err("reading batch file failed: %s", strerror(errno)); 322 322 err = -1; 323 323 } else { 324 - p_info("processed %d commands", lines); 324 + if (!json_output) 325 + printf("processed %d commands\n", lines); 325 326 err = 0; 326 327 } 327 328 err_close:

+4 -2

tools/bpf/bpftool/main.h

··· 145 145 int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len); 146 146 147 147 void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes, 148 - const char *arch); 148 + const char *arch, const char *disassembler_options); 149 149 void print_data_json(uint8_t *data, size_t len); 150 150 void print_hex_data_json(uint8_t *data, size_t len); 151 151 152 152 unsigned int get_page_size(void); 153 153 unsigned int get_possible_cpus(void); 154 - const char *ifindex_to_bfd_name_ns(__u32 ifindex, __u64 ns_dev, __u64 ns_ino); 154 + const char * 155 + ifindex_to_bfd_params(__u32 ifindex, __u64 ns_dev, __u64 ns_ino, 156 + const char **opt); 155 157 156 158 struct btf_dumper { 157 159 const struct btf *btf;

+6 -4

tools/bpf/bpftool/map_perf_ring.c

··· 50 50 stop = true; 51 51 } 52 52 53 - static enum bpf_perf_event_ret print_bpf_output(void *event, void *priv) 53 + static enum bpf_perf_event_ret 54 + print_bpf_output(struct perf_event_header *event, void *private_data) 54 55 { 55 - struct event_ring_info *ring = priv; 56 - struct perf_event_sample *e = event; 56 + struct perf_event_sample *e = container_of(event, struct perf_event_sample, 57 + header); 58 + struct event_ring_info *ring = private_data; 57 59 struct { 58 60 struct perf_event_header header; 59 61 __u64 id; 60 62 __u64 lost; 61 - } *lost = event; 63 + } *lost = (typeof(lost))event; 62 64 63 65 if (json_output) { 64 66 jsonw_start_object(json_wtr);

+9 -5

tools/bpf/bpftool/prog.c

··· 449 449 unsigned long *func_ksyms = NULL; 450 450 struct bpf_prog_info info = {}; 451 451 unsigned int *func_lens = NULL; 452 + const char *disasm_opt = NULL; 452 453 unsigned int nr_func_ksyms; 453 454 unsigned int nr_func_lens; 454 455 struct dump_data dd = {}; ··· 608 607 const char *name = NULL; 609 608 610 609 if (info.ifindex) { 611 - name = ifindex_to_bfd_name_ns(info.ifindex, 612 - info.netns_dev, 613 - info.netns_ino); 610 + name = ifindex_to_bfd_params(info.ifindex, 611 + info.netns_dev, 612 + info.netns_ino, 613 + &disasm_opt); 614 614 if (!name) 615 615 goto err_free; 616 616 } ··· 653 651 printf("%s:\n", sym_name); 654 652 } 655 653 656 - disasm_print_insn(img, lens[i], opcodes, name); 654 + disasm_print_insn(img, lens[i], opcodes, name, 655 + disasm_opt); 657 656 img += lens[i]; 658 657 659 658 if (json_output) ··· 666 663 if (json_output) 667 664 jsonw_end_array(json_wtr); 668 665 } else { 669 - disasm_print_insn(buf, *member_len, opcodes, name); 666 + disasm_print_insn(buf, *member_len, opcodes, name, 667 + disasm_opt); 670 668 } 671 669 } else if (visual) { 672 670 if (json_output)

+35

tools/include/asm/barrier.h

··· 1 1 /* SPDX-License-Identifier: GPL-2.0 */ 2 + #include <linux/compiler.h> 2 3 #if defined(__i386__) || defined(__x86_64__) 3 4 #include "../../arch/x86/include/asm/barrier.h" 4 5 #elif defined(__arm__) ··· 26 25 #include "../../arch/xtensa/include/asm/barrier.h" 27 26 #else 28 27 #include <asm-generic/barrier.h> 28 + #endif 29 + 30 + /* 31 + * Generic fallback smp_*() definitions for archs that haven't 32 + * been updated yet. 33 + */ 34 + 35 + #ifndef smp_rmb 36 + # define smp_rmb() rmb() 37 + #endif 38 + 39 + #ifndef smp_wmb 40 + # define smp_wmb() wmb() 41 + #endif 42 + 43 + #ifndef smp_mb 44 + # define smp_mb() mb() 45 + #endif 46 + 47 + #ifndef smp_store_release 48 + # define smp_store_release(p, v) \ 49 + do { \ 50 + smp_mb(); \ 51 + WRITE_ONCE(*p, v); \ 52 + } while (0) 53 + #endif 54 + 55 + #ifndef smp_load_acquire 56 + # define smp_load_acquire(p) \ 57 + ({ \ 58 + typeof(*p) ___p1 = READ_ONCE(*p); \ 59 + smp_mb(); \ 60 + ___p1; \ 61 + }) 29 62 #endif

+73

tools/include/linux/ring_buffer.h

··· 1 + #ifndef _TOOLS_LINUX_RING_BUFFER_H_ 2 + #define _TOOLS_LINUX_RING_BUFFER_H_ 3 + 4 + #include <asm/barrier.h> 5 + 6 + /* 7 + * Contract with kernel for walking the perf ring buffer from 8 + * user space requires the following barrier pairing (quote 9 + * from kernel/events/ring_buffer.c): 10 + * 11 + * Since the mmap() consumer (userspace) can run on a 12 + * different CPU: 13 + * 14 + * kernel user 15 + * 16 + * if (LOAD ->data_tail) { LOAD ->data_head 17 + * (A) smp_rmb() (C) 18 + * STORE $data LOAD $data 19 + * smp_wmb() (B) smp_mb() (D) 20 + * STORE ->data_head STORE ->data_tail 21 + * } 22 + * 23 + * Where A pairs with D, and B pairs with C. 24 + * 25 + * In our case A is a control dependency that separates the 26 + * load of the ->data_tail and the stores of $data. In case 27 + * ->data_tail indicates there is no room in the buffer to 28 + * store $data we do not. 29 + * 30 + * D needs to be a full barrier since it separates the data 31 + * READ from the tail WRITE. 32 + * 33 + * For B a WMB is sufficient since it separates two WRITEs, 34 + * and for C an RMB is sufficient since it separates two READs. 35 + * 36 + * Note, instead of B, C, D we could also use smp_store_release() 37 + * in B and D as well as smp_load_acquire() in C. 38 + * 39 + * However, this optimization does not make sense for all kernel 40 + * supported architectures since for a fair number it would 41 + * resolve into READ_ONCE() + smp_mb() pair for smp_load_acquire(), 42 + * and smp_mb() + WRITE_ONCE() pair for smp_store_release(). 43 + * 44 + * Thus for those smp_wmb() in B and smp_rmb() in C would still 45 + * be less expensive. For the case of D this has either the same 46 + * cost or is less expensive, for example, due to TSO x86 can 47 + * avoid the CPU barrier entirely. 48 + */ 49 + 50 + static inline u64 ring_buffer_read_head(struct perf_event_mmap_page *base) 51 + { 52 + /* 53 + * Architectures where smp_load_acquire() does not fallback to 54 + * READ_ONCE() + smp_mb() pair. 55 + */ 56 + #if defined(__x86_64__) || defined(__aarch64__) || defined(__powerpc64__) || \ 57 + defined(__ia64__) || defined(__sparc__) && defined(__arch64__) 58 + return smp_load_acquire(&base->data_head); 59 + #else 60 + u64 head = READ_ONCE(base->data_head); 61 + 62 + smp_rmb(); 63 + return head; 64 + #endif 65 + } 66 + 67 + static inline void ring_buffer_write_tail(struct perf_event_mmap_page *base, 68 + u64 tail) 69 + { 70 + smp_store_release(&base->data_tail, tail); 71 + } 72 + 73 + #endif /* _TOOLS_LINUX_RING_BUFFER_H_ */

+48 -2

tools/include/uapi/linux/bpf.h

··· 103 103 BPF_BTF_LOAD, 104 104 BPF_BTF_GET_FD_BY_ID, 105 105 BPF_TASK_FD_QUERY, 106 + BPF_MAP_LOOKUP_AND_DELETE_ELEM, 106 107 }; 107 108 108 109 enum bpf_map_type { ··· 129 128 BPF_MAP_TYPE_CGROUP_STORAGE, 130 129 BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, 131 130 BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, 131 + BPF_MAP_TYPE_QUEUE, 132 + BPF_MAP_TYPE_STACK, 132 133 }; 133 134 134 135 enum bpf_prog_type { ··· 463 460 * Description 464 461 * Delete entry with *key* from *map*. 465 462 * Return 463 + * 0 on success, or a negative error in case of failure. 464 + * 465 + * int bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags) 466 + * Description 467 + * Push an element *value* in *map*. *flags* is one of: 468 + * 469 + * **BPF_EXIST** 470 + * If the queue/stack is full, the oldest element is removed to 471 + * make room for this. 472 + * Return 473 + * 0 on success, or a negative error in case of failure. 474 + * 475 + * int bpf_map_pop_elem(struct bpf_map *map, void *value) 476 + * Description 477 + * Pop an element from *map*. 478 + * Return 479 + * 0 on success, or a negative error in case of failure. 480 + * 481 + * int bpf_map_peek_elem(struct bpf_map *map, void *value) 482 + * Description 483 + * Get an element from *map* without removing it. 484 + * Return 466 485 * 0 on success, or a negative error in case of failure. 467 486 * 468 487 * int bpf_probe_read(void *dst, u32 size, const void *src) ··· 1458 1433 * Return 1459 1434 * 0 on success, or a negative error in case of failure. 1460 1435 * 1461 - * int bpf_skb_adjust_room(struct sk_buff *skb, u32 len_diff, u32 mode, u64 flags) 1436 + * int bpf_skb_adjust_room(struct sk_buff *skb, s32 len_diff, u32 mode, u64 flags) 1462 1437 * Description 1463 1438 * Grow or shrink the room for data in the packet associated to 1464 1439 * *skb* by *len_diff*, and according to the selected *mode*. ··· 2240 2215 * pointer that was returned from bpf_sk_lookup_xxx\ (). 2241 2216 * Return 2242 2217 * 0 on success, or a negative error in case of failure. 2218 + * 2219 + * int bpf_msg_push_data(struct sk_buff *skb, u32 start, u32 len, u64 flags) 2220 + * Description 2221 + * For socket policies, insert *len* bytes into msg at offset 2222 + * *start*. 2223 + * 2224 + * If a program of type **BPF_PROG_TYPE_SK_MSG** is run on a 2225 + * *msg* it may want to insert metadata or options into the msg. 2226 + * This can later be read and used by any of the lower layer BPF 2227 + * hooks. 2228 + * 2229 + * This helper may fail if under memory pressure (a malloc 2230 + * fails) in these cases BPF programs will get an appropriate 2231 + * error and BPF programs will need to handle them. 2232 + * 2233 + * Return 2234 + * 0 on success, or a negative error in case of failure. 2243 2235 */ 2244 2236 #define __BPF_FUNC_MAPPER(FN) \ 2245 2237 FN(unspec), \ ··· 2345 2303 FN(skb_ancestor_cgroup_id), \ 2346 2304 FN(sk_lookup_tcp), \ 2347 2305 FN(sk_lookup_udp), \ 2348 - FN(sk_release), 2306 + FN(sk_release), \ 2307 + FN(map_push_elem), \ 2308 + FN(map_pop_elem), \ 2309 + FN(map_peek_elem), \ 2310 + FN(msg_push_data), 2349 2311 2350 2312 /* integer value in 'imm' field of BPF_CALL instruction selects which helper 2351 2313 * function eBPF program intends to call

+78

tools/include/uapi/linux/tls.h

··· 1 + /* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */ 2 + /* 3 + * Copyright (c) 2016-2017, Mellanox Technologies. All rights reserved. 4 + * 5 + * This software is available to you under a choice of one of two 6 + * licenses. You may choose to be licensed under the terms of the GNU 7 + * General Public License (GPL) Version 2, available from the file 8 + * COPYING in the main directory of this source tree, or the 9 + * OpenIB.org BSD license below: 10 + * 11 + * Redistribution and use in source and binary forms, with or 12 + * without modification, are permitted provided that the following 13 + * conditions are met: 14 + * 15 + * - Redistributions of source code must retain the above 16 + * copyright notice, this list of conditions and the following 17 + * disclaimer. 18 + * 19 + * - Redistributions in binary form must reproduce the above 20 + * copyright notice, this list of conditions and the following 21 + * disclaimer in the documentation and/or other materials 22 + * provided with the distribution. 23 + * 24 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 + * SOFTWARE. 32 + */ 33 + 34 + #ifndef _UAPI_LINUX_TLS_H 35 + #define _UAPI_LINUX_TLS_H 36 + 37 + #include <linux/types.h> 38 + 39 + /* TLS socket options */ 40 + #define TLS_TX 1 /* Set transmit parameters */ 41 + #define TLS_RX 2 /* Set receive parameters */ 42 + 43 + /* Supported versions */ 44 + #define TLS_VERSION_MINOR(ver) ((ver) & 0xFF) 45 + #define TLS_VERSION_MAJOR(ver) (((ver) >> 8) & 0xFF) 46 + 47 + #define TLS_VERSION_NUMBER(id) ((((id##_VERSION_MAJOR) & 0xFF) << 8) | \ 48 + ((id##_VERSION_MINOR) & 0xFF)) 49 + 50 + #define TLS_1_2_VERSION_MAJOR 0x3 51 + #define TLS_1_2_VERSION_MINOR 0x3 52 + #define TLS_1_2_VERSION TLS_VERSION_NUMBER(TLS_1_2) 53 + 54 + /* Supported ciphers */ 55 + #define TLS_CIPHER_AES_GCM_128 51 56 + #define TLS_CIPHER_AES_GCM_128_IV_SIZE 8 57 + #define TLS_CIPHER_AES_GCM_128_KEY_SIZE 16 58 + #define TLS_CIPHER_AES_GCM_128_SALT_SIZE 4 59 + #define TLS_CIPHER_AES_GCM_128_TAG_SIZE 16 60 + #define TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE 8 61 + 62 + #define TLS_SET_RECORD_TYPE 1 63 + #define TLS_GET_RECORD_TYPE 2 64 + 65 + struct tls_crypto_info { 66 + __u16 version; 67 + __u16 cipher_type; 68 + }; 69 + 70 + struct tls12_crypto_info_aes_gcm_128 { 71 + struct tls_crypto_info info; 72 + unsigned char iv[TLS_CIPHER_AES_GCM_128_IV_SIZE]; 73 + unsigned char key[TLS_CIPHER_AES_GCM_128_KEY_SIZE]; 74 + unsigned char salt[TLS_CIPHER_AES_GCM_128_SALT_SIZE]; 75 + unsigned char rec_seq[TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE]; 76 + }; 77 + 78 + #endif /* _UAPI_LINUX_TLS_H */

+1

tools/lib/bpf/Makefile

··· 125 125 override CFLAGS += -Werror -Wall 126 126 override CFLAGS += -fPIC 127 127 override CFLAGS += $(INCLUDES) 128 + override CFLAGS += -fvisibility=hidden 128 129 129 130 ifeq ($(VERBOSE),1) 130 131 Q =

+12

tools/lib/bpf/bpf.c

··· 278 278 return sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr)); 279 279 } 280 280 281 + int bpf_map_lookup_and_delete_elem(int fd, const void *key, void *value) 282 + { 283 + union bpf_attr attr; 284 + 285 + bzero(&attr, sizeof(attr)); 286 + attr.map_fd = fd; 287 + attr.key = ptr_to_u64(key); 288 + attr.value = ptr_to_u64(value); 289 + 290 + return sys_bpf(BPF_MAP_LOOKUP_AND_DELETE_ELEM, &attr, sizeof(attr)); 291 + } 292 + 281 293 int bpf_map_delete_elem(int fd, const void *key) 282 294 { 283 295 union bpf_attr attr;

+67 -53

tools/lib/bpf/bpf.h

··· 27 27 #include <stdbool.h> 28 28 #include <stddef.h> 29 29 30 + #ifndef LIBBPF_API 31 + #define LIBBPF_API __attribute__((visibility("default"))) 32 + #endif 33 + 30 34 struct bpf_create_map_attr { 31 35 const char *name; 32 36 enum bpf_map_type map_type; ··· 46 42 __u32 inner_map_fd; 47 43 }; 48 44 49 - int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr); 50 - int bpf_create_map_node(enum bpf_map_type map_type, const char *name, 51 - int key_size, int value_size, int max_entries, 52 - __u32 map_flags, int node); 53 - int bpf_create_map_name(enum bpf_map_type map_type, const char *name, 54 - int key_size, int value_size, int max_entries, 55 - __u32 map_flags); 56 - int bpf_create_map(enum bpf_map_type map_type, int key_size, int value_size, 57 - int max_entries, __u32 map_flags); 58 - int bpf_create_map_in_map_node(enum bpf_map_type map_type, const char *name, 59 - int key_size, int inner_map_fd, int max_entries, 60 - __u32 map_flags, int node); 61 - int bpf_create_map_in_map(enum bpf_map_type map_type, const char *name, 62 - int key_size, int inner_map_fd, int max_entries, 63 - __u32 map_flags); 45 + LIBBPF_API int 46 + bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr); 47 + LIBBPF_API int bpf_create_map_node(enum bpf_map_type map_type, const char *name, 48 + int key_size, int value_size, 49 + int max_entries, __u32 map_flags, int node); 50 + LIBBPF_API int bpf_create_map_name(enum bpf_map_type map_type, const char *name, 51 + int key_size, int value_size, 52 + int max_entries, __u32 map_flags); 53 + LIBBPF_API int bpf_create_map(enum bpf_map_type map_type, int key_size, 54 + int value_size, int max_entries, __u32 map_flags); 55 + LIBBPF_API int bpf_create_map_in_map_node(enum bpf_map_type map_type, 56 + const char *name, int key_size, 57 + int inner_map_fd, int max_entries, 58 + __u32 map_flags, int node); 59 + LIBBPF_API int bpf_create_map_in_map(enum bpf_map_type map_type, 60 + const char *name, int key_size, 61 + int inner_map_fd, int max_entries, 62 + __u32 map_flags); 64 63 65 64 struct bpf_load_program_attr { 66 65 enum bpf_prog_type prog_type; ··· 81 74 82 75 /* Recommend log buffer size */ 83 76 #define BPF_LOG_BUF_SIZE (256 * 1024) 84 - int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, 85 - char *log_buf, size_t log_buf_sz); 86 - int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns, 87 - size_t insns_cnt, const char *license, 88 - __u32 kern_version, char *log_buf, 89 - size_t log_buf_sz); 90 - int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns, 91 - size_t insns_cnt, int strict_alignment, 92 - const char *license, __u32 kern_version, 93 - char *log_buf, size_t log_buf_sz, int log_level); 77 + LIBBPF_API int 78 + bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, 79 + char *log_buf, size_t log_buf_sz); 80 + LIBBPF_API int bpf_load_program(enum bpf_prog_type type, 81 + const struct bpf_insn *insns, size_t insns_cnt, 82 + const char *license, __u32 kern_version, 83 + char *log_buf, size_t log_buf_sz); 84 + LIBBPF_API int bpf_verify_program(enum bpf_prog_type type, 85 + const struct bpf_insn *insns, 86 + size_t insns_cnt, int strict_alignment, 87 + const char *license, __u32 kern_version, 88 + char *log_buf, size_t log_buf_sz, 89 + int log_level); 94 90 95 - int bpf_map_update_elem(int fd, const void *key, const void *value, 96 - __u64 flags); 91 + LIBBPF_API int bpf_map_update_elem(int fd, const void *key, const void *value, 92 + __u64 flags); 97 93 98 - int bpf_map_lookup_elem(int fd, const void *key, void *value); 99 - int bpf_map_delete_elem(int fd, const void *key); 100 - int bpf_map_get_next_key(int fd, const void *key, void *next_key); 101 - int bpf_obj_pin(int fd, const char *pathname); 102 - int bpf_obj_get(const char *pathname); 103 - int bpf_prog_attach(int prog_fd, int attachable_fd, enum bpf_attach_type type, 104 - unsigned int flags); 105 - int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type); 106 - int bpf_prog_detach2(int prog_fd, int attachable_fd, enum bpf_attach_type type); 107 - int bpf_prog_test_run(int prog_fd, int repeat, void *data, __u32 size, 108 - void *data_out, __u32 *size_out, __u32 *retval, 109 - __u32 *duration); 110 - int bpf_prog_get_next_id(__u32 start_id, __u32 *next_id); 111 - int bpf_map_get_next_id(__u32 start_id, __u32 *next_id); 112 - int bpf_prog_get_fd_by_id(__u32 id); 113 - int bpf_map_get_fd_by_id(__u32 id); 114 - int bpf_btf_get_fd_by_id(__u32 id); 115 - int bpf_obj_get_info_by_fd(int prog_fd, void *info, __u32 *info_len); 116 - int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags, 117 - __u32 *attach_flags, __u32 *prog_ids, __u32 *prog_cnt); 118 - int bpf_raw_tracepoint_open(const char *name, int prog_fd); 119 - int bpf_load_btf(void *btf, __u32 btf_size, char *log_buf, __u32 log_buf_size, 120 - bool do_log); 121 - int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, __u32 *buf_len, 122 - __u32 *prog_id, __u32 *fd_type, __u64 *probe_offset, 123 - __u64 *probe_addr); 94 + LIBBPF_API int bpf_map_lookup_elem(int fd, const void *key, void *value); 95 + LIBBPF_API int bpf_map_lookup_and_delete_elem(int fd, const void *key, 96 + void *value); 97 + LIBBPF_API int bpf_map_delete_elem(int fd, const void *key); 98 + LIBBPF_API int bpf_map_get_next_key(int fd, const void *key, void *next_key); 99 + LIBBPF_API int bpf_obj_pin(int fd, const char *pathname); 100 + LIBBPF_API int bpf_obj_get(const char *pathname); 101 + LIBBPF_API int bpf_prog_attach(int prog_fd, int attachable_fd, 102 + enum bpf_attach_type type, unsigned int flags); 103 + LIBBPF_API int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type); 104 + LIBBPF_API int bpf_prog_detach2(int prog_fd, int attachable_fd, 105 + enum bpf_attach_type type); 106 + LIBBPF_API int bpf_prog_test_run(int prog_fd, int repeat, void *data, 107 + __u32 size, void *data_out, __u32 *size_out, 108 + __u32 *retval, __u32 *duration); 109 + LIBBPF_API int bpf_prog_get_next_id(__u32 start_id, __u32 *next_id); 110 + LIBBPF_API int bpf_map_get_next_id(__u32 start_id, __u32 *next_id); 111 + LIBBPF_API int bpf_prog_get_fd_by_id(__u32 id); 112 + LIBBPF_API int bpf_map_get_fd_by_id(__u32 id); 113 + LIBBPF_API int bpf_btf_get_fd_by_id(__u32 id); 114 + LIBBPF_API int bpf_obj_get_info_by_fd(int prog_fd, void *info, __u32 *info_len); 115 + LIBBPF_API int bpf_prog_query(int target_fd, enum bpf_attach_type type, 116 + __u32 query_flags, __u32 *attach_flags, 117 + __u32 *prog_ids, __u32 *prog_cnt); 118 + LIBBPF_API int bpf_raw_tracepoint_open(const char *name, int prog_fd); 119 + LIBBPF_API int bpf_load_btf(void *btf, __u32 btf_size, char *log_buf, 120 + __u32 log_buf_size, bool do_log); 121 + LIBBPF_API int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, 122 + __u32 *buf_len, __u32 *prog_id, __u32 *fd_type, 123 + __u64 *probe_offset, __u64 *probe_addr); 124 124 #endif /* __LIBBPF_BPF_H */

+14 -8

tools/lib/bpf/btf.h

··· 6 6 7 7 #include <linux/types.h> 8 8 9 + #ifndef LIBBPF_API 10 + #define LIBBPF_API __attribute__((visibility("default"))) 11 + #endif 12 + 9 13 #define BTF_ELF_SEC ".BTF" 10 14 11 15 struct btf; ··· 18 14 typedef int (*btf_print_fn_t)(const char *, ...) 19 15 __attribute__((format(printf, 1, 2))); 20 16 21 - void btf__free(struct btf *btf); 22 - struct btf *btf__new(__u8 *data, __u32 size, btf_print_fn_t err_log); 23 - __s32 btf__find_by_name(const struct btf *btf, const char *type_name); 24 - const struct btf_type *btf__type_by_id(const struct btf *btf, __u32 id); 25 - __s64 btf__resolve_size(const struct btf *btf, __u32 type_id); 26 - int btf__resolve_type(const struct btf *btf, __u32 type_id); 27 - int btf__fd(const struct btf *btf); 28 - const char *btf__name_by_offset(const struct btf *btf, __u32 offset); 17 + LIBBPF_API void btf__free(struct btf *btf); 18 + LIBBPF_API struct btf *btf__new(__u8 *data, __u32 size, btf_print_fn_t err_log); 19 + LIBBPF_API __s32 btf__find_by_name(const struct btf *btf, 20 + const char *type_name); 21 + LIBBPF_API const struct btf_type *btf__type_by_id(const struct btf *btf, 22 + __u32 id); 23 + LIBBPF_API __s64 btf__resolve_size(const struct btf *btf, __u32 type_id); 24 + LIBBPF_API int btf__resolve_type(const struct btf *btf, __u32 type_id); 25 + LIBBPF_API int btf__fd(const struct btf *btf); 26 + LIBBPF_API const char *btf__name_by_offset(const struct btf *btf, __u32 offset); 29 27 30 28 #endif /* __LIBBPF_BTF_H */

+29 -40

tools/lib/bpf/libbpf.c

··· 27 27 #include <linux/list.h> 28 28 #include <linux/limits.h> 29 29 #include <linux/perf_event.h> 30 + #include <linux/ring_buffer.h> 30 31 #include <sys/stat.h> 31 32 #include <sys/types.h> 32 33 #include <sys/vfs.h> ··· 2415 2414 } 2416 2415 2417 2416 enum bpf_perf_event_ret 2418 - bpf_perf_event_read_simple(void *mem, unsigned long size, 2419 - unsigned long page_size, void **buf, size_t *buf_len, 2420 - bpf_perf_event_print_t fn, void *priv) 2417 + bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size, 2418 + void **copy_mem, size_t *copy_size, 2419 + bpf_perf_event_print_t fn, void *private_data) 2421 2420 { 2422 - volatile struct perf_event_mmap_page *header = mem; 2421 + struct perf_event_mmap_page *header = mmap_mem; 2422 + __u64 data_head = ring_buffer_read_head(header); 2423 2423 __u64 data_tail = header->data_tail; 2424 - __u64 data_head = header->data_head; 2425 - int ret = LIBBPF_PERF_EVENT_ERROR; 2426 - void *base, *begin, *end; 2424 + void *base = ((__u8 *)header) + page_size; 2425 + int ret = LIBBPF_PERF_EVENT_CONT; 2426 + struct perf_event_header *ehdr; 2427 + size_t ehdr_size; 2427 2428 2428 - asm volatile("" ::: "memory"); /* in real code it should be smp_rmb() */ 2429 - if (data_head == data_tail) 2430 - return LIBBPF_PERF_EVENT_CONT; 2429 + while (data_head != data_tail) { 2430 + ehdr = base + (data_tail & (mmap_size - 1)); 2431 + ehdr_size = ehdr->size; 2431 2432 2432 - base = ((char *)header) + page_size; 2433 + if (((void *)ehdr) + ehdr_size > base + mmap_size) { 2434 + void *copy_start = ehdr; 2435 + size_t len_first = base + mmap_size - copy_start; 2436 + size_t len_secnd = ehdr_size - len_first; 2433 2437 2434 - begin = base + data_tail % size; 2435 - end = base + data_head % size; 2436 - 2437 - while (begin != end) { 2438 - struct perf_event_header *ehdr; 2439 - 2440 - ehdr = begin; 2441 - if (begin + ehdr->size > base + size) { 2442 - long len = base + size - begin; 2443 - 2444 - if (*buf_len < ehdr->size) { 2445 - free(*buf); 2446 - *buf = malloc(ehdr->size); 2447 - if (!*buf) { 2438 + if (*copy_size < ehdr_size) { 2439 + free(*copy_mem); 2440 + *copy_mem = malloc(ehdr_size); 2441 + if (!*copy_mem) { 2442 + *copy_size = 0; 2448 2443 ret = LIBBPF_PERF_EVENT_ERROR; 2449 2444 break; 2450 2445 } 2451 - *buf_len = ehdr->size; 2446 + *copy_size = ehdr_size; 2452 2447 } 2453 2448 2454 - memcpy(*buf, begin, len); 2455 - memcpy(*buf + len, base, ehdr->size - len); 2456 - ehdr = (void *)*buf; 2457 - begin = base + ehdr->size - len; 2458 - } else if (begin + ehdr->size == base + size) { 2459 - begin = base; 2460 - } else { 2461 - begin += ehdr->size; 2449 + memcpy(*copy_mem, copy_start, len_first); 2450 + memcpy(*copy_mem + len_first, base, len_secnd); 2451 + ehdr = *copy_mem; 2462 2452 } 2463 2453 2464 - ret = fn(ehdr, priv); 2454 + ret = fn(ehdr, private_data); 2455 + data_tail += ehdr_size; 2465 2456 if (ret != LIBBPF_PERF_EVENT_CONT) 2466 2457 break; 2467 - 2468 - data_tail += ehdr->size; 2469 2458 } 2470 2459 2471 - __sync_synchronize(); /* smp_mb() */ 2472 - header->data_tail = data_tail; 2473 - 2460 + ring_buffer_write_tail(header, data_tail); 2474 2461 return ret; 2475 2462 }

+101 -88

tools/lib/bpf/libbpf.h

··· 16 16 #include <sys/types.h> // for size_t 17 17 #include <linux/bpf.h> 18 18 19 + #ifndef LIBBPF_API 20 + #define LIBBPF_API __attribute__((visibility("default"))) 21 + #endif 22 + 19 23 enum libbpf_errno { 20 24 __LIBBPF_ERRNO__START = 4000, 21 25 ··· 41 37 __LIBBPF_ERRNO__END, 42 38 }; 43 39 44 - int libbpf_strerror(int err, char *buf, size_t size); 40 + LIBBPF_API int libbpf_strerror(int err, char *buf, size_t size); 45 41 46 42 /* 47 43 * __printf is defined in include/linux/compiler-gcc.h. However, ··· 51 47 typedef int (*libbpf_print_fn_t)(const char *, ...) 52 48 __attribute__((format(printf, 1, 2))); 53 49 54 - void libbpf_set_print(libbpf_print_fn_t warn, 55 - libbpf_print_fn_t info, 56 - libbpf_print_fn_t debug); 50 + LIBBPF_API void libbpf_set_print(libbpf_print_fn_t warn, 51 + libbpf_print_fn_t info, 52 + libbpf_print_fn_t debug); 57 53 58 54 /* Hide internal to user */ 59 55 struct bpf_object; ··· 63 59 enum bpf_prog_type prog_type; 64 60 }; 65 61 66 - struct bpf_object *bpf_object__open(const char *path); 67 - struct bpf_object *bpf_object__open_xattr(struct bpf_object_open_attr *attr); 62 + LIBBPF_API struct bpf_object *bpf_object__open(const char *path); 63 + LIBBPF_API struct bpf_object * 64 + bpf_object__open_xattr(struct bpf_object_open_attr *attr); 68 65 struct bpf_object *__bpf_object__open_xattr(struct bpf_object_open_attr *attr, 69 66 int flags); 70 - struct bpf_object *bpf_object__open_buffer(void *obj_buf, 71 - size_t obj_buf_sz, 72 - const char *name); 73 - int bpf_object__pin(struct bpf_object *object, const char *path); 74 - void bpf_object__close(struct bpf_object *object); 67 + LIBBPF_API struct bpf_object *bpf_object__open_buffer(void *obj_buf, 68 + size_t obj_buf_sz, 69 + const char *name); 70 + LIBBPF_API int bpf_object__pin(struct bpf_object *object, const char *path); 71 + LIBBPF_API void bpf_object__close(struct bpf_object *object); 75 72 76 73 /* Load/unload object into/from kernel */ 77 - int bpf_object__load(struct bpf_object *obj); 78 - int bpf_object__unload(struct bpf_object *obj); 79 - const char *bpf_object__name(struct bpf_object *obj); 80 - unsigned int bpf_object__kversion(struct bpf_object *obj); 81 - int bpf_object__btf_fd(const struct bpf_object *obj); 74 + LIBBPF_API int bpf_object__load(struct bpf_object *obj); 75 + LIBBPF_API int bpf_object__unload(struct bpf_object *obj); 76 + LIBBPF_API const char *bpf_object__name(struct bpf_object *obj); 77 + LIBBPF_API unsigned int bpf_object__kversion(struct bpf_object *obj); 78 + LIBBPF_API int bpf_object__btf_fd(const struct bpf_object *obj); 82 79 83 - struct bpf_program * 80 + LIBBPF_API struct bpf_program * 84 81 bpf_object__find_program_by_title(struct bpf_object *obj, const char *title); 85 82 86 - struct bpf_object *bpf_object__next(struct bpf_object *prev); 83 + LIBBPF_API struct bpf_object *bpf_object__next(struct bpf_object *prev); 87 84 #define bpf_object__for_each_safe(pos, tmp) \ 88 85 for ((pos) = bpf_object__next(NULL), \ 89 86 (tmp) = bpf_object__next(pos); \ ··· 92 87 (pos) = (tmp), (tmp) = bpf_object__next(tmp)) 93 88 94 89 typedef void (*bpf_object_clear_priv_t)(struct bpf_object *, void *); 95 - int bpf_object__set_priv(struct bpf_object *obj, void *priv, 96 - bpf_object_clear_priv_t clear_priv); 97 - void *bpf_object__priv(struct bpf_object *prog); 90 + LIBBPF_API int bpf_object__set_priv(struct bpf_object *obj, void *priv, 91 + bpf_object_clear_priv_t clear_priv); 92 + LIBBPF_API void *bpf_object__priv(struct bpf_object *prog); 98 93 99 - int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type, 100 - enum bpf_attach_type *expected_attach_type); 101 - int libbpf_attach_type_by_name(const char *name, 102 - enum bpf_attach_type *attach_type); 94 + LIBBPF_API int 95 + libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type, 96 + enum bpf_attach_type *expected_attach_type); 97 + LIBBPF_API int libbpf_attach_type_by_name(const char *name, 98 + enum bpf_attach_type *attach_type); 103 99 104 100 /* Accessors of bpf_program */ 105 101 struct bpf_program; 106 - struct bpf_program *bpf_program__next(struct bpf_program *prog, 107 - struct bpf_object *obj); 102 + LIBBPF_API struct bpf_program *bpf_program__next(struct bpf_program *prog, 103 + struct bpf_object *obj); 108 104 109 105 #define bpf_object__for_each_program(pos, obj) \ 110 106 for ((pos) = bpf_program__next(NULL, (obj)); \ ··· 115 109 typedef void (*bpf_program_clear_priv_t)(struct bpf_program *, 116 110 void *); 117 111 118 - int bpf_program__set_priv(struct bpf_program *prog, void *priv, 119 - bpf_program_clear_priv_t clear_priv); 112 + LIBBPF_API int bpf_program__set_priv(struct bpf_program *prog, void *priv, 113 + bpf_program_clear_priv_t clear_priv); 120 114 121 - void *bpf_program__priv(struct bpf_program *prog); 122 - void bpf_program__set_ifindex(struct bpf_program *prog, __u32 ifindex); 115 + LIBBPF_API void *bpf_program__priv(struct bpf_program *prog); 116 + LIBBPF_API void bpf_program__set_ifindex(struct bpf_program *prog, 117 + __u32 ifindex); 123 118 124 - const char *bpf_program__title(struct bpf_program *prog, bool needs_copy); 119 + LIBBPF_API const char *bpf_program__title(struct bpf_program *prog, 120 + bool needs_copy); 125 121 126 - int bpf_program__load(struct bpf_program *prog, char *license, 127 - __u32 kern_version); 128 - int bpf_program__fd(struct bpf_program *prog); 129 - int bpf_program__pin_instance(struct bpf_program *prog, const char *path, 130 - int instance); 131 - int bpf_program__pin(struct bpf_program *prog, const char *path); 132 - void bpf_program__unload(struct bpf_program *prog); 122 + LIBBPF_API int bpf_program__load(struct bpf_program *prog, char *license, 123 + __u32 kern_version); 124 + LIBBPF_API int bpf_program__fd(struct bpf_program *prog); 125 + LIBBPF_API int bpf_program__pin_instance(struct bpf_program *prog, 126 + const char *path, 127 + int instance); 128 + LIBBPF_API int bpf_program__pin(struct bpf_program *prog, const char *path); 129 + LIBBPF_API void bpf_program__unload(struct bpf_program *prog); 133 130 134 131 struct bpf_insn; 135 132 ··· 193 184 struct bpf_insn *insns, int insns_cnt, 194 185 struct bpf_prog_prep_result *res); 195 186 196 - int bpf_program__set_prep(struct bpf_program *prog, int nr_instance, 197 - bpf_program_prep_t prep); 187 + LIBBPF_API int bpf_program__set_prep(struct bpf_program *prog, int nr_instance, 188 + bpf_program_prep_t prep); 198 189 199 - int bpf_program__nth_fd(struct bpf_program *prog, int n); 190 + LIBBPF_API int bpf_program__nth_fd(struct bpf_program *prog, int n); 200 191 201 192 /* 202 193 * Adjust type of BPF program. Default is kprobe. 203 194 */ 204 - int bpf_program__set_socket_filter(struct bpf_program *prog); 205 - int bpf_program__set_tracepoint(struct bpf_program *prog); 206 - int bpf_program__set_raw_tracepoint(struct bpf_program *prog); 207 - int bpf_program__set_kprobe(struct bpf_program *prog); 208 - int bpf_program__set_sched_cls(struct bpf_program *prog); 209 - int bpf_program__set_sched_act(struct bpf_program *prog); 210 - int bpf_program__set_xdp(struct bpf_program *prog); 211 - int bpf_program__set_perf_event(struct bpf_program *prog); 212 - void bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type); 213 - void bpf_program__set_expected_attach_type(struct bpf_program *prog, 214 - enum bpf_attach_type type); 195 + LIBBPF_API int bpf_program__set_socket_filter(struct bpf_program *prog); 196 + LIBBPF_API int bpf_program__set_tracepoint(struct bpf_program *prog); 197 + LIBBPF_API int bpf_program__set_raw_tracepoint(struct bpf_program *prog); 198 + LIBBPF_API int bpf_program__set_kprobe(struct bpf_program *prog); 199 + LIBBPF_API int bpf_program__set_sched_cls(struct bpf_program *prog); 200 + LIBBPF_API int bpf_program__set_sched_act(struct bpf_program *prog); 201 + LIBBPF_API int bpf_program__set_xdp(struct bpf_program *prog); 202 + LIBBPF_API int bpf_program__set_perf_event(struct bpf_program *prog); 203 + LIBBPF_API void bpf_program__set_type(struct bpf_program *prog, 204 + enum bpf_prog_type type); 205 + LIBBPF_API void 206 + bpf_program__set_expected_attach_type(struct bpf_program *prog, 207 + enum bpf_attach_type type); 215 208 216 - bool bpf_program__is_socket_filter(struct bpf_program *prog); 217 - bool bpf_program__is_tracepoint(struct bpf_program *prog); 218 - bool bpf_program__is_raw_tracepoint(struct bpf_program *prog); 219 - bool bpf_program__is_kprobe(struct bpf_program *prog); 220 - bool bpf_program__is_sched_cls(struct bpf_program *prog); 221 - bool bpf_program__is_sched_act(struct bpf_program *prog); 222 - bool bpf_program__is_xdp(struct bpf_program *prog); 223 - bool bpf_program__is_perf_event(struct bpf_program *prog); 209 + LIBBPF_API bool bpf_program__is_socket_filter(struct bpf_program *prog); 210 + LIBBPF_API bool bpf_program__is_tracepoint(struct bpf_program *prog); 211 + LIBBPF_API bool bpf_program__is_raw_tracepoint(struct bpf_program *prog); 212 + LIBBPF_API bool bpf_program__is_kprobe(struct bpf_program *prog); 213 + LIBBPF_API bool bpf_program__is_sched_cls(struct bpf_program *prog); 214 + LIBBPF_API bool bpf_program__is_sched_act(struct bpf_program *prog); 215 + LIBBPF_API bool bpf_program__is_xdp(struct bpf_program *prog); 216 + LIBBPF_API bool bpf_program__is_perf_event(struct bpf_program *prog); 224 217 225 218 /* 226 219 * No need for __attribute__((packed)), all members of 'bpf_map_def' ··· 243 232 * so no need to worry about a name clash. 244 233 */ 245 234 struct bpf_map; 246 - struct bpf_map * 235 + LIBBPF_API struct bpf_map * 247 236 bpf_object__find_map_by_name(struct bpf_object *obj, const char *name); 248 237 249 238 /* 250 239 * Get bpf_map through the offset of corresponding struct bpf_map_def 251 240 * in the BPF object file. 252 241 */ 253 - struct bpf_map * 242 + LIBBPF_API struct bpf_map * 254 243 bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset); 255 244 256 - struct bpf_map * 245 + LIBBPF_API struct bpf_map * 257 246 bpf_map__next(struct bpf_map *map, struct bpf_object *obj); 258 247 #define bpf_map__for_each(pos, obj) \ 259 248 for ((pos) = bpf_map__next(NULL, (obj)); \ 260 249 (pos) != NULL; \ 261 250 (pos) = bpf_map__next((pos), (obj))) 262 251 263 - int bpf_map__fd(struct bpf_map *map); 264 - const struct bpf_map_def *bpf_map__def(struct bpf_map *map); 265 - const char *bpf_map__name(struct bpf_map *map); 266 - __u32 bpf_map__btf_key_type_id(const struct bpf_map *map); 267 - __u32 bpf_map__btf_value_type_id(const struct bpf_map *map); 252 + LIBBPF_API int bpf_map__fd(struct bpf_map *map); 253 + LIBBPF_API const struct bpf_map_def *bpf_map__def(struct bpf_map *map); 254 + LIBBPF_API const char *bpf_map__name(struct bpf_map *map); 255 + LIBBPF_API __u32 bpf_map__btf_key_type_id(const struct bpf_map *map); 256 + LIBBPF_API __u32 bpf_map__btf_value_type_id(const struct bpf_map *map); 268 257 269 258 typedef void (*bpf_map_clear_priv_t)(struct bpf_map *, void *); 270 - int bpf_map__set_priv(struct bpf_map *map, void *priv, 271 - bpf_map_clear_priv_t clear_priv); 272 - void *bpf_map__priv(struct bpf_map *map); 273 - int bpf_map__reuse_fd(struct bpf_map *map, int fd); 274 - bool bpf_map__is_offload_neutral(struct bpf_map *map); 275 - void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex); 276 - int bpf_map__pin(struct bpf_map *map, const char *path); 259 + LIBBPF_API int bpf_map__set_priv(struct bpf_map *map, void *priv, 260 + bpf_map_clear_priv_t clear_priv); 261 + LIBBPF_API void *bpf_map__priv(struct bpf_map *map); 262 + LIBBPF_API int bpf_map__reuse_fd(struct bpf_map *map, int fd); 263 + LIBBPF_API bool bpf_map__is_offload_neutral(struct bpf_map *map); 264 + LIBBPF_API void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex); 265 + LIBBPF_API int bpf_map__pin(struct bpf_map *map, const char *path); 277 266 278 - long libbpf_get_error(const void *ptr); 267 + LIBBPF_API long libbpf_get_error(const void *ptr); 279 268 280 269 struct bpf_prog_load_attr { 281 270 const char *file; ··· 284 273 int ifindex; 285 274 }; 286 275 287 - int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr, 288 - struct bpf_object **pobj, int *prog_fd); 289 - int bpf_prog_load(const char *file, enum bpf_prog_type type, 290 - struct bpf_object **pobj, int *prog_fd); 276 + LIBBPF_API int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr, 277 + struct bpf_object **pobj, int *prog_fd); 278 + LIBBPF_API int bpf_prog_load(const char *file, enum bpf_prog_type type, 279 + struct bpf_object **pobj, int *prog_fd); 291 280 292 - int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags); 281 + LIBBPF_API int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags); 293 282 294 283 enum bpf_perf_event_ret { 295 284 LIBBPF_PERF_EVENT_DONE = 0, ··· 297 286 LIBBPF_PERF_EVENT_CONT = -2, 298 287 }; 299 288 300 - typedef enum bpf_perf_event_ret (*bpf_perf_event_print_t)(void *event, 301 - void *priv); 302 - int bpf_perf_event_read_simple(void *mem, unsigned long size, 303 - unsigned long page_size, 304 - void **buf, size_t *buf_len, 305 - bpf_perf_event_print_t fn, void *priv); 289 + struct perf_event_header; 290 + typedef enum bpf_perf_event_ret 291 + (*bpf_perf_event_print_t)(struct perf_event_header *hdr, 292 + void *private_data); 293 + LIBBPF_API enum bpf_perf_event_ret 294 + bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size, 295 + void **copy_mem, size_t *copy_size, 296 + bpf_perf_event_print_t fn, void *private_data); 306 297 307 298 struct nlattr; 308 299 typedef int (*libbpf_dump_nlmsg_t)(void *cookie, void *msg, struct nlattr **tb);

+3 -12

tools/perf/util/mmap.h

··· 4 4 #include <linux/compiler.h> 5 5 #include <linux/refcount.h> 6 6 #include <linux/types.h> 7 - #include <asm/barrier.h> 7 + #include <linux/ring_buffer.h> 8 8 #include <stdbool.h> 9 9 #include "auxtrace.h" 10 10 #include "event.h" ··· 71 71 72 72 static inline u64 perf_mmap__read_head(struct perf_mmap *mm) 73 73 { 74 - struct perf_event_mmap_page *pc = mm->base; 75 - u64 head = READ_ONCE(pc->data_head); 76 - rmb(); 77 - return head; 74 + return ring_buffer_read_head(mm->base); 78 75 } 79 76 80 77 static inline void perf_mmap__write_tail(struct perf_mmap *md, u64 tail) 81 78 { 82 - struct perf_event_mmap_page *pc = md->base; 83 - 84 - /* 85 - * ensure all reads are done before we write the tail out. 86 - */ 87 - mb(); 88 - pc->data_tail = tail; 79 + ring_buffer_write_tail(md->base, tail); 89 80 } 90 81 91 82 union perf_event *perf_mmap__read_forward(struct perf_mmap *map);

+2

tools/testing/selftests/bpf/.gitignore

··· 25 25 test_select_reuseport 26 26 test_flow_dissector 27 27 flow_dissector_load 28 + test_netcnt 29 + test_section_names

+4 -1

tools/testing/selftests/bpf/Makefile

··· 37 37 test_lwt_seg6local.o sendmsg4_prog.o sendmsg6_prog.o test_lirc_mode2_kern.o \ 38 38 get_cgroup_id_kern.o socket_cookie_prog.o test_select_reuseport_kern.o \ 39 39 test_skb_cgroup_id_kern.o bpf_flow.o netcnt_prog.o \ 40 - test_sk_lookup_kern.o test_xdp_vlan.o 40 + test_sk_lookup_kern.o test_xdp_vlan.o test_queue_map.o test_stack_map.o 41 41 42 42 # Order correspond to 'make run_tests' order 43 43 TEST_PROGS := test_kmod.sh \ ··· 117 117 118 118 $(OUTPUT)/test_l4lb_noinline.o: CLANG_FLAGS += -fno-inline 119 119 $(OUTPUT)/test_xdp_noinline.o: CLANG_FLAGS += -fno-inline 120 + 121 + $(OUTPUT)/test_queue_map.o: test_queue_stack_map.h 122 + $(OUTPUT)/test_stack_map.o: test_queue_stack_map.h 120 123 121 124 BTF_LLC_PROBE := $(shell $(LLC) -march=bpf -mattr=help 2>&1 | grep dwarfris) 122 125 BTF_PAHOLE_PROBE := $(shell $(BTF_PAHOLE) --help 2>&1 | grep BTF)

+9

tools/testing/selftests/bpf/bpf_helpers.h

··· 16 16 (void *) BPF_FUNC_map_update_elem; 17 17 static int (*bpf_map_delete_elem)(void *map, void *key) = 18 18 (void *) BPF_FUNC_map_delete_elem; 19 + static int (*bpf_map_push_elem)(void *map, void *value, 20 + unsigned long long flags) = 21 + (void *) BPF_FUNC_map_push_elem; 22 + static int (*bpf_map_pop_elem)(void *map, void *value) = 23 + (void *) BPF_FUNC_map_pop_elem; 24 + static int (*bpf_map_peek_elem)(void *map, void *value) = 25 + (void *) BPF_FUNC_map_peek_elem; 19 26 static int (*bpf_probe_read)(void *dst, int size, void *unsafe_ptr) = 20 27 (void *) BPF_FUNC_probe_read; 21 28 static unsigned long long (*bpf_ktime_get_ns)(void) = ··· 111 104 (void *) BPF_FUNC_msg_cork_bytes; 112 105 static int (*bpf_msg_pull_data)(void *ctx, int start, int end, int flags) = 113 106 (void *) BPF_FUNC_msg_pull_data; 107 + static int (*bpf_msg_push_data)(void *ctx, int start, int end, int flags) = 108 + (void *) BPF_FUNC_msg_push_data; 114 109 static int (*bpf_bind)(void *ctx, void *addr, int addr_len) = 115 110 (void *) BPF_FUNC_bind; 116 111 static int (*bpf_xdp_adjust_tail)(void *ctx, int offset) =

+1 -1

tools/testing/selftests/bpf/test_libbpf.sh

··· 6 6 # Determine selftest success via shell exit code 7 7 exit_handler() 8 8 { 9 - if (( $? == 0 )); then 9 + if [ $? -eq 0 ]; then 10 10 echo "selftests: $TESTNAME [PASS]"; 11 11 else 12 12 echo "$TESTNAME: failed at file $LAST_LOADED" 1>&2

+122

tools/testing/selftests/bpf/test_maps.c

··· 15 15 #include <string.h> 16 16 #include <assert.h> 17 17 #include <stdlib.h> 18 + #include <time.h> 18 19 19 20 #include <sys/wait.h> 20 21 #include <sys/socket.h> ··· 468 467 printf("Failed to create arraymap '%s'!\n", strerror(errno)); 469 468 exit(1); 470 469 } 470 + 471 + close(fd); 472 + } 473 + 474 + static void test_queuemap(int task, void *data) 475 + { 476 + const int MAP_SIZE = 32; 477 + __u32 vals[MAP_SIZE + MAP_SIZE/2], val; 478 + int fd, i; 479 + 480 + /* Fill test values to be used */ 481 + for (i = 0; i < MAP_SIZE + MAP_SIZE/2; i++) 482 + vals[i] = rand(); 483 + 484 + /* Invalid key size */ 485 + fd = bpf_create_map(BPF_MAP_TYPE_QUEUE, 4, sizeof(val), MAP_SIZE, 486 + map_flags); 487 + assert(fd < 0 && errno == EINVAL); 488 + 489 + fd = bpf_create_map(BPF_MAP_TYPE_QUEUE, 0, sizeof(val), MAP_SIZE, 490 + map_flags); 491 + /* Queue map does not support BPF_F_NO_PREALLOC */ 492 + if (map_flags & BPF_F_NO_PREALLOC) { 493 + assert(fd < 0 && errno == EINVAL); 494 + return; 495 + } 496 + if (fd < 0) { 497 + printf("Failed to create queuemap '%s'!\n", strerror(errno)); 498 + exit(1); 499 + } 500 + 501 + /* Push MAP_SIZE elements */ 502 + for (i = 0; i < MAP_SIZE; i++) 503 + assert(bpf_map_update_elem(fd, NULL, &vals[i], 0) == 0); 504 + 505 + /* Check that element cannot be pushed due to max_entries limit */ 506 + assert(bpf_map_update_elem(fd, NULL, &val, 0) == -1 && 507 + errno == E2BIG); 508 + 509 + /* Peek element */ 510 + assert(bpf_map_lookup_elem(fd, NULL, &val) == 0 && val == vals[0]); 511 + 512 + /* Replace half elements */ 513 + for (i = MAP_SIZE; i < MAP_SIZE + MAP_SIZE/2; i++) 514 + assert(bpf_map_update_elem(fd, NULL, &vals[i], BPF_EXIST) == 0); 515 + 516 + /* Pop all elements */ 517 + for (i = MAP_SIZE/2; i < MAP_SIZE + MAP_SIZE/2; i++) 518 + assert(bpf_map_lookup_and_delete_elem(fd, NULL, &val) == 0 && 519 + val == vals[i]); 520 + 521 + /* Check that there are not elements left */ 522 + assert(bpf_map_lookup_and_delete_elem(fd, NULL, &val) == -1 && 523 + errno == ENOENT); 524 + 525 + /* Check that non supported functions set errno to EINVAL */ 526 + assert(bpf_map_delete_elem(fd, NULL) == -1 && errno == EINVAL); 527 + assert(bpf_map_get_next_key(fd, NULL, NULL) == -1 && errno == EINVAL); 528 + 529 + close(fd); 530 + } 531 + 532 + static void test_stackmap(int task, void *data) 533 + { 534 + const int MAP_SIZE = 32; 535 + __u32 vals[MAP_SIZE + MAP_SIZE/2], val; 536 + int fd, i; 537 + 538 + /* Fill test values to be used */ 539 + for (i = 0; i < MAP_SIZE + MAP_SIZE/2; i++) 540 + vals[i] = rand(); 541 + 542 + /* Invalid key size */ 543 + fd = bpf_create_map(BPF_MAP_TYPE_STACK, 4, sizeof(val), MAP_SIZE, 544 + map_flags); 545 + assert(fd < 0 && errno == EINVAL); 546 + 547 + fd = bpf_create_map(BPF_MAP_TYPE_STACK, 0, sizeof(val), MAP_SIZE, 548 + map_flags); 549 + /* Stack map does not support BPF_F_NO_PREALLOC */ 550 + if (map_flags & BPF_F_NO_PREALLOC) { 551 + assert(fd < 0 && errno == EINVAL); 552 + return; 553 + } 554 + if (fd < 0) { 555 + printf("Failed to create stackmap '%s'!\n", strerror(errno)); 556 + exit(1); 557 + } 558 + 559 + /* Push MAP_SIZE elements */ 560 + for (i = 0; i < MAP_SIZE; i++) 561 + assert(bpf_map_update_elem(fd, NULL, &vals[i], 0) == 0); 562 + 563 + /* Check that element cannot be pushed due to max_entries limit */ 564 + assert(bpf_map_update_elem(fd, NULL, &val, 0) == -1 && 565 + errno == E2BIG); 566 + 567 + /* Peek element */ 568 + assert(bpf_map_lookup_elem(fd, NULL, &val) == 0 && val == vals[i - 1]); 569 + 570 + /* Replace half elements */ 571 + for (i = MAP_SIZE; i < MAP_SIZE + MAP_SIZE/2; i++) 572 + assert(bpf_map_update_elem(fd, NULL, &vals[i], BPF_EXIST) == 0); 573 + 574 + /* Pop all elements */ 575 + for (i = MAP_SIZE + MAP_SIZE/2 - 1; i >= MAP_SIZE/2; i--) 576 + assert(bpf_map_lookup_and_delete_elem(fd, NULL, &val) == 0 && 577 + val == vals[i]); 578 + 579 + /* Check that there are not elements left */ 580 + assert(bpf_map_lookup_and_delete_elem(fd, NULL, &val) == -1 && 581 + errno == ENOENT); 582 + 583 + /* Check that non supported functions set errno to EINVAL */ 584 + assert(bpf_map_delete_elem(fd, NULL) == -1 && errno == EINVAL); 585 + assert(bpf_map_get_next_key(fd, NULL, NULL) == -1 && errno == EINVAL); 471 586 472 587 close(fd); 473 588 } ··· 1551 1434 test_map_wronly(); 1552 1435 1553 1436 test_reuseport_array(); 1437 + 1438 + test_queuemap(0, NULL); 1439 + test_stackmap(0, NULL); 1554 1440 } 1555 1441 1556 1442 int main(void) 1557 1443 { 1444 + srand(time(NULL)); 1445 + 1558 1446 map_flags = 0; 1559 1447 run_all_tests(); 1560 1448

+99

tools/testing/selftests/bpf/test_progs.c

··· 1735 1735 bpf_object__close(obj); 1736 1736 } 1737 1737 1738 + enum { 1739 + QUEUE, 1740 + STACK, 1741 + }; 1742 + 1743 + static void test_queue_stack_map(int type) 1744 + { 1745 + const int MAP_SIZE = 32; 1746 + __u32 vals[MAP_SIZE], duration, retval, size, val; 1747 + int i, err, prog_fd, map_in_fd, map_out_fd; 1748 + char file[32], buf[128]; 1749 + struct bpf_object *obj; 1750 + struct iphdr *iph = (void *)buf + sizeof(struct ethhdr); 1751 + 1752 + /* Fill test values to be used */ 1753 + for (i = 0; i < MAP_SIZE; i++) 1754 + vals[i] = rand(); 1755 + 1756 + if (type == QUEUE) 1757 + strncpy(file, "./test_queue_map.o", sizeof(file)); 1758 + else if (type == STACK) 1759 + strncpy(file, "./test_stack_map.o", sizeof(file)); 1760 + else 1761 + return; 1762 + 1763 + err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd); 1764 + if (err) { 1765 + error_cnt++; 1766 + return; 1767 + } 1768 + 1769 + map_in_fd = bpf_find_map(__func__, obj, "map_in"); 1770 + if (map_in_fd < 0) 1771 + goto out; 1772 + 1773 + map_out_fd = bpf_find_map(__func__, obj, "map_out"); 1774 + if (map_out_fd < 0) 1775 + goto out; 1776 + 1777 + /* Push 32 elements to the input map */ 1778 + for (i = 0; i < MAP_SIZE; i++) { 1779 + err = bpf_map_update_elem(map_in_fd, NULL, &vals[i], 0); 1780 + if (err) { 1781 + error_cnt++; 1782 + goto out; 1783 + } 1784 + } 1785 + 1786 + /* The eBPF program pushes iph.saddr in the output map, 1787 + * pops the input map and saves this value in iph.daddr 1788 + */ 1789 + for (i = 0; i < MAP_SIZE; i++) { 1790 + if (type == QUEUE) { 1791 + val = vals[i]; 1792 + pkt_v4.iph.saddr = vals[i] * 5; 1793 + } else if (type == STACK) { 1794 + val = vals[MAP_SIZE - 1 - i]; 1795 + pkt_v4.iph.saddr = vals[MAP_SIZE - 1 - i] * 5; 1796 + } 1797 + 1798 + err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4), 1799 + buf, &size, &retval, &duration); 1800 + if (err || retval || size != sizeof(pkt_v4) || 1801 + iph->daddr != val) 1802 + break; 1803 + } 1804 + 1805 + CHECK(err || retval || size != sizeof(pkt_v4) || iph->daddr != val, 1806 + "bpf_map_pop_elem", 1807 + "err %d errno %d retval %d size %d iph->daddr %u\n", 1808 + err, errno, retval, size, iph->daddr); 1809 + 1810 + /* Queue is empty, program should return TC_ACT_SHOT */ 1811 + err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4), 1812 + buf, &size, &retval, &duration); 1813 + CHECK(err || retval != 2 /* TC_ACT_SHOT */|| size != sizeof(pkt_v4), 1814 + "check-queue-stack-map-empty", 1815 + "err %d errno %d retval %d size %d\n", 1816 + err, errno, retval, size); 1817 + 1818 + /* Check that the program pushed elements correctly */ 1819 + for (i = 0; i < MAP_SIZE; i++) { 1820 + err = bpf_map_lookup_and_delete_elem(map_out_fd, NULL, &val); 1821 + if (err || val != vals[i] * 5) 1822 + break; 1823 + } 1824 + 1825 + CHECK(i != MAP_SIZE && (err || val != vals[i] * 5), 1826 + "bpf_map_push_elem", "err %d value %u\n", err, val); 1827 + 1828 + out: 1829 + pkt_v4.iph.saddr = 0; 1830 + bpf_object__close(obj); 1831 + } 1832 + 1738 1833 int main(void) 1739 1834 { 1835 + srand(time(NULL)); 1836 + 1740 1837 jit_enabled = is_jit_enabled(); 1741 1838 1742 1839 test_pkt_access(); ··· 1854 1757 test_task_fd_query_rawtp(); 1855 1758 test_task_fd_query_tp(); 1856 1759 test_reference_tracking(); 1760 + test_queue_stack_map(QUEUE); 1761 + test_queue_stack_map(STACK); 1857 1762 1858 1763 printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, error_cnt); 1859 1764 return error_cnt ? EXIT_FAILURE : EXIT_SUCCESS;

+4

tools/testing/selftests/bpf/test_queue_map.c

+59

tools/testing/selftests/bpf/test_queue_stack_map.h

··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + // Copyright (c) 2018 Politecnico di Torino 3 + #include <stddef.h> 4 + #include <string.h> 5 + #include <linux/bpf.h> 6 + #include <linux/if_ether.h> 7 + #include <linux/ip.h> 8 + #include <linux/pkt_cls.h> 9 + #include "bpf_helpers.h" 10 + 11 + int _version SEC("version") = 1; 12 + 13 + struct bpf_map_def __attribute__ ((section("maps"), used)) map_in = { 14 + .type = MAP_TYPE, 15 + .key_size = 0, 16 + .value_size = sizeof(__u32), 17 + .max_entries = 32, 18 + .map_flags = 0, 19 + }; 20 + 21 + struct bpf_map_def __attribute__ ((section("maps"), used)) map_out = { 22 + .type = MAP_TYPE, 23 + .key_size = 0, 24 + .value_size = sizeof(__u32), 25 + .max_entries = 32, 26 + .map_flags = 0, 27 + }; 28 + 29 + SEC("test") 30 + int _test(struct __sk_buff *skb) 31 + { 32 + void *data_end = (void *)(long)skb->data_end; 33 + void *data = (void *)(long)skb->data; 34 + struct ethhdr *eth = (struct ethhdr *)(data); 35 + __u32 value; 36 + int err; 37 + 38 + if (eth + 1 > data_end) 39 + return TC_ACT_SHOT; 40 + 41 + struct iphdr *iph = (struct iphdr *)(eth + 1); 42 + 43 + if (iph + 1 > data_end) 44 + return TC_ACT_SHOT; 45 + 46 + err = bpf_map_pop_elem(&map_in, &value); 47 + if (err) 48 + return TC_ACT_SHOT; 49 + 50 + iph->daddr = value; 51 + 52 + err = bpf_map_push_elem(&map_out, &iph->saddr, 0); 53 + if (err) 54 + return TC_ACT_SHOT; 55 + 56 + return TC_ACT_OK; 57 + } 58 + 59 + char _license[] SEC("license") = "GPL";

+177 -59

tools/testing/selftests/bpf/test_sockmap.c

··· 28 28 #include <linux/sock_diag.h> 29 29 #include <linux/bpf.h> 30 30 #include <linux/if_link.h> 31 + #include <linux/tls.h> 31 32 #include <assert.h> 32 33 #include <libgen.h> 33 34 ··· 43 42 44 43 int running; 45 44 static void running_handler(int a); 45 + 46 + #ifndef TCP_ULP 47 + # define TCP_ULP 31 48 + #endif 49 + #ifndef SOL_TLS 50 + # define SOL_TLS 282 51 + #endif 46 52 47 53 /* randomly selected ports for testing on lo */ 48 54 #define S1_PORT 10000 ··· 77 69 int txmsg_cork; 78 70 int txmsg_start; 79 71 int txmsg_end; 72 + int txmsg_start_push; 73 + int txmsg_end_push; 80 74 int txmsg_ingress; 81 75 int txmsg_skb; 82 76 int ktls; 77 + int peek_flag; 83 78 84 79 static const struct option long_options[] = { 85 80 {"help", no_argument, NULL, 'h' }, ··· 102 91 {"txmsg_cork", required_argument, NULL, 'k'}, 103 92 {"txmsg_start", required_argument, NULL, 's'}, 104 93 {"txmsg_end", required_argument, NULL, 'e'}, 94 + {"txmsg_start_push", required_argument, NULL, 'p'}, 95 + {"txmsg_end_push", required_argument, NULL, 'q'}, 105 96 {"txmsg_ingress", no_argument, &txmsg_ingress, 1 }, 106 97 {"txmsg_skb", no_argument, &txmsg_skb, 1 }, 107 98 {"ktls", no_argument, &ktls, 1 }, 99 + {"peek", no_argument, &peek_flag, 1 }, 108 100 {0, 0, NULL, 0 } 109 101 }; 110 102 ··· 127 113 } 128 114 printf("\n"); 129 115 } 130 - 131 - #define TCP_ULP 31 132 - #define TLS_TX 1 133 - #define TLS_RX 2 134 - #include <linux/tls.h> 135 116 136 117 char *sock_to_string(int s) 137 118 { ··· 358 349 return 0; 359 350 } 360 351 361 - static int msg_loop(int fd, int iov_count, int iov_length, int cnt, 362 - struct msg_stats *s, bool tx, 363 - struct sockmap_options *opt) 352 + static void msg_free_iov(struct msghdr *msg) 364 353 { 365 - struct msghdr msg = {0}; 366 - int err, i, flags = MSG_NOSIGNAL; 354 + int i; 355 + 356 + for (i = 0; i < msg->msg_iovlen; i++) 357 + free(msg->msg_iov[i].iov_base); 358 + free(msg->msg_iov); 359 + msg->msg_iov = NULL; 360 + msg->msg_iovlen = 0; 361 + } 362 + 363 + static int msg_alloc_iov(struct msghdr *msg, 364 + int iov_count, int iov_length, 365 + bool data, bool xmit) 366 + { 367 + unsigned char k = 0; 367 368 struct iovec *iov; 368 - unsigned char k; 369 - bool data_test = opt->data_test; 370 - bool drop = opt->drop_expected; 369 + int i; 371 370 372 371 iov = calloc(iov_count, sizeof(struct iovec)); 373 372 if (!iov) 374 373 return errno; 375 374 376 - k = 0; 377 375 for (i = 0; i < iov_count; i++) { 378 376 unsigned char *d = calloc(iov_length, sizeof(char)); 379 377 380 378 if (!d) { 381 379 fprintf(stderr, "iov_count %i/%i OOM\n", i, iov_count); 382 - goto out_errno; 380 + goto unwind_iov; 383 381 } 384 382 iov[i].iov_base = d; 385 383 iov[i].iov_len = iov_length; 386 384 387 - if (data_test && tx) { 385 + if (data && xmit) { 388 386 int j; 389 387 390 388 for (j = 0; j < iov_length; j++) ··· 399 383 } 400 384 } 401 385 402 - msg.msg_iov = iov; 403 - msg.msg_iovlen = iov_count; 404 - k = 0; 386 + msg->msg_iov = iov; 387 + msg->msg_iovlen = iov_count; 388 + 389 + return 0; 390 + unwind_iov: 391 + for (i--; i >= 0 ; i--) 392 + free(msg->msg_iov[i].iov_base); 393 + return -ENOMEM; 394 + } 395 + 396 + static int msg_verify_data(struct msghdr *msg, int size, int chunk_sz) 397 + { 398 + int i, j, bytes_cnt = 0; 399 + unsigned char k = 0; 400 + 401 + for (i = 0; i < msg->msg_iovlen; i++) { 402 + unsigned char *d = msg->msg_iov[i].iov_base; 403 + 404 + for (j = 0; 405 + j < msg->msg_iov[i].iov_len && size; j++) { 406 + if (d[j] != k++) { 407 + fprintf(stderr, 408 + "detected data corruption @iov[%i]:%i %02x != %02x, %02x ?= %02x\n", 409 + i, j, d[j], k - 1, d[j+1], k); 410 + return -EIO; 411 + } 412 + bytes_cnt++; 413 + if (bytes_cnt == chunk_sz) { 414 + k = 0; 415 + bytes_cnt = 0; 416 + } 417 + size--; 418 + } 419 + } 420 + return 0; 421 + } 422 + 423 + static int msg_loop(int fd, int iov_count, int iov_length, int cnt, 424 + struct msg_stats *s, bool tx, 425 + struct sockmap_options *opt) 426 + { 427 + struct msghdr msg = {0}, msg_peek = {0}; 428 + int err, i, flags = MSG_NOSIGNAL; 429 + bool drop = opt->drop_expected; 430 + bool data = opt->data_test; 431 + 432 + err = msg_alloc_iov(&msg, iov_count, iov_length, data, tx); 433 + if (err) 434 + goto out_errno; 435 + if (peek_flag) { 436 + err = msg_alloc_iov(&msg_peek, iov_count, iov_length, data, tx); 437 + if (err) 438 + goto out_errno; 439 + } 405 440 406 441 if (tx) { 407 442 clock_gettime(CLOCK_MONOTONIC, &s->start); ··· 472 405 } 473 406 clock_gettime(CLOCK_MONOTONIC, &s->end); 474 407 } else { 475 - int slct, recv, max_fd = fd; 408 + int slct, recvp = 0, recv, max_fd = fd; 476 409 int fd_flags = O_NONBLOCK; 477 410 struct timeval timeout; 478 411 float total_bytes; 479 - int bytes_cnt = 0; 480 - int chunk_sz; 481 412 fd_set w; 482 - 483 - if (opt->sendpage) 484 - chunk_sz = iov_length * cnt; 485 - else 486 - chunk_sz = iov_length * iov_count; 487 413 488 414 fcntl(fd, fd_flags); 489 415 total_bytes = (float)iov_count * (float)iov_length * (float)cnt; ··· 509 449 goto out_errno; 510 450 } 511 451 452 + errno = 0; 453 + if (peek_flag) { 454 + flags |= MSG_PEEK; 455 + recvp = recvmsg(fd, &msg_peek, flags); 456 + if (recvp < 0) { 457 + if (errno != EWOULDBLOCK) { 458 + clock_gettime(CLOCK_MONOTONIC, &s->end); 459 + goto out_errno; 460 + } 461 + } 462 + flags = 0; 463 + } 464 + 512 465 recv = recvmsg(fd, &msg, flags); 513 466 if (recv < 0) { 514 467 if (errno != EWOULDBLOCK) { ··· 533 460 534 461 s->bytes_recvd += recv; 535 462 536 - if (data_test) { 537 - int j; 463 + if (data) { 464 + int chunk_sz = opt->sendpage ? 465 + iov_length * cnt : 466 + iov_length * iov_count; 538 467 539 - for (i = 0; i < msg.msg_iovlen; i++) { 540 - unsigned char *d = iov[i].iov_base; 541 - 542 - for (j = 0; 543 - j < iov[i].iov_len && recv; j++) { 544 - if (d[j] != k++) { 545 - errno = -EIO; 546 - fprintf(stderr, 547 - "detected data corruption @iov[%i]:%i %02x != %02x, %02x ?= %02x\n", 548 - i, j, d[j], k - 1, d[j+1], k); 549 - goto out_errno; 550 - } 551 - bytes_cnt++; 552 - if (bytes_cnt == chunk_sz) { 553 - k = 0; 554 - bytes_cnt = 0; 555 - } 556 - recv--; 468 + errno = msg_verify_data(&msg, recv, chunk_sz); 469 + if (errno) { 470 + perror("data verify msg failed\n"); 471 + goto out_errno; 472 + } 473 + if (recvp) { 474 + errno = msg_verify_data(&msg_peek, 475 + recvp, 476 + chunk_sz); 477 + if (errno) { 478 + perror("data verify msg_peek failed\n"); 479 + goto out_errno; 557 480 } 558 481 } 559 482 } ··· 557 488 clock_gettime(CLOCK_MONOTONIC, &s->end); 558 489 } 559 490 560 - for (i = 0; i < iov_count; i++) 561 - free(iov[i].iov_base); 562 - free(iov); 563 - return 0; 491 + msg_free_iov(&msg); 492 + msg_free_iov(&msg_peek); 493 + return err; 564 494 out_errno: 565 - for (i = 0; i < iov_count; i++) 566 - free(iov[i].iov_base); 567 - free(iov); 495 + msg_free_iov(&msg); 496 + msg_free_iov(&msg_peek); 568 497 return errno; 569 498 } 570 499 ··· 629 562 } 630 563 if (opt->verbose) 631 564 fprintf(stdout, 632 - "rx_sendmsg: TX: %zuB %fB/s %fGB/s RX: %zuB %fB/s %fGB/s\n", 565 + "rx_sendmsg: TX: %zuB %fB/s %fGB/s RX: %zuB %fB/s %fGB/s %s\n", 633 566 s.bytes_sent, sent_Bps, sent_Bps/giga, 634 - s.bytes_recvd, recvd_Bps, recvd_Bps/giga); 567 + s.bytes_recvd, recvd_Bps, recvd_Bps/giga, 568 + peek_flag ? "(peek_msg)" : ""); 635 569 if (err && txmsg_cork) 636 570 err = 0; 637 571 exit(err ? 1 : 0); ··· 907 839 } 908 840 } 909 841 842 + if (txmsg_start_push) { 843 + i = 2; 844 + err = bpf_map_update_elem(map_fd[5], 845 + &i, &txmsg_start_push, BPF_ANY); 846 + if (err) { 847 + fprintf(stderr, 848 + "ERROR: bpf_map_update_elem (txmsg_start_push): %d (%s)\n", 849 + err, strerror(errno)); 850 + goto out; 851 + } 852 + } 853 + 854 + if (txmsg_end_push) { 855 + i = 3; 856 + err = bpf_map_update_elem(map_fd[5], 857 + &i, &txmsg_end_push, BPF_ANY); 858 + if (err) { 859 + fprintf(stderr, 860 + "ERROR: bpf_map_update_elem %i@%i (txmsg_end_push): %d (%s)\n", 861 + txmsg_end_push, i, err, strerror(errno)); 862 + goto out; 863 + } 864 + } 865 + 910 866 if (txmsg_ingress) { 911 867 int in = BPF_F_INGRESS; 912 868 ··· 1088 996 strncat(options, "skb,", OPTSTRING); 1089 997 if (ktls) 1090 998 strncat(options, "ktls,", OPTSTRING); 999 + if (peek_flag) 1000 + strncat(options, "peek,", OPTSTRING); 1091 1001 } 1092 1002 1093 1003 static int __test_exec(int cgrp, int test, struct sockmap_options *opt) ··· 1263 1169 txmsg_pass = txmsg_noisy = txmsg_redir_noisy = txmsg_drop = 0; 1264 1170 txmsg_apply = txmsg_cork = 0; 1265 1171 txmsg_start = txmsg_end = 0; 1172 + txmsg_start_push = txmsg_end_push = 0; 1173 + 1266 1174 /* Test small and large iov_count values with pass/redir/apply/cork */ 1267 1175 txmsg_pass = 1; 1268 1176 txmsg_redir = 0; ··· 1381 1285 /* Test basic start/end with lots of iov_count and iov_lengths */ 1382 1286 txmsg_start = 1; 1383 1287 txmsg_end = 2; 1288 + txmsg_start_push = 1; 1289 + txmsg_end_push = 2; 1384 1290 err = test_txmsg(cgrp); 1385 1291 if (err) 1386 1292 goto out; ··· 1396 1298 for (i = 99; i <= 1600; i += 500) { 1397 1299 txmsg_start = 0; 1398 1300 txmsg_end = i; 1301 + txmsg_start_push = 0; 1302 + txmsg_end_push = i; 1399 1303 err = test_exec(cgrp, &opt); 1400 1304 if (err) 1401 1305 goto out; ··· 1407 1307 for (i = 199; i <= 1600; i += 500) { 1408 1308 txmsg_start = 100; 1409 1309 txmsg_end = i; 1310 + txmsg_start_push = 100; 1311 + txmsg_end_push = i; 1410 1312 err = test_exec(cgrp, &opt); 1411 1313 if (err) 1412 1314 goto out; ··· 1417 1315 /* Test start/end with cork pulling last sg entry */ 1418 1316 txmsg_start = 1500; 1419 1317 txmsg_end = 1600; 1318 + txmsg_start_push = 1500; 1319 + txmsg_end_push = 1600; 1420 1320 err = test_exec(cgrp, &opt); 1421 1321 if (err) 1422 1322 goto out; ··· 1426 1322 /* Test start/end pull of single byte in last page */ 1427 1323 txmsg_start = 1111; 1428 1324 txmsg_end = 1112; 1325 + txmsg_start_push = 1111; 1326 + txmsg_end_push = 1112; 1429 1327 err = test_exec(cgrp, &opt); 1430 1328 if (err) 1431 1329 goto out; ··· 1435 1329 /* Test start/end with end < start */ 1436 1330 txmsg_start = 1111; 1437 1331 txmsg_end = 0; 1332 + txmsg_start_push = 1111; 1333 + txmsg_end_push = 0; 1438 1334 err = test_exec(cgrp, &opt); 1439 1335 if (err) 1440 1336 goto out; ··· 1444 1336 /* Test start/end with end > data */ 1445 1337 txmsg_start = 0; 1446 1338 txmsg_end = 1601; 1339 + txmsg_start_push = 0; 1340 + txmsg_end_push = 1601; 1447 1341 err = test_exec(cgrp, &opt); 1448 1342 if (err) 1449 1343 goto out; ··· 1453 1343 /* Test start/end with start > data */ 1454 1344 txmsg_start = 1601; 1455 1345 txmsg_end = 1600; 1346 + txmsg_start_push = 1601; 1347 + txmsg_end_push = 1600; 1456 1348 err = test_exec(cgrp, &opt); 1457 1349 1458 1350 out: ··· 1470 1358 "sock_map_redir", 1471 1359 "sock_apply_bytes", 1472 1360 "sock_cork_bytes", 1473 - "sock_pull_bytes", 1361 + "sock_bytes", 1474 1362 "sock_redir_flags", 1475 1363 "sock_skb_opts", 1476 1364 }; ··· 1577 1465 } 1578 1466 1579 1467 /* Tests basic commands and APIs with range of iov values */ 1580 - txmsg_start = txmsg_end = 0; 1468 + txmsg_start = txmsg_end = txmsg_start_push = txmsg_end_push = 0; 1581 1469 err = test_txmsg(cg_fd); 1582 1470 if (err) 1583 1471 goto out; ··· 1626 1514 if (argc < 2) 1627 1515 return test_suite(-1); 1628 1516 1629 - while ((opt = getopt_long(argc, argv, ":dhvc:r:i:l:t:", 1517 + while ((opt = getopt_long(argc, argv, ":dhvc:r:i:l:t:p:q:", 1630 1518 long_options, &longindex)) != -1) { 1631 1519 switch (opt) { 1632 1520 case 's': ··· 1634 1522 break; 1635 1523 case 'e': 1636 1524 txmsg_end = atoi(optarg); 1525 + break; 1526 + case 'p': 1527 + txmsg_start_push = atoi(optarg); 1528 + break; 1529 + case 'q': 1530 + txmsg_end_push = atoi(optarg); 1637 1531 break; 1638 1532 case 'a': 1639 1533 txmsg_apply = atoi(optarg);

+74 -23

tools/testing/selftests/bpf/test_sockmap_kern.h

··· 70 70 .max_entries = 1 71 71 }; 72 72 73 - struct bpf_map_def SEC("maps") sock_pull_bytes = { 73 + struct bpf_map_def SEC("maps") sock_bytes = { 74 74 .type = BPF_MAP_TYPE_ARRAY, 75 75 .key_size = sizeof(int), 76 76 .value_size = sizeof(int), 77 - .max_entries = 2 77 + .max_entries = 4 78 78 }; 79 79 80 80 struct bpf_map_def SEC("maps") sock_redir_flags = { ··· 181 181 SEC("sk_msg1") 182 182 int bpf_prog4(struct sk_msg_md *msg) 183 183 { 184 - int *bytes, zero = 0, one = 1; 185 - int *start, *end; 184 + int *bytes, zero = 0, one = 1, two = 2, three = 3; 185 + int *start, *end, *start_push, *end_push; 186 186 187 187 bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero); 188 188 if (bytes) ··· 190 190 bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero); 191 191 if (bytes) 192 192 bpf_msg_cork_bytes(msg, *bytes); 193 - start = bpf_map_lookup_elem(&sock_pull_bytes, &zero); 194 - end = bpf_map_lookup_elem(&sock_pull_bytes, &one); 193 + start = bpf_map_lookup_elem(&sock_bytes, &zero); 194 + end = bpf_map_lookup_elem(&sock_bytes, &one); 195 195 if (start && end) 196 196 bpf_msg_pull_data(msg, *start, *end, 0); 197 + start_push = bpf_map_lookup_elem(&sock_bytes, &two); 198 + end_push = bpf_map_lookup_elem(&sock_bytes, &three); 199 + if (start_push && end_push) 200 + bpf_msg_push_data(msg, *start_push, *end_push, 0); 197 201 return SK_PASS; 198 202 } 199 203 200 204 SEC("sk_msg2") 201 205 int bpf_prog5(struct sk_msg_md *msg) 202 206 { 203 - int err1 = -1, err2 = -1, zero = 0, one = 1; 204 - int *bytes, *start, *end, len1, len2; 207 + int zero = 0, one = 1, two = 2, three = 3; 208 + int *start, *end, *start_push, *end_push; 209 + int *bytes, len1, len2 = 0, len3; 210 + int err1 = -1, err2 = -1; 205 211 206 212 bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero); 207 213 if (bytes) ··· 216 210 if (bytes) 217 211 err2 = bpf_msg_cork_bytes(msg, *bytes); 218 212 len1 = (__u64)msg->data_end - (__u64)msg->data; 219 - start = bpf_map_lookup_elem(&sock_pull_bytes, &zero); 220 - end = bpf_map_lookup_elem(&sock_pull_bytes, &one); 213 + start = bpf_map_lookup_elem(&sock_bytes, &zero); 214 + end = bpf_map_lookup_elem(&sock_bytes, &one); 221 215 if (start && end) { 222 216 int err; 223 217 ··· 231 225 bpf_printk("sk_msg2: length update %i->%i\n", 232 226 len1, len2); 233 227 } 228 + 229 + start_push = bpf_map_lookup_elem(&sock_bytes, &two); 230 + end_push = bpf_map_lookup_elem(&sock_bytes, &three); 231 + if (start_push && end_push) { 232 + int err; 233 + 234 + bpf_printk("sk_msg2: push(%i:%i)\n", 235 + start_push ? *start_push : 0, 236 + end_push ? *end_push : 0); 237 + err = bpf_msg_push_data(msg, *start_push, *end_push, 0); 238 + if (err) 239 + bpf_printk("sk_msg2: push_data err %i\n", err); 240 + len3 = (__u64)msg->data_end - (__u64)msg->data; 241 + bpf_printk("sk_msg2: length push_update %i->%i\n", 242 + len2 ? len2 : len1, len3); 243 + } 244 + 234 245 bpf_printk("sk_msg2: data length %i err1 %i err2 %i\n", 235 246 len1, err1, err2); 236 247 return SK_PASS; ··· 256 233 SEC("sk_msg3") 257 234 int bpf_prog6(struct sk_msg_md *msg) 258 235 { 259 - int *bytes, zero = 0, one = 1, key = 0; 260 - int *start, *end, *f; 236 + int *bytes, *start, *end, *start_push, *end_push, *f; 237 + int zero = 0, one = 1, two = 2, three = 3, key = 0; 261 238 __u64 flags = 0; 262 239 263 240 bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero); ··· 266 243 bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero); 267 244 if (bytes) 268 245 bpf_msg_cork_bytes(msg, *bytes); 269 - start = bpf_map_lookup_elem(&sock_pull_bytes, &zero); 270 - end = bpf_map_lookup_elem(&sock_pull_bytes, &one); 246 + 247 + start = bpf_map_lookup_elem(&sock_bytes, &zero); 248 + end = bpf_map_lookup_elem(&sock_bytes, &one); 271 249 if (start && end) 272 250 bpf_msg_pull_data(msg, *start, *end, 0); 251 + 252 + start_push = bpf_map_lookup_elem(&sock_bytes, &two); 253 + end_push = bpf_map_lookup_elem(&sock_bytes, &three); 254 + if (start_push && end_push) 255 + bpf_msg_push_data(msg, *start_push, *end_push, 0); 256 + 273 257 f = bpf_map_lookup_elem(&sock_redir_flags, &zero); 274 258 if (f && *f) { 275 259 key = 2; ··· 292 262 SEC("sk_msg4") 293 263 int bpf_prog7(struct sk_msg_md *msg) 294 264 { 295 - int err1 = 0, err2 = 0, zero = 0, one = 1, key = 0; 296 - int *f, *bytes, *start, *end, len1, len2; 265 + int zero = 0, one = 1, two = 2, three = 3, len1, len2 = 0, len3; 266 + int *bytes, *start, *end, *start_push, *end_push, *f; 267 + int err1 = 0, err2 = 0, key = 0; 297 268 __u64 flags = 0; 298 269 299 270 int err; ··· 305 274 if (bytes) 306 275 err2 = bpf_msg_cork_bytes(msg, *bytes); 307 276 len1 = (__u64)msg->data_end - (__u64)msg->data; 308 - start = bpf_map_lookup_elem(&sock_pull_bytes, &zero); 309 - end = bpf_map_lookup_elem(&sock_pull_bytes, &one); 310 - if (start && end) { 311 277 278 + start = bpf_map_lookup_elem(&sock_bytes, &zero); 279 + end = bpf_map_lookup_elem(&sock_bytes, &one); 280 + if (start && end) { 312 281 bpf_printk("sk_msg2: pull(%i:%i)\n", 313 282 start ? *start : 0, end ? *end : 0); 314 283 err = bpf_msg_pull_data(msg, *start, *end, 0); ··· 319 288 bpf_printk("sk_msg2: length update %i->%i\n", 320 289 len1, len2); 321 290 } 291 + 292 + start_push = bpf_map_lookup_elem(&sock_bytes, &two); 293 + end_push = bpf_map_lookup_elem(&sock_bytes, &three); 294 + if (start_push && end_push) { 295 + bpf_printk("sk_msg4: push(%i:%i)\n", 296 + start_push ? *start_push : 0, 297 + end_push ? *end_push : 0); 298 + err = bpf_msg_push_data(msg, *start_push, *end_push, 0); 299 + if (err) 300 + bpf_printk("sk_msg4: push_data err %i\n", 301 + err); 302 + len3 = (__u64)msg->data_end - (__u64)msg->data; 303 + bpf_printk("sk_msg4: length push_update %i->%i\n", 304 + len2 ? len2 : len1, len3); 305 + } 306 + 322 307 f = bpf_map_lookup_elem(&sock_redir_flags, &zero); 323 308 if (f && *f) { 324 309 key = 2; ··· 389 342 SEC("sk_msg7") 390 343 int bpf_prog10(struct sk_msg_md *msg) 391 344 { 392 - int *bytes, zero = 0, one = 1; 393 - int *start, *end; 345 + int *bytes, *start, *end, *start_push, *end_push; 346 + int zero = 0, one = 1, two = 2, three = 3; 394 347 395 348 bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero); 396 349 if (bytes) ··· 398 351 bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero); 399 352 if (bytes) 400 353 bpf_msg_cork_bytes(msg, *bytes); 401 - start = bpf_map_lookup_elem(&sock_pull_bytes, &zero); 402 - end = bpf_map_lookup_elem(&sock_pull_bytes, &one); 354 + start = bpf_map_lookup_elem(&sock_bytes, &zero); 355 + end = bpf_map_lookup_elem(&sock_bytes, &one); 403 356 if (start && end) 404 357 bpf_msg_pull_data(msg, *start, *end, 0); 358 + start_push = bpf_map_lookup_elem(&sock_bytes, &two); 359 + end_push = bpf_map_lookup_elem(&sock_bytes, &three); 360 + if (start_push && end_push) 361 + bpf_msg_push_data(msg, *start_push, *end_push, 0); 405 362 406 363 return SK_DROP; 407 364 }

+4

tools/testing/selftests/bpf/test_stack_map.c

+176 -5

tools/testing/selftests/bpf/test_verifier.c

··· 3430 3430 BPF_ST_MEM(BPF_DW, BPF_REG_1, offsetof(struct __sk_buff, mark), 0), 3431 3431 BPF_EXIT_INSN(), 3432 3432 }, 3433 - .errstr = "BPF_ST stores into R1 inv is not allowed", 3433 + .errstr = "BPF_ST stores into R1 ctx is not allowed", 3434 3434 .result = REJECT, 3435 3435 .prog_type = BPF_PROG_TYPE_SCHED_CLS, 3436 3436 }, ··· 3442 3442 BPF_REG_0, offsetof(struct __sk_buff, mark), 0), 3443 3443 BPF_EXIT_INSN(), 3444 3444 }, 3445 - .errstr = "BPF_XADD stores into R1 inv is not allowed", 3445 + .errstr = "BPF_XADD stores into R1 ctx is not allowed", 3446 3446 .result = REJECT, 3447 3447 .prog_type = BPF_PROG_TYPE_SCHED_CLS, 3448 3448 }, ··· 4863 4863 .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, 4864 4864 }, 4865 4865 { 4866 + "direct packet read test#1 for CGROUP_SKB", 4867 + .insns = { 4868 + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 4869 + offsetof(struct __sk_buff, data)), 4870 + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, 4871 + offsetof(struct __sk_buff, data_end)), 4872 + BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, 4873 + offsetof(struct __sk_buff, len)), 4874 + BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1, 4875 + offsetof(struct __sk_buff, pkt_type)), 4876 + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, 4877 + offsetof(struct __sk_buff, mark)), 4878 + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_6, 4879 + offsetof(struct __sk_buff, mark)), 4880 + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, 4881 + offsetof(struct __sk_buff, queue_mapping)), 4882 + BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1, 4883 + offsetof(struct __sk_buff, protocol)), 4884 + BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1, 4885 + offsetof(struct __sk_buff, vlan_present)), 4886 + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), 4887 + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), 4888 + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), 4889 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), 4890 + BPF_MOV64_IMM(BPF_REG_0, 0), 4891 + BPF_EXIT_INSN(), 4892 + }, 4893 + .result = ACCEPT, 4894 + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, 4895 + }, 4896 + { 4897 + "direct packet read test#2 for CGROUP_SKB", 4898 + .insns = { 4899 + BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, 4900 + offsetof(struct __sk_buff, vlan_tci)), 4901 + BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1, 4902 + offsetof(struct __sk_buff, vlan_proto)), 4903 + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, 4904 + offsetof(struct __sk_buff, priority)), 4905 + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_6, 4906 + offsetof(struct __sk_buff, priority)), 4907 + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, 4908 + offsetof(struct __sk_buff, 4909 + ingress_ifindex)), 4910 + BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1, 4911 + offsetof(struct __sk_buff, tc_index)), 4912 + BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1, 4913 + offsetof(struct __sk_buff, hash)), 4914 + BPF_MOV64_IMM(BPF_REG_0, 0), 4915 + BPF_EXIT_INSN(), 4916 + }, 4917 + .result = ACCEPT, 4918 + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, 4919 + }, 4920 + { 4921 + "direct packet read test#3 for CGROUP_SKB", 4922 + .insns = { 4923 + BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, 4924 + offsetof(struct __sk_buff, cb[0])), 4925 + BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1, 4926 + offsetof(struct __sk_buff, cb[1])), 4927 + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, 4928 + offsetof(struct __sk_buff, cb[2])), 4929 + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, 4930 + offsetof(struct __sk_buff, cb[3])), 4931 + BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1, 4932 + offsetof(struct __sk_buff, cb[4])), 4933 + BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1, 4934 + offsetof(struct __sk_buff, napi_id)), 4935 + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_4, 4936 + offsetof(struct __sk_buff, cb[0])), 4937 + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_5, 4938 + offsetof(struct __sk_buff, cb[1])), 4939 + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_6, 4940 + offsetof(struct __sk_buff, cb[2])), 4941 + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_7, 4942 + offsetof(struct __sk_buff, cb[3])), 4943 + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_8, 4944 + offsetof(struct __sk_buff, cb[4])), 4945 + BPF_MOV64_IMM(BPF_REG_0, 0), 4946 + BPF_EXIT_INSN(), 4947 + }, 4948 + .result = ACCEPT, 4949 + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, 4950 + }, 4951 + { 4952 + "direct packet read test#4 for CGROUP_SKB", 4953 + .insns = { 4954 + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 4955 + offsetof(struct __sk_buff, family)), 4956 + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, 4957 + offsetof(struct __sk_buff, remote_ip4)), 4958 + BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, 4959 + offsetof(struct __sk_buff, local_ip4)), 4960 + BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1, 4961 + offsetof(struct __sk_buff, remote_ip6[0])), 4962 + BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1, 4963 + offsetof(struct __sk_buff, remote_ip6[1])), 4964 + BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1, 4965 + offsetof(struct __sk_buff, remote_ip6[2])), 4966 + BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1, 4967 + offsetof(struct __sk_buff, remote_ip6[3])), 4968 + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, 4969 + offsetof(struct __sk_buff, local_ip6[0])), 4970 + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, 4971 + offsetof(struct __sk_buff, local_ip6[1])), 4972 + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, 4973 + offsetof(struct __sk_buff, local_ip6[2])), 4974 + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, 4975 + offsetof(struct __sk_buff, local_ip6[3])), 4976 + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, 4977 + offsetof(struct __sk_buff, remote_port)), 4978 + BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1, 4979 + offsetof(struct __sk_buff, local_port)), 4980 + BPF_MOV64_IMM(BPF_REG_0, 0), 4981 + BPF_EXIT_INSN(), 4982 + }, 4983 + .result = ACCEPT, 4984 + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, 4985 + }, 4986 + { 4987 + "invalid access of tc_classid for CGROUP_SKB", 4988 + .insns = { 4989 + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 4990 + offsetof(struct __sk_buff, tc_classid)), 4991 + BPF_MOV64_IMM(BPF_REG_0, 0), 4992 + BPF_EXIT_INSN(), 4993 + }, 4994 + .result = REJECT, 4995 + .errstr = "invalid bpf_context access", 4996 + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, 4997 + }, 4998 + { 4999 + "invalid access of data_meta for CGROUP_SKB", 5000 + .insns = { 5001 + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 5002 + offsetof(struct __sk_buff, data_meta)), 5003 + BPF_MOV64_IMM(BPF_REG_0, 0), 5004 + BPF_EXIT_INSN(), 5005 + }, 5006 + .result = REJECT, 5007 + .errstr = "invalid bpf_context access", 5008 + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, 5009 + }, 5010 + { 5011 + "invalid access of flow_keys for CGROUP_SKB", 5012 + .insns = { 5013 + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 5014 + offsetof(struct __sk_buff, flow_keys)), 5015 + BPF_MOV64_IMM(BPF_REG_0, 0), 5016 + BPF_EXIT_INSN(), 5017 + }, 5018 + .result = REJECT, 5019 + .errstr = "invalid bpf_context access", 5020 + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, 5021 + }, 5022 + { 5023 + "invalid write access to napi_id for CGROUP_SKB", 5024 + .insns = { 5025 + BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1, 5026 + offsetof(struct __sk_buff, napi_id)), 5027 + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_9, 5028 + offsetof(struct __sk_buff, napi_id)), 5029 + BPF_MOV64_IMM(BPF_REG_0, 0), 5030 + BPF_EXIT_INSN(), 5031 + }, 5032 + .result = REJECT, 5033 + .errstr = "invalid bpf_context access", 5034 + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, 5035 + }, 5036 + { 4866 5037 "valid cgroup storage access", 4867 5038 .insns = { 4868 5039 BPF_MOV64_IMM(BPF_REG_2, 0), ··· 5670 5499 .errstr_unpriv = "R2 leaks addr into mem", 5671 5500 .result_unpriv = REJECT, 5672 5501 .result = REJECT, 5673 - .errstr = "BPF_XADD stores into R1 inv is not allowed", 5502 + .errstr = "BPF_XADD stores into R1 ctx is not allowed", 5674 5503 }, 5675 5504 { 5676 5505 "leak pointer into ctx 2", ··· 5685 5514 .errstr_unpriv = "R10 leaks addr into mem", 5686 5515 .result_unpriv = REJECT, 5687 5516 .result = REJECT, 5688 - .errstr = "BPF_XADD stores into R1 inv is not allowed", 5517 + .errstr = "BPF_XADD stores into R1 ctx is not allowed", 5689 5518 }, 5690 5519 { 5691 5520 "leak pointer into ctx 3", ··· 12634 12463 BPF_EXIT_INSN(), 12635 12464 }, 12636 12465 .result = REJECT, 12637 - .errstr = "BPF_XADD stores into R2 ctx", 12466 + .errstr = "BPF_XADD stores into R2 pkt is not allowed", 12638 12467 .prog_type = BPF_PROG_TYPE_XDP, 12639 12468 }, 12640 12469 {

+5 -3

tools/testing/selftests/bpf/trace_helpers.c

··· 41 41 syms[i].name = strdup(func); 42 42 i++; 43 43 } 44 + fclose(f); 44 45 sym_cnt = i; 45 46 qsort(syms, sym_cnt, sizeof(struct ksym), ksym_cmp); 46 47 return 0; ··· 125 124 char data[]; 126 125 }; 127 126 128 - static enum bpf_perf_event_ret bpf_perf_event_print(void *event, void *priv) 127 + static enum bpf_perf_event_ret 128 + bpf_perf_event_print(struct perf_event_header *hdr, void *private_data) 129 129 { 130 - struct perf_event_sample *e = event; 131 - perf_event_print_fn fn = priv; 130 + struct perf_event_sample *e = (struct perf_event_sample *)hdr; 131 + perf_event_print_fn fn = private_data; 132 132 int ret; 133 133 134 134 if (e->header.type == PERF_RECORD_SAMPLE) {