Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next

Daniel Borkmann says:

====================
pull-request: bpf-next 2018-10-21

The following pull-request contains BPF updates for your *net-next* tree.

The main changes are:

1) Implement two new kind of BPF maps, that is, queue and stack
map along with new peek, push and pop operations, from Mauricio.

2) Add support for MSG_PEEK flag when redirecting into an ingress
psock sk_msg queue, and add a new helper bpf_msg_push_data() for
insert data into the message, from John.

3) Allow for BPF programs of type BPF_PROG_TYPE_CGROUP_SKB to use
direct packet access for __skb_buff, from Song.

4) Use more lightweight barriers for walking perf ring buffer for
libbpf and perf tool as well. Also, various fixes and improvements
from verifier side, from Daniel.

5) Add per-symbol visibility for DSO in libbpf and hide by default
global symbols such as netlink related functions, from Andrey.

6) Two improvements to nfp's BPF offload to check vNIC capabilities
in case prog is shared with multiple vNICs and to protect against
mis-initializing atomic counters, from Jakub.

7) Fix for bpftool to use 4 context mode for the nfp disassembler,
also from Jakub.

8) Fix a return value comparison in test_libbpf.sh and add several
bpftool improvements in bash completion, documentation of bpf fs
restrictions and batch mode summary print, from Quentin.

9) Fix a file resource leak in BPF selftest's load_kallsyms()
helper, from Peng.

10) Fix an unused variable warning in map_lookup_and_delete_elem(),
from Alexei.

11) Fix bpf_skb_adjust_room() signature in BPF UAPI helper doc,
from Nicolas.

12) Add missing executables to .gitignore in BPF selftests, from Anders.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>

+2315 -394
+9 -1
drivers/net/ethernet/netronome/nfp/bpf/main.h
··· 189 189 NFP_MAP_USE_ATOMIC_CNT, 190 190 }; 191 191 192 + struct nfp_bpf_map_word { 193 + unsigned char type :4; 194 + unsigned char non_zero_update :1; 195 + }; 196 + 192 197 /** 193 198 * struct nfp_bpf_map - private per-map data attached to BPF maps for offload 194 199 * @offmap: pointer to the offloaded BPF map ··· 207 202 struct nfp_app_bpf *bpf; 208 203 u32 tid; 209 204 struct list_head l; 210 - enum nfp_bpf_map_use use_map[]; 205 + struct nfp_bpf_map_word use_map[]; 211 206 }; 212 207 213 208 struct nfp_bpf_neutral_map { ··· 441 436 * @prog: machine code 442 437 * @prog_len: number of valid instructions in @prog array 443 438 * @__prog_alloc_len: alloc size of @prog array 439 + * @stack_size: total amount of stack used 444 440 * @verifier_meta: temporary storage for verifier's insn meta 445 441 * @type: BPF program type 446 442 * @last_bpf_off: address of the last instruction translated from BPF ··· 465 459 u64 *prog; 466 460 unsigned int prog_len; 467 461 unsigned int __prog_alloc_len; 462 + 463 + unsigned int stack_size; 468 464 469 465 struct nfp_insn_meta *verifier_meta; 470 466
+30 -2
drivers/net/ethernet/netronome/nfp/bpf/offload.c
··· 262 262 unsigned int i; 263 263 264 264 for (i = 0; i < DIV_ROUND_UP(nfp_map->offmap->map.value_size, 4); i++) 265 - if (nfp_map->use_map[i] == NFP_MAP_USE_ATOMIC_CNT) 265 + if (nfp_map->use_map[i].type == NFP_MAP_USE_ATOMIC_CNT) 266 266 word[i] = (__force u32)cpu_to_be32(word[i]); 267 + } 268 + 269 + /* Mark value as unsafely initialized in case it becomes atomic later 270 + * and we didn't byte swap something non-byte swap neutral. 271 + */ 272 + static void 273 + nfp_map_bpf_byte_swap_record(struct nfp_bpf_map *nfp_map, void *value) 274 + { 275 + u32 *word = value; 276 + unsigned int i; 277 + 278 + for (i = 0; i < DIV_ROUND_UP(nfp_map->offmap->map.value_size, 4); i++) 279 + if (nfp_map->use_map[i].type == NFP_MAP_UNUSED && 280 + word[i] != (__force u32)cpu_to_be32(word[i])) 281 + nfp_map->use_map[i].non_zero_update = 1; 267 282 } 268 283 269 284 static int ··· 300 285 void *key, void *value, u64 flags) 301 286 { 302 287 nfp_map_bpf_byte_swap(offmap->dev_priv, value); 288 + nfp_map_bpf_byte_swap_record(offmap->dev_priv, value); 303 289 return nfp_bpf_ctrl_update_entry(offmap, key, value, flags); 304 290 } 305 291 ··· 489 473 struct netlink_ext_ack *extack) 490 474 { 491 475 struct nfp_prog *nfp_prog = prog->aux->offload->dev_priv; 492 - unsigned int max_mtu; 476 + unsigned int max_mtu, max_stack, max_prog_len; 493 477 dma_addr_t dma_addr; 494 478 void *img; 495 479 int err; ··· 497 481 max_mtu = nn_readb(nn, NFP_NET_CFG_BPF_INL_MTU) * 64 - 32; 498 482 if (max_mtu < nn->dp.netdev->mtu) { 499 483 NL_SET_ERR_MSG_MOD(extack, "BPF offload not supported with MTU larger than HW packet split boundary"); 484 + return -EOPNOTSUPP; 485 + } 486 + 487 + max_stack = nn_readb(nn, NFP_NET_CFG_BPF_STACK_SZ) * 64; 488 + if (nfp_prog->stack_size > max_stack) { 489 + NL_SET_ERR_MSG_MOD(extack, "stack too large"); 490 + return -EOPNOTSUPP; 491 + } 492 + 493 + max_prog_len = nn_readw(nn, NFP_NET_CFG_BPF_MAX_LEN); 494 + if (nfp_prog->prog_len > max_prog_len) { 495 + NL_SET_ERR_MSG_MOD(extack, "program too long"); 500 496 return -EOPNOTSUPP; 501 497 } 502 498
+59 -10
drivers/net/ethernet/netronome/nfp/bpf/verifier.c
··· 80 80 nfp_prog->adjust_head_location = location; 81 81 } 82 82 83 + static bool nfp_bpf_map_update_value_ok(struct bpf_verifier_env *env) 84 + { 85 + const struct bpf_reg_state *reg1 = cur_regs(env) + BPF_REG_1; 86 + const struct bpf_reg_state *reg3 = cur_regs(env) + BPF_REG_3; 87 + struct bpf_offloaded_map *offmap; 88 + struct bpf_func_state *state; 89 + struct nfp_bpf_map *nfp_map; 90 + int off, i; 91 + 92 + state = env->cur_state->frame[reg3->frameno]; 93 + 94 + /* We need to record each time update happens with non-zero words, 95 + * in case such word is used in atomic operations. 96 + * Implicitly depend on nfp_bpf_stack_arg_ok(reg3) being run before. 97 + */ 98 + 99 + offmap = map_to_offmap(reg1->map_ptr); 100 + nfp_map = offmap->dev_priv; 101 + off = reg3->off + reg3->var_off.value; 102 + 103 + for (i = 0; i < offmap->map.value_size; i++) { 104 + struct bpf_stack_state *stack_entry; 105 + unsigned int soff; 106 + 107 + soff = -(off + i) - 1; 108 + stack_entry = &state->stack[soff / BPF_REG_SIZE]; 109 + if (stack_entry->slot_type[soff % BPF_REG_SIZE] == STACK_ZERO) 110 + continue; 111 + 112 + if (nfp_map->use_map[i / 4].type == NFP_MAP_USE_ATOMIC_CNT) { 113 + pr_vlog(env, "value at offset %d/%d may be non-zero, bpf_map_update_elem() is required to initialize atomic counters to zero to avoid offload endian issues\n", 114 + i, soff); 115 + return false; 116 + } 117 + nfp_map->use_map[i / 4].non_zero_update = 1; 118 + } 119 + 120 + return true; 121 + } 122 + 83 123 static int 84 124 nfp_bpf_stack_arg_ok(const char *fname, struct bpf_verifier_env *env, 85 125 const struct bpf_reg_state *reg, ··· 211 171 bpf->helpers.map_update, reg1) || 212 172 !nfp_bpf_stack_arg_ok("map_update", env, reg2, 213 173 meta->func_id ? &meta->arg2 : NULL) || 214 - !nfp_bpf_stack_arg_ok("map_update", env, reg3, NULL)) 174 + !nfp_bpf_stack_arg_ok("map_update", env, reg3, NULL) || 175 + !nfp_bpf_map_update_value_ok(env)) 215 176 return -EOPNOTSUPP; 216 177 break; 217 178 ··· 393 352 struct nfp_bpf_map *nfp_map, 394 353 unsigned int off, enum nfp_bpf_map_use use) 395 354 { 396 - if (nfp_map->use_map[off / 4] != NFP_MAP_UNUSED && 397 - nfp_map->use_map[off / 4] != use) { 355 + if (nfp_map->use_map[off / 4].type != NFP_MAP_UNUSED && 356 + nfp_map->use_map[off / 4].type != use) { 398 357 pr_vlog(env, "map value use type conflict %s vs %s off: %u\n", 399 - nfp_bpf_map_use_name(nfp_map->use_map[off / 4]), 358 + nfp_bpf_map_use_name(nfp_map->use_map[off / 4].type), 400 359 nfp_bpf_map_use_name(use), off); 401 360 return -EOPNOTSUPP; 402 361 } 403 362 404 - nfp_map->use_map[off / 4] = use; 363 + if (nfp_map->use_map[off / 4].non_zero_update && 364 + use == NFP_MAP_USE_ATOMIC_CNT) { 365 + pr_vlog(env, "atomic counter in map value may already be initialized to non-zero value off: %u\n", 366 + off); 367 + return -EOPNOTSUPP; 368 + } 369 + 370 + nfp_map->use_map[off / 4].type = use; 405 371 406 372 return 0; 407 373 } ··· 747 699 748 700 static int nfp_bpf_finalize(struct bpf_verifier_env *env) 749 701 { 750 - unsigned int stack_size, stack_needed; 751 702 struct bpf_subprog_info *info; 752 703 struct nfp_prog *nfp_prog; 704 + unsigned int max_stack; 753 705 struct nfp_net *nn; 754 706 int i; 755 707 ··· 777 729 } 778 730 779 731 nn = netdev_priv(env->prog->aux->offload->netdev); 780 - stack_size = nn_readb(nn, NFP_NET_CFG_BPF_STACK_SZ) * 64; 781 - stack_needed = nfp_bpf_get_stack_usage(nfp_prog, env->prog->len); 782 - if (stack_needed > stack_size) { 732 + max_stack = nn_readb(nn, NFP_NET_CFG_BPF_STACK_SZ) * 64; 733 + nfp_prog->stack_size = nfp_bpf_get_stack_usage(nfp_prog, 734 + env->prog->len); 735 + if (nfp_prog->stack_size > max_stack) { 783 736 pr_vlog(env, "stack too large: program %dB > FW stack %dB\n", 784 - stack_needed, stack_size); 737 + nfp_prog->stack_size, max_stack); 785 738 return -EOPNOTSUPP; 786 739 } 787 740
+7
include/linux/bpf.h
··· 39 39 void *(*map_lookup_elem)(struct bpf_map *map, void *key); 40 40 int (*map_update_elem)(struct bpf_map *map, void *key, void *value, u64 flags); 41 41 int (*map_delete_elem)(struct bpf_map *map, void *key); 42 + int (*map_push_elem)(struct bpf_map *map, void *value, u64 flags); 43 + int (*map_pop_elem)(struct bpf_map *map, void *value); 44 + int (*map_peek_elem)(struct bpf_map *map, void *value); 42 45 43 46 /* funcs called by prog_array and perf_event_array map */ 44 47 void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file, ··· 141 138 ARG_CONST_MAP_PTR, /* const argument used as pointer to bpf_map */ 142 139 ARG_PTR_TO_MAP_KEY, /* pointer to stack used as map key */ 143 140 ARG_PTR_TO_MAP_VALUE, /* pointer to stack used as map value */ 141 + ARG_PTR_TO_UNINIT_MAP_VALUE, /* pointer to valid memory used to store a map value */ 144 142 145 143 /* the following constraints used to prototype bpf_memcmp() and other 146 144 * functions that access data on eBPF program stack ··· 814 810 extern const struct bpf_func_proto bpf_map_lookup_elem_proto; 815 811 extern const struct bpf_func_proto bpf_map_update_elem_proto; 816 812 extern const struct bpf_func_proto bpf_map_delete_elem_proto; 813 + extern const struct bpf_func_proto bpf_map_push_elem_proto; 814 + extern const struct bpf_func_proto bpf_map_pop_elem_proto; 815 + extern const struct bpf_func_proto bpf_map_peek_elem_proto; 817 816 818 817 extern const struct bpf_func_proto bpf_get_prandom_u32_proto; 819 818 extern const struct bpf_func_proto bpf_get_smp_processor_id_proto;
+3 -1
include/linux/bpf_types.h
··· 51 51 BPF_MAP_TYPE(BPF_MAP_TYPE_LRU_PERCPU_HASH, htab_lru_percpu_map_ops) 52 52 BPF_MAP_TYPE(BPF_MAP_TYPE_LPM_TRIE, trie_map_ops) 53 53 #ifdef CONFIG_PERF_EVENTS 54 - BPF_MAP_TYPE(BPF_MAP_TYPE_STACK_TRACE, stack_map_ops) 54 + BPF_MAP_TYPE(BPF_MAP_TYPE_STACK_TRACE, stack_trace_map_ops) 55 55 #endif 56 56 BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY_OF_MAPS, array_of_maps_map_ops) 57 57 BPF_MAP_TYPE(BPF_MAP_TYPE_HASH_OF_MAPS, htab_of_maps_map_ops) ··· 69 69 BPF_MAP_TYPE(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, reuseport_array_ops) 70 70 #endif 71 71 #endif 72 + BPF_MAP_TYPE(BPF_MAP_TYPE_QUEUE, queue_map_ops) 73 + BPF_MAP_TYPE(BPF_MAP_TYPE_STACK, stack_map_ops)
+21
include/linux/filter.h
··· 548 548 cb->data_end = skb->data + skb_headlen(skb); 549 549 } 550 550 551 + /* Similar to bpf_compute_data_pointers(), except that save orginal 552 + * data in cb->data and cb->meta_data for restore. 553 + */ 554 + static inline void bpf_compute_and_save_data_end( 555 + struct sk_buff *skb, void **saved_data_end) 556 + { 557 + struct bpf_skb_data_end *cb = (struct bpf_skb_data_end *)skb->cb; 558 + 559 + *saved_data_end = cb->data_end; 560 + cb->data_end = skb->data + skb_headlen(skb); 561 + } 562 + 563 + /* Restore data saved by bpf_compute_data_pointers(). */ 564 + static inline void bpf_restore_data_end( 565 + struct sk_buff *skb, void *saved_data_end) 566 + { 567 + struct bpf_skb_data_end *cb = (struct bpf_skb_data_end *)skb->cb; 568 + 569 + cb->data_end = saved_data_end; 570 + } 571 + 551 572 static inline u8 *bpf_skb_cb(struct sk_buff *skb) 552 573 { 553 574 /* eBPF programs may read/write skb->cb[] area to transfer meta
+36 -12
include/linux/skmsg.h
··· 176 176 { 177 177 dst->sg.data[which] = src->sg.data[which]; 178 178 dst->sg.data[which].length = size; 179 + dst->sg.size += size; 179 180 src->sg.data[which].length -= size; 180 181 src->sg.data[which].offset += size; 181 182 } ··· 187 186 sk_msg_init(src); 188 187 } 189 188 190 - static inline u32 sk_msg_elem_used(const struct sk_msg *msg) 191 - { 192 - return msg->sg.end >= msg->sg.start ? 193 - msg->sg.end - msg->sg.start : 194 - msg->sg.end + (MAX_MSG_FRAGS - msg->sg.start); 195 - } 196 - 197 189 static inline bool sk_msg_full(const struct sk_msg *msg) 198 190 { 199 191 return (msg->sg.end == msg->sg.start) && msg->sg.size; 200 192 } 201 193 194 + static inline u32 sk_msg_elem_used(const struct sk_msg *msg) 195 + { 196 + if (sk_msg_full(msg)) 197 + return MAX_MSG_FRAGS; 198 + 199 + return msg->sg.end >= msg->sg.start ? 200 + msg->sg.end - msg->sg.start : 201 + msg->sg.end + (MAX_MSG_FRAGS - msg->sg.start); 202 + } 203 + 202 204 static inline struct scatterlist *sk_msg_elem(struct sk_msg *msg, int which) 203 205 { 204 206 return &msg->sg.data[which]; 207 + } 208 + 209 + static inline struct scatterlist sk_msg_elem_cpy(struct sk_msg *msg, int which) 210 + { 211 + return msg->sg.data[which]; 205 212 } 206 213 207 214 static inline struct page *sk_msg_page(struct sk_msg *msg, int which) ··· 273 264 static inline struct sk_psock *sk_psock(const struct sock *sk) 274 265 { 275 266 return rcu_dereference_sk_user_data(sk); 276 - } 277 - 278 - static inline bool sk_has_psock(struct sock *sk) 279 - { 280 - return sk_psock(sk) != NULL && sk->sk_prot->recvmsg == tcp_bpf_recvmsg; 281 267 } 282 268 283 269 static inline void sk_psock_queue_msg(struct sk_psock *psock, ··· 372 368 enum sk_psock_state_bits bit) 373 369 { 374 370 return test_bit(bit, &psock->state); 371 + } 372 + 373 + static inline struct sk_psock *sk_psock_get_checked(struct sock *sk) 374 + { 375 + struct sk_psock *psock; 376 + 377 + rcu_read_lock(); 378 + psock = sk_psock(sk); 379 + if (psock) { 380 + if (sk->sk_prot->recvmsg != tcp_bpf_recvmsg) { 381 + psock = ERR_PTR(-EBUSY); 382 + goto out; 383 + } 384 + 385 + if (!refcount_inc_not_zero(&psock->refcnt)) 386 + psock = ERR_PTR(-EBUSY); 387 + } 388 + out: 389 + rcu_read_unlock(); 390 + return psock; 375 391 } 376 392 377 393 static inline struct sk_psock *sk_psock_get(struct sock *sk)
+1 -8
include/net/tcp.h
··· 2051 2051 #define TCP_ULP_MAX 128 2052 2052 #define TCP_ULP_BUF_MAX (TCP_ULP_NAME_MAX*TCP_ULP_MAX) 2053 2053 2054 - enum { 2055 - TCP_ULP_TLS, 2056 - TCP_ULP_BPF, 2057 - }; 2058 - 2059 2054 struct tcp_ulp_ops { 2060 2055 struct list_head list; 2061 2056 ··· 2059 2064 /* cleanup ulp */ 2060 2065 void (*release)(struct sock *sk); 2061 2066 2062 - int uid; 2063 2067 char name[TCP_ULP_NAME_MAX]; 2064 - bool user_visible; 2065 2068 struct module *owner; 2066 2069 }; 2067 2070 int tcp_register_ulp(struct tcp_ulp_ops *type); ··· 2082 2089 int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, 2083 2090 int nonblock, int flags, int *addr_len); 2084 2091 int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock, 2085 - struct msghdr *msg, int len); 2092 + struct msghdr *msg, int len, int flags); 2086 2093 2087 2094 /* Call BPF_SOCK_OPS program that returns an int. If the return value 2088 2095 * is < 0, then the BPF op failed (for example if the loaded BPF
+48 -2
include/uapi/linux/bpf.h
··· 103 103 BPF_BTF_LOAD, 104 104 BPF_BTF_GET_FD_BY_ID, 105 105 BPF_TASK_FD_QUERY, 106 + BPF_MAP_LOOKUP_AND_DELETE_ELEM, 106 107 }; 107 108 108 109 enum bpf_map_type { ··· 129 128 BPF_MAP_TYPE_CGROUP_STORAGE, 130 129 BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, 131 130 BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, 131 + BPF_MAP_TYPE_QUEUE, 132 + BPF_MAP_TYPE_STACK, 132 133 }; 133 134 134 135 enum bpf_prog_type { ··· 463 460 * Description 464 461 * Delete entry with *key* from *map*. 465 462 * Return 463 + * 0 on success, or a negative error in case of failure. 464 + * 465 + * int bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags) 466 + * Description 467 + * Push an element *value* in *map*. *flags* is one of: 468 + * 469 + * **BPF_EXIST** 470 + * If the queue/stack is full, the oldest element is removed to 471 + * make room for this. 472 + * Return 473 + * 0 on success, or a negative error in case of failure. 474 + * 475 + * int bpf_map_pop_elem(struct bpf_map *map, void *value) 476 + * Description 477 + * Pop an element from *map*. 478 + * Return 479 + * 0 on success, or a negative error in case of failure. 480 + * 481 + * int bpf_map_peek_elem(struct bpf_map *map, void *value) 482 + * Description 483 + * Get an element from *map* without removing it. 484 + * Return 466 485 * 0 on success, or a negative error in case of failure. 467 486 * 468 487 * int bpf_probe_read(void *dst, u32 size, const void *src) ··· 1458 1433 * Return 1459 1434 * 0 on success, or a negative error in case of failure. 1460 1435 * 1461 - * int bpf_skb_adjust_room(struct sk_buff *skb, u32 len_diff, u32 mode, u64 flags) 1436 + * int bpf_skb_adjust_room(struct sk_buff *skb, s32 len_diff, u32 mode, u64 flags) 1462 1437 * Description 1463 1438 * Grow or shrink the room for data in the packet associated to 1464 1439 * *skb* by *len_diff*, and according to the selected *mode*. ··· 2240 2215 * pointer that was returned from bpf_sk_lookup_xxx\ (). 2241 2216 * Return 2242 2217 * 0 on success, or a negative error in case of failure. 2218 + * 2219 + * int bpf_msg_push_data(struct sk_buff *skb, u32 start, u32 len, u64 flags) 2220 + * Description 2221 + * For socket policies, insert *len* bytes into msg at offset 2222 + * *start*. 2223 + * 2224 + * If a program of type **BPF_PROG_TYPE_SK_MSG** is run on a 2225 + * *msg* it may want to insert metadata or options into the msg. 2226 + * This can later be read and used by any of the lower layer BPF 2227 + * hooks. 2228 + * 2229 + * This helper may fail if under memory pressure (a malloc 2230 + * fails) in these cases BPF programs will get an appropriate 2231 + * error and BPF programs will need to handle them. 2232 + * 2233 + * Return 2234 + * 0 on success, or a negative error in case of failure. 2243 2235 */ 2244 2236 #define __BPF_FUNC_MAPPER(FN) \ 2245 2237 FN(unspec), \ ··· 2345 2303 FN(skb_ancestor_cgroup_id), \ 2346 2304 FN(sk_lookup_tcp), \ 2347 2305 FN(sk_lookup_udp), \ 2348 - FN(sk_release), 2306 + FN(sk_release), \ 2307 + FN(map_push_elem), \ 2308 + FN(map_pop_elem), \ 2309 + FN(map_peek_elem), \ 2310 + FN(msg_push_data), 2349 2311 2350 2312 /* integer value in 'imm' field of BPF_CALL instruction selects which helper 2351 2313 * function eBPF program intends to call
+1 -1
kernel/bpf/Makefile
··· 3 3 4 4 obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o 5 5 obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o 6 - obj-$(CONFIG_BPF_SYSCALL) += local_storage.o 6 + obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o 7 7 obj-$(CONFIG_BPF_SYSCALL) += disasm.o 8 8 obj-$(CONFIG_BPF_SYSCALL) += btf.o 9 9 ifeq ($(CONFIG_NET),y)
+6
kernel/bpf/cgroup.c
··· 553 553 { 554 554 unsigned int offset = skb->data - skb_network_header(skb); 555 555 struct sock *save_sk; 556 + void *saved_data_end; 556 557 struct cgroup *cgrp; 557 558 int ret; 558 559 ··· 567 566 save_sk = skb->sk; 568 567 skb->sk = sk; 569 568 __skb_push(skb, offset); 569 + 570 + /* compute pointers for the bpf prog */ 571 + bpf_compute_and_save_data_end(skb, &saved_data_end); 572 + 570 573 ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], skb, 571 574 bpf_prog_run_save_cb); 575 + bpf_restore_data_end(skb, saved_data_end); 572 576 __skb_pull(skb, offset); 573 577 skb->sk = save_sk; 574 578 return ret == 1 ? 0 : -EPERM;
+3
kernel/bpf/core.c
··· 1783 1783 const struct bpf_func_proto bpf_map_lookup_elem_proto __weak; 1784 1784 const struct bpf_func_proto bpf_map_update_elem_proto __weak; 1785 1785 const struct bpf_func_proto bpf_map_delete_elem_proto __weak; 1786 + const struct bpf_func_proto bpf_map_push_elem_proto __weak; 1787 + const struct bpf_func_proto bpf_map_pop_elem_proto __weak; 1788 + const struct bpf_func_proto bpf_map_peek_elem_proto __weak; 1786 1789 1787 1790 const struct bpf_func_proto bpf_get_prandom_u32_proto __weak; 1788 1791 const struct bpf_func_proto bpf_get_smp_processor_id_proto __weak;
+43
kernel/bpf/helpers.c
··· 76 76 .arg2_type = ARG_PTR_TO_MAP_KEY, 77 77 }; 78 78 79 + BPF_CALL_3(bpf_map_push_elem, struct bpf_map *, map, void *, value, u64, flags) 80 + { 81 + return map->ops->map_push_elem(map, value, flags); 82 + } 83 + 84 + const struct bpf_func_proto bpf_map_push_elem_proto = { 85 + .func = bpf_map_push_elem, 86 + .gpl_only = false, 87 + .pkt_access = true, 88 + .ret_type = RET_INTEGER, 89 + .arg1_type = ARG_CONST_MAP_PTR, 90 + .arg2_type = ARG_PTR_TO_MAP_VALUE, 91 + .arg3_type = ARG_ANYTHING, 92 + }; 93 + 94 + BPF_CALL_2(bpf_map_pop_elem, struct bpf_map *, map, void *, value) 95 + { 96 + return map->ops->map_pop_elem(map, value); 97 + } 98 + 99 + const struct bpf_func_proto bpf_map_pop_elem_proto = { 100 + .func = bpf_map_pop_elem, 101 + .gpl_only = false, 102 + .pkt_access = true, 103 + .ret_type = RET_INTEGER, 104 + .arg1_type = ARG_CONST_MAP_PTR, 105 + .arg2_type = ARG_PTR_TO_UNINIT_MAP_VALUE, 106 + }; 107 + 108 + BPF_CALL_2(bpf_map_peek_elem, struct bpf_map *, map, void *, value) 109 + { 110 + return map->ops->map_peek_elem(map, value); 111 + } 112 + 113 + const struct bpf_func_proto bpf_map_peek_elem_proto = { 114 + .func = bpf_map_pop_elem, 115 + .gpl_only = false, 116 + .pkt_access = true, 117 + .ret_type = RET_INTEGER, 118 + .arg1_type = ARG_CONST_MAP_PTR, 119 + .arg2_type = ARG_PTR_TO_UNINIT_MAP_VALUE, 120 + }; 121 + 79 122 const struct bpf_func_proto bpf_get_prandom_u32_proto = { 80 123 .func = bpf_user_rnd_u32, 81 124 .gpl_only = false,
+288
kernel/bpf/queue_stack_maps.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * queue_stack_maps.c: BPF queue and stack maps 4 + * 5 + * Copyright (c) 2018 Politecnico di Torino 6 + */ 7 + #include <linux/bpf.h> 8 + #include <linux/list.h> 9 + #include <linux/slab.h> 10 + #include "percpu_freelist.h" 11 + 12 + #define QUEUE_STACK_CREATE_FLAG_MASK \ 13 + (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY) 14 + 15 + 16 + struct bpf_queue_stack { 17 + struct bpf_map map; 18 + raw_spinlock_t lock; 19 + u32 head, tail; 20 + u32 size; /* max_entries + 1 */ 21 + 22 + char elements[0] __aligned(8); 23 + }; 24 + 25 + static struct bpf_queue_stack *bpf_queue_stack(struct bpf_map *map) 26 + { 27 + return container_of(map, struct bpf_queue_stack, map); 28 + } 29 + 30 + static bool queue_stack_map_is_empty(struct bpf_queue_stack *qs) 31 + { 32 + return qs->head == qs->tail; 33 + } 34 + 35 + static bool queue_stack_map_is_full(struct bpf_queue_stack *qs) 36 + { 37 + u32 head = qs->head + 1; 38 + 39 + if (unlikely(head >= qs->size)) 40 + head = 0; 41 + 42 + return head == qs->tail; 43 + } 44 + 45 + /* Called from syscall */ 46 + static int queue_stack_map_alloc_check(union bpf_attr *attr) 47 + { 48 + /* check sanity of attributes */ 49 + if (attr->max_entries == 0 || attr->key_size != 0 || 50 + attr->map_flags & ~QUEUE_STACK_CREATE_FLAG_MASK) 51 + return -EINVAL; 52 + 53 + if (attr->value_size > KMALLOC_MAX_SIZE) 54 + /* if value_size is bigger, the user space won't be able to 55 + * access the elements. 56 + */ 57 + return -E2BIG; 58 + 59 + return 0; 60 + } 61 + 62 + static struct bpf_map *queue_stack_map_alloc(union bpf_attr *attr) 63 + { 64 + int ret, numa_node = bpf_map_attr_numa_node(attr); 65 + struct bpf_queue_stack *qs; 66 + u32 size, value_size; 67 + u64 queue_size, cost; 68 + 69 + size = attr->max_entries + 1; 70 + value_size = attr->value_size; 71 + 72 + queue_size = sizeof(*qs) + (u64) value_size * size; 73 + 74 + cost = queue_size; 75 + if (cost >= U32_MAX - PAGE_SIZE) 76 + return ERR_PTR(-E2BIG); 77 + 78 + cost = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT; 79 + 80 + ret = bpf_map_precharge_memlock(cost); 81 + if (ret < 0) 82 + return ERR_PTR(ret); 83 + 84 + qs = bpf_map_area_alloc(queue_size, numa_node); 85 + if (!qs) 86 + return ERR_PTR(-ENOMEM); 87 + 88 + memset(qs, 0, sizeof(*qs)); 89 + 90 + bpf_map_init_from_attr(&qs->map, attr); 91 + 92 + qs->map.pages = cost; 93 + qs->size = size; 94 + 95 + raw_spin_lock_init(&qs->lock); 96 + 97 + return &qs->map; 98 + } 99 + 100 + /* Called when map->refcnt goes to zero, either from workqueue or from syscall */ 101 + static void queue_stack_map_free(struct bpf_map *map) 102 + { 103 + struct bpf_queue_stack *qs = bpf_queue_stack(map); 104 + 105 + /* at this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0, 106 + * so the programs (can be more than one that used this map) were 107 + * disconnected from events. Wait for outstanding critical sections in 108 + * these programs to complete 109 + */ 110 + synchronize_rcu(); 111 + 112 + bpf_map_area_free(qs); 113 + } 114 + 115 + static int __queue_map_get(struct bpf_map *map, void *value, bool delete) 116 + { 117 + struct bpf_queue_stack *qs = bpf_queue_stack(map); 118 + unsigned long flags; 119 + int err = 0; 120 + void *ptr; 121 + 122 + raw_spin_lock_irqsave(&qs->lock, flags); 123 + 124 + if (queue_stack_map_is_empty(qs)) { 125 + err = -ENOENT; 126 + goto out; 127 + } 128 + 129 + ptr = &qs->elements[qs->tail * qs->map.value_size]; 130 + memcpy(value, ptr, qs->map.value_size); 131 + 132 + if (delete) { 133 + if (unlikely(++qs->tail >= qs->size)) 134 + qs->tail = 0; 135 + } 136 + 137 + out: 138 + raw_spin_unlock_irqrestore(&qs->lock, flags); 139 + return err; 140 + } 141 + 142 + 143 + static int __stack_map_get(struct bpf_map *map, void *value, bool delete) 144 + { 145 + struct bpf_queue_stack *qs = bpf_queue_stack(map); 146 + unsigned long flags; 147 + int err = 0; 148 + void *ptr; 149 + u32 index; 150 + 151 + raw_spin_lock_irqsave(&qs->lock, flags); 152 + 153 + if (queue_stack_map_is_empty(qs)) { 154 + err = -ENOENT; 155 + goto out; 156 + } 157 + 158 + index = qs->head - 1; 159 + if (unlikely(index >= qs->size)) 160 + index = qs->size - 1; 161 + 162 + ptr = &qs->elements[index * qs->map.value_size]; 163 + memcpy(value, ptr, qs->map.value_size); 164 + 165 + if (delete) 166 + qs->head = index; 167 + 168 + out: 169 + raw_spin_unlock_irqrestore(&qs->lock, flags); 170 + return err; 171 + } 172 + 173 + /* Called from syscall or from eBPF program */ 174 + static int queue_map_peek_elem(struct bpf_map *map, void *value) 175 + { 176 + return __queue_map_get(map, value, false); 177 + } 178 + 179 + /* Called from syscall or from eBPF program */ 180 + static int stack_map_peek_elem(struct bpf_map *map, void *value) 181 + { 182 + return __stack_map_get(map, value, false); 183 + } 184 + 185 + /* Called from syscall or from eBPF program */ 186 + static int queue_map_pop_elem(struct bpf_map *map, void *value) 187 + { 188 + return __queue_map_get(map, value, true); 189 + } 190 + 191 + /* Called from syscall or from eBPF program */ 192 + static int stack_map_pop_elem(struct bpf_map *map, void *value) 193 + { 194 + return __stack_map_get(map, value, true); 195 + } 196 + 197 + /* Called from syscall or from eBPF program */ 198 + static int queue_stack_map_push_elem(struct bpf_map *map, void *value, 199 + u64 flags) 200 + { 201 + struct bpf_queue_stack *qs = bpf_queue_stack(map); 202 + unsigned long irq_flags; 203 + int err = 0; 204 + void *dst; 205 + 206 + /* BPF_EXIST is used to force making room for a new element in case the 207 + * map is full 208 + */ 209 + bool replace = (flags & BPF_EXIST); 210 + 211 + /* Check supported flags for queue and stack maps */ 212 + if (flags & BPF_NOEXIST || flags > BPF_EXIST) 213 + return -EINVAL; 214 + 215 + raw_spin_lock_irqsave(&qs->lock, irq_flags); 216 + 217 + if (queue_stack_map_is_full(qs)) { 218 + if (!replace) { 219 + err = -E2BIG; 220 + goto out; 221 + } 222 + /* advance tail pointer to overwrite oldest element */ 223 + if (unlikely(++qs->tail >= qs->size)) 224 + qs->tail = 0; 225 + } 226 + 227 + dst = &qs->elements[qs->head * qs->map.value_size]; 228 + memcpy(dst, value, qs->map.value_size); 229 + 230 + if (unlikely(++qs->head >= qs->size)) 231 + qs->head = 0; 232 + 233 + out: 234 + raw_spin_unlock_irqrestore(&qs->lock, irq_flags); 235 + return err; 236 + } 237 + 238 + /* Called from syscall or from eBPF program */ 239 + static void *queue_stack_map_lookup_elem(struct bpf_map *map, void *key) 240 + { 241 + return NULL; 242 + } 243 + 244 + /* Called from syscall or from eBPF program */ 245 + static int queue_stack_map_update_elem(struct bpf_map *map, void *key, 246 + void *value, u64 flags) 247 + { 248 + return -EINVAL; 249 + } 250 + 251 + /* Called from syscall or from eBPF program */ 252 + static int queue_stack_map_delete_elem(struct bpf_map *map, void *key) 253 + { 254 + return -EINVAL; 255 + } 256 + 257 + /* Called from syscall */ 258 + static int queue_stack_map_get_next_key(struct bpf_map *map, void *key, 259 + void *next_key) 260 + { 261 + return -EINVAL; 262 + } 263 + 264 + const struct bpf_map_ops queue_map_ops = { 265 + .map_alloc_check = queue_stack_map_alloc_check, 266 + .map_alloc = queue_stack_map_alloc, 267 + .map_free = queue_stack_map_free, 268 + .map_lookup_elem = queue_stack_map_lookup_elem, 269 + .map_update_elem = queue_stack_map_update_elem, 270 + .map_delete_elem = queue_stack_map_delete_elem, 271 + .map_push_elem = queue_stack_map_push_elem, 272 + .map_pop_elem = queue_map_pop_elem, 273 + .map_peek_elem = queue_map_peek_elem, 274 + .map_get_next_key = queue_stack_map_get_next_key, 275 + }; 276 + 277 + const struct bpf_map_ops stack_map_ops = { 278 + .map_alloc_check = queue_stack_map_alloc_check, 279 + .map_alloc = queue_stack_map_alloc, 280 + .map_free = queue_stack_map_free, 281 + .map_lookup_elem = queue_stack_map_lookup_elem, 282 + .map_update_elem = queue_stack_map_update_elem, 283 + .map_delete_elem = queue_stack_map_delete_elem, 284 + .map_push_elem = queue_stack_map_push_elem, 285 + .map_pop_elem = stack_map_pop_elem, 286 + .map_peek_elem = stack_map_peek_elem, 287 + .map_get_next_key = queue_stack_map_get_next_key, 288 + };
+1 -1
kernel/bpf/stackmap.c
··· 600 600 put_callchain_buffers(); 601 601 } 602 602 603 - const struct bpf_map_ops stack_map_ops = { 603 + const struct bpf_map_ops stack_trace_map_ops = { 604 604 .map_alloc = stack_map_alloc, 605 605 .map_free = stack_map_free, 606 606 .map_get_next_key = stack_map_get_next_key,
+87 -4
kernel/bpf/syscall.c
··· 651 651 return -ENOTSUPP; 652 652 } 653 653 654 + static void *__bpf_copy_key(void __user *ukey, u64 key_size) 655 + { 656 + if (key_size) 657 + return memdup_user(ukey, key_size); 658 + 659 + if (ukey) 660 + return ERR_PTR(-EINVAL); 661 + 662 + return NULL; 663 + } 664 + 654 665 /* last field in 'union bpf_attr' used by this command */ 655 666 #define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value 656 667 ··· 689 678 goto err_put; 690 679 } 691 680 692 - key = memdup_user(ukey, map->key_size); 681 + key = __bpf_copy_key(ukey, map->key_size); 693 682 if (IS_ERR(key)) { 694 683 err = PTR_ERR(key); 695 684 goto err_put; ··· 727 716 err = bpf_fd_htab_map_lookup_elem(map, key, value); 728 717 } else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) { 729 718 err = bpf_fd_reuseport_array_lookup_elem(map, key, value); 719 + } else if (map->map_type == BPF_MAP_TYPE_QUEUE || 720 + map->map_type == BPF_MAP_TYPE_STACK) { 721 + err = map->ops->map_peek_elem(map, value); 730 722 } else { 731 723 rcu_read_lock(); 732 724 ptr = map->ops->map_lookup_elem(map, key); ··· 799 785 goto err_put; 800 786 } 801 787 802 - key = memdup_user(ukey, map->key_size); 788 + key = __bpf_copy_key(ukey, map->key_size); 803 789 if (IS_ERR(key)) { 804 790 err = PTR_ERR(key); 805 791 goto err_put; ··· 860 846 /* rcu_read_lock() is not needed */ 861 847 err = bpf_fd_reuseport_array_update_elem(map, key, value, 862 848 attr->flags); 849 + } else if (map->map_type == BPF_MAP_TYPE_QUEUE || 850 + map->map_type == BPF_MAP_TYPE_STACK) { 851 + err = map->ops->map_push_elem(map, value, attr->flags); 863 852 } else { 864 853 rcu_read_lock(); 865 854 err = map->ops->map_update_elem(map, key, value, attr->flags); ··· 905 888 goto err_put; 906 889 } 907 890 908 - key = memdup_user(ukey, map->key_size); 891 + key = __bpf_copy_key(ukey, map->key_size); 909 892 if (IS_ERR(key)) { 910 893 err = PTR_ERR(key); 911 894 goto err_put; ··· 958 941 } 959 942 960 943 if (ukey) { 961 - key = memdup_user(ukey, map->key_size); 944 + key = __bpf_copy_key(ukey, map->key_size); 962 945 if (IS_ERR(key)) { 963 946 err = PTR_ERR(key); 964 947 goto err_put; ··· 992 975 993 976 free_next_key: 994 977 kfree(next_key); 978 + free_key: 979 + kfree(key); 980 + err_put: 981 + fdput(f); 982 + return err; 983 + } 984 + 985 + #define BPF_MAP_LOOKUP_AND_DELETE_ELEM_LAST_FIELD value 986 + 987 + static int map_lookup_and_delete_elem(union bpf_attr *attr) 988 + { 989 + void __user *ukey = u64_to_user_ptr(attr->key); 990 + void __user *uvalue = u64_to_user_ptr(attr->value); 991 + int ufd = attr->map_fd; 992 + struct bpf_map *map; 993 + void *key, *value; 994 + u32 value_size; 995 + struct fd f; 996 + int err; 997 + 998 + if (CHECK_ATTR(BPF_MAP_LOOKUP_AND_DELETE_ELEM)) 999 + return -EINVAL; 1000 + 1001 + f = fdget(ufd); 1002 + map = __bpf_map_get(f); 1003 + if (IS_ERR(map)) 1004 + return PTR_ERR(map); 1005 + 1006 + if (!(f.file->f_mode & FMODE_CAN_WRITE)) { 1007 + err = -EPERM; 1008 + goto err_put; 1009 + } 1010 + 1011 + key = __bpf_copy_key(ukey, map->key_size); 1012 + if (IS_ERR(key)) { 1013 + err = PTR_ERR(key); 1014 + goto err_put; 1015 + } 1016 + 1017 + value_size = map->value_size; 1018 + 1019 + err = -ENOMEM; 1020 + value = kmalloc(value_size, GFP_USER | __GFP_NOWARN); 1021 + if (!value) 1022 + goto free_key; 1023 + 1024 + if (map->map_type == BPF_MAP_TYPE_QUEUE || 1025 + map->map_type == BPF_MAP_TYPE_STACK) { 1026 + err = map->ops->map_pop_elem(map, value); 1027 + } else { 1028 + err = -ENOTSUPP; 1029 + } 1030 + 1031 + if (err) 1032 + goto free_value; 1033 + 1034 + if (copy_to_user(uvalue, value, value_size) != 0) 1035 + goto free_value; 1036 + 1037 + err = 0; 1038 + 1039 + free_value: 1040 + kfree(value); 995 1041 free_key: 996 1042 kfree(key); 997 1043 err_put: ··· 2534 2454 break; 2535 2455 case BPF_TASK_FD_QUERY: 2536 2456 err = bpf_task_fd_query(&attr, uattr); 2457 + break; 2458 + case BPF_MAP_LOOKUP_AND_DELETE_ELEM: 2459 + err = map_lookup_and_delete_elem(&attr); 2537 2460 break; 2538 2461 default: 2539 2462 err = -EINVAL;
+71 -14
kernel/bpf/verifier.c
··· 1528 1528 return reg->type != SCALAR_VALUE; 1529 1529 } 1530 1530 1531 + static struct bpf_reg_state *reg_state(struct bpf_verifier_env *env, int regno) 1532 + { 1533 + return cur_regs(env) + regno; 1534 + } 1535 + 1531 1536 static bool is_pointer_value(struct bpf_verifier_env *env, int regno) 1532 1537 { 1533 - return __is_pointer_value(env->allow_ptr_leaks, cur_regs(env) + regno); 1538 + return __is_pointer_value(env->allow_ptr_leaks, reg_state(env, regno)); 1534 1539 } 1535 1540 1536 1541 static bool is_ctx_reg(struct bpf_verifier_env *env, int regno) 1537 1542 { 1538 - const struct bpf_reg_state *reg = cur_regs(env) + regno; 1543 + const struct bpf_reg_state *reg = reg_state(env, regno); 1539 1544 1540 1545 return reg->type == PTR_TO_CTX || 1541 1546 reg->type == PTR_TO_SOCKET; ··· 1548 1543 1549 1544 static bool is_pkt_reg(struct bpf_verifier_env *env, int regno) 1550 1545 { 1551 - const struct bpf_reg_state *reg = cur_regs(env) + regno; 1546 + const struct bpf_reg_state *reg = reg_state(env, regno); 1552 1547 1553 1548 return type_is_pkt_pointer(reg->type); 1549 + } 1550 + 1551 + static bool is_flow_key_reg(struct bpf_verifier_env *env, int regno) 1552 + { 1553 + const struct bpf_reg_state *reg = reg_state(env, regno); 1554 + 1555 + /* Separate to is_ctx_reg() since we still want to allow BPF_ST here. */ 1556 + return reg->type == PTR_TO_FLOW_KEYS; 1554 1557 } 1555 1558 1556 1559 static int check_pkt_ptr_alignment(struct bpf_verifier_env *env, ··· 1969 1956 } 1970 1957 1971 1958 if (is_ctx_reg(env, insn->dst_reg) || 1972 - is_pkt_reg(env, insn->dst_reg)) { 1959 + is_pkt_reg(env, insn->dst_reg) || 1960 + is_flow_key_reg(env, insn->dst_reg)) { 1973 1961 verbose(env, "BPF_XADD stores into R%d %s is not allowed\n", 1974 - insn->dst_reg, reg_type_str[insn->dst_reg]); 1962 + insn->dst_reg, 1963 + reg_type_str[reg_state(env, insn->dst_reg)->type]); 1975 1964 return -EACCES; 1976 1965 } 1977 1966 ··· 1998 1983 int access_size, bool zero_size_allowed, 1999 1984 struct bpf_call_arg_meta *meta) 2000 1985 { 2001 - struct bpf_reg_state *reg = cur_regs(env) + regno; 1986 + struct bpf_reg_state *reg = reg_state(env, regno); 2002 1987 struct bpf_func_state *state = func(env, reg); 2003 1988 int off, i, slot, spi; 2004 1989 ··· 2077 2062 case PTR_TO_PACKET_META: 2078 2063 return check_packet_access(env, regno, reg->off, access_size, 2079 2064 zero_size_allowed); 2080 - case PTR_TO_FLOW_KEYS: 2081 - return check_flow_keys_access(env, reg->off, access_size); 2082 2065 case PTR_TO_MAP_VALUE: 2083 2066 return check_map_access(env, regno, reg->off, access_size, 2084 2067 zero_size_allowed); ··· 2130 2117 } 2131 2118 2132 2119 if (arg_type == ARG_PTR_TO_MAP_KEY || 2133 - arg_type == ARG_PTR_TO_MAP_VALUE) { 2120 + arg_type == ARG_PTR_TO_MAP_VALUE || 2121 + arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE) { 2134 2122 expected_type = PTR_TO_STACK; 2135 2123 if (!type_is_pkt_pointer(type) && type != PTR_TO_MAP_VALUE && 2136 2124 type != expected_type) ··· 2201 2187 err = check_helper_mem_access(env, regno, 2202 2188 meta->map_ptr->key_size, false, 2203 2189 NULL); 2204 - } else if (arg_type == ARG_PTR_TO_MAP_VALUE) { 2190 + } else if (arg_type == ARG_PTR_TO_MAP_VALUE || 2191 + arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE) { 2205 2192 /* bpf_map_xxx(..., map_ptr, ..., value) call: 2206 2193 * check [value, value + map->value_size) validity 2207 2194 */ ··· 2211 2196 verbose(env, "invalid map_ptr to access map->value\n"); 2212 2197 return -EACCES; 2213 2198 } 2199 + meta->raw_mode = (arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE); 2214 2200 err = check_helper_mem_access(env, regno, 2215 2201 meta->map_ptr->value_size, false, 2216 - NULL); 2202 + meta); 2217 2203 } else if (arg_type_is_mem_size(arg_type)) { 2218 2204 bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO); 2219 2205 ··· 2337 2321 if (func_id != BPF_FUNC_sk_select_reuseport) 2338 2322 goto error; 2339 2323 break; 2324 + case BPF_MAP_TYPE_QUEUE: 2325 + case BPF_MAP_TYPE_STACK: 2326 + if (func_id != BPF_FUNC_map_peek_elem && 2327 + func_id != BPF_FUNC_map_pop_elem && 2328 + func_id != BPF_FUNC_map_push_elem) 2329 + goto error; 2330 + break; 2340 2331 default: 2341 2332 break; 2342 2333 } ··· 2398 2375 break; 2399 2376 case BPF_FUNC_sk_select_reuseport: 2400 2377 if (map->map_type != BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) 2378 + goto error; 2379 + break; 2380 + case BPF_FUNC_map_peek_elem: 2381 + case BPF_FUNC_map_pop_elem: 2382 + case BPF_FUNC_map_push_elem: 2383 + if (map->map_type != BPF_MAP_TYPE_QUEUE && 2384 + map->map_type != BPF_MAP_TYPE_STACK) 2401 2385 goto error; 2402 2386 break; 2403 2387 default: ··· 2702 2672 if (func_id != BPF_FUNC_tail_call && 2703 2673 func_id != BPF_FUNC_map_lookup_elem && 2704 2674 func_id != BPF_FUNC_map_update_elem && 2705 - func_id != BPF_FUNC_map_delete_elem) 2675 + func_id != BPF_FUNC_map_delete_elem && 2676 + func_id != BPF_FUNC_map_push_elem && 2677 + func_id != BPF_FUNC_map_pop_elem && 2678 + func_id != BPF_FUNC_map_peek_elem) 2706 2679 return 0; 2707 2680 2708 2681 if (meta->map_ptr == NULL) { ··· 5277 5244 5278 5245 if (is_ctx_reg(env, insn->dst_reg)) { 5279 5246 verbose(env, "BPF_ST stores into R%d %s is not allowed\n", 5280 - insn->dst_reg, reg_type_str[insn->dst_reg]); 5247 + insn->dst_reg, 5248 + reg_type_str[reg_state(env, insn->dst_reg)->type]); 5281 5249 return -EACCES; 5282 5250 } 5283 5251 ··· 6178 6144 if (prog->jit_requested && BITS_PER_LONG == 64 && 6179 6145 (insn->imm == BPF_FUNC_map_lookup_elem || 6180 6146 insn->imm == BPF_FUNC_map_update_elem || 6181 - insn->imm == BPF_FUNC_map_delete_elem)) { 6147 + insn->imm == BPF_FUNC_map_delete_elem || 6148 + insn->imm == BPF_FUNC_map_push_elem || 6149 + insn->imm == BPF_FUNC_map_pop_elem || 6150 + insn->imm == BPF_FUNC_map_peek_elem)) { 6182 6151 aux = &env->insn_aux_data[i + delta]; 6183 6152 if (bpf_map_ptr_poisoned(aux)) 6184 6153 goto patch_call_imm; ··· 6214 6177 BUILD_BUG_ON(!__same_type(ops->map_update_elem, 6215 6178 (int (*)(struct bpf_map *map, void *key, void *value, 6216 6179 u64 flags))NULL)); 6180 + BUILD_BUG_ON(!__same_type(ops->map_push_elem, 6181 + (int (*)(struct bpf_map *map, void *value, 6182 + u64 flags))NULL)); 6183 + BUILD_BUG_ON(!__same_type(ops->map_pop_elem, 6184 + (int (*)(struct bpf_map *map, void *value))NULL)); 6185 + BUILD_BUG_ON(!__same_type(ops->map_peek_elem, 6186 + (int (*)(struct bpf_map *map, void *value))NULL)); 6187 + 6217 6188 switch (insn->imm) { 6218 6189 case BPF_FUNC_map_lookup_elem: 6219 6190 insn->imm = BPF_CAST_CALL(ops->map_lookup_elem) - ··· 6233 6188 continue; 6234 6189 case BPF_FUNC_map_delete_elem: 6235 6190 insn->imm = BPF_CAST_CALL(ops->map_delete_elem) - 6191 + __bpf_call_base; 6192 + continue; 6193 + case BPF_FUNC_map_push_elem: 6194 + insn->imm = BPF_CAST_CALL(ops->map_push_elem) - 6195 + __bpf_call_base; 6196 + continue; 6197 + case BPF_FUNC_map_pop_elem: 6198 + insn->imm = BPF_CAST_CALL(ops->map_pop_elem) - 6199 + __bpf_call_base; 6200 + continue; 6201 + case BPF_FUNC_map_peek_elem: 6202 + insn->imm = BPF_CAST_CALL(ops->map_peek_elem) - 6236 6203 __bpf_call_base; 6237 6204 continue; 6238 6205 }
+17 -2
net/bpf/test_run.c
··· 10 10 #include <linux/etherdevice.h> 11 11 #include <linux/filter.h> 12 12 #include <linux/sched/signal.h> 13 + #include <net/sock.h> 14 + #include <net/tcp.h> 13 15 14 16 static __always_inline u32 bpf_test_run_one(struct bpf_prog *prog, void *ctx, 15 17 struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE]) ··· 117 115 u32 retval, duration; 118 116 int hh_len = ETH_HLEN; 119 117 struct sk_buff *skb; 118 + struct sock *sk; 120 119 void *data; 121 120 int ret; 122 121 ··· 140 137 break; 141 138 } 142 139 143 - skb = build_skb(data, 0); 144 - if (!skb) { 140 + sk = kzalloc(sizeof(struct sock), GFP_USER); 141 + if (!sk) { 145 142 kfree(data); 146 143 return -ENOMEM; 147 144 } 145 + sock_net_set(sk, current->nsproxy->net_ns); 146 + sock_init_data(NULL, sk); 147 + 148 + skb = build_skb(data, 0); 149 + if (!skb) { 150 + kfree(data); 151 + kfree(sk); 152 + return -ENOMEM; 153 + } 154 + skb->sk = sk; 148 155 149 156 skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN); 150 157 __skb_put(skb, size); ··· 172 159 173 160 if (pskb_expand_head(skb, nhead, 0, GFP_USER)) { 174 161 kfree_skb(skb); 162 + kfree(sk); 175 163 return -ENOMEM; 176 164 } 177 165 } ··· 185 171 size = skb_headlen(skb); 186 172 ret = bpf_test_finish(kattr, uattr, skb->data, size, retval, duration); 187 173 kfree_skb(skb); 174 + kfree(sk); 188 175 return ret; 189 176 } 190 177
+175 -1
net/core/filter.c
··· 2297 2297 .arg4_type = ARG_ANYTHING, 2298 2298 }; 2299 2299 2300 + BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start, 2301 + u32, len, u64, flags) 2302 + { 2303 + struct scatterlist sge, nsge, nnsge, rsge = {0}, *psge; 2304 + u32 new, i = 0, l, space, copy = 0, offset = 0; 2305 + u8 *raw, *to, *from; 2306 + struct page *page; 2307 + 2308 + if (unlikely(flags)) 2309 + return -EINVAL; 2310 + 2311 + /* First find the starting scatterlist element */ 2312 + i = msg->sg.start; 2313 + do { 2314 + l = sk_msg_elem(msg, i)->length; 2315 + 2316 + if (start < offset + l) 2317 + break; 2318 + offset += l; 2319 + sk_msg_iter_var_next(i); 2320 + } while (i != msg->sg.end); 2321 + 2322 + if (start >= offset + l) 2323 + return -EINVAL; 2324 + 2325 + space = MAX_MSG_FRAGS - sk_msg_elem_used(msg); 2326 + 2327 + /* If no space available will fallback to copy, we need at 2328 + * least one scatterlist elem available to push data into 2329 + * when start aligns to the beginning of an element or two 2330 + * when it falls inside an element. We handle the start equals 2331 + * offset case because its the common case for inserting a 2332 + * header. 2333 + */ 2334 + if (!space || (space == 1 && start != offset)) 2335 + copy = msg->sg.data[i].length; 2336 + 2337 + page = alloc_pages(__GFP_NOWARN | GFP_ATOMIC | __GFP_COMP, 2338 + get_order(copy + len)); 2339 + if (unlikely(!page)) 2340 + return -ENOMEM; 2341 + 2342 + if (copy) { 2343 + int front, back; 2344 + 2345 + raw = page_address(page); 2346 + 2347 + psge = sk_msg_elem(msg, i); 2348 + front = start - offset; 2349 + back = psge->length - front; 2350 + from = sg_virt(psge); 2351 + 2352 + if (front) 2353 + memcpy(raw, from, front); 2354 + 2355 + if (back) { 2356 + from += front; 2357 + to = raw + front + len; 2358 + 2359 + memcpy(to, from, back); 2360 + } 2361 + 2362 + put_page(sg_page(psge)); 2363 + } else if (start - offset) { 2364 + psge = sk_msg_elem(msg, i); 2365 + rsge = sk_msg_elem_cpy(msg, i); 2366 + 2367 + psge->length = start - offset; 2368 + rsge.length -= psge->length; 2369 + rsge.offset += start; 2370 + 2371 + sk_msg_iter_var_next(i); 2372 + sg_unmark_end(psge); 2373 + sk_msg_iter_next(msg, end); 2374 + } 2375 + 2376 + /* Slot(s) to place newly allocated data */ 2377 + new = i; 2378 + 2379 + /* Shift one or two slots as needed */ 2380 + if (!copy) { 2381 + sge = sk_msg_elem_cpy(msg, i); 2382 + 2383 + sk_msg_iter_var_next(i); 2384 + sg_unmark_end(&sge); 2385 + sk_msg_iter_next(msg, end); 2386 + 2387 + nsge = sk_msg_elem_cpy(msg, i); 2388 + if (rsge.length) { 2389 + sk_msg_iter_var_next(i); 2390 + nnsge = sk_msg_elem_cpy(msg, i); 2391 + } 2392 + 2393 + while (i != msg->sg.end) { 2394 + msg->sg.data[i] = sge; 2395 + sge = nsge; 2396 + sk_msg_iter_var_next(i); 2397 + if (rsge.length) { 2398 + nsge = nnsge; 2399 + nnsge = sk_msg_elem_cpy(msg, i); 2400 + } else { 2401 + nsge = sk_msg_elem_cpy(msg, i); 2402 + } 2403 + } 2404 + } 2405 + 2406 + /* Place newly allocated data buffer */ 2407 + sk_mem_charge(msg->sk, len); 2408 + msg->sg.size += len; 2409 + msg->sg.copy[new] = false; 2410 + sg_set_page(&msg->sg.data[new], page, len + copy, 0); 2411 + if (rsge.length) { 2412 + get_page(sg_page(&rsge)); 2413 + sk_msg_iter_var_next(new); 2414 + msg->sg.data[new] = rsge; 2415 + } 2416 + 2417 + sk_msg_compute_data_pointers(msg); 2418 + return 0; 2419 + } 2420 + 2421 + static const struct bpf_func_proto bpf_msg_push_data_proto = { 2422 + .func = bpf_msg_push_data, 2423 + .gpl_only = false, 2424 + .ret_type = RET_INTEGER, 2425 + .arg1_type = ARG_PTR_TO_CTX, 2426 + .arg2_type = ARG_ANYTHING, 2427 + .arg3_type = ARG_ANYTHING, 2428 + .arg4_type = ARG_ANYTHING, 2429 + }; 2430 + 2300 2431 BPF_CALL_1(bpf_get_cgroup_classid, const struct sk_buff *, skb) 2301 2432 { 2302 2433 return task_get_classid(skb); ··· 4985 4854 func == bpf_xdp_adjust_head || 4986 4855 func == bpf_xdp_adjust_meta || 4987 4856 func == bpf_msg_pull_data || 4857 + func == bpf_msg_push_data || 4988 4858 func == bpf_xdp_adjust_tail || 4989 4859 #if IS_ENABLED(CONFIG_IPV6_SEG6_BPF) 4990 4860 func == bpf_lwt_seg6_store_bytes || ··· 5008 4876 return &bpf_map_update_elem_proto; 5009 4877 case BPF_FUNC_map_delete_elem: 5010 4878 return &bpf_map_delete_elem_proto; 4879 + case BPF_FUNC_map_push_elem: 4880 + return &bpf_map_push_elem_proto; 4881 + case BPF_FUNC_map_pop_elem: 4882 + return &bpf_map_pop_elem_proto; 4883 + case BPF_FUNC_map_peek_elem: 4884 + return &bpf_map_peek_elem_proto; 5011 4885 case BPF_FUNC_get_prandom_u32: 5012 4886 return &bpf_get_prandom_u32_proto; 5013 4887 case BPF_FUNC_get_smp_processor_id: ··· 5262 5124 return &bpf_msg_cork_bytes_proto; 5263 5125 case BPF_FUNC_msg_pull_data: 5264 5126 return &bpf_msg_pull_data_proto; 5127 + case BPF_FUNC_msg_push_data: 5128 + return &bpf_msg_push_data_proto; 5265 5129 case BPF_FUNC_get_local_storage: 5266 5130 return &bpf_get_local_storage_proto; 5267 5131 default: ··· 5481 5341 default: 5482 5342 return false; 5483 5343 } 5344 + } 5345 + 5346 + return bpf_skb_is_valid_access(off, size, type, prog, info); 5347 + } 5348 + 5349 + static bool cg_skb_is_valid_access(int off, int size, 5350 + enum bpf_access_type type, 5351 + const struct bpf_prog *prog, 5352 + struct bpf_insn_access_aux *info) 5353 + { 5354 + switch (off) { 5355 + case bpf_ctx_range(struct __sk_buff, tc_classid): 5356 + case bpf_ctx_range(struct __sk_buff, data_meta): 5357 + case bpf_ctx_range(struct __sk_buff, flow_keys): 5358 + return false; 5359 + } 5360 + if (type == BPF_WRITE) { 5361 + switch (off) { 5362 + case bpf_ctx_range(struct __sk_buff, mark): 5363 + case bpf_ctx_range(struct __sk_buff, priority): 5364 + case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]): 5365 + break; 5366 + default: 5367 + return false; 5368 + } 5369 + } 5370 + 5371 + switch (off) { 5372 + case bpf_ctx_range(struct __sk_buff, data): 5373 + info->reg_type = PTR_TO_PACKET; 5374 + break; 5375 + case bpf_ctx_range(struct __sk_buff, data_end): 5376 + info->reg_type = PTR_TO_PACKET_END; 5377 + break; 5484 5378 } 5485 5379 5486 5380 return bpf_skb_is_valid_access(off, size, type, prog, info); ··· 7212 7038 7213 7039 const struct bpf_verifier_ops cg_skb_verifier_ops = { 7214 7040 .get_func_proto = cg_skb_func_proto, 7215 - .is_valid_access = sk_filter_is_valid_access, 7041 + .is_valid_access = cg_skb_is_valid_access, 7216 7042 .convert_ctx_access = bpf_convert_ctx_access, 7217 7043 }; 7218 7044
+6 -5
net/core/sock_map.c
··· 175 175 } 176 176 } 177 177 178 - psock = sk_psock_get(sk); 178 + psock = sk_psock_get_checked(sk); 179 + if (IS_ERR(psock)) { 180 + ret = PTR_ERR(psock); 181 + goto out_progs; 182 + } 183 + 179 184 if (psock) { 180 - if (!sk_has_psock(sk)) { 181 - ret = -EBUSY; 182 - goto out_progs; 183 - } 184 185 if ((msg_parser && READ_ONCE(psock->progs.msg_parser)) || 185 186 (skb_progs && READ_ONCE(psock->progs.skb_parser))) { 186 187 sk_psock_put(sk, psock);
+27 -14
net/ipv4/tcp_bpf.c
··· 39 39 } 40 40 41 41 int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock, 42 - struct msghdr *msg, int len) 42 + struct msghdr *msg, int len, int flags) 43 43 { 44 44 struct iov_iter *iter = &msg->msg_iter; 45 + int peek = flags & MSG_PEEK; 45 46 int i, ret, copied = 0; 47 + struct sk_msg *msg_rx; 48 + 49 + msg_rx = list_first_entry_or_null(&psock->ingress_msg, 50 + struct sk_msg, list); 46 51 47 52 while (copied != len) { 48 53 struct scatterlist *sge; 49 - struct sk_msg *msg_rx; 50 54 51 - msg_rx = list_first_entry_or_null(&psock->ingress_msg, 52 - struct sk_msg, list); 53 55 if (unlikely(!msg_rx)) 54 56 break; 55 57 ··· 72 70 } 73 71 74 72 copied += copy; 75 - sge->offset += copy; 76 - sge->length -= copy; 77 - sk_mem_uncharge(sk, copy); 78 - if (!sge->length) { 79 - i++; 80 - if (i == MAX_SKB_FRAGS) 81 - i = 0; 82 - if (!msg_rx->skb) 83 - put_page(page); 73 + if (likely(!peek)) { 74 + sge->offset += copy; 75 + sge->length -= copy; 76 + sk_mem_uncharge(sk, copy); 77 + msg_rx->sg.size -= copy; 78 + 79 + if (!sge->length) { 80 + sk_msg_iter_var_next(i); 81 + if (!msg_rx->skb) 82 + put_page(page); 83 + } 84 + } else { 85 + sk_msg_iter_var_next(i); 84 86 } 85 87 86 88 if (copied == len) 87 89 break; 88 90 } while (i != msg_rx->sg.end); 91 + 92 + if (unlikely(peek)) { 93 + msg_rx = list_next_entry(msg_rx, list); 94 + continue; 95 + } 89 96 90 97 msg_rx->sg.start = i; 91 98 if (!sge->length && msg_rx->sg.start == msg_rx->sg.end) { ··· 103 92 consume_skb(msg_rx->skb); 104 93 kfree(msg_rx); 105 94 } 95 + msg_rx = list_first_entry_or_null(&psock->ingress_msg, 96 + struct sk_msg, list); 106 97 } 107 98 108 99 return copied; ··· 127 114 return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len); 128 115 lock_sock(sk); 129 116 msg_bytes_ready: 130 - copied = __tcp_bpf_recvmsg(sk, psock, msg, len); 117 + copied = __tcp_bpf_recvmsg(sk, psock, msg, len, flags); 131 118 if (!copied) { 132 119 int data, err = 0; 133 120 long timeo;
-2
net/tls/tls_main.c
··· 715 715 716 716 static struct tcp_ulp_ops tcp_tls_ulp_ops __read_mostly = { 717 717 .name = "tls", 718 - .uid = TCP_ULP_TLS, 719 - .user_visible = true, 720 718 .owner = THIS_MODULE, 721 719 .init = tls_init, 722 720 };
+2 -1
net/tls/tls_sw.c
··· 1478 1478 skb = tls_wait_data(sk, psock, flags, timeo, &err); 1479 1479 if (!skb) { 1480 1480 if (psock) { 1481 - int ret = __tcp_bpf_recvmsg(sk, psock, msg, len); 1481 + int ret = __tcp_bpf_recvmsg(sk, psock, 1482 + msg, len, flags); 1482 1483 1483 1484 if (ret > 0) { 1484 1485 copied += ret;
+70
tools/arch/arm64/include/asm/barrier.h
··· 14 14 #define wmb() asm volatile("dmb ishst" ::: "memory") 15 15 #define rmb() asm volatile("dmb ishld" ::: "memory") 16 16 17 + #define smp_store_release(p, v) \ 18 + do { \ 19 + union { typeof(*p) __val; char __c[1]; } __u = \ 20 + { .__val = (__force typeof(*p)) (v) }; \ 21 + \ 22 + switch (sizeof(*p)) { \ 23 + case 1: \ 24 + asm volatile ("stlrb %w1, %0" \ 25 + : "=Q" (*p) \ 26 + : "r" (*(__u8 *)__u.__c) \ 27 + : "memory"); \ 28 + break; \ 29 + case 2: \ 30 + asm volatile ("stlrh %w1, %0" \ 31 + : "=Q" (*p) \ 32 + : "r" (*(__u16 *)__u.__c) \ 33 + : "memory"); \ 34 + break; \ 35 + case 4: \ 36 + asm volatile ("stlr %w1, %0" \ 37 + : "=Q" (*p) \ 38 + : "r" (*(__u32 *)__u.__c) \ 39 + : "memory"); \ 40 + break; \ 41 + case 8: \ 42 + asm volatile ("stlr %1, %0" \ 43 + : "=Q" (*p) \ 44 + : "r" (*(__u64 *)__u.__c) \ 45 + : "memory"); \ 46 + break; \ 47 + default: \ 48 + /* Only to shut up gcc ... */ \ 49 + mb(); \ 50 + break; \ 51 + } \ 52 + } while (0) 53 + 54 + #define smp_load_acquire(p) \ 55 + ({ \ 56 + union { typeof(*p) __val; char __c[1]; } __u; \ 57 + \ 58 + switch (sizeof(*p)) { \ 59 + case 1: \ 60 + asm volatile ("ldarb %w0, %1" \ 61 + : "=r" (*(__u8 *)__u.__c) \ 62 + : "Q" (*p) : "memory"); \ 63 + break; \ 64 + case 2: \ 65 + asm volatile ("ldarh %w0, %1" \ 66 + : "=r" (*(__u16 *)__u.__c) \ 67 + : "Q" (*p) : "memory"); \ 68 + break; \ 69 + case 4: \ 70 + asm volatile ("ldar %w0, %1" \ 71 + : "=r" (*(__u32 *)__u.__c) \ 72 + : "Q" (*p) : "memory"); \ 73 + break; \ 74 + case 8: \ 75 + asm volatile ("ldar %0, %1" \ 76 + : "=r" (*(__u64 *)__u.__c) \ 77 + : "Q" (*p) : "memory"); \ 78 + break; \ 79 + default: \ 80 + /* Only to shut up gcc ... */ \ 81 + mb(); \ 82 + break; \ 83 + } \ 84 + __u.__val; \ 85 + }) 86 + 17 87 #endif /* _TOOLS_LINUX_ASM_AARCH64_BARRIER_H */
+13
tools/arch/ia64/include/asm/barrier.h
··· 46 46 #define rmb() mb() 47 47 #define wmb() mb() 48 48 49 + #define smp_store_release(p, v) \ 50 + do { \ 51 + barrier(); \ 52 + WRITE_ONCE(*p, v); \ 53 + } while (0) 54 + 55 + #define smp_load_acquire(p) \ 56 + ({ \ 57 + typeof(*p) ___p1 = READ_ONCE(*p); \ 58 + barrier(); \ 59 + ___p1; \ 60 + }) 61 + 49 62 #endif /* _TOOLS_LINUX_ASM_IA64_BARRIER_H */
+16
tools/arch/powerpc/include/asm/barrier.h
··· 27 27 #define rmb() __asm__ __volatile__ ("sync" : : : "memory") 28 28 #define wmb() __asm__ __volatile__ ("sync" : : : "memory") 29 29 30 + #if defined(__powerpc64__) 31 + #define smp_lwsync() __asm__ __volatile__ ("lwsync" : : : "memory") 32 + 33 + #define smp_store_release(p, v) \ 34 + do { \ 35 + smp_lwsync(); \ 36 + WRITE_ONCE(*p, v); \ 37 + } while (0) 38 + 39 + #define smp_load_acquire(p) \ 40 + ({ \ 41 + typeof(*p) ___p1 = READ_ONCE(*p); \ 42 + smp_lwsync(); \ 43 + ___p1; \ 44 + }) 45 + #endif /* defined(__powerpc64__) */ 30 46 #endif /* _TOOLS_LINUX_ASM_POWERPC_BARRIER_H */
+13
tools/arch/s390/include/asm/barrier.h
··· 28 28 #define rmb() mb() 29 29 #define wmb() mb() 30 30 31 + #define smp_store_release(p, v) \ 32 + do { \ 33 + barrier(); \ 34 + WRITE_ONCE(*p, v); \ 35 + } while (0) 36 + 37 + #define smp_load_acquire(p) \ 38 + ({ \ 39 + typeof(*p) ___p1 = READ_ONCE(*p); \ 40 + barrier(); \ 41 + ___p1; \ 42 + }) 43 + 31 44 #endif /* __TOOLS_LIB_ASM_BARRIER_H */
+13
tools/arch/sparc/include/asm/barrier_64.h
··· 40 40 #define rmb() __asm__ __volatile__("":::"memory") 41 41 #define wmb() __asm__ __volatile__("":::"memory") 42 42 43 + #define smp_store_release(p, v) \ 44 + do { \ 45 + barrier(); \ 46 + WRITE_ONCE(*p, v); \ 47 + } while (0) 48 + 49 + #define smp_load_acquire(p) \ 50 + ({ \ 51 + typeof(*p) ___p1 = READ_ONCE(*p); \ 52 + barrier(); \ 53 + ___p1; \ 54 + }) 55 + 43 56 #endif /* !(__TOOLS_LINUX_SPARC64_BARRIER_H) */
+14
tools/arch/x86/include/asm/barrier.h
··· 26 26 #define wmb() asm volatile("sfence" ::: "memory") 27 27 #endif 28 28 29 + #if defined(__x86_64__) 30 + #define smp_store_release(p, v) \ 31 + do { \ 32 + barrier(); \ 33 + WRITE_ONCE(*p, v); \ 34 + } while (0) 35 + 36 + #define smp_load_acquire(p) \ 37 + ({ \ 38 + typeof(*p) ___p1 = READ_ONCE(*p); \ 39 + barrier(); \ 40 + ___p1; \ 41 + }) 42 + #endif /* defined(__x86_64__) */ 29 43 #endif /* _TOOLS_LINUX_ASM_X86_BARRIER_H */
+3 -1
tools/bpf/bpftool/Documentation/bpftool-map.rst
··· 86 86 **bpftool map pin** *MAP* *FILE* 87 87 Pin map *MAP* as *FILE*. 88 88 89 - Note: *FILE* must be located in *bpffs* mount. 89 + Note: *FILE* must be located in *bpffs* mount. It must not 90 + contain a dot character ('.'), which is reserved for future 91 + extensions of *bpffs*. 90 92 91 93 **bpftool** **map event_pipe** *MAP* [**cpu** *N* **index** *M*] 92 94 Read events from a BPF_MAP_TYPE_PERF_EVENT_ARRAY map.
+6 -2
tools/bpf/bpftool/Documentation/bpftool-prog.rst
··· 75 75 **bpftool prog pin** *PROG* *FILE* 76 76 Pin program *PROG* as *FILE*. 77 77 78 - Note: *FILE* must be located in *bpffs* mount. 78 + Note: *FILE* must be located in *bpffs* mount. It must not 79 + contain a dot character ('.'), which is reserved for future 80 + extensions of *bpffs*. 79 81 80 82 **bpftool prog load** *OBJ* *FILE* [**type** *TYPE*] [**map** {**idx** *IDX* | **name** *NAME*} *MAP*] [**dev** *NAME*] 81 83 Load bpf program from binary *OBJ* and pin as *FILE*. ··· 93 91 If **dev** *NAME* is specified program will be loaded onto 94 92 given networking device (offload). 95 93 96 - Note: *FILE* must be located in *bpffs* mount. 94 + Note: *FILE* must be located in *bpffs* mount. It must not 95 + contain a dot character ('.'), which is reserved for future 96 + extensions of *bpffs*. 97 97 98 98 **bpftool prog attach** *PROG* *ATTACH_TYPE* *MAP* 99 99 Attach bpf program *PROG* (with type specified by *ATTACH_TYPE*)
+1 -1
tools/bpf/bpftool/bash-completion/bpftool
··· 143 143 local type 144 144 type=$(bpftool -jp map show $keyword $ref | \ 145 145 command sed -n 's/.*"type": "\(.*\)",$/\1/p') 146 - printf $type 146 + [[ -n $type ]] && printf $type 147 147 } 148 148 149 149 _bpftool_map_update_get_id()
+4 -1
tools/bpf/bpftool/common.c
··· 554 554 return read_sysfs_hex_int(full_path); 555 555 } 556 556 557 - const char *ifindex_to_bfd_name_ns(__u32 ifindex, __u64 ns_dev, __u64 ns_ino) 557 + const char * 558 + ifindex_to_bfd_params(__u32 ifindex, __u64 ns_dev, __u64 ns_ino, 559 + const char **opt) 558 560 { 559 561 char devname[IF_NAMESIZE]; 560 562 int vendor_id; ··· 581 579 device_id != 0x6000 && 582 580 device_id != 0x6003) 583 581 p_info("Unknown NFP device ID, assuming it is NFP-6xxx arch"); 582 + *opt = "ctx4"; 584 583 return "NFP-6xxx"; 585 584 default: 586 585 p_err("Can't get bfd arch name for device vendor id 0x%04x",
+3 -1
tools/bpf/bpftool/jit_disasm.c
··· 77 77 } 78 78 79 79 void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes, 80 - const char *arch) 80 + const char *arch, const char *disassembler_options) 81 81 { 82 82 disassembler_ftype disassemble; 83 83 struct disassemble_info info; ··· 116 116 117 117 info.arch = bfd_get_arch(bfdf); 118 118 info.mach = bfd_get_mach(bfdf); 119 + if (disassembler_options) 120 + info.disassembler_options = disassembler_options; 119 121 info.buffer = image; 120 122 info.buffer_length = len; 121 123
+2 -1
tools/bpf/bpftool/main.c
··· 321 321 p_err("reading batch file failed: %s", strerror(errno)); 322 322 err = -1; 323 323 } else { 324 - p_info("processed %d commands", lines); 324 + if (!json_output) 325 + printf("processed %d commands\n", lines); 325 326 err = 0; 326 327 } 327 328 err_close:
+4 -2
tools/bpf/bpftool/main.h
··· 145 145 int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len); 146 146 147 147 void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes, 148 - const char *arch); 148 + const char *arch, const char *disassembler_options); 149 149 void print_data_json(uint8_t *data, size_t len); 150 150 void print_hex_data_json(uint8_t *data, size_t len); 151 151 152 152 unsigned int get_page_size(void); 153 153 unsigned int get_possible_cpus(void); 154 - const char *ifindex_to_bfd_name_ns(__u32 ifindex, __u64 ns_dev, __u64 ns_ino); 154 + const char * 155 + ifindex_to_bfd_params(__u32 ifindex, __u64 ns_dev, __u64 ns_ino, 156 + const char **opt); 155 157 156 158 struct btf_dumper { 157 159 const struct btf *btf;
+6 -4
tools/bpf/bpftool/map_perf_ring.c
··· 50 50 stop = true; 51 51 } 52 52 53 - static enum bpf_perf_event_ret print_bpf_output(void *event, void *priv) 53 + static enum bpf_perf_event_ret 54 + print_bpf_output(struct perf_event_header *event, void *private_data) 54 55 { 55 - struct event_ring_info *ring = priv; 56 - struct perf_event_sample *e = event; 56 + struct perf_event_sample *e = container_of(event, struct perf_event_sample, 57 + header); 58 + struct event_ring_info *ring = private_data; 57 59 struct { 58 60 struct perf_event_header header; 59 61 __u64 id; 60 62 __u64 lost; 61 - } *lost = event; 63 + } *lost = (typeof(lost))event; 62 64 63 65 if (json_output) { 64 66 jsonw_start_object(json_wtr);
+9 -5
tools/bpf/bpftool/prog.c
··· 449 449 unsigned long *func_ksyms = NULL; 450 450 struct bpf_prog_info info = {}; 451 451 unsigned int *func_lens = NULL; 452 + const char *disasm_opt = NULL; 452 453 unsigned int nr_func_ksyms; 453 454 unsigned int nr_func_lens; 454 455 struct dump_data dd = {}; ··· 608 607 const char *name = NULL; 609 608 610 609 if (info.ifindex) { 611 - name = ifindex_to_bfd_name_ns(info.ifindex, 612 - info.netns_dev, 613 - info.netns_ino); 610 + name = ifindex_to_bfd_params(info.ifindex, 611 + info.netns_dev, 612 + info.netns_ino, 613 + &disasm_opt); 614 614 if (!name) 615 615 goto err_free; 616 616 } ··· 653 651 printf("%s:\n", sym_name); 654 652 } 655 653 656 - disasm_print_insn(img, lens[i], opcodes, name); 654 + disasm_print_insn(img, lens[i], opcodes, name, 655 + disasm_opt); 657 656 img += lens[i]; 658 657 659 658 if (json_output) ··· 666 663 if (json_output) 667 664 jsonw_end_array(json_wtr); 668 665 } else { 669 - disasm_print_insn(buf, *member_len, opcodes, name); 666 + disasm_print_insn(buf, *member_len, opcodes, name, 667 + disasm_opt); 670 668 } 671 669 } else if (visual) { 672 670 if (json_output)
+35
tools/include/asm/barrier.h
··· 1 1 /* SPDX-License-Identifier: GPL-2.0 */ 2 + #include <linux/compiler.h> 2 3 #if defined(__i386__) || defined(__x86_64__) 3 4 #include "../../arch/x86/include/asm/barrier.h" 4 5 #elif defined(__arm__) ··· 26 25 #include "../../arch/xtensa/include/asm/barrier.h" 27 26 #else 28 27 #include <asm-generic/barrier.h> 28 + #endif 29 + 30 + /* 31 + * Generic fallback smp_*() definitions for archs that haven't 32 + * been updated yet. 33 + */ 34 + 35 + #ifndef smp_rmb 36 + # define smp_rmb() rmb() 37 + #endif 38 + 39 + #ifndef smp_wmb 40 + # define smp_wmb() wmb() 41 + #endif 42 + 43 + #ifndef smp_mb 44 + # define smp_mb() mb() 45 + #endif 46 + 47 + #ifndef smp_store_release 48 + # define smp_store_release(p, v) \ 49 + do { \ 50 + smp_mb(); \ 51 + WRITE_ONCE(*p, v); \ 52 + } while (0) 53 + #endif 54 + 55 + #ifndef smp_load_acquire 56 + # define smp_load_acquire(p) \ 57 + ({ \ 58 + typeof(*p) ___p1 = READ_ONCE(*p); \ 59 + smp_mb(); \ 60 + ___p1; \ 61 + }) 29 62 #endif
+73
tools/include/linux/ring_buffer.h
··· 1 + #ifndef _TOOLS_LINUX_RING_BUFFER_H_ 2 + #define _TOOLS_LINUX_RING_BUFFER_H_ 3 + 4 + #include <asm/barrier.h> 5 + 6 + /* 7 + * Contract with kernel for walking the perf ring buffer from 8 + * user space requires the following barrier pairing (quote 9 + * from kernel/events/ring_buffer.c): 10 + * 11 + * Since the mmap() consumer (userspace) can run on a 12 + * different CPU: 13 + * 14 + * kernel user 15 + * 16 + * if (LOAD ->data_tail) { LOAD ->data_head 17 + * (A) smp_rmb() (C) 18 + * STORE $data LOAD $data 19 + * smp_wmb() (B) smp_mb() (D) 20 + * STORE ->data_head STORE ->data_tail 21 + * } 22 + * 23 + * Where A pairs with D, and B pairs with C. 24 + * 25 + * In our case A is a control dependency that separates the 26 + * load of the ->data_tail and the stores of $data. In case 27 + * ->data_tail indicates there is no room in the buffer to 28 + * store $data we do not. 29 + * 30 + * D needs to be a full barrier since it separates the data 31 + * READ from the tail WRITE. 32 + * 33 + * For B a WMB is sufficient since it separates two WRITEs, 34 + * and for C an RMB is sufficient since it separates two READs. 35 + * 36 + * Note, instead of B, C, D we could also use smp_store_release() 37 + * in B and D as well as smp_load_acquire() in C. 38 + * 39 + * However, this optimization does not make sense for all kernel 40 + * supported architectures since for a fair number it would 41 + * resolve into READ_ONCE() + smp_mb() pair for smp_load_acquire(), 42 + * and smp_mb() + WRITE_ONCE() pair for smp_store_release(). 43 + * 44 + * Thus for those smp_wmb() in B and smp_rmb() in C would still 45 + * be less expensive. For the case of D this has either the same 46 + * cost or is less expensive, for example, due to TSO x86 can 47 + * avoid the CPU barrier entirely. 48 + */ 49 + 50 + static inline u64 ring_buffer_read_head(struct perf_event_mmap_page *base) 51 + { 52 + /* 53 + * Architectures where smp_load_acquire() does not fallback to 54 + * READ_ONCE() + smp_mb() pair. 55 + */ 56 + #if defined(__x86_64__) || defined(__aarch64__) || defined(__powerpc64__) || \ 57 + defined(__ia64__) || defined(__sparc__) && defined(__arch64__) 58 + return smp_load_acquire(&base->data_head); 59 + #else 60 + u64 head = READ_ONCE(base->data_head); 61 + 62 + smp_rmb(); 63 + return head; 64 + #endif 65 + } 66 + 67 + static inline void ring_buffer_write_tail(struct perf_event_mmap_page *base, 68 + u64 tail) 69 + { 70 + smp_store_release(&base->data_tail, tail); 71 + } 72 + 73 + #endif /* _TOOLS_LINUX_RING_BUFFER_H_ */
+48 -2
tools/include/uapi/linux/bpf.h
··· 103 103 BPF_BTF_LOAD, 104 104 BPF_BTF_GET_FD_BY_ID, 105 105 BPF_TASK_FD_QUERY, 106 + BPF_MAP_LOOKUP_AND_DELETE_ELEM, 106 107 }; 107 108 108 109 enum bpf_map_type { ··· 129 128 BPF_MAP_TYPE_CGROUP_STORAGE, 130 129 BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, 131 130 BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, 131 + BPF_MAP_TYPE_QUEUE, 132 + BPF_MAP_TYPE_STACK, 132 133 }; 133 134 134 135 enum bpf_prog_type { ··· 463 460 * Description 464 461 * Delete entry with *key* from *map*. 465 462 * Return 463 + * 0 on success, or a negative error in case of failure. 464 + * 465 + * int bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags) 466 + * Description 467 + * Push an element *value* in *map*. *flags* is one of: 468 + * 469 + * **BPF_EXIST** 470 + * If the queue/stack is full, the oldest element is removed to 471 + * make room for this. 472 + * Return 473 + * 0 on success, or a negative error in case of failure. 474 + * 475 + * int bpf_map_pop_elem(struct bpf_map *map, void *value) 476 + * Description 477 + * Pop an element from *map*. 478 + * Return 479 + * 0 on success, or a negative error in case of failure. 480 + * 481 + * int bpf_map_peek_elem(struct bpf_map *map, void *value) 482 + * Description 483 + * Get an element from *map* without removing it. 484 + * Return 466 485 * 0 on success, or a negative error in case of failure. 467 486 * 468 487 * int bpf_probe_read(void *dst, u32 size, const void *src) ··· 1458 1433 * Return 1459 1434 * 0 on success, or a negative error in case of failure. 1460 1435 * 1461 - * int bpf_skb_adjust_room(struct sk_buff *skb, u32 len_diff, u32 mode, u64 flags) 1436 + * int bpf_skb_adjust_room(struct sk_buff *skb, s32 len_diff, u32 mode, u64 flags) 1462 1437 * Description 1463 1438 * Grow or shrink the room for data in the packet associated to 1464 1439 * *skb* by *len_diff*, and according to the selected *mode*. ··· 2240 2215 * pointer that was returned from bpf_sk_lookup_xxx\ (). 2241 2216 * Return 2242 2217 * 0 on success, or a negative error in case of failure. 2218 + * 2219 + * int bpf_msg_push_data(struct sk_buff *skb, u32 start, u32 len, u64 flags) 2220 + * Description 2221 + * For socket policies, insert *len* bytes into msg at offset 2222 + * *start*. 2223 + * 2224 + * If a program of type **BPF_PROG_TYPE_SK_MSG** is run on a 2225 + * *msg* it may want to insert metadata or options into the msg. 2226 + * This can later be read and used by any of the lower layer BPF 2227 + * hooks. 2228 + * 2229 + * This helper may fail if under memory pressure (a malloc 2230 + * fails) in these cases BPF programs will get an appropriate 2231 + * error and BPF programs will need to handle them. 2232 + * 2233 + * Return 2234 + * 0 on success, or a negative error in case of failure. 2243 2235 */ 2244 2236 #define __BPF_FUNC_MAPPER(FN) \ 2245 2237 FN(unspec), \ ··· 2345 2303 FN(skb_ancestor_cgroup_id), \ 2346 2304 FN(sk_lookup_tcp), \ 2347 2305 FN(sk_lookup_udp), \ 2348 - FN(sk_release), 2306 + FN(sk_release), \ 2307 + FN(map_push_elem), \ 2308 + FN(map_pop_elem), \ 2309 + FN(map_peek_elem), \ 2310 + FN(msg_push_data), 2349 2311 2350 2312 /* integer value in 'imm' field of BPF_CALL instruction selects which helper 2351 2313 * function eBPF program intends to call
+78
tools/include/uapi/linux/tls.h
··· 1 + /* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */ 2 + /* 3 + * Copyright (c) 2016-2017, Mellanox Technologies. All rights reserved. 4 + * 5 + * This software is available to you under a choice of one of two 6 + * licenses. You may choose to be licensed under the terms of the GNU 7 + * General Public License (GPL) Version 2, available from the file 8 + * COPYING in the main directory of this source tree, or the 9 + * OpenIB.org BSD license below: 10 + * 11 + * Redistribution and use in source and binary forms, with or 12 + * without modification, are permitted provided that the following 13 + * conditions are met: 14 + * 15 + * - Redistributions of source code must retain the above 16 + * copyright notice, this list of conditions and the following 17 + * disclaimer. 18 + * 19 + * - Redistributions in binary form must reproduce the above 20 + * copyright notice, this list of conditions and the following 21 + * disclaimer in the documentation and/or other materials 22 + * provided with the distribution. 23 + * 24 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 + * SOFTWARE. 32 + */ 33 + 34 + #ifndef _UAPI_LINUX_TLS_H 35 + #define _UAPI_LINUX_TLS_H 36 + 37 + #include <linux/types.h> 38 + 39 + /* TLS socket options */ 40 + #define TLS_TX 1 /* Set transmit parameters */ 41 + #define TLS_RX 2 /* Set receive parameters */ 42 + 43 + /* Supported versions */ 44 + #define TLS_VERSION_MINOR(ver) ((ver) & 0xFF) 45 + #define TLS_VERSION_MAJOR(ver) (((ver) >> 8) & 0xFF) 46 + 47 + #define TLS_VERSION_NUMBER(id) ((((id##_VERSION_MAJOR) & 0xFF) << 8) | \ 48 + ((id##_VERSION_MINOR) & 0xFF)) 49 + 50 + #define TLS_1_2_VERSION_MAJOR 0x3 51 + #define TLS_1_2_VERSION_MINOR 0x3 52 + #define TLS_1_2_VERSION TLS_VERSION_NUMBER(TLS_1_2) 53 + 54 + /* Supported ciphers */ 55 + #define TLS_CIPHER_AES_GCM_128 51 56 + #define TLS_CIPHER_AES_GCM_128_IV_SIZE 8 57 + #define TLS_CIPHER_AES_GCM_128_KEY_SIZE 16 58 + #define TLS_CIPHER_AES_GCM_128_SALT_SIZE 4 59 + #define TLS_CIPHER_AES_GCM_128_TAG_SIZE 16 60 + #define TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE 8 61 + 62 + #define TLS_SET_RECORD_TYPE 1 63 + #define TLS_GET_RECORD_TYPE 2 64 + 65 + struct tls_crypto_info { 66 + __u16 version; 67 + __u16 cipher_type; 68 + }; 69 + 70 + struct tls12_crypto_info_aes_gcm_128 { 71 + struct tls_crypto_info info; 72 + unsigned char iv[TLS_CIPHER_AES_GCM_128_IV_SIZE]; 73 + unsigned char key[TLS_CIPHER_AES_GCM_128_KEY_SIZE]; 74 + unsigned char salt[TLS_CIPHER_AES_GCM_128_SALT_SIZE]; 75 + unsigned char rec_seq[TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE]; 76 + }; 77 + 78 + #endif /* _UAPI_LINUX_TLS_H */
+1
tools/lib/bpf/Makefile
··· 125 125 override CFLAGS += -Werror -Wall 126 126 override CFLAGS += -fPIC 127 127 override CFLAGS += $(INCLUDES) 128 + override CFLAGS += -fvisibility=hidden 128 129 129 130 ifeq ($(VERBOSE),1) 130 131 Q =
+12
tools/lib/bpf/bpf.c
··· 278 278 return sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr)); 279 279 } 280 280 281 + int bpf_map_lookup_and_delete_elem(int fd, const void *key, void *value) 282 + { 283 + union bpf_attr attr; 284 + 285 + bzero(&attr, sizeof(attr)); 286 + attr.map_fd = fd; 287 + attr.key = ptr_to_u64(key); 288 + attr.value = ptr_to_u64(value); 289 + 290 + return sys_bpf(BPF_MAP_LOOKUP_AND_DELETE_ELEM, &attr, sizeof(attr)); 291 + } 292 + 281 293 int bpf_map_delete_elem(int fd, const void *key) 282 294 { 283 295 union bpf_attr attr;
+67 -53
tools/lib/bpf/bpf.h
··· 27 27 #include <stdbool.h> 28 28 #include <stddef.h> 29 29 30 + #ifndef LIBBPF_API 31 + #define LIBBPF_API __attribute__((visibility("default"))) 32 + #endif 33 + 30 34 struct bpf_create_map_attr { 31 35 const char *name; 32 36 enum bpf_map_type map_type; ··· 46 42 __u32 inner_map_fd; 47 43 }; 48 44 49 - int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr); 50 - int bpf_create_map_node(enum bpf_map_type map_type, const char *name, 51 - int key_size, int value_size, int max_entries, 52 - __u32 map_flags, int node); 53 - int bpf_create_map_name(enum bpf_map_type map_type, const char *name, 54 - int key_size, int value_size, int max_entries, 55 - __u32 map_flags); 56 - int bpf_create_map(enum bpf_map_type map_type, int key_size, int value_size, 57 - int max_entries, __u32 map_flags); 58 - int bpf_create_map_in_map_node(enum bpf_map_type map_type, const char *name, 59 - int key_size, int inner_map_fd, int max_entries, 60 - __u32 map_flags, int node); 61 - int bpf_create_map_in_map(enum bpf_map_type map_type, const char *name, 62 - int key_size, int inner_map_fd, int max_entries, 63 - __u32 map_flags); 45 + LIBBPF_API int 46 + bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr); 47 + LIBBPF_API int bpf_create_map_node(enum bpf_map_type map_type, const char *name, 48 + int key_size, int value_size, 49 + int max_entries, __u32 map_flags, int node); 50 + LIBBPF_API int bpf_create_map_name(enum bpf_map_type map_type, const char *name, 51 + int key_size, int value_size, 52 + int max_entries, __u32 map_flags); 53 + LIBBPF_API int bpf_create_map(enum bpf_map_type map_type, int key_size, 54 + int value_size, int max_entries, __u32 map_flags); 55 + LIBBPF_API int bpf_create_map_in_map_node(enum bpf_map_type map_type, 56 + const char *name, int key_size, 57 + int inner_map_fd, int max_entries, 58 + __u32 map_flags, int node); 59 + LIBBPF_API int bpf_create_map_in_map(enum bpf_map_type map_type, 60 + const char *name, int key_size, 61 + int inner_map_fd, int max_entries, 62 + __u32 map_flags); 64 63 65 64 struct bpf_load_program_attr { 66 65 enum bpf_prog_type prog_type; ··· 81 74 82 75 /* Recommend log buffer size */ 83 76 #define BPF_LOG_BUF_SIZE (256 * 1024) 84 - int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, 85 - char *log_buf, size_t log_buf_sz); 86 - int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns, 87 - size_t insns_cnt, const char *license, 88 - __u32 kern_version, char *log_buf, 89 - size_t log_buf_sz); 90 - int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns, 91 - size_t insns_cnt, int strict_alignment, 92 - const char *license, __u32 kern_version, 93 - char *log_buf, size_t log_buf_sz, int log_level); 77 + LIBBPF_API int 78 + bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, 79 + char *log_buf, size_t log_buf_sz); 80 + LIBBPF_API int bpf_load_program(enum bpf_prog_type type, 81 + const struct bpf_insn *insns, size_t insns_cnt, 82 + const char *license, __u32 kern_version, 83 + char *log_buf, size_t log_buf_sz); 84 + LIBBPF_API int bpf_verify_program(enum bpf_prog_type type, 85 + const struct bpf_insn *insns, 86 + size_t insns_cnt, int strict_alignment, 87 + const char *license, __u32 kern_version, 88 + char *log_buf, size_t log_buf_sz, 89 + int log_level); 94 90 95 - int bpf_map_update_elem(int fd, const void *key, const void *value, 96 - __u64 flags); 91 + LIBBPF_API int bpf_map_update_elem(int fd, const void *key, const void *value, 92 + __u64 flags); 97 93 98 - int bpf_map_lookup_elem(int fd, const void *key, void *value); 99 - int bpf_map_delete_elem(int fd, const void *key); 100 - int bpf_map_get_next_key(int fd, const void *key, void *next_key); 101 - int bpf_obj_pin(int fd, const char *pathname); 102 - int bpf_obj_get(const char *pathname); 103 - int bpf_prog_attach(int prog_fd, int attachable_fd, enum bpf_attach_type type, 104 - unsigned int flags); 105 - int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type); 106 - int bpf_prog_detach2(int prog_fd, int attachable_fd, enum bpf_attach_type type); 107 - int bpf_prog_test_run(int prog_fd, int repeat, void *data, __u32 size, 108 - void *data_out, __u32 *size_out, __u32 *retval, 109 - __u32 *duration); 110 - int bpf_prog_get_next_id(__u32 start_id, __u32 *next_id); 111 - int bpf_map_get_next_id(__u32 start_id, __u32 *next_id); 112 - int bpf_prog_get_fd_by_id(__u32 id); 113 - int bpf_map_get_fd_by_id(__u32 id); 114 - int bpf_btf_get_fd_by_id(__u32 id); 115 - int bpf_obj_get_info_by_fd(int prog_fd, void *info, __u32 *info_len); 116 - int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags, 117 - __u32 *attach_flags, __u32 *prog_ids, __u32 *prog_cnt); 118 - int bpf_raw_tracepoint_open(const char *name, int prog_fd); 119 - int bpf_load_btf(void *btf, __u32 btf_size, char *log_buf, __u32 log_buf_size, 120 - bool do_log); 121 - int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, __u32 *buf_len, 122 - __u32 *prog_id, __u32 *fd_type, __u64 *probe_offset, 123 - __u64 *probe_addr); 94 + LIBBPF_API int bpf_map_lookup_elem(int fd, const void *key, void *value); 95 + LIBBPF_API int bpf_map_lookup_and_delete_elem(int fd, const void *key, 96 + void *value); 97 + LIBBPF_API int bpf_map_delete_elem(int fd, const void *key); 98 + LIBBPF_API int bpf_map_get_next_key(int fd, const void *key, void *next_key); 99 + LIBBPF_API int bpf_obj_pin(int fd, const char *pathname); 100 + LIBBPF_API int bpf_obj_get(const char *pathname); 101 + LIBBPF_API int bpf_prog_attach(int prog_fd, int attachable_fd, 102 + enum bpf_attach_type type, unsigned int flags); 103 + LIBBPF_API int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type); 104 + LIBBPF_API int bpf_prog_detach2(int prog_fd, int attachable_fd, 105 + enum bpf_attach_type type); 106 + LIBBPF_API int bpf_prog_test_run(int prog_fd, int repeat, void *data, 107 + __u32 size, void *data_out, __u32 *size_out, 108 + __u32 *retval, __u32 *duration); 109 + LIBBPF_API int bpf_prog_get_next_id(__u32 start_id, __u32 *next_id); 110 + LIBBPF_API int bpf_map_get_next_id(__u32 start_id, __u32 *next_id); 111 + LIBBPF_API int bpf_prog_get_fd_by_id(__u32 id); 112 + LIBBPF_API int bpf_map_get_fd_by_id(__u32 id); 113 + LIBBPF_API int bpf_btf_get_fd_by_id(__u32 id); 114 + LIBBPF_API int bpf_obj_get_info_by_fd(int prog_fd, void *info, __u32 *info_len); 115 + LIBBPF_API int bpf_prog_query(int target_fd, enum bpf_attach_type type, 116 + __u32 query_flags, __u32 *attach_flags, 117 + __u32 *prog_ids, __u32 *prog_cnt); 118 + LIBBPF_API int bpf_raw_tracepoint_open(const char *name, int prog_fd); 119 + LIBBPF_API int bpf_load_btf(void *btf, __u32 btf_size, char *log_buf, 120 + __u32 log_buf_size, bool do_log); 121 + LIBBPF_API int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, 122 + __u32 *buf_len, __u32 *prog_id, __u32 *fd_type, 123 + __u64 *probe_offset, __u64 *probe_addr); 124 124 #endif /* __LIBBPF_BPF_H */
+14 -8
tools/lib/bpf/btf.h
··· 6 6 7 7 #include <linux/types.h> 8 8 9 + #ifndef LIBBPF_API 10 + #define LIBBPF_API __attribute__((visibility("default"))) 11 + #endif 12 + 9 13 #define BTF_ELF_SEC ".BTF" 10 14 11 15 struct btf; ··· 18 14 typedef int (*btf_print_fn_t)(const char *, ...) 19 15 __attribute__((format(printf, 1, 2))); 20 16 21 - void btf__free(struct btf *btf); 22 - struct btf *btf__new(__u8 *data, __u32 size, btf_print_fn_t err_log); 23 - __s32 btf__find_by_name(const struct btf *btf, const char *type_name); 24 - const struct btf_type *btf__type_by_id(const struct btf *btf, __u32 id); 25 - __s64 btf__resolve_size(const struct btf *btf, __u32 type_id); 26 - int btf__resolve_type(const struct btf *btf, __u32 type_id); 27 - int btf__fd(const struct btf *btf); 28 - const char *btf__name_by_offset(const struct btf *btf, __u32 offset); 17 + LIBBPF_API void btf__free(struct btf *btf); 18 + LIBBPF_API struct btf *btf__new(__u8 *data, __u32 size, btf_print_fn_t err_log); 19 + LIBBPF_API __s32 btf__find_by_name(const struct btf *btf, 20 + const char *type_name); 21 + LIBBPF_API const struct btf_type *btf__type_by_id(const struct btf *btf, 22 + __u32 id); 23 + LIBBPF_API __s64 btf__resolve_size(const struct btf *btf, __u32 type_id); 24 + LIBBPF_API int btf__resolve_type(const struct btf *btf, __u32 type_id); 25 + LIBBPF_API int btf__fd(const struct btf *btf); 26 + LIBBPF_API const char *btf__name_by_offset(const struct btf *btf, __u32 offset); 29 27 30 28 #endif /* __LIBBPF_BTF_H */
+29 -40
tools/lib/bpf/libbpf.c
··· 27 27 #include <linux/list.h> 28 28 #include <linux/limits.h> 29 29 #include <linux/perf_event.h> 30 + #include <linux/ring_buffer.h> 30 31 #include <sys/stat.h> 31 32 #include <sys/types.h> 32 33 #include <sys/vfs.h> ··· 2415 2414 } 2416 2415 2417 2416 enum bpf_perf_event_ret 2418 - bpf_perf_event_read_simple(void *mem, unsigned long size, 2419 - unsigned long page_size, void **buf, size_t *buf_len, 2420 - bpf_perf_event_print_t fn, void *priv) 2417 + bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size, 2418 + void **copy_mem, size_t *copy_size, 2419 + bpf_perf_event_print_t fn, void *private_data) 2421 2420 { 2422 - volatile struct perf_event_mmap_page *header = mem; 2421 + struct perf_event_mmap_page *header = mmap_mem; 2422 + __u64 data_head = ring_buffer_read_head(header); 2423 2423 __u64 data_tail = header->data_tail; 2424 - __u64 data_head = header->data_head; 2425 - int ret = LIBBPF_PERF_EVENT_ERROR; 2426 - void *base, *begin, *end; 2424 + void *base = ((__u8 *)header) + page_size; 2425 + int ret = LIBBPF_PERF_EVENT_CONT; 2426 + struct perf_event_header *ehdr; 2427 + size_t ehdr_size; 2427 2428 2428 - asm volatile("" ::: "memory"); /* in real code it should be smp_rmb() */ 2429 - if (data_head == data_tail) 2430 - return LIBBPF_PERF_EVENT_CONT; 2429 + while (data_head != data_tail) { 2430 + ehdr = base + (data_tail & (mmap_size - 1)); 2431 + ehdr_size = ehdr->size; 2431 2432 2432 - base = ((char *)header) + page_size; 2433 + if (((void *)ehdr) + ehdr_size > base + mmap_size) { 2434 + void *copy_start = ehdr; 2435 + size_t len_first = base + mmap_size - copy_start; 2436 + size_t len_secnd = ehdr_size - len_first; 2433 2437 2434 - begin = base + data_tail % size; 2435 - end = base + data_head % size; 2436 - 2437 - while (begin != end) { 2438 - struct perf_event_header *ehdr; 2439 - 2440 - ehdr = begin; 2441 - if (begin + ehdr->size > base + size) { 2442 - long len = base + size - begin; 2443 - 2444 - if (*buf_len < ehdr->size) { 2445 - free(*buf); 2446 - *buf = malloc(ehdr->size); 2447 - if (!*buf) { 2438 + if (*copy_size < ehdr_size) { 2439 + free(*copy_mem); 2440 + *copy_mem = malloc(ehdr_size); 2441 + if (!*copy_mem) { 2442 + *copy_size = 0; 2448 2443 ret = LIBBPF_PERF_EVENT_ERROR; 2449 2444 break; 2450 2445 } 2451 - *buf_len = ehdr->size; 2446 + *copy_size = ehdr_size; 2452 2447 } 2453 2448 2454 - memcpy(*buf, begin, len); 2455 - memcpy(*buf + len, base, ehdr->size - len); 2456 - ehdr = (void *)*buf; 2457 - begin = base + ehdr->size - len; 2458 - } else if (begin + ehdr->size == base + size) { 2459 - begin = base; 2460 - } else { 2461 - begin += ehdr->size; 2449 + memcpy(*copy_mem, copy_start, len_first); 2450 + memcpy(*copy_mem + len_first, base, len_secnd); 2451 + ehdr = *copy_mem; 2462 2452 } 2463 2453 2464 - ret = fn(ehdr, priv); 2454 + ret = fn(ehdr, private_data); 2455 + data_tail += ehdr_size; 2465 2456 if (ret != LIBBPF_PERF_EVENT_CONT) 2466 2457 break; 2467 - 2468 - data_tail += ehdr->size; 2469 2458 } 2470 2459 2471 - __sync_synchronize(); /* smp_mb() */ 2472 - header->data_tail = data_tail; 2473 - 2460 + ring_buffer_write_tail(header, data_tail); 2474 2461 return ret; 2475 2462 }
+101 -88
tools/lib/bpf/libbpf.h
··· 16 16 #include <sys/types.h> // for size_t 17 17 #include <linux/bpf.h> 18 18 19 + #ifndef LIBBPF_API 20 + #define LIBBPF_API __attribute__((visibility("default"))) 21 + #endif 22 + 19 23 enum libbpf_errno { 20 24 __LIBBPF_ERRNO__START = 4000, 21 25 ··· 41 37 __LIBBPF_ERRNO__END, 42 38 }; 43 39 44 - int libbpf_strerror(int err, char *buf, size_t size); 40 + LIBBPF_API int libbpf_strerror(int err, char *buf, size_t size); 45 41 46 42 /* 47 43 * __printf is defined in include/linux/compiler-gcc.h. However, ··· 51 47 typedef int (*libbpf_print_fn_t)(const char *, ...) 52 48 __attribute__((format(printf, 1, 2))); 53 49 54 - void libbpf_set_print(libbpf_print_fn_t warn, 55 - libbpf_print_fn_t info, 56 - libbpf_print_fn_t debug); 50 + LIBBPF_API void libbpf_set_print(libbpf_print_fn_t warn, 51 + libbpf_print_fn_t info, 52 + libbpf_print_fn_t debug); 57 53 58 54 /* Hide internal to user */ 59 55 struct bpf_object; ··· 63 59 enum bpf_prog_type prog_type; 64 60 }; 65 61 66 - struct bpf_object *bpf_object__open(const char *path); 67 - struct bpf_object *bpf_object__open_xattr(struct bpf_object_open_attr *attr); 62 + LIBBPF_API struct bpf_object *bpf_object__open(const char *path); 63 + LIBBPF_API struct bpf_object * 64 + bpf_object__open_xattr(struct bpf_object_open_attr *attr); 68 65 struct bpf_object *__bpf_object__open_xattr(struct bpf_object_open_attr *attr, 69 66 int flags); 70 - struct bpf_object *bpf_object__open_buffer(void *obj_buf, 71 - size_t obj_buf_sz, 72 - const char *name); 73 - int bpf_object__pin(struct bpf_object *object, const char *path); 74 - void bpf_object__close(struct bpf_object *object); 67 + LIBBPF_API struct bpf_object *bpf_object__open_buffer(void *obj_buf, 68 + size_t obj_buf_sz, 69 + const char *name); 70 + LIBBPF_API int bpf_object__pin(struct bpf_object *object, const char *path); 71 + LIBBPF_API void bpf_object__close(struct bpf_object *object); 75 72 76 73 /* Load/unload object into/from kernel */ 77 - int bpf_object__load(struct bpf_object *obj); 78 - int bpf_object__unload(struct bpf_object *obj); 79 - const char *bpf_object__name(struct bpf_object *obj); 80 - unsigned int bpf_object__kversion(struct bpf_object *obj); 81 - int bpf_object__btf_fd(const struct bpf_object *obj); 74 + LIBBPF_API int bpf_object__load(struct bpf_object *obj); 75 + LIBBPF_API int bpf_object__unload(struct bpf_object *obj); 76 + LIBBPF_API const char *bpf_object__name(struct bpf_object *obj); 77 + LIBBPF_API unsigned int bpf_object__kversion(struct bpf_object *obj); 78 + LIBBPF_API int bpf_object__btf_fd(const struct bpf_object *obj); 82 79 83 - struct bpf_program * 80 + LIBBPF_API struct bpf_program * 84 81 bpf_object__find_program_by_title(struct bpf_object *obj, const char *title); 85 82 86 - struct bpf_object *bpf_object__next(struct bpf_object *prev); 83 + LIBBPF_API struct bpf_object *bpf_object__next(struct bpf_object *prev); 87 84 #define bpf_object__for_each_safe(pos, tmp) \ 88 85 for ((pos) = bpf_object__next(NULL), \ 89 86 (tmp) = bpf_object__next(pos); \ ··· 92 87 (pos) = (tmp), (tmp) = bpf_object__next(tmp)) 93 88 94 89 typedef void (*bpf_object_clear_priv_t)(struct bpf_object *, void *); 95 - int bpf_object__set_priv(struct bpf_object *obj, void *priv, 96 - bpf_object_clear_priv_t clear_priv); 97 - void *bpf_object__priv(struct bpf_object *prog); 90 + LIBBPF_API int bpf_object__set_priv(struct bpf_object *obj, void *priv, 91 + bpf_object_clear_priv_t clear_priv); 92 + LIBBPF_API void *bpf_object__priv(struct bpf_object *prog); 98 93 99 - int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type, 100 - enum bpf_attach_type *expected_attach_type); 101 - int libbpf_attach_type_by_name(const char *name, 102 - enum bpf_attach_type *attach_type); 94 + LIBBPF_API int 95 + libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type, 96 + enum bpf_attach_type *expected_attach_type); 97 + LIBBPF_API int libbpf_attach_type_by_name(const char *name, 98 + enum bpf_attach_type *attach_type); 103 99 104 100 /* Accessors of bpf_program */ 105 101 struct bpf_program; 106 - struct bpf_program *bpf_program__next(struct bpf_program *prog, 107 - struct bpf_object *obj); 102 + LIBBPF_API struct bpf_program *bpf_program__next(struct bpf_program *prog, 103 + struct bpf_object *obj); 108 104 109 105 #define bpf_object__for_each_program(pos, obj) \ 110 106 for ((pos) = bpf_program__next(NULL, (obj)); \ ··· 115 109 typedef void (*bpf_program_clear_priv_t)(struct bpf_program *, 116 110 void *); 117 111 118 - int bpf_program__set_priv(struct bpf_program *prog, void *priv, 119 - bpf_program_clear_priv_t clear_priv); 112 + LIBBPF_API int bpf_program__set_priv(struct bpf_program *prog, void *priv, 113 + bpf_program_clear_priv_t clear_priv); 120 114 121 - void *bpf_program__priv(struct bpf_program *prog); 122 - void bpf_program__set_ifindex(struct bpf_program *prog, __u32 ifindex); 115 + LIBBPF_API void *bpf_program__priv(struct bpf_program *prog); 116 + LIBBPF_API void bpf_program__set_ifindex(struct bpf_program *prog, 117 + __u32 ifindex); 123 118 124 - const char *bpf_program__title(struct bpf_program *prog, bool needs_copy); 119 + LIBBPF_API const char *bpf_program__title(struct bpf_program *prog, 120 + bool needs_copy); 125 121 126 - int bpf_program__load(struct bpf_program *prog, char *license, 127 - __u32 kern_version); 128 - int bpf_program__fd(struct bpf_program *prog); 129 - int bpf_program__pin_instance(struct bpf_program *prog, const char *path, 130 - int instance); 131 - int bpf_program__pin(struct bpf_program *prog, const char *path); 132 - void bpf_program__unload(struct bpf_program *prog); 122 + LIBBPF_API int bpf_program__load(struct bpf_program *prog, char *license, 123 + __u32 kern_version); 124 + LIBBPF_API int bpf_program__fd(struct bpf_program *prog); 125 + LIBBPF_API int bpf_program__pin_instance(struct bpf_program *prog, 126 + const char *path, 127 + int instance); 128 + LIBBPF_API int bpf_program__pin(struct bpf_program *prog, const char *path); 129 + LIBBPF_API void bpf_program__unload(struct bpf_program *prog); 133 130 134 131 struct bpf_insn; 135 132 ··· 193 184 struct bpf_insn *insns, int insns_cnt, 194 185 struct bpf_prog_prep_result *res); 195 186 196 - int bpf_program__set_prep(struct bpf_program *prog, int nr_instance, 197 - bpf_program_prep_t prep); 187 + LIBBPF_API int bpf_program__set_prep(struct bpf_program *prog, int nr_instance, 188 + bpf_program_prep_t prep); 198 189 199 - int bpf_program__nth_fd(struct bpf_program *prog, int n); 190 + LIBBPF_API int bpf_program__nth_fd(struct bpf_program *prog, int n); 200 191 201 192 /* 202 193 * Adjust type of BPF program. Default is kprobe. 203 194 */ 204 - int bpf_program__set_socket_filter(struct bpf_program *prog); 205 - int bpf_program__set_tracepoint(struct bpf_program *prog); 206 - int bpf_program__set_raw_tracepoint(struct bpf_program *prog); 207 - int bpf_program__set_kprobe(struct bpf_program *prog); 208 - int bpf_program__set_sched_cls(struct bpf_program *prog); 209 - int bpf_program__set_sched_act(struct bpf_program *prog); 210 - int bpf_program__set_xdp(struct bpf_program *prog); 211 - int bpf_program__set_perf_event(struct bpf_program *prog); 212 - void bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type); 213 - void bpf_program__set_expected_attach_type(struct bpf_program *prog, 214 - enum bpf_attach_type type); 195 + LIBBPF_API int bpf_program__set_socket_filter(struct bpf_program *prog); 196 + LIBBPF_API int bpf_program__set_tracepoint(struct bpf_program *prog); 197 + LIBBPF_API int bpf_program__set_raw_tracepoint(struct bpf_program *prog); 198 + LIBBPF_API int bpf_program__set_kprobe(struct bpf_program *prog); 199 + LIBBPF_API int bpf_program__set_sched_cls(struct bpf_program *prog); 200 + LIBBPF_API int bpf_program__set_sched_act(struct bpf_program *prog); 201 + LIBBPF_API int bpf_program__set_xdp(struct bpf_program *prog); 202 + LIBBPF_API int bpf_program__set_perf_event(struct bpf_program *prog); 203 + LIBBPF_API void bpf_program__set_type(struct bpf_program *prog, 204 + enum bpf_prog_type type); 205 + LIBBPF_API void 206 + bpf_program__set_expected_attach_type(struct bpf_program *prog, 207 + enum bpf_attach_type type); 215 208 216 - bool bpf_program__is_socket_filter(struct bpf_program *prog); 217 - bool bpf_program__is_tracepoint(struct bpf_program *prog); 218 - bool bpf_program__is_raw_tracepoint(struct bpf_program *prog); 219 - bool bpf_program__is_kprobe(struct bpf_program *prog); 220 - bool bpf_program__is_sched_cls(struct bpf_program *prog); 221 - bool bpf_program__is_sched_act(struct bpf_program *prog); 222 - bool bpf_program__is_xdp(struct bpf_program *prog); 223 - bool bpf_program__is_perf_event(struct bpf_program *prog); 209 + LIBBPF_API bool bpf_program__is_socket_filter(struct bpf_program *prog); 210 + LIBBPF_API bool bpf_program__is_tracepoint(struct bpf_program *prog); 211 + LIBBPF_API bool bpf_program__is_raw_tracepoint(struct bpf_program *prog); 212 + LIBBPF_API bool bpf_program__is_kprobe(struct bpf_program *prog); 213 + LIBBPF_API bool bpf_program__is_sched_cls(struct bpf_program *prog); 214 + LIBBPF_API bool bpf_program__is_sched_act(struct bpf_program *prog); 215 + LIBBPF_API bool bpf_program__is_xdp(struct bpf_program *prog); 216 + LIBBPF_API bool bpf_program__is_perf_event(struct bpf_program *prog); 224 217 225 218 /* 226 219 * No need for __attribute__((packed)), all members of 'bpf_map_def' ··· 243 232 * so no need to worry about a name clash. 244 233 */ 245 234 struct bpf_map; 246 - struct bpf_map * 235 + LIBBPF_API struct bpf_map * 247 236 bpf_object__find_map_by_name(struct bpf_object *obj, const char *name); 248 237 249 238 /* 250 239 * Get bpf_map through the offset of corresponding struct bpf_map_def 251 240 * in the BPF object file. 252 241 */ 253 - struct bpf_map * 242 + LIBBPF_API struct bpf_map * 254 243 bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset); 255 244 256 - struct bpf_map * 245 + LIBBPF_API struct bpf_map * 257 246 bpf_map__next(struct bpf_map *map, struct bpf_object *obj); 258 247 #define bpf_map__for_each(pos, obj) \ 259 248 for ((pos) = bpf_map__next(NULL, (obj)); \ 260 249 (pos) != NULL; \ 261 250 (pos) = bpf_map__next((pos), (obj))) 262 251 263 - int bpf_map__fd(struct bpf_map *map); 264 - const struct bpf_map_def *bpf_map__def(struct bpf_map *map); 265 - const char *bpf_map__name(struct bpf_map *map); 266 - __u32 bpf_map__btf_key_type_id(const struct bpf_map *map); 267 - __u32 bpf_map__btf_value_type_id(const struct bpf_map *map); 252 + LIBBPF_API int bpf_map__fd(struct bpf_map *map); 253 + LIBBPF_API const struct bpf_map_def *bpf_map__def(struct bpf_map *map); 254 + LIBBPF_API const char *bpf_map__name(struct bpf_map *map); 255 + LIBBPF_API __u32 bpf_map__btf_key_type_id(const struct bpf_map *map); 256 + LIBBPF_API __u32 bpf_map__btf_value_type_id(const struct bpf_map *map); 268 257 269 258 typedef void (*bpf_map_clear_priv_t)(struct bpf_map *, void *); 270 - int bpf_map__set_priv(struct bpf_map *map, void *priv, 271 - bpf_map_clear_priv_t clear_priv); 272 - void *bpf_map__priv(struct bpf_map *map); 273 - int bpf_map__reuse_fd(struct bpf_map *map, int fd); 274 - bool bpf_map__is_offload_neutral(struct bpf_map *map); 275 - void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex); 276 - int bpf_map__pin(struct bpf_map *map, const char *path); 259 + LIBBPF_API int bpf_map__set_priv(struct bpf_map *map, void *priv, 260 + bpf_map_clear_priv_t clear_priv); 261 + LIBBPF_API void *bpf_map__priv(struct bpf_map *map); 262 + LIBBPF_API int bpf_map__reuse_fd(struct bpf_map *map, int fd); 263 + LIBBPF_API bool bpf_map__is_offload_neutral(struct bpf_map *map); 264 + LIBBPF_API void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex); 265 + LIBBPF_API int bpf_map__pin(struct bpf_map *map, const char *path); 277 266 278 - long libbpf_get_error(const void *ptr); 267 + LIBBPF_API long libbpf_get_error(const void *ptr); 279 268 280 269 struct bpf_prog_load_attr { 281 270 const char *file; ··· 284 273 int ifindex; 285 274 }; 286 275 287 - int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr, 288 - struct bpf_object **pobj, int *prog_fd); 289 - int bpf_prog_load(const char *file, enum bpf_prog_type type, 290 - struct bpf_object **pobj, int *prog_fd); 276 + LIBBPF_API int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr, 277 + struct bpf_object **pobj, int *prog_fd); 278 + LIBBPF_API int bpf_prog_load(const char *file, enum bpf_prog_type type, 279 + struct bpf_object **pobj, int *prog_fd); 291 280 292 - int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags); 281 + LIBBPF_API int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags); 293 282 294 283 enum bpf_perf_event_ret { 295 284 LIBBPF_PERF_EVENT_DONE = 0, ··· 297 286 LIBBPF_PERF_EVENT_CONT = -2, 298 287 }; 299 288 300 - typedef enum bpf_perf_event_ret (*bpf_perf_event_print_t)(void *event, 301 - void *priv); 302 - int bpf_perf_event_read_simple(void *mem, unsigned long size, 303 - unsigned long page_size, 304 - void **buf, size_t *buf_len, 305 - bpf_perf_event_print_t fn, void *priv); 289 + struct perf_event_header; 290 + typedef enum bpf_perf_event_ret 291 + (*bpf_perf_event_print_t)(struct perf_event_header *hdr, 292 + void *private_data); 293 + LIBBPF_API enum bpf_perf_event_ret 294 + bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size, 295 + void **copy_mem, size_t *copy_size, 296 + bpf_perf_event_print_t fn, void *private_data); 306 297 307 298 struct nlattr; 308 299 typedef int (*libbpf_dump_nlmsg_t)(void *cookie, void *msg, struct nlattr **tb);
+3 -12
tools/perf/util/mmap.h
··· 4 4 #include <linux/compiler.h> 5 5 #include <linux/refcount.h> 6 6 #include <linux/types.h> 7 - #include <asm/barrier.h> 7 + #include <linux/ring_buffer.h> 8 8 #include <stdbool.h> 9 9 #include "auxtrace.h" 10 10 #include "event.h" ··· 71 71 72 72 static inline u64 perf_mmap__read_head(struct perf_mmap *mm) 73 73 { 74 - struct perf_event_mmap_page *pc = mm->base; 75 - u64 head = READ_ONCE(pc->data_head); 76 - rmb(); 77 - return head; 74 + return ring_buffer_read_head(mm->base); 78 75 } 79 76 80 77 static inline void perf_mmap__write_tail(struct perf_mmap *md, u64 tail) 81 78 { 82 - struct perf_event_mmap_page *pc = md->base; 83 - 84 - /* 85 - * ensure all reads are done before we write the tail out. 86 - */ 87 - mb(); 88 - pc->data_tail = tail; 79 + ring_buffer_write_tail(md->base, tail); 89 80 } 90 81 91 82 union perf_event *perf_mmap__read_forward(struct perf_mmap *map);
+2
tools/testing/selftests/bpf/.gitignore
··· 25 25 test_select_reuseport 26 26 test_flow_dissector 27 27 flow_dissector_load 28 + test_netcnt 29 + test_section_names
+4 -1
tools/testing/selftests/bpf/Makefile
··· 37 37 test_lwt_seg6local.o sendmsg4_prog.o sendmsg6_prog.o test_lirc_mode2_kern.o \ 38 38 get_cgroup_id_kern.o socket_cookie_prog.o test_select_reuseport_kern.o \ 39 39 test_skb_cgroup_id_kern.o bpf_flow.o netcnt_prog.o \ 40 - test_sk_lookup_kern.o test_xdp_vlan.o 40 + test_sk_lookup_kern.o test_xdp_vlan.o test_queue_map.o test_stack_map.o 41 41 42 42 # Order correspond to 'make run_tests' order 43 43 TEST_PROGS := test_kmod.sh \ ··· 117 117 118 118 $(OUTPUT)/test_l4lb_noinline.o: CLANG_FLAGS += -fno-inline 119 119 $(OUTPUT)/test_xdp_noinline.o: CLANG_FLAGS += -fno-inline 120 + 121 + $(OUTPUT)/test_queue_map.o: test_queue_stack_map.h 122 + $(OUTPUT)/test_stack_map.o: test_queue_stack_map.h 120 123 121 124 BTF_LLC_PROBE := $(shell $(LLC) -march=bpf -mattr=help 2>&1 | grep dwarfris) 122 125 BTF_PAHOLE_PROBE := $(shell $(BTF_PAHOLE) --help 2>&1 | grep BTF)
+9
tools/testing/selftests/bpf/bpf_helpers.h
··· 16 16 (void *) BPF_FUNC_map_update_elem; 17 17 static int (*bpf_map_delete_elem)(void *map, void *key) = 18 18 (void *) BPF_FUNC_map_delete_elem; 19 + static int (*bpf_map_push_elem)(void *map, void *value, 20 + unsigned long long flags) = 21 + (void *) BPF_FUNC_map_push_elem; 22 + static int (*bpf_map_pop_elem)(void *map, void *value) = 23 + (void *) BPF_FUNC_map_pop_elem; 24 + static int (*bpf_map_peek_elem)(void *map, void *value) = 25 + (void *) BPF_FUNC_map_peek_elem; 19 26 static int (*bpf_probe_read)(void *dst, int size, void *unsafe_ptr) = 20 27 (void *) BPF_FUNC_probe_read; 21 28 static unsigned long long (*bpf_ktime_get_ns)(void) = ··· 111 104 (void *) BPF_FUNC_msg_cork_bytes; 112 105 static int (*bpf_msg_pull_data)(void *ctx, int start, int end, int flags) = 113 106 (void *) BPF_FUNC_msg_pull_data; 107 + static int (*bpf_msg_push_data)(void *ctx, int start, int end, int flags) = 108 + (void *) BPF_FUNC_msg_push_data; 114 109 static int (*bpf_bind)(void *ctx, void *addr, int addr_len) = 115 110 (void *) BPF_FUNC_bind; 116 111 static int (*bpf_xdp_adjust_tail)(void *ctx, int offset) =
+1 -1
tools/testing/selftests/bpf/test_libbpf.sh
··· 6 6 # Determine selftest success via shell exit code 7 7 exit_handler() 8 8 { 9 - if (( $? == 0 )); then 9 + if [ $? -eq 0 ]; then 10 10 echo "selftests: $TESTNAME [PASS]"; 11 11 else 12 12 echo "$TESTNAME: failed at file $LAST_LOADED" 1>&2
+122
tools/testing/selftests/bpf/test_maps.c
··· 15 15 #include <string.h> 16 16 #include <assert.h> 17 17 #include <stdlib.h> 18 + #include <time.h> 18 19 19 20 #include <sys/wait.h> 20 21 #include <sys/socket.h> ··· 468 467 printf("Failed to create arraymap '%s'!\n", strerror(errno)); 469 468 exit(1); 470 469 } 470 + 471 + close(fd); 472 + } 473 + 474 + static void test_queuemap(int task, void *data) 475 + { 476 + const int MAP_SIZE = 32; 477 + __u32 vals[MAP_SIZE + MAP_SIZE/2], val; 478 + int fd, i; 479 + 480 + /* Fill test values to be used */ 481 + for (i = 0; i < MAP_SIZE + MAP_SIZE/2; i++) 482 + vals[i] = rand(); 483 + 484 + /* Invalid key size */ 485 + fd = bpf_create_map(BPF_MAP_TYPE_QUEUE, 4, sizeof(val), MAP_SIZE, 486 + map_flags); 487 + assert(fd < 0 && errno == EINVAL); 488 + 489 + fd = bpf_create_map(BPF_MAP_TYPE_QUEUE, 0, sizeof(val), MAP_SIZE, 490 + map_flags); 491 + /* Queue map does not support BPF_F_NO_PREALLOC */ 492 + if (map_flags & BPF_F_NO_PREALLOC) { 493 + assert(fd < 0 && errno == EINVAL); 494 + return; 495 + } 496 + if (fd < 0) { 497 + printf("Failed to create queuemap '%s'!\n", strerror(errno)); 498 + exit(1); 499 + } 500 + 501 + /* Push MAP_SIZE elements */ 502 + for (i = 0; i < MAP_SIZE; i++) 503 + assert(bpf_map_update_elem(fd, NULL, &vals[i], 0) == 0); 504 + 505 + /* Check that element cannot be pushed due to max_entries limit */ 506 + assert(bpf_map_update_elem(fd, NULL, &val, 0) == -1 && 507 + errno == E2BIG); 508 + 509 + /* Peek element */ 510 + assert(bpf_map_lookup_elem(fd, NULL, &val) == 0 && val == vals[0]); 511 + 512 + /* Replace half elements */ 513 + for (i = MAP_SIZE; i < MAP_SIZE + MAP_SIZE/2; i++) 514 + assert(bpf_map_update_elem(fd, NULL, &vals[i], BPF_EXIST) == 0); 515 + 516 + /* Pop all elements */ 517 + for (i = MAP_SIZE/2; i < MAP_SIZE + MAP_SIZE/2; i++) 518 + assert(bpf_map_lookup_and_delete_elem(fd, NULL, &val) == 0 && 519 + val == vals[i]); 520 + 521 + /* Check that there are not elements left */ 522 + assert(bpf_map_lookup_and_delete_elem(fd, NULL, &val) == -1 && 523 + errno == ENOENT); 524 + 525 + /* Check that non supported functions set errno to EINVAL */ 526 + assert(bpf_map_delete_elem(fd, NULL) == -1 && errno == EINVAL); 527 + assert(bpf_map_get_next_key(fd, NULL, NULL) == -1 && errno == EINVAL); 528 + 529 + close(fd); 530 + } 531 + 532 + static void test_stackmap(int task, void *data) 533 + { 534 + const int MAP_SIZE = 32; 535 + __u32 vals[MAP_SIZE + MAP_SIZE/2], val; 536 + int fd, i; 537 + 538 + /* Fill test values to be used */ 539 + for (i = 0; i < MAP_SIZE + MAP_SIZE/2; i++) 540 + vals[i] = rand(); 541 + 542 + /* Invalid key size */ 543 + fd = bpf_create_map(BPF_MAP_TYPE_STACK, 4, sizeof(val), MAP_SIZE, 544 + map_flags); 545 + assert(fd < 0 && errno == EINVAL); 546 + 547 + fd = bpf_create_map(BPF_MAP_TYPE_STACK, 0, sizeof(val), MAP_SIZE, 548 + map_flags); 549 + /* Stack map does not support BPF_F_NO_PREALLOC */ 550 + if (map_flags & BPF_F_NO_PREALLOC) { 551 + assert(fd < 0 && errno == EINVAL); 552 + return; 553 + } 554 + if (fd < 0) { 555 + printf("Failed to create stackmap '%s'!\n", strerror(errno)); 556 + exit(1); 557 + } 558 + 559 + /* Push MAP_SIZE elements */ 560 + for (i = 0; i < MAP_SIZE; i++) 561 + assert(bpf_map_update_elem(fd, NULL, &vals[i], 0) == 0); 562 + 563 + /* Check that element cannot be pushed due to max_entries limit */ 564 + assert(bpf_map_update_elem(fd, NULL, &val, 0) == -1 && 565 + errno == E2BIG); 566 + 567 + /* Peek element */ 568 + assert(bpf_map_lookup_elem(fd, NULL, &val) == 0 && val == vals[i - 1]); 569 + 570 + /* Replace half elements */ 571 + for (i = MAP_SIZE; i < MAP_SIZE + MAP_SIZE/2; i++) 572 + assert(bpf_map_update_elem(fd, NULL, &vals[i], BPF_EXIST) == 0); 573 + 574 + /* Pop all elements */ 575 + for (i = MAP_SIZE + MAP_SIZE/2 - 1; i >= MAP_SIZE/2; i--) 576 + assert(bpf_map_lookup_and_delete_elem(fd, NULL, &val) == 0 && 577 + val == vals[i]); 578 + 579 + /* Check that there are not elements left */ 580 + assert(bpf_map_lookup_and_delete_elem(fd, NULL, &val) == -1 && 581 + errno == ENOENT); 582 + 583 + /* Check that non supported functions set errno to EINVAL */ 584 + assert(bpf_map_delete_elem(fd, NULL) == -1 && errno == EINVAL); 585 + assert(bpf_map_get_next_key(fd, NULL, NULL) == -1 && errno == EINVAL); 471 586 472 587 close(fd); 473 588 } ··· 1551 1434 test_map_wronly(); 1552 1435 1553 1436 test_reuseport_array(); 1437 + 1438 + test_queuemap(0, NULL); 1439 + test_stackmap(0, NULL); 1554 1440 } 1555 1441 1556 1442 int main(void) 1557 1443 { 1444 + srand(time(NULL)); 1445 + 1558 1446 map_flags = 0; 1559 1447 run_all_tests(); 1560 1448
+99
tools/testing/selftests/bpf/test_progs.c
··· 1735 1735 bpf_object__close(obj); 1736 1736 } 1737 1737 1738 + enum { 1739 + QUEUE, 1740 + STACK, 1741 + }; 1742 + 1743 + static void test_queue_stack_map(int type) 1744 + { 1745 + const int MAP_SIZE = 32; 1746 + __u32 vals[MAP_SIZE], duration, retval, size, val; 1747 + int i, err, prog_fd, map_in_fd, map_out_fd; 1748 + char file[32], buf[128]; 1749 + struct bpf_object *obj; 1750 + struct iphdr *iph = (void *)buf + sizeof(struct ethhdr); 1751 + 1752 + /* Fill test values to be used */ 1753 + for (i = 0; i < MAP_SIZE; i++) 1754 + vals[i] = rand(); 1755 + 1756 + if (type == QUEUE) 1757 + strncpy(file, "./test_queue_map.o", sizeof(file)); 1758 + else if (type == STACK) 1759 + strncpy(file, "./test_stack_map.o", sizeof(file)); 1760 + else 1761 + return; 1762 + 1763 + err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd); 1764 + if (err) { 1765 + error_cnt++; 1766 + return; 1767 + } 1768 + 1769 + map_in_fd = bpf_find_map(__func__, obj, "map_in"); 1770 + if (map_in_fd < 0) 1771 + goto out; 1772 + 1773 + map_out_fd = bpf_find_map(__func__, obj, "map_out"); 1774 + if (map_out_fd < 0) 1775 + goto out; 1776 + 1777 + /* Push 32 elements to the input map */ 1778 + for (i = 0; i < MAP_SIZE; i++) { 1779 + err = bpf_map_update_elem(map_in_fd, NULL, &vals[i], 0); 1780 + if (err) { 1781 + error_cnt++; 1782 + goto out; 1783 + } 1784 + } 1785 + 1786 + /* The eBPF program pushes iph.saddr in the output map, 1787 + * pops the input map and saves this value in iph.daddr 1788 + */ 1789 + for (i = 0; i < MAP_SIZE; i++) { 1790 + if (type == QUEUE) { 1791 + val = vals[i]; 1792 + pkt_v4.iph.saddr = vals[i] * 5; 1793 + } else if (type == STACK) { 1794 + val = vals[MAP_SIZE - 1 - i]; 1795 + pkt_v4.iph.saddr = vals[MAP_SIZE - 1 - i] * 5; 1796 + } 1797 + 1798 + err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4), 1799 + buf, &size, &retval, &duration); 1800 + if (err || retval || size != sizeof(pkt_v4) || 1801 + iph->daddr != val) 1802 + break; 1803 + } 1804 + 1805 + CHECK(err || retval || size != sizeof(pkt_v4) || iph->daddr != val, 1806 + "bpf_map_pop_elem", 1807 + "err %d errno %d retval %d size %d iph->daddr %u\n", 1808 + err, errno, retval, size, iph->daddr); 1809 + 1810 + /* Queue is empty, program should return TC_ACT_SHOT */ 1811 + err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4), 1812 + buf, &size, &retval, &duration); 1813 + CHECK(err || retval != 2 /* TC_ACT_SHOT */|| size != sizeof(pkt_v4), 1814 + "check-queue-stack-map-empty", 1815 + "err %d errno %d retval %d size %d\n", 1816 + err, errno, retval, size); 1817 + 1818 + /* Check that the program pushed elements correctly */ 1819 + for (i = 0; i < MAP_SIZE; i++) { 1820 + err = bpf_map_lookup_and_delete_elem(map_out_fd, NULL, &val); 1821 + if (err || val != vals[i] * 5) 1822 + break; 1823 + } 1824 + 1825 + CHECK(i != MAP_SIZE && (err || val != vals[i] * 5), 1826 + "bpf_map_push_elem", "err %d value %u\n", err, val); 1827 + 1828 + out: 1829 + pkt_v4.iph.saddr = 0; 1830 + bpf_object__close(obj); 1831 + } 1832 + 1738 1833 int main(void) 1739 1834 { 1835 + srand(time(NULL)); 1836 + 1740 1837 jit_enabled = is_jit_enabled(); 1741 1838 1742 1839 test_pkt_access(); ··· 1854 1757 test_task_fd_query_rawtp(); 1855 1758 test_task_fd_query_tp(); 1856 1759 test_reference_tracking(); 1760 + test_queue_stack_map(QUEUE); 1761 + test_queue_stack_map(STACK); 1857 1762 1858 1763 printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, error_cnt); 1859 1764 return error_cnt ? EXIT_FAILURE : EXIT_SUCCESS;
+4
tools/testing/selftests/bpf/test_queue_map.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + // Copyright (c) 2018 Politecnico di Torino 3 + #define MAP_TYPE BPF_MAP_TYPE_QUEUE 4 + #include "test_queue_stack_map.h"
+59
tools/testing/selftests/bpf/test_queue_stack_map.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + // Copyright (c) 2018 Politecnico di Torino 3 + #include <stddef.h> 4 + #include <string.h> 5 + #include <linux/bpf.h> 6 + #include <linux/if_ether.h> 7 + #include <linux/ip.h> 8 + #include <linux/pkt_cls.h> 9 + #include "bpf_helpers.h" 10 + 11 + int _version SEC("version") = 1; 12 + 13 + struct bpf_map_def __attribute__ ((section("maps"), used)) map_in = { 14 + .type = MAP_TYPE, 15 + .key_size = 0, 16 + .value_size = sizeof(__u32), 17 + .max_entries = 32, 18 + .map_flags = 0, 19 + }; 20 + 21 + struct bpf_map_def __attribute__ ((section("maps"), used)) map_out = { 22 + .type = MAP_TYPE, 23 + .key_size = 0, 24 + .value_size = sizeof(__u32), 25 + .max_entries = 32, 26 + .map_flags = 0, 27 + }; 28 + 29 + SEC("test") 30 + int _test(struct __sk_buff *skb) 31 + { 32 + void *data_end = (void *)(long)skb->data_end; 33 + void *data = (void *)(long)skb->data; 34 + struct ethhdr *eth = (struct ethhdr *)(data); 35 + __u32 value; 36 + int err; 37 + 38 + if (eth + 1 > data_end) 39 + return TC_ACT_SHOT; 40 + 41 + struct iphdr *iph = (struct iphdr *)(eth + 1); 42 + 43 + if (iph + 1 > data_end) 44 + return TC_ACT_SHOT; 45 + 46 + err = bpf_map_pop_elem(&map_in, &value); 47 + if (err) 48 + return TC_ACT_SHOT; 49 + 50 + iph->daddr = value; 51 + 52 + err = bpf_map_push_elem(&map_out, &iph->saddr, 0); 53 + if (err) 54 + return TC_ACT_SHOT; 55 + 56 + return TC_ACT_OK; 57 + } 58 + 59 + char _license[] SEC("license") = "GPL";
+177 -59
tools/testing/selftests/bpf/test_sockmap.c
··· 28 28 #include <linux/sock_diag.h> 29 29 #include <linux/bpf.h> 30 30 #include <linux/if_link.h> 31 + #include <linux/tls.h> 31 32 #include <assert.h> 32 33 #include <libgen.h> 33 34 ··· 43 42 44 43 int running; 45 44 static void running_handler(int a); 45 + 46 + #ifndef TCP_ULP 47 + # define TCP_ULP 31 48 + #endif 49 + #ifndef SOL_TLS 50 + # define SOL_TLS 282 51 + #endif 46 52 47 53 /* randomly selected ports for testing on lo */ 48 54 #define S1_PORT 10000 ··· 77 69 int txmsg_cork; 78 70 int txmsg_start; 79 71 int txmsg_end; 72 + int txmsg_start_push; 73 + int txmsg_end_push; 80 74 int txmsg_ingress; 81 75 int txmsg_skb; 82 76 int ktls; 77 + int peek_flag; 83 78 84 79 static const struct option long_options[] = { 85 80 {"help", no_argument, NULL, 'h' }, ··· 102 91 {"txmsg_cork", required_argument, NULL, 'k'}, 103 92 {"txmsg_start", required_argument, NULL, 's'}, 104 93 {"txmsg_end", required_argument, NULL, 'e'}, 94 + {"txmsg_start_push", required_argument, NULL, 'p'}, 95 + {"txmsg_end_push", required_argument, NULL, 'q'}, 105 96 {"txmsg_ingress", no_argument, &txmsg_ingress, 1 }, 106 97 {"txmsg_skb", no_argument, &txmsg_skb, 1 }, 107 98 {"ktls", no_argument, &ktls, 1 }, 99 + {"peek", no_argument, &peek_flag, 1 }, 108 100 {0, 0, NULL, 0 } 109 101 }; 110 102 ··· 127 113 } 128 114 printf("\n"); 129 115 } 130 - 131 - #define TCP_ULP 31 132 - #define TLS_TX 1 133 - #define TLS_RX 2 134 - #include <linux/tls.h> 135 116 136 117 char *sock_to_string(int s) 137 118 { ··· 358 349 return 0; 359 350 } 360 351 361 - static int msg_loop(int fd, int iov_count, int iov_length, int cnt, 362 - struct msg_stats *s, bool tx, 363 - struct sockmap_options *opt) 352 + static void msg_free_iov(struct msghdr *msg) 364 353 { 365 - struct msghdr msg = {0}; 366 - int err, i, flags = MSG_NOSIGNAL; 354 + int i; 355 + 356 + for (i = 0; i < msg->msg_iovlen; i++) 357 + free(msg->msg_iov[i].iov_base); 358 + free(msg->msg_iov); 359 + msg->msg_iov = NULL; 360 + msg->msg_iovlen = 0; 361 + } 362 + 363 + static int msg_alloc_iov(struct msghdr *msg, 364 + int iov_count, int iov_length, 365 + bool data, bool xmit) 366 + { 367 + unsigned char k = 0; 367 368 struct iovec *iov; 368 - unsigned char k; 369 - bool data_test = opt->data_test; 370 - bool drop = opt->drop_expected; 369 + int i; 371 370 372 371 iov = calloc(iov_count, sizeof(struct iovec)); 373 372 if (!iov) 374 373 return errno; 375 374 376 - k = 0; 377 375 for (i = 0; i < iov_count; i++) { 378 376 unsigned char *d = calloc(iov_length, sizeof(char)); 379 377 380 378 if (!d) { 381 379 fprintf(stderr, "iov_count %i/%i OOM\n", i, iov_count); 382 - goto out_errno; 380 + goto unwind_iov; 383 381 } 384 382 iov[i].iov_base = d; 385 383 iov[i].iov_len = iov_length; 386 384 387 - if (data_test && tx) { 385 + if (data && xmit) { 388 386 int j; 389 387 390 388 for (j = 0; j < iov_length; j++) ··· 399 383 } 400 384 } 401 385 402 - msg.msg_iov = iov; 403 - msg.msg_iovlen = iov_count; 404 - k = 0; 386 + msg->msg_iov = iov; 387 + msg->msg_iovlen = iov_count; 388 + 389 + return 0; 390 + unwind_iov: 391 + for (i--; i >= 0 ; i--) 392 + free(msg->msg_iov[i].iov_base); 393 + return -ENOMEM; 394 + } 395 + 396 + static int msg_verify_data(struct msghdr *msg, int size, int chunk_sz) 397 + { 398 + int i, j, bytes_cnt = 0; 399 + unsigned char k = 0; 400 + 401 + for (i = 0; i < msg->msg_iovlen; i++) { 402 + unsigned char *d = msg->msg_iov[i].iov_base; 403 + 404 + for (j = 0; 405 + j < msg->msg_iov[i].iov_len && size; j++) { 406 + if (d[j] != k++) { 407 + fprintf(stderr, 408 + "detected data corruption @iov[%i]:%i %02x != %02x, %02x ?= %02x\n", 409 + i, j, d[j], k - 1, d[j+1], k); 410 + return -EIO; 411 + } 412 + bytes_cnt++; 413 + if (bytes_cnt == chunk_sz) { 414 + k = 0; 415 + bytes_cnt = 0; 416 + } 417 + size--; 418 + } 419 + } 420 + return 0; 421 + } 422 + 423 + static int msg_loop(int fd, int iov_count, int iov_length, int cnt, 424 + struct msg_stats *s, bool tx, 425 + struct sockmap_options *opt) 426 + { 427 + struct msghdr msg = {0}, msg_peek = {0}; 428 + int err, i, flags = MSG_NOSIGNAL; 429 + bool drop = opt->drop_expected; 430 + bool data = opt->data_test; 431 + 432 + err = msg_alloc_iov(&msg, iov_count, iov_length, data, tx); 433 + if (err) 434 + goto out_errno; 435 + if (peek_flag) { 436 + err = msg_alloc_iov(&msg_peek, iov_count, iov_length, data, tx); 437 + if (err) 438 + goto out_errno; 439 + } 405 440 406 441 if (tx) { 407 442 clock_gettime(CLOCK_MONOTONIC, &s->start); ··· 472 405 } 473 406 clock_gettime(CLOCK_MONOTONIC, &s->end); 474 407 } else { 475 - int slct, recv, max_fd = fd; 408 + int slct, recvp = 0, recv, max_fd = fd; 476 409 int fd_flags = O_NONBLOCK; 477 410 struct timeval timeout; 478 411 float total_bytes; 479 - int bytes_cnt = 0; 480 - int chunk_sz; 481 412 fd_set w; 482 - 483 - if (opt->sendpage) 484 - chunk_sz = iov_length * cnt; 485 - else 486 - chunk_sz = iov_length * iov_count; 487 413 488 414 fcntl(fd, fd_flags); 489 415 total_bytes = (float)iov_count * (float)iov_length * (float)cnt; ··· 509 449 goto out_errno; 510 450 } 511 451 452 + errno = 0; 453 + if (peek_flag) { 454 + flags |= MSG_PEEK; 455 + recvp = recvmsg(fd, &msg_peek, flags); 456 + if (recvp < 0) { 457 + if (errno != EWOULDBLOCK) { 458 + clock_gettime(CLOCK_MONOTONIC, &s->end); 459 + goto out_errno; 460 + } 461 + } 462 + flags = 0; 463 + } 464 + 512 465 recv = recvmsg(fd, &msg, flags); 513 466 if (recv < 0) { 514 467 if (errno != EWOULDBLOCK) { ··· 533 460 534 461 s->bytes_recvd += recv; 535 462 536 - if (data_test) { 537 - int j; 463 + if (data) { 464 + int chunk_sz = opt->sendpage ? 465 + iov_length * cnt : 466 + iov_length * iov_count; 538 467 539 - for (i = 0; i < msg.msg_iovlen; i++) { 540 - unsigned char *d = iov[i].iov_base; 541 - 542 - for (j = 0; 543 - j < iov[i].iov_len && recv; j++) { 544 - if (d[j] != k++) { 545 - errno = -EIO; 546 - fprintf(stderr, 547 - "detected data corruption @iov[%i]:%i %02x != %02x, %02x ?= %02x\n", 548 - i, j, d[j], k - 1, d[j+1], k); 549 - goto out_errno; 550 - } 551 - bytes_cnt++; 552 - if (bytes_cnt == chunk_sz) { 553 - k = 0; 554 - bytes_cnt = 0; 555 - } 556 - recv--; 468 + errno = msg_verify_data(&msg, recv, chunk_sz); 469 + if (errno) { 470 + perror("data verify msg failed\n"); 471 + goto out_errno; 472 + } 473 + if (recvp) { 474 + errno = msg_verify_data(&msg_peek, 475 + recvp, 476 + chunk_sz); 477 + if (errno) { 478 + perror("data verify msg_peek failed\n"); 479 + goto out_errno; 557 480 } 558 481 } 559 482 } ··· 557 488 clock_gettime(CLOCK_MONOTONIC, &s->end); 558 489 } 559 490 560 - for (i = 0; i < iov_count; i++) 561 - free(iov[i].iov_base); 562 - free(iov); 563 - return 0; 491 + msg_free_iov(&msg); 492 + msg_free_iov(&msg_peek); 493 + return err; 564 494 out_errno: 565 - for (i = 0; i < iov_count; i++) 566 - free(iov[i].iov_base); 567 - free(iov); 495 + msg_free_iov(&msg); 496 + msg_free_iov(&msg_peek); 568 497 return errno; 569 498 } 570 499 ··· 629 562 } 630 563 if (opt->verbose) 631 564 fprintf(stdout, 632 - "rx_sendmsg: TX: %zuB %fB/s %fGB/s RX: %zuB %fB/s %fGB/s\n", 565 + "rx_sendmsg: TX: %zuB %fB/s %fGB/s RX: %zuB %fB/s %fGB/s %s\n", 633 566 s.bytes_sent, sent_Bps, sent_Bps/giga, 634 - s.bytes_recvd, recvd_Bps, recvd_Bps/giga); 567 + s.bytes_recvd, recvd_Bps, recvd_Bps/giga, 568 + peek_flag ? "(peek_msg)" : ""); 635 569 if (err && txmsg_cork) 636 570 err = 0; 637 571 exit(err ? 1 : 0); ··· 907 839 } 908 840 } 909 841 842 + if (txmsg_start_push) { 843 + i = 2; 844 + err = bpf_map_update_elem(map_fd[5], 845 + &i, &txmsg_start_push, BPF_ANY); 846 + if (err) { 847 + fprintf(stderr, 848 + "ERROR: bpf_map_update_elem (txmsg_start_push): %d (%s)\n", 849 + err, strerror(errno)); 850 + goto out; 851 + } 852 + } 853 + 854 + if (txmsg_end_push) { 855 + i = 3; 856 + err = bpf_map_update_elem(map_fd[5], 857 + &i, &txmsg_end_push, BPF_ANY); 858 + if (err) { 859 + fprintf(stderr, 860 + "ERROR: bpf_map_update_elem %i@%i (txmsg_end_push): %d (%s)\n", 861 + txmsg_end_push, i, err, strerror(errno)); 862 + goto out; 863 + } 864 + } 865 + 910 866 if (txmsg_ingress) { 911 867 int in = BPF_F_INGRESS; 912 868 ··· 1088 996 strncat(options, "skb,", OPTSTRING); 1089 997 if (ktls) 1090 998 strncat(options, "ktls,", OPTSTRING); 999 + if (peek_flag) 1000 + strncat(options, "peek,", OPTSTRING); 1091 1001 } 1092 1002 1093 1003 static int __test_exec(int cgrp, int test, struct sockmap_options *opt) ··· 1263 1169 txmsg_pass = txmsg_noisy = txmsg_redir_noisy = txmsg_drop = 0; 1264 1170 txmsg_apply = txmsg_cork = 0; 1265 1171 txmsg_start = txmsg_end = 0; 1172 + txmsg_start_push = txmsg_end_push = 0; 1173 + 1266 1174 /* Test small and large iov_count values with pass/redir/apply/cork */ 1267 1175 txmsg_pass = 1; 1268 1176 txmsg_redir = 0; ··· 1381 1285 /* Test basic start/end with lots of iov_count and iov_lengths */ 1382 1286 txmsg_start = 1; 1383 1287 txmsg_end = 2; 1288 + txmsg_start_push = 1; 1289 + txmsg_end_push = 2; 1384 1290 err = test_txmsg(cgrp); 1385 1291 if (err) 1386 1292 goto out; ··· 1396 1298 for (i = 99; i <= 1600; i += 500) { 1397 1299 txmsg_start = 0; 1398 1300 txmsg_end = i; 1301 + txmsg_start_push = 0; 1302 + txmsg_end_push = i; 1399 1303 err = test_exec(cgrp, &opt); 1400 1304 if (err) 1401 1305 goto out; ··· 1407 1307 for (i = 199; i <= 1600; i += 500) { 1408 1308 txmsg_start = 100; 1409 1309 txmsg_end = i; 1310 + txmsg_start_push = 100; 1311 + txmsg_end_push = i; 1410 1312 err = test_exec(cgrp, &opt); 1411 1313 if (err) 1412 1314 goto out; ··· 1417 1315 /* Test start/end with cork pulling last sg entry */ 1418 1316 txmsg_start = 1500; 1419 1317 txmsg_end = 1600; 1318 + txmsg_start_push = 1500; 1319 + txmsg_end_push = 1600; 1420 1320 err = test_exec(cgrp, &opt); 1421 1321 if (err) 1422 1322 goto out; ··· 1426 1322 /* Test start/end pull of single byte in last page */ 1427 1323 txmsg_start = 1111; 1428 1324 txmsg_end = 1112; 1325 + txmsg_start_push = 1111; 1326 + txmsg_end_push = 1112; 1429 1327 err = test_exec(cgrp, &opt); 1430 1328 if (err) 1431 1329 goto out; ··· 1435 1329 /* Test start/end with end < start */ 1436 1330 txmsg_start = 1111; 1437 1331 txmsg_end = 0; 1332 + txmsg_start_push = 1111; 1333 + txmsg_end_push = 0; 1438 1334 err = test_exec(cgrp, &opt); 1439 1335 if (err) 1440 1336 goto out; ··· 1444 1336 /* Test start/end with end > data */ 1445 1337 txmsg_start = 0; 1446 1338 txmsg_end = 1601; 1339 + txmsg_start_push = 0; 1340 + txmsg_end_push = 1601; 1447 1341 err = test_exec(cgrp, &opt); 1448 1342 if (err) 1449 1343 goto out; ··· 1453 1343 /* Test start/end with start > data */ 1454 1344 txmsg_start = 1601; 1455 1345 txmsg_end = 1600; 1346 + txmsg_start_push = 1601; 1347 + txmsg_end_push = 1600; 1456 1348 err = test_exec(cgrp, &opt); 1457 1349 1458 1350 out: ··· 1470 1358 "sock_map_redir", 1471 1359 "sock_apply_bytes", 1472 1360 "sock_cork_bytes", 1473 - "sock_pull_bytes", 1361 + "sock_bytes", 1474 1362 "sock_redir_flags", 1475 1363 "sock_skb_opts", 1476 1364 }; ··· 1577 1465 } 1578 1466 1579 1467 /* Tests basic commands and APIs with range of iov values */ 1580 - txmsg_start = txmsg_end = 0; 1468 + txmsg_start = txmsg_end = txmsg_start_push = txmsg_end_push = 0; 1581 1469 err = test_txmsg(cg_fd); 1582 1470 if (err) 1583 1471 goto out; ··· 1626 1514 if (argc < 2) 1627 1515 return test_suite(-1); 1628 1516 1629 - while ((opt = getopt_long(argc, argv, ":dhvc:r:i:l:t:", 1517 + while ((opt = getopt_long(argc, argv, ":dhvc:r:i:l:t:p:q:", 1630 1518 long_options, &longindex)) != -1) { 1631 1519 switch (opt) { 1632 1520 case 's': ··· 1634 1522 break; 1635 1523 case 'e': 1636 1524 txmsg_end = atoi(optarg); 1525 + break; 1526 + case 'p': 1527 + txmsg_start_push = atoi(optarg); 1528 + break; 1529 + case 'q': 1530 + txmsg_end_push = atoi(optarg); 1637 1531 break; 1638 1532 case 'a': 1639 1533 txmsg_apply = atoi(optarg);
+74 -23
tools/testing/selftests/bpf/test_sockmap_kern.h
··· 70 70 .max_entries = 1 71 71 }; 72 72 73 - struct bpf_map_def SEC("maps") sock_pull_bytes = { 73 + struct bpf_map_def SEC("maps") sock_bytes = { 74 74 .type = BPF_MAP_TYPE_ARRAY, 75 75 .key_size = sizeof(int), 76 76 .value_size = sizeof(int), 77 - .max_entries = 2 77 + .max_entries = 4 78 78 }; 79 79 80 80 struct bpf_map_def SEC("maps") sock_redir_flags = { ··· 181 181 SEC("sk_msg1") 182 182 int bpf_prog4(struct sk_msg_md *msg) 183 183 { 184 - int *bytes, zero = 0, one = 1; 185 - int *start, *end; 184 + int *bytes, zero = 0, one = 1, two = 2, three = 3; 185 + int *start, *end, *start_push, *end_push; 186 186 187 187 bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero); 188 188 if (bytes) ··· 190 190 bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero); 191 191 if (bytes) 192 192 bpf_msg_cork_bytes(msg, *bytes); 193 - start = bpf_map_lookup_elem(&sock_pull_bytes, &zero); 194 - end = bpf_map_lookup_elem(&sock_pull_bytes, &one); 193 + start = bpf_map_lookup_elem(&sock_bytes, &zero); 194 + end = bpf_map_lookup_elem(&sock_bytes, &one); 195 195 if (start && end) 196 196 bpf_msg_pull_data(msg, *start, *end, 0); 197 + start_push = bpf_map_lookup_elem(&sock_bytes, &two); 198 + end_push = bpf_map_lookup_elem(&sock_bytes, &three); 199 + if (start_push && end_push) 200 + bpf_msg_push_data(msg, *start_push, *end_push, 0); 197 201 return SK_PASS; 198 202 } 199 203 200 204 SEC("sk_msg2") 201 205 int bpf_prog5(struct sk_msg_md *msg) 202 206 { 203 - int err1 = -1, err2 = -1, zero = 0, one = 1; 204 - int *bytes, *start, *end, len1, len2; 207 + int zero = 0, one = 1, two = 2, three = 3; 208 + int *start, *end, *start_push, *end_push; 209 + int *bytes, len1, len2 = 0, len3; 210 + int err1 = -1, err2 = -1; 205 211 206 212 bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero); 207 213 if (bytes) ··· 216 210 if (bytes) 217 211 err2 = bpf_msg_cork_bytes(msg, *bytes); 218 212 len1 = (__u64)msg->data_end - (__u64)msg->data; 219 - start = bpf_map_lookup_elem(&sock_pull_bytes, &zero); 220 - end = bpf_map_lookup_elem(&sock_pull_bytes, &one); 213 + start = bpf_map_lookup_elem(&sock_bytes, &zero); 214 + end = bpf_map_lookup_elem(&sock_bytes, &one); 221 215 if (start && end) { 222 216 int err; 223 217 ··· 231 225 bpf_printk("sk_msg2: length update %i->%i\n", 232 226 len1, len2); 233 227 } 228 + 229 + start_push = bpf_map_lookup_elem(&sock_bytes, &two); 230 + end_push = bpf_map_lookup_elem(&sock_bytes, &three); 231 + if (start_push && end_push) { 232 + int err; 233 + 234 + bpf_printk("sk_msg2: push(%i:%i)\n", 235 + start_push ? *start_push : 0, 236 + end_push ? *end_push : 0); 237 + err = bpf_msg_push_data(msg, *start_push, *end_push, 0); 238 + if (err) 239 + bpf_printk("sk_msg2: push_data err %i\n", err); 240 + len3 = (__u64)msg->data_end - (__u64)msg->data; 241 + bpf_printk("sk_msg2: length push_update %i->%i\n", 242 + len2 ? len2 : len1, len3); 243 + } 244 + 234 245 bpf_printk("sk_msg2: data length %i err1 %i err2 %i\n", 235 246 len1, err1, err2); 236 247 return SK_PASS; ··· 256 233 SEC("sk_msg3") 257 234 int bpf_prog6(struct sk_msg_md *msg) 258 235 { 259 - int *bytes, zero = 0, one = 1, key = 0; 260 - int *start, *end, *f; 236 + int *bytes, *start, *end, *start_push, *end_push, *f; 237 + int zero = 0, one = 1, two = 2, three = 3, key = 0; 261 238 __u64 flags = 0; 262 239 263 240 bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero); ··· 266 243 bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero); 267 244 if (bytes) 268 245 bpf_msg_cork_bytes(msg, *bytes); 269 - start = bpf_map_lookup_elem(&sock_pull_bytes, &zero); 270 - end = bpf_map_lookup_elem(&sock_pull_bytes, &one); 246 + 247 + start = bpf_map_lookup_elem(&sock_bytes, &zero); 248 + end = bpf_map_lookup_elem(&sock_bytes, &one); 271 249 if (start && end) 272 250 bpf_msg_pull_data(msg, *start, *end, 0); 251 + 252 + start_push = bpf_map_lookup_elem(&sock_bytes, &two); 253 + end_push = bpf_map_lookup_elem(&sock_bytes, &three); 254 + if (start_push && end_push) 255 + bpf_msg_push_data(msg, *start_push, *end_push, 0); 256 + 273 257 f = bpf_map_lookup_elem(&sock_redir_flags, &zero); 274 258 if (f && *f) { 275 259 key = 2; ··· 292 262 SEC("sk_msg4") 293 263 int bpf_prog7(struct sk_msg_md *msg) 294 264 { 295 - int err1 = 0, err2 = 0, zero = 0, one = 1, key = 0; 296 - int *f, *bytes, *start, *end, len1, len2; 265 + int zero = 0, one = 1, two = 2, three = 3, len1, len2 = 0, len3; 266 + int *bytes, *start, *end, *start_push, *end_push, *f; 267 + int err1 = 0, err2 = 0, key = 0; 297 268 __u64 flags = 0; 298 269 299 270 int err; ··· 305 274 if (bytes) 306 275 err2 = bpf_msg_cork_bytes(msg, *bytes); 307 276 len1 = (__u64)msg->data_end - (__u64)msg->data; 308 - start = bpf_map_lookup_elem(&sock_pull_bytes, &zero); 309 - end = bpf_map_lookup_elem(&sock_pull_bytes, &one); 310 - if (start && end) { 311 277 278 + start = bpf_map_lookup_elem(&sock_bytes, &zero); 279 + end = bpf_map_lookup_elem(&sock_bytes, &one); 280 + if (start && end) { 312 281 bpf_printk("sk_msg2: pull(%i:%i)\n", 313 282 start ? *start : 0, end ? *end : 0); 314 283 err = bpf_msg_pull_data(msg, *start, *end, 0); ··· 319 288 bpf_printk("sk_msg2: length update %i->%i\n", 320 289 len1, len2); 321 290 } 291 + 292 + start_push = bpf_map_lookup_elem(&sock_bytes, &two); 293 + end_push = bpf_map_lookup_elem(&sock_bytes, &three); 294 + if (start_push && end_push) { 295 + bpf_printk("sk_msg4: push(%i:%i)\n", 296 + start_push ? *start_push : 0, 297 + end_push ? *end_push : 0); 298 + err = bpf_msg_push_data(msg, *start_push, *end_push, 0); 299 + if (err) 300 + bpf_printk("sk_msg4: push_data err %i\n", 301 + err); 302 + len3 = (__u64)msg->data_end - (__u64)msg->data; 303 + bpf_printk("sk_msg4: length push_update %i->%i\n", 304 + len2 ? len2 : len1, len3); 305 + } 306 + 322 307 f = bpf_map_lookup_elem(&sock_redir_flags, &zero); 323 308 if (f && *f) { 324 309 key = 2; ··· 389 342 SEC("sk_msg7") 390 343 int bpf_prog10(struct sk_msg_md *msg) 391 344 { 392 - int *bytes, zero = 0, one = 1; 393 - int *start, *end; 345 + int *bytes, *start, *end, *start_push, *end_push; 346 + int zero = 0, one = 1, two = 2, three = 3; 394 347 395 348 bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero); 396 349 if (bytes) ··· 398 351 bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero); 399 352 if (bytes) 400 353 bpf_msg_cork_bytes(msg, *bytes); 401 - start = bpf_map_lookup_elem(&sock_pull_bytes, &zero); 402 - end = bpf_map_lookup_elem(&sock_pull_bytes, &one); 354 + start = bpf_map_lookup_elem(&sock_bytes, &zero); 355 + end = bpf_map_lookup_elem(&sock_bytes, &one); 403 356 if (start && end) 404 357 bpf_msg_pull_data(msg, *start, *end, 0); 358 + start_push = bpf_map_lookup_elem(&sock_bytes, &two); 359 + end_push = bpf_map_lookup_elem(&sock_bytes, &three); 360 + if (start_push && end_push) 361 + bpf_msg_push_data(msg, *start_push, *end_push, 0); 405 362 406 363 return SK_DROP; 407 364 }
+4
tools/testing/selftests/bpf/test_stack_map.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + // Copyright (c) 2018 Politecnico di Torino 3 + #define MAP_TYPE BPF_MAP_TYPE_STACK 4 + #include "test_queue_stack_map.h"
+176 -5
tools/testing/selftests/bpf/test_verifier.c
··· 3430 3430 BPF_ST_MEM(BPF_DW, BPF_REG_1, offsetof(struct __sk_buff, mark), 0), 3431 3431 BPF_EXIT_INSN(), 3432 3432 }, 3433 - .errstr = "BPF_ST stores into R1 inv is not allowed", 3433 + .errstr = "BPF_ST stores into R1 ctx is not allowed", 3434 3434 .result = REJECT, 3435 3435 .prog_type = BPF_PROG_TYPE_SCHED_CLS, 3436 3436 }, ··· 3442 3442 BPF_REG_0, offsetof(struct __sk_buff, mark), 0), 3443 3443 BPF_EXIT_INSN(), 3444 3444 }, 3445 - .errstr = "BPF_XADD stores into R1 inv is not allowed", 3445 + .errstr = "BPF_XADD stores into R1 ctx is not allowed", 3446 3446 .result = REJECT, 3447 3447 .prog_type = BPF_PROG_TYPE_SCHED_CLS, 3448 3448 }, ··· 4863 4863 .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, 4864 4864 }, 4865 4865 { 4866 + "direct packet read test#1 for CGROUP_SKB", 4867 + .insns = { 4868 + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 4869 + offsetof(struct __sk_buff, data)), 4870 + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, 4871 + offsetof(struct __sk_buff, data_end)), 4872 + BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, 4873 + offsetof(struct __sk_buff, len)), 4874 + BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1, 4875 + offsetof(struct __sk_buff, pkt_type)), 4876 + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, 4877 + offsetof(struct __sk_buff, mark)), 4878 + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_6, 4879 + offsetof(struct __sk_buff, mark)), 4880 + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, 4881 + offsetof(struct __sk_buff, queue_mapping)), 4882 + BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1, 4883 + offsetof(struct __sk_buff, protocol)), 4884 + BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1, 4885 + offsetof(struct __sk_buff, vlan_present)), 4886 + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), 4887 + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), 4888 + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), 4889 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), 4890 + BPF_MOV64_IMM(BPF_REG_0, 0), 4891 + BPF_EXIT_INSN(), 4892 + }, 4893 + .result = ACCEPT, 4894 + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, 4895 + }, 4896 + { 4897 + "direct packet read test#2 for CGROUP_SKB", 4898 + .insns = { 4899 + BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, 4900 + offsetof(struct __sk_buff, vlan_tci)), 4901 + BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1, 4902 + offsetof(struct __sk_buff, vlan_proto)), 4903 + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, 4904 + offsetof(struct __sk_buff, priority)), 4905 + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_6, 4906 + offsetof(struct __sk_buff, priority)), 4907 + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, 4908 + offsetof(struct __sk_buff, 4909 + ingress_ifindex)), 4910 + BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1, 4911 + offsetof(struct __sk_buff, tc_index)), 4912 + BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1, 4913 + offsetof(struct __sk_buff, hash)), 4914 + BPF_MOV64_IMM(BPF_REG_0, 0), 4915 + BPF_EXIT_INSN(), 4916 + }, 4917 + .result = ACCEPT, 4918 + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, 4919 + }, 4920 + { 4921 + "direct packet read test#3 for CGROUP_SKB", 4922 + .insns = { 4923 + BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, 4924 + offsetof(struct __sk_buff, cb[0])), 4925 + BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1, 4926 + offsetof(struct __sk_buff, cb[1])), 4927 + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, 4928 + offsetof(struct __sk_buff, cb[2])), 4929 + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, 4930 + offsetof(struct __sk_buff, cb[3])), 4931 + BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1, 4932 + offsetof(struct __sk_buff, cb[4])), 4933 + BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1, 4934 + offsetof(struct __sk_buff, napi_id)), 4935 + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_4, 4936 + offsetof(struct __sk_buff, cb[0])), 4937 + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_5, 4938 + offsetof(struct __sk_buff, cb[1])), 4939 + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_6, 4940 + offsetof(struct __sk_buff, cb[2])), 4941 + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_7, 4942 + offsetof(struct __sk_buff, cb[3])), 4943 + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_8, 4944 + offsetof(struct __sk_buff, cb[4])), 4945 + BPF_MOV64_IMM(BPF_REG_0, 0), 4946 + BPF_EXIT_INSN(), 4947 + }, 4948 + .result = ACCEPT, 4949 + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, 4950 + }, 4951 + { 4952 + "direct packet read test#4 for CGROUP_SKB", 4953 + .insns = { 4954 + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 4955 + offsetof(struct __sk_buff, family)), 4956 + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, 4957 + offsetof(struct __sk_buff, remote_ip4)), 4958 + BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, 4959 + offsetof(struct __sk_buff, local_ip4)), 4960 + BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1, 4961 + offsetof(struct __sk_buff, remote_ip6[0])), 4962 + BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1, 4963 + offsetof(struct __sk_buff, remote_ip6[1])), 4964 + BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1, 4965 + offsetof(struct __sk_buff, remote_ip6[2])), 4966 + BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1, 4967 + offsetof(struct __sk_buff, remote_ip6[3])), 4968 + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, 4969 + offsetof(struct __sk_buff, local_ip6[0])), 4970 + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, 4971 + offsetof(struct __sk_buff, local_ip6[1])), 4972 + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, 4973 + offsetof(struct __sk_buff, local_ip6[2])), 4974 + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, 4975 + offsetof(struct __sk_buff, local_ip6[3])), 4976 + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, 4977 + offsetof(struct __sk_buff, remote_port)), 4978 + BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1, 4979 + offsetof(struct __sk_buff, local_port)), 4980 + BPF_MOV64_IMM(BPF_REG_0, 0), 4981 + BPF_EXIT_INSN(), 4982 + }, 4983 + .result = ACCEPT, 4984 + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, 4985 + }, 4986 + { 4987 + "invalid access of tc_classid for CGROUP_SKB", 4988 + .insns = { 4989 + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 4990 + offsetof(struct __sk_buff, tc_classid)), 4991 + BPF_MOV64_IMM(BPF_REG_0, 0), 4992 + BPF_EXIT_INSN(), 4993 + }, 4994 + .result = REJECT, 4995 + .errstr = "invalid bpf_context access", 4996 + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, 4997 + }, 4998 + { 4999 + "invalid access of data_meta for CGROUP_SKB", 5000 + .insns = { 5001 + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 5002 + offsetof(struct __sk_buff, data_meta)), 5003 + BPF_MOV64_IMM(BPF_REG_0, 0), 5004 + BPF_EXIT_INSN(), 5005 + }, 5006 + .result = REJECT, 5007 + .errstr = "invalid bpf_context access", 5008 + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, 5009 + }, 5010 + { 5011 + "invalid access of flow_keys for CGROUP_SKB", 5012 + .insns = { 5013 + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 5014 + offsetof(struct __sk_buff, flow_keys)), 5015 + BPF_MOV64_IMM(BPF_REG_0, 0), 5016 + BPF_EXIT_INSN(), 5017 + }, 5018 + .result = REJECT, 5019 + .errstr = "invalid bpf_context access", 5020 + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, 5021 + }, 5022 + { 5023 + "invalid write access to napi_id for CGROUP_SKB", 5024 + .insns = { 5025 + BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1, 5026 + offsetof(struct __sk_buff, napi_id)), 5027 + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_9, 5028 + offsetof(struct __sk_buff, napi_id)), 5029 + BPF_MOV64_IMM(BPF_REG_0, 0), 5030 + BPF_EXIT_INSN(), 5031 + }, 5032 + .result = REJECT, 5033 + .errstr = "invalid bpf_context access", 5034 + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, 5035 + }, 5036 + { 4866 5037 "valid cgroup storage access", 4867 5038 .insns = { 4868 5039 BPF_MOV64_IMM(BPF_REG_2, 0), ··· 5670 5499 .errstr_unpriv = "R2 leaks addr into mem", 5671 5500 .result_unpriv = REJECT, 5672 5501 .result = REJECT, 5673 - .errstr = "BPF_XADD stores into R1 inv is not allowed", 5502 + .errstr = "BPF_XADD stores into R1 ctx is not allowed", 5674 5503 }, 5675 5504 { 5676 5505 "leak pointer into ctx 2", ··· 5685 5514 .errstr_unpriv = "R10 leaks addr into mem", 5686 5515 .result_unpriv = REJECT, 5687 5516 .result = REJECT, 5688 - .errstr = "BPF_XADD stores into R1 inv is not allowed", 5517 + .errstr = "BPF_XADD stores into R1 ctx is not allowed", 5689 5518 }, 5690 5519 { 5691 5520 "leak pointer into ctx 3", ··· 12634 12463 BPF_EXIT_INSN(), 12635 12464 }, 12636 12465 .result = REJECT, 12637 - .errstr = "BPF_XADD stores into R2 ctx", 12466 + .errstr = "BPF_XADD stores into R2 pkt is not allowed", 12638 12467 .prog_type = BPF_PROG_TYPE_XDP, 12639 12468 }, 12640 12469 {
+5 -3
tools/testing/selftests/bpf/trace_helpers.c
··· 41 41 syms[i].name = strdup(func); 42 42 i++; 43 43 } 44 + fclose(f); 44 45 sym_cnt = i; 45 46 qsort(syms, sym_cnt, sizeof(struct ksym), ksym_cmp); 46 47 return 0; ··· 125 124 char data[]; 126 125 }; 127 126 128 - static enum bpf_perf_event_ret bpf_perf_event_print(void *event, void *priv) 127 + static enum bpf_perf_event_ret 128 + bpf_perf_event_print(struct perf_event_header *hdr, void *private_data) 129 129 { 130 - struct perf_event_sample *e = event; 131 - perf_event_print_fn fn = priv; 130 + struct perf_event_sample *e = (struct perf_event_sample *)hdr; 131 + perf_event_print_fn fn = private_data; 132 132 int ret; 133 133 134 134 if (e->header.type == PERF_RECORD_SAMPLE) {