Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next

Alexei Starovoitov says:

====================
pull-request: bpf-next 2018-01-19

The following pull-request contains BPF updates for your *net-next* tree.

The main changes are:

1) bpf array map HW offload, from Jakub.

2) support for bpf_get_next_key() for LPM map, from Yonghong.

3) test_verifier now runs loaded programs, from Alexei.

4) xdp cpumap monitoring, from Jesper.

5) variety of tests, cleanups and small x64 JIT optimization, from Daniel.

6) user space can now retrieve HW JITed program, from Jiong.

Note there is a minor conflict between Russell's arm32 JIT fixes
and removal of bpf_jit_enable variable by Daniel which should
be resolved by keeping Russell's comment and removing that variable.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>

+1835 -229
-2
arch/arm/net/bpf_jit_32.c
··· 25 25 26 26 #include "bpf_jit_32.h" 27 27 28 - int bpf_jit_enable __read_mostly; 29 - 30 28 /* 31 29 * eBPF prog stack layout: 32 30 *
-2
arch/arm64/net/bpf_jit_comp.c
··· 31 31 32 32 #include "bpf_jit.h" 33 33 34 - int bpf_jit_enable __read_mostly; 35 - 36 34 #define TMP_REG_1 (MAX_BPF_JIT_REG + 0) 37 35 #define TMP_REG_2 (MAX_BPF_JIT_REG + 1) 38 36 #define TCALL_CNT (MAX_BPF_JIT_REG + 2)
-2
arch/mips/net/bpf_jit.c
··· 1207 1207 return 0; 1208 1208 } 1209 1209 1210 - int bpf_jit_enable __read_mostly; 1211 - 1212 1210 void bpf_jit_compile(struct bpf_prog *fp) 1213 1211 { 1214 1212 struct jit_ctx ctx;
-2
arch/mips/net/ebpf_jit.c
··· 177 177 (ctx->idx * 4) - 4; 178 178 } 179 179 180 - int bpf_jit_enable __read_mostly; 181 - 182 180 enum which_ebpf_reg { 183 181 src_reg, 184 182 src_reg_no_fp,
-2
arch/powerpc/net/bpf_jit_comp.c
··· 18 18 19 19 #include "bpf_jit32.h" 20 20 21 - int bpf_jit_enable __read_mostly; 22 - 23 21 static inline void bpf_flush_icache(void *start, void *end) 24 22 { 25 23 smp_wmb();
-2
arch/powerpc/net/bpf_jit_comp64.c
··· 21 21 22 22 #include "bpf_jit64.h" 23 23 24 - int bpf_jit_enable __read_mostly; 25 - 26 24 static void bpf_jit_fill_ill_insns(void *area, unsigned int size) 27 25 { 28 26 memset32(area, BREAKPOINT_INSTRUCTION, size/4);
-2
arch/s390/net/bpf_jit_comp.c
··· 28 28 #include <asm/set_memory.h> 29 29 #include "bpf_jit.h" 30 30 31 - int bpf_jit_enable __read_mostly; 32 - 33 31 struct bpf_jit { 34 32 u32 seen; /* Flags to remember seen eBPF instructions */ 35 33 u32 seen_reg[16]; /* Array to remember which registers are used */
-2
arch/sparc/net/bpf_jit_comp_32.c
··· 11 11 12 12 #include "bpf_jit_32.h" 13 13 14 - int bpf_jit_enable __read_mostly; 15 - 16 14 static inline bool is_simm13(unsigned int value) 17 15 { 18 16 return value + 0x1000 < 0x2000;
-2
arch/sparc/net/bpf_jit_comp_64.c
··· 12 12 13 13 #include "bpf_jit_64.h" 14 14 15 - int bpf_jit_enable __read_mostly; 16 - 17 15 static inline bool is_simm13(unsigned int value) 18 16 { 19 17 return value + 0x1000 < 0x2000;
+30 -7
arch/x86/net/bpf_jit_comp.c
··· 15 15 #include <asm/set_memory.h> 16 16 #include <linux/bpf.h> 17 17 18 - int bpf_jit_enable __read_mostly; 19 - 20 18 /* 21 19 * assembly code in arch/x86/net/bpf_jit.S 22 20 */ ··· 150 152 BIT(BPF_REG_8) | 151 153 BIT(BPF_REG_9) | 152 154 BIT(BPF_REG_AX)); 155 + } 156 + 157 + static bool is_axreg(u32 reg) 158 + { 159 + return reg == BPF_REG_0; 153 160 } 154 161 155 162 /* add modifiers if 'reg' maps to x64 registers r8..r15 */ ··· 450 447 else if (is_ereg(dst_reg)) 451 448 EMIT1(add_1mod(0x40, dst_reg)); 452 449 450 + /* b3 holds 'normal' opcode, b2 short form only valid 451 + * in case dst is eax/rax. 452 + */ 453 453 switch (BPF_OP(insn->code)) { 454 - case BPF_ADD: b3 = 0xC0; break; 455 - case BPF_SUB: b3 = 0xE8; break; 456 - case BPF_AND: b3 = 0xE0; break; 457 - case BPF_OR: b3 = 0xC8; break; 458 - case BPF_XOR: b3 = 0xF0; break; 454 + case BPF_ADD: 455 + b3 = 0xC0; 456 + b2 = 0x05; 457 + break; 458 + case BPF_SUB: 459 + b3 = 0xE8; 460 + b2 = 0x2D; 461 + break; 462 + case BPF_AND: 463 + b3 = 0xE0; 464 + b2 = 0x25; 465 + break; 466 + case BPF_OR: 467 + b3 = 0xC8; 468 + b2 = 0x0D; 469 + break; 470 + case BPF_XOR: 471 + b3 = 0xF0; 472 + b2 = 0x35; 473 + break; 459 474 } 460 475 461 476 if (is_imm8(imm32)) 462 477 EMIT3(0x83, add_1reg(b3, dst_reg), imm32); 478 + else if (is_axreg(dst_reg)) 479 + EMIT1_off32(b2, imm32); 463 480 else 464 481 EMIT2_off32(0x81, add_1reg(b3, dst_reg), imm32); 465 482 break;
+8 -1
drivers/net/ethernet/netronome/nfp/bpf/cmsg.c
··· 157 157 int tag) 158 158 { 159 159 struct sk_buff *skb; 160 - int err; 160 + int i, err; 161 + 162 + for (i = 0; i < 50; i++) { 163 + udelay(4); 164 + skb = nfp_bpf_reply(bpf, tag); 165 + if (skb) 166 + return skb; 167 + } 161 168 162 169 err = wait_event_interruptible_timeout(bpf->cmsg_wq, 163 170 skb = nfp_bpf_reply(bpf, tag),
+11 -1
drivers/net/ethernet/netronome/nfp/bpf/offload.c
··· 127 127 struct nfp_prog *nfp_prog = prog->aux->offload->dev_priv; 128 128 unsigned int stack_size; 129 129 unsigned int max_instr; 130 + int err; 130 131 131 132 stack_size = nn_readb(nn, NFP_NET_CFG_BPF_STACK_SZ) * 64; 132 133 if (prog->aux->stack_depth > stack_size) { ··· 144 143 if (!nfp_prog->prog) 145 144 return -ENOMEM; 146 145 147 - return nfp_bpf_jit(nfp_prog); 146 + err = nfp_bpf_jit(nfp_prog); 147 + if (err) 148 + return err; 149 + 150 + prog->aux->offload->jited_len = nfp_prog->prog_len * sizeof(u64); 151 + prog->aux->offload->jited_image = nfp_prog->prog; 152 + 153 + return 0; 148 154 } 149 155 150 156 static int nfp_bpf_destroy(struct nfp_net *nn, struct bpf_prog *prog) ··· 176 168 static int 177 169 nfp_bpf_map_delete_elem(struct bpf_offloaded_map *offmap, void *key) 178 170 { 171 + if (offmap->map.map_type == BPF_MAP_TYPE_ARRAY) 172 + return -EINVAL; 179 173 return nfp_bpf_ctrl_del_entry(offmap, key); 180 174 } 181 175
+246
drivers/net/netdevsim/bpf.c
··· 17 17 #include <linux/bpf_verifier.h> 18 18 #include <linux/debugfs.h> 19 19 #include <linux/kernel.h> 20 + #include <linux/mutex.h> 20 21 #include <linux/rtnetlink.h> 21 22 #include <net/pkt_cls.h> 22 23 ··· 29 28 struct dentry *ddir; 30 29 const char *state; 31 30 bool is_loaded; 31 + struct list_head l; 32 + }; 33 + 34 + #define NSIM_BPF_MAX_KEYS 2 35 + 36 + struct nsim_bpf_bound_map { 37 + struct netdevsim *ns; 38 + struct bpf_offloaded_map *map; 39 + struct mutex mutex; 40 + struct nsim_map_entry { 41 + void *key; 42 + void *value; 43 + } entry[NSIM_BPF_MAX_KEYS]; 32 44 struct list_head l; 33 45 }; 34 46 ··· 298 284 return 0; 299 285 } 300 286 287 + static bool 288 + nsim_map_key_match(struct bpf_map *map, struct nsim_map_entry *e, void *key) 289 + { 290 + return e->key && !memcmp(key, e->key, map->key_size); 291 + } 292 + 293 + static int nsim_map_key_find(struct bpf_offloaded_map *offmap, void *key) 294 + { 295 + struct nsim_bpf_bound_map *nmap = offmap->dev_priv; 296 + unsigned int i; 297 + 298 + for (i = 0; i < ARRAY_SIZE(nmap->entry); i++) 299 + if (nsim_map_key_match(&offmap->map, &nmap->entry[i], key)) 300 + return i; 301 + 302 + return -ENOENT; 303 + } 304 + 305 + static int 306 + nsim_map_alloc_elem(struct bpf_offloaded_map *offmap, unsigned int idx) 307 + { 308 + struct nsim_bpf_bound_map *nmap = offmap->dev_priv; 309 + 310 + nmap->entry[idx].key = kmalloc(offmap->map.key_size, GFP_USER); 311 + if (!nmap->entry[idx].key) 312 + return -ENOMEM; 313 + nmap->entry[idx].value = kmalloc(offmap->map.value_size, GFP_USER); 314 + if (!nmap->entry[idx].value) { 315 + kfree(nmap->entry[idx].key); 316 + nmap->entry[idx].key = NULL; 317 + return -ENOMEM; 318 + } 319 + 320 + return 0; 321 + } 322 + 323 + static int 324 + nsim_map_get_next_key(struct bpf_offloaded_map *offmap, 325 + void *key, void *next_key) 326 + { 327 + struct nsim_bpf_bound_map *nmap = offmap->dev_priv; 328 + int idx = -ENOENT; 329 + 330 + mutex_lock(&nmap->mutex); 331 + 332 + if (key) 333 + idx = nsim_map_key_find(offmap, key); 334 + if (idx == -ENOENT) 335 + idx = 0; 336 + else 337 + idx++; 338 + 339 + for (; idx < ARRAY_SIZE(nmap->entry); idx++) { 340 + if (nmap->entry[idx].key) { 341 + memcpy(next_key, nmap->entry[idx].key, 342 + offmap->map.key_size); 343 + break; 344 + } 345 + } 346 + 347 + mutex_unlock(&nmap->mutex); 348 + 349 + if (idx == ARRAY_SIZE(nmap->entry)) 350 + return -ENOENT; 351 + return 0; 352 + } 353 + 354 + static int 355 + nsim_map_lookup_elem(struct bpf_offloaded_map *offmap, void *key, void *value) 356 + { 357 + struct nsim_bpf_bound_map *nmap = offmap->dev_priv; 358 + int idx; 359 + 360 + mutex_lock(&nmap->mutex); 361 + 362 + idx = nsim_map_key_find(offmap, key); 363 + if (idx >= 0) 364 + memcpy(value, nmap->entry[idx].value, offmap->map.value_size); 365 + 366 + mutex_unlock(&nmap->mutex); 367 + 368 + return idx < 0 ? idx : 0; 369 + } 370 + 371 + static int 372 + nsim_map_update_elem(struct bpf_offloaded_map *offmap, 373 + void *key, void *value, u64 flags) 374 + { 375 + struct nsim_bpf_bound_map *nmap = offmap->dev_priv; 376 + int idx, err = 0; 377 + 378 + mutex_lock(&nmap->mutex); 379 + 380 + idx = nsim_map_key_find(offmap, key); 381 + if (idx < 0 && flags == BPF_EXIST) { 382 + err = idx; 383 + goto exit_unlock; 384 + } 385 + if (idx >= 0 && flags == BPF_NOEXIST) { 386 + err = -EEXIST; 387 + goto exit_unlock; 388 + } 389 + 390 + if (idx < 0) { 391 + for (idx = 0; idx < ARRAY_SIZE(nmap->entry); idx++) 392 + if (!nmap->entry[idx].key) 393 + break; 394 + if (idx == ARRAY_SIZE(nmap->entry)) { 395 + err = -E2BIG; 396 + goto exit_unlock; 397 + } 398 + 399 + err = nsim_map_alloc_elem(offmap, idx); 400 + if (err) 401 + goto exit_unlock; 402 + } 403 + 404 + memcpy(nmap->entry[idx].key, key, offmap->map.key_size); 405 + memcpy(nmap->entry[idx].value, value, offmap->map.value_size); 406 + exit_unlock: 407 + mutex_unlock(&nmap->mutex); 408 + 409 + return err; 410 + } 411 + 412 + static int nsim_map_delete_elem(struct bpf_offloaded_map *offmap, void *key) 413 + { 414 + struct nsim_bpf_bound_map *nmap = offmap->dev_priv; 415 + int idx; 416 + 417 + if (offmap->map.map_type == BPF_MAP_TYPE_ARRAY) 418 + return -EINVAL; 419 + 420 + mutex_lock(&nmap->mutex); 421 + 422 + idx = nsim_map_key_find(offmap, key); 423 + if (idx >= 0) { 424 + kfree(nmap->entry[idx].key); 425 + kfree(nmap->entry[idx].value); 426 + memset(&nmap->entry[idx], 0, sizeof(nmap->entry[idx])); 427 + } 428 + 429 + mutex_unlock(&nmap->mutex); 430 + 431 + return idx < 0 ? idx : 0; 432 + } 433 + 434 + static const struct bpf_map_dev_ops nsim_bpf_map_ops = { 435 + .map_get_next_key = nsim_map_get_next_key, 436 + .map_lookup_elem = nsim_map_lookup_elem, 437 + .map_update_elem = nsim_map_update_elem, 438 + .map_delete_elem = nsim_map_delete_elem, 439 + }; 440 + 441 + static int 442 + nsim_bpf_map_alloc(struct netdevsim *ns, struct bpf_offloaded_map *offmap) 443 + { 444 + struct nsim_bpf_bound_map *nmap; 445 + unsigned int i; 446 + int err; 447 + 448 + if (WARN_ON(offmap->map.map_type != BPF_MAP_TYPE_ARRAY && 449 + offmap->map.map_type != BPF_MAP_TYPE_HASH)) 450 + return -EINVAL; 451 + if (offmap->map.max_entries > NSIM_BPF_MAX_KEYS) 452 + return -ENOMEM; 453 + if (offmap->map.map_flags) 454 + return -EINVAL; 455 + 456 + nmap = kzalloc(sizeof(*nmap), GFP_USER); 457 + if (!nmap) 458 + return -ENOMEM; 459 + 460 + offmap->dev_priv = nmap; 461 + nmap->ns = ns; 462 + nmap->map = offmap; 463 + mutex_init(&nmap->mutex); 464 + 465 + if (offmap->map.map_type == BPF_MAP_TYPE_ARRAY) { 466 + for (i = 0; i < ARRAY_SIZE(nmap->entry); i++) { 467 + u32 *key; 468 + 469 + err = nsim_map_alloc_elem(offmap, i); 470 + if (err) 471 + goto err_free; 472 + key = nmap->entry[i].key; 473 + *key = i; 474 + } 475 + } 476 + 477 + offmap->dev_ops = &nsim_bpf_map_ops; 478 + list_add_tail(&nmap->l, &ns->bpf_bound_maps); 479 + 480 + return 0; 481 + 482 + err_free: 483 + while (--i) { 484 + kfree(nmap->entry[i].key); 485 + kfree(nmap->entry[i].value); 486 + } 487 + kfree(nmap); 488 + return err; 489 + } 490 + 491 + static void nsim_bpf_map_free(struct bpf_offloaded_map *offmap) 492 + { 493 + struct nsim_bpf_bound_map *nmap = offmap->dev_priv; 494 + unsigned int i; 495 + 496 + for (i = 0; i < ARRAY_SIZE(nmap->entry); i++) { 497 + kfree(nmap->entry[i].key); 498 + kfree(nmap->entry[i].value); 499 + } 500 + list_del_init(&nmap->l); 501 + mutex_destroy(&nmap->mutex); 502 + kfree(nmap); 503 + } 504 + 301 505 int nsim_bpf(struct net_device *dev, struct netdev_bpf *bpf) 302 506 { 303 507 struct netdevsim *ns = netdev_priv(dev); ··· 560 328 return err; 561 329 562 330 return nsim_xdp_set_prog(ns, bpf); 331 + case BPF_OFFLOAD_MAP_ALLOC: 332 + if (!ns->bpf_map_accept) 333 + return -EOPNOTSUPP; 334 + 335 + return nsim_bpf_map_alloc(ns, bpf->offmap); 336 + case BPF_OFFLOAD_MAP_FREE: 337 + nsim_bpf_map_free(bpf->offmap); 338 + return 0; 563 339 default: 564 340 return -EINVAL; 565 341 } ··· 576 336 int nsim_bpf_init(struct netdevsim *ns) 577 337 { 578 338 INIT_LIST_HEAD(&ns->bpf_bound_progs); 339 + INIT_LIST_HEAD(&ns->bpf_bound_maps); 579 340 580 341 debugfs_create_u32("bpf_offloaded_id", 0400, ns->ddir, 581 342 &ns->bpf_offloaded_id); ··· 603 362 debugfs_create_bool("bpf_xdpoffload_accept", 0600, ns->ddir, 604 363 &ns->bpf_xdpoffload_accept); 605 364 365 + ns->bpf_map_accept = true; 366 + debugfs_create_bool("bpf_map_accept", 0600, ns->ddir, 367 + &ns->bpf_map_accept); 368 + 606 369 return 0; 607 370 } 608 371 609 372 void nsim_bpf_uninit(struct netdevsim *ns) 610 373 { 611 374 WARN_ON(!list_empty(&ns->bpf_bound_progs)); 375 + WARN_ON(!list_empty(&ns->bpf_bound_maps)); 612 376 WARN_ON(ns->xdp_prog); 613 377 WARN_ON(ns->bpf_offloaded); 614 378 }
+3
drivers/net/netdevsim/netdevsim.h
··· 61 61 bool bpf_tc_non_bound_accept; 62 62 bool bpf_xdpdrv_accept; 63 63 bool bpf_xdpoffload_accept; 64 + 65 + bool bpf_map_accept; 66 + struct list_head bpf_bound_maps; 64 67 }; 65 68 66 69 extern struct dentry *nsim_ddir;
+4
include/linux/bpf.h
··· 234 234 struct list_head offloads; 235 235 bool dev_state; 236 236 const struct bpf_prog_offload_ops *dev_ops; 237 + void *jited_image; 238 + u32 jited_len; 237 239 }; 238 240 239 241 struct bpf_prog_aux { ··· 585 583 void bpf_prog_offload_destroy(struct bpf_prog *prog); 586 584 int bpf_prog_offload_info_fill(struct bpf_prog_info *info, 587 585 struct bpf_prog *prog); 586 + 587 + int bpf_map_offload_info_fill(struct bpf_map_info *info, struct bpf_map *map); 588 588 589 589 int bpf_map_offload_lookup_elem(struct bpf_map *map, void *key, void *value); 590 590 int bpf_map_offload_update_elem(struct bpf_map *map,
+4 -1
include/uapi/linux/bpf.h
··· 17 17 #define BPF_ALU64 0x07 /* alu mode in double word width */ 18 18 19 19 /* ld/ldx fields */ 20 - #define BPF_DW 0x18 /* double word */ 20 + #define BPF_DW 0x18 /* double word (64-bit) */ 21 21 #define BPF_XADD 0xc0 /* exclusive add */ 22 22 23 23 /* alu/jmp fields */ ··· 938 938 __u32 max_entries; 939 939 __u32 map_flags; 940 940 char name[BPF_OBJ_NAME_LEN]; 941 + __u32 ifindex; 942 + __u64 netns_dev; 943 + __u64 netns_ino; 941 944 } __attribute__((aligned(8))); 942 945 943 946 /* User bpf_sock_ops struct to access socket values and specify request ops
+4 -3
include/uapi/linux/bpf_common.h
··· 15 15 16 16 /* ld/ldx fields */ 17 17 #define BPF_SIZE(code) ((code) & 0x18) 18 - #define BPF_W 0x00 19 - #define BPF_H 0x08 20 - #define BPF_B 0x10 18 + #define BPF_W 0x00 /* 32-bit */ 19 + #define BPF_H 0x08 /* 16-bit */ 20 + #define BPF_B 0x10 /* 8-bit */ 21 + /* eBPF BPF_DW 0x18 64-bit */ 21 22 #define BPF_MODE(code) ((code) & 0xe0) 22 23 #define BPF_IMM 0x00 23 24 #define BPF_ABS 0x20
+35 -26
kernel/bpf/arraymap.c
··· 49 49 } 50 50 51 51 /* Called from syscall */ 52 + static int array_map_alloc_check(union bpf_attr *attr) 53 + { 54 + bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY; 55 + int numa_node = bpf_map_attr_numa_node(attr); 56 + 57 + /* check sanity of attributes */ 58 + if (attr->max_entries == 0 || attr->key_size != 4 || 59 + attr->value_size == 0 || 60 + attr->map_flags & ~ARRAY_CREATE_FLAG_MASK || 61 + (percpu && numa_node != NUMA_NO_NODE)) 62 + return -EINVAL; 63 + 64 + if (attr->value_size > KMALLOC_MAX_SIZE) 65 + /* if value_size is bigger, the user space won't be able to 66 + * access the elements. 67 + */ 68 + return -E2BIG; 69 + 70 + return 0; 71 + } 72 + 52 73 static struct bpf_map *array_map_alloc(union bpf_attr *attr) 53 74 { 54 75 bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY; ··· 78 57 bool unpriv = !capable(CAP_SYS_ADMIN); 79 58 struct bpf_array *array; 80 59 u64 array_size, mask64; 81 - 82 - /* check sanity of attributes */ 83 - if (attr->max_entries == 0 || attr->key_size != 4 || 84 - attr->value_size == 0 || 85 - attr->map_flags & ~ARRAY_CREATE_FLAG_MASK || 86 - (percpu && numa_node != NUMA_NO_NODE)) 87 - return ERR_PTR(-EINVAL); 88 - 89 - if (attr->value_size > KMALLOC_MAX_SIZE) 90 - /* if value_size is bigger, the user space won't be able to 91 - * access the elements. 92 - */ 93 - return ERR_PTR(-E2BIG); 94 60 95 61 elem_size = round_up(attr->value_size, 8); 96 62 ··· 120 112 array->map.unpriv_array = unpriv; 121 113 122 114 /* copy mandatory map attributes */ 123 - array->map.map_type = attr->map_type; 124 - array->map.key_size = attr->key_size; 125 - array->map.value_size = attr->value_size; 126 - array->map.max_entries = attr->max_entries; 127 - array->map.map_flags = attr->map_flags; 128 - array->map.numa_node = numa_node; 115 + bpf_map_init_from_attr(&array->map, attr); 129 116 array->elem_size = elem_size; 130 117 131 118 if (!percpu) ··· 330 327 } 331 328 332 329 const struct bpf_map_ops array_map_ops = { 330 + .map_alloc_check = array_map_alloc_check, 333 331 .map_alloc = array_map_alloc, 334 332 .map_free = array_map_free, 335 333 .map_get_next_key = array_map_get_next_key, ··· 341 337 }; 342 338 343 339 const struct bpf_map_ops percpu_array_map_ops = { 340 + .map_alloc_check = array_map_alloc_check, 344 341 .map_alloc = array_map_alloc, 345 342 .map_free = array_map_free, 346 343 .map_get_next_key = array_map_get_next_key, ··· 350 345 .map_delete_elem = array_map_delete_elem, 351 346 }; 352 347 353 - static struct bpf_map *fd_array_map_alloc(union bpf_attr *attr) 348 + static int fd_array_map_alloc_check(union bpf_attr *attr) 354 349 { 355 350 /* only file descriptors can be stored in this type of map */ 356 351 if (attr->value_size != sizeof(u32)) 357 - return ERR_PTR(-EINVAL); 358 - return array_map_alloc(attr); 352 + return -EINVAL; 353 + return array_map_alloc_check(attr); 359 354 } 360 355 361 356 static void fd_array_map_free(struct bpf_map *map) ··· 479 474 } 480 475 481 476 const struct bpf_map_ops prog_array_map_ops = { 482 - .map_alloc = fd_array_map_alloc, 477 + .map_alloc_check = fd_array_map_alloc_check, 478 + .map_alloc = array_map_alloc, 483 479 .map_free = fd_array_map_free, 484 480 .map_get_next_key = array_map_get_next_key, 485 481 .map_lookup_elem = fd_array_map_lookup_elem, ··· 567 561 } 568 562 569 563 const struct bpf_map_ops perf_event_array_map_ops = { 570 - .map_alloc = fd_array_map_alloc, 564 + .map_alloc_check = fd_array_map_alloc_check, 565 + .map_alloc = array_map_alloc, 571 566 .map_free = fd_array_map_free, 572 567 .map_get_next_key = array_map_get_next_key, 573 568 .map_lookup_elem = fd_array_map_lookup_elem, ··· 599 592 } 600 593 601 594 const struct bpf_map_ops cgroup_array_map_ops = { 602 - .map_alloc = fd_array_map_alloc, 595 + .map_alloc_check = fd_array_map_alloc_check, 596 + .map_alloc = array_map_alloc, 603 597 .map_free = cgroup_fd_array_free, 604 598 .map_get_next_key = array_map_get_next_key, 605 599 .map_lookup_elem = fd_array_map_lookup_elem, ··· 618 610 if (IS_ERR(inner_map_meta)) 619 611 return inner_map_meta; 620 612 621 - map = fd_array_map_alloc(attr); 613 + map = array_map_alloc(attr); 622 614 if (IS_ERR(map)) { 623 615 bpf_map_meta_free(inner_map_meta); 624 616 return map; ··· 681 673 } 682 674 683 675 const struct bpf_map_ops array_of_maps_map_ops = { 676 + .map_alloc_check = fd_array_map_alloc_check, 684 677 .map_alloc = array_of_map_alloc, 685 678 .map_free = array_of_map_free, 686 679 .map_get_next_key = array_map_get_next_key,
+12 -7
kernel/bpf/core.c
··· 300 300 } 301 301 302 302 #ifdef CONFIG_BPF_JIT 303 + /* All BPF JIT sysctl knobs here. */ 304 + int bpf_jit_enable __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_ALWAYS_ON); 305 + int bpf_jit_harden __read_mostly; 306 + int bpf_jit_kallsyms __read_mostly; 307 + 303 308 static __always_inline void 304 309 bpf_get_prog_addr_region(const struct bpf_prog *prog, 305 310 unsigned long *symbol_start, ··· 385 380 static DEFINE_SPINLOCK(bpf_lock); 386 381 static LIST_HEAD(bpf_kallsyms); 387 382 static struct latch_tree_root bpf_tree __cacheline_aligned; 388 - 389 - int bpf_jit_kallsyms __read_mostly; 390 383 391 384 static void bpf_prog_ksym_node_add(struct bpf_prog_aux *aux) 392 385 { ··· 565 562 566 563 bpf_prog_unlock_free(fp); 567 564 } 568 - 569 - int bpf_jit_harden __read_mostly; 570 565 571 566 static int bpf_jit_blind_insn(const struct bpf_insn *from, 572 567 const struct bpf_insn *aux, ··· 1380 1379 } 1381 1380 1382 1381 #else 1383 - static unsigned int __bpf_prog_ret0(const void *ctx, 1384 - const struct bpf_insn *insn) 1382 + static unsigned int __bpf_prog_ret0_warn(const void *ctx, 1383 + const struct bpf_insn *insn) 1385 1384 { 1385 + /* If this handler ever gets executed, then BPF_JIT_ALWAYS_ON 1386 + * is not working properly, so warn about it! 1387 + */ 1388 + WARN_ON_ONCE(1); 1386 1389 return 0; 1387 1390 } 1388 1391 #endif ··· 1446 1441 1447 1442 fp->bpf_func = interpreters[(round_up(stack_depth, 32) / 32) - 1]; 1448 1443 #else 1449 - fp->bpf_func = __bpf_prog_ret0; 1444 + fp->bpf_func = __bpf_prog_ret0_warn; 1450 1445 #endif 1451 1446 1452 1447 /* eBPF JITs can rewrite the program in case constant
+93 -2
kernel/bpf/lpm_trie.c
··· 591 591 raw_spin_unlock(&trie->lock); 592 592 } 593 593 594 - static int trie_get_next_key(struct bpf_map *map, void *key, void *next_key) 594 + static int trie_get_next_key(struct bpf_map *map, void *_key, void *_next_key) 595 595 { 596 - return -ENOTSUPP; 596 + struct lpm_trie *trie = container_of(map, struct lpm_trie, map); 597 + struct bpf_lpm_trie_key *key = _key, *next_key = _next_key; 598 + struct lpm_trie_node *node, *next_node = NULL, *parent; 599 + struct lpm_trie_node **node_stack = NULL; 600 + struct lpm_trie_node __rcu **root; 601 + int err = 0, stack_ptr = -1; 602 + unsigned int next_bit; 603 + size_t matchlen; 604 + 605 + /* The get_next_key follows postorder. For the 4 node example in 606 + * the top of this file, the trie_get_next_key() returns the following 607 + * one after another: 608 + * 192.168.0.0/24 609 + * 192.168.1.0/24 610 + * 192.168.128.0/24 611 + * 192.168.0.0/16 612 + * 613 + * The idea is to return more specific keys before less specific ones. 614 + */ 615 + 616 + /* Empty trie */ 617 + if (!rcu_dereference(trie->root)) 618 + return -ENOENT; 619 + 620 + /* For invalid key, find the leftmost node in the trie */ 621 + if (!key || key->prefixlen > trie->max_prefixlen) { 622 + root = &trie->root; 623 + goto find_leftmost; 624 + } 625 + 626 + node_stack = kmalloc(trie->max_prefixlen * sizeof(struct lpm_trie_node *), 627 + GFP_USER | __GFP_NOWARN); 628 + if (!node_stack) 629 + return -ENOMEM; 630 + 631 + /* Try to find the exact node for the given key */ 632 + for (node = rcu_dereference(trie->root); node;) { 633 + node_stack[++stack_ptr] = node; 634 + matchlen = longest_prefix_match(trie, node, key); 635 + if (node->prefixlen != matchlen || 636 + node->prefixlen == key->prefixlen) 637 + break; 638 + 639 + next_bit = extract_bit(key->data, node->prefixlen); 640 + node = rcu_dereference(node->child[next_bit]); 641 + } 642 + if (!node || node->prefixlen != key->prefixlen || 643 + (node->flags & LPM_TREE_NODE_FLAG_IM)) { 644 + root = &trie->root; 645 + goto find_leftmost; 646 + } 647 + 648 + /* The node with the exactly-matching key has been found, 649 + * find the first node in postorder after the matched node. 650 + */ 651 + node = node_stack[stack_ptr]; 652 + while (stack_ptr > 0) { 653 + parent = node_stack[stack_ptr - 1]; 654 + if (rcu_dereference(parent->child[0]) == node && 655 + rcu_dereference(parent->child[1])) { 656 + root = &parent->child[1]; 657 + goto find_leftmost; 658 + } 659 + if (!(parent->flags & LPM_TREE_NODE_FLAG_IM)) { 660 + next_node = parent; 661 + goto do_copy; 662 + } 663 + 664 + node = parent; 665 + stack_ptr--; 666 + } 667 + 668 + /* did not find anything */ 669 + err = -ENOENT; 670 + goto free_stack; 671 + 672 + find_leftmost: 673 + /* Find the leftmost non-intermediate node, all intermediate nodes 674 + * have exact two children, so this function will never return NULL. 675 + */ 676 + for (node = rcu_dereference(*root); node;) { 677 + if (!(node->flags & LPM_TREE_NODE_FLAG_IM)) 678 + next_node = node; 679 + node = rcu_dereference(node->child[0]); 680 + } 681 + do_copy: 682 + next_key->prefixlen = next_node->prefixlen; 683 + memcpy((void *)next_key + offsetof(struct bpf_lpm_trie_key, data), 684 + next_node->data, trie->data_size); 685 + free_stack: 686 + kfree(node_stack); 687 + return err; 597 688 } 598 689 599 690 const struct bpf_map_ops trie_map_ops = {
+80 -1
kernel/bpf/offload.c
··· 230 230 .prog = prog, 231 231 .info = info, 232 232 }; 233 + struct bpf_prog_aux *aux = prog->aux; 233 234 struct inode *ns_inode; 234 235 struct path ns_path; 236 + char __user *uinsns; 235 237 void *res; 238 + u32 ulen; 236 239 237 240 res = ns_get_path_cb(&ns_path, bpf_prog_offload_info_fill_ns, &args); 238 241 if (IS_ERR(res)) { ··· 243 240 return -ENODEV; 244 241 return PTR_ERR(res); 245 242 } 243 + 244 + down_read(&bpf_devs_lock); 245 + 246 + if (!aux->offload) { 247 + up_read(&bpf_devs_lock); 248 + return -ENODEV; 249 + } 250 + 251 + ulen = info->jited_prog_len; 252 + info->jited_prog_len = aux->offload->jited_len; 253 + if (info->jited_prog_len & ulen) { 254 + uinsns = u64_to_user_ptr(info->jited_prog_insns); 255 + ulen = min_t(u32, info->jited_prog_len, ulen); 256 + if (copy_to_user(uinsns, aux->offload->jited_image, ulen)) { 257 + up_read(&bpf_devs_lock); 258 + return -EFAULT; 259 + } 260 + } 261 + 262 + up_read(&bpf_devs_lock); 246 263 247 264 ns_inode = ns_path.dentry->d_inode; 248 265 info->netns_dev = new_encode_dev(ns_inode->i_sb->s_dev); ··· 299 276 300 277 if (!capable(CAP_SYS_ADMIN)) 301 278 return ERR_PTR(-EPERM); 302 - if (attr->map_type != BPF_MAP_TYPE_HASH) 279 + if (attr->map_type != BPF_MAP_TYPE_ARRAY && 280 + attr->map_type != BPF_MAP_TYPE_HASH) 303 281 return ERR_PTR(-EINVAL); 304 282 305 283 offmap = kzalloc(sizeof(*offmap), GFP_USER); ··· 411 387 up_read(&bpf_devs_lock); 412 388 413 389 return ret; 390 + } 391 + 392 + struct ns_get_path_bpf_map_args { 393 + struct bpf_offloaded_map *offmap; 394 + struct bpf_map_info *info; 395 + }; 396 + 397 + static struct ns_common *bpf_map_offload_info_fill_ns(void *private_data) 398 + { 399 + struct ns_get_path_bpf_map_args *args = private_data; 400 + struct ns_common *ns; 401 + struct net *net; 402 + 403 + rtnl_lock(); 404 + down_read(&bpf_devs_lock); 405 + 406 + if (args->offmap->netdev) { 407 + args->info->ifindex = args->offmap->netdev->ifindex; 408 + net = dev_net(args->offmap->netdev); 409 + get_net(net); 410 + ns = &net->ns; 411 + } else { 412 + args->info->ifindex = 0; 413 + ns = NULL; 414 + } 415 + 416 + up_read(&bpf_devs_lock); 417 + rtnl_unlock(); 418 + 419 + return ns; 420 + } 421 + 422 + int bpf_map_offload_info_fill(struct bpf_map_info *info, struct bpf_map *map) 423 + { 424 + struct ns_get_path_bpf_map_args args = { 425 + .offmap = map_to_offmap(map), 426 + .info = info, 427 + }; 428 + struct inode *ns_inode; 429 + struct path ns_path; 430 + void *res; 431 + 432 + res = ns_get_path_cb(&ns_path, bpf_map_offload_info_fill_ns, &args); 433 + if (IS_ERR(res)) { 434 + if (!info->ifindex) 435 + return -ENODEV; 436 + return PTR_ERR(res); 437 + } 438 + 439 + ns_inode = ns_path.dentry->d_inode; 440 + info->netns_dev = new_encode_dev(ns_inode->i_sb->s_dev); 441 + info->netns_ino = ns_inode->i_ino; 442 + path_put(&ns_path); 443 + 444 + return 0; 414 445 } 415 446 416 447 bool bpf_offload_dev_match(struct bpf_prog *prog, struct bpf_map *map)
+26 -13
kernel/bpf/syscall.c
··· 1504 1504 struct bpf_prog *prog; 1505 1505 int ret = -ENOTSUPP; 1506 1506 1507 + if (!capable(CAP_SYS_ADMIN)) 1508 + return -EPERM; 1507 1509 if (CHECK_ATTR(BPF_PROG_TEST_RUN)) 1508 1510 return -EINVAL; 1509 1511 ··· 1726 1724 goto done; 1727 1725 } 1728 1726 1729 - ulen = info.jited_prog_len; 1730 - info.jited_prog_len = prog->jited_len; 1731 - if (info.jited_prog_len && ulen) { 1732 - if (bpf_dump_raw_ok()) { 1733 - uinsns = u64_to_user_ptr(info.jited_prog_insns); 1734 - ulen = min_t(u32, info.jited_prog_len, ulen); 1735 - if (copy_to_user(uinsns, prog->bpf_func, ulen)) 1736 - return -EFAULT; 1737 - } else { 1738 - info.jited_prog_insns = 0; 1739 - } 1740 - } 1741 - 1742 1727 ulen = info.xlated_prog_len; 1743 1728 info.xlated_prog_len = bpf_prog_insn_size(prog); 1744 1729 if (info.xlated_prog_len && ulen) { ··· 1751 1762 err = bpf_prog_offload_info_fill(&info, prog); 1752 1763 if (err) 1753 1764 return err; 1765 + goto done; 1766 + } 1767 + 1768 + /* NOTE: the following code is supposed to be skipped for offload. 1769 + * bpf_prog_offload_info_fill() is the place to fill similar fields 1770 + * for offload. 1771 + */ 1772 + ulen = info.jited_prog_len; 1773 + info.jited_prog_len = prog->jited_len; 1774 + if (info.jited_prog_len && ulen) { 1775 + if (bpf_dump_raw_ok()) { 1776 + uinsns = u64_to_user_ptr(info.jited_prog_insns); 1777 + ulen = min_t(u32, info.jited_prog_len, ulen); 1778 + if (copy_to_user(uinsns, prog->bpf_func, ulen)) 1779 + return -EFAULT; 1780 + } else { 1781 + info.jited_prog_insns = 0; 1782 + } 1754 1783 } 1755 1784 1756 1785 done: ··· 1800 1793 info.max_entries = map->max_entries; 1801 1794 info.map_flags = map->map_flags; 1802 1795 memcpy(info.name, map->name, sizeof(map->name)); 1796 + 1797 + if (bpf_map_is_dev_bound(map)) { 1798 + err = bpf_map_offload_info_fill(&info, map); 1799 + if (err) 1800 + return err; 1801 + } 1803 1802 1804 1803 if (copy_to_user(uinfo, &info, info_len) || 1805 1804 put_user(info_len, &uattr->info.info_len))
+57 -25
kernel/bpf/verifier.c
··· 1850 1850 } 1851 1851 } 1852 1852 1853 + static bool arg_type_is_mem_ptr(enum bpf_arg_type type) 1854 + { 1855 + return type == ARG_PTR_TO_MEM || 1856 + type == ARG_PTR_TO_MEM_OR_NULL || 1857 + type == ARG_PTR_TO_UNINIT_MEM; 1858 + } 1859 + 1860 + static bool arg_type_is_mem_size(enum bpf_arg_type type) 1861 + { 1862 + return type == ARG_CONST_SIZE || 1863 + type == ARG_CONST_SIZE_OR_ZERO; 1864 + } 1865 + 1853 1866 static int check_func_arg(struct bpf_verifier_env *env, u32 regno, 1854 1867 enum bpf_arg_type arg_type, 1855 1868 struct bpf_call_arg_meta *meta) ··· 1912 1899 expected_type = PTR_TO_CTX; 1913 1900 if (type != expected_type) 1914 1901 goto err_type; 1915 - } else if (arg_type == ARG_PTR_TO_MEM || 1916 - arg_type == ARG_PTR_TO_MEM_OR_NULL || 1917 - arg_type == ARG_PTR_TO_UNINIT_MEM) { 1902 + } else if (arg_type_is_mem_ptr(arg_type)) { 1918 1903 expected_type = PTR_TO_STACK; 1919 1904 /* One exception here. In case function allows for NULL to be 1920 1905 * passed in as argument, it's a SCALAR_VALUE type. Final test ··· 1973 1962 err = check_stack_boundary(env, regno, 1974 1963 meta->map_ptr->value_size, 1975 1964 false, NULL); 1976 - } else if (arg_type == ARG_CONST_SIZE || 1977 - arg_type == ARG_CONST_SIZE_OR_ZERO) { 1965 + } else if (arg_type_is_mem_size(arg_type)) { 1978 1966 bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO); 1979 - 1980 - /* bpf_xxx(..., buf, len) call will access 'len' bytes 1981 - * from stack pointer 'buf'. Check it 1982 - * note: regno == len, regno - 1 == buf 1983 - */ 1984 - if (regno == 0) { 1985 - /* kernel subsystem misconfigured verifier */ 1986 - verbose(env, 1987 - "ARG_CONST_SIZE cannot be first argument\n"); 1988 - return -EACCES; 1989 - } 1990 1967 1991 1968 /* The register is SCALAR_VALUE; the access check 1992 1969 * happens using its boundaries. 1993 1970 */ 1994 - 1995 1971 if (!tnum_is_const(reg->var_off)) 1996 1972 /* For unprivileged variable accesses, disable raw 1997 1973 * mode so that the program is required to ··· 2122 2124 return -EINVAL; 2123 2125 } 2124 2126 2125 - static int check_raw_mode(const struct bpf_func_proto *fn) 2127 + static bool check_raw_mode_ok(const struct bpf_func_proto *fn) 2126 2128 { 2127 2129 int count = 0; 2128 2130 ··· 2137 2139 if (fn->arg5_type == ARG_PTR_TO_UNINIT_MEM) 2138 2140 count++; 2139 2141 2140 - return count > 1 ? -EINVAL : 0; 2142 + /* We only support one arg being in raw mode at the moment, 2143 + * which is sufficient for the helper functions we have 2144 + * right now. 2145 + */ 2146 + return count <= 1; 2147 + } 2148 + 2149 + static bool check_args_pair_invalid(enum bpf_arg_type arg_curr, 2150 + enum bpf_arg_type arg_next) 2151 + { 2152 + return (arg_type_is_mem_ptr(arg_curr) && 2153 + !arg_type_is_mem_size(arg_next)) || 2154 + (!arg_type_is_mem_ptr(arg_curr) && 2155 + arg_type_is_mem_size(arg_next)); 2156 + } 2157 + 2158 + static bool check_arg_pair_ok(const struct bpf_func_proto *fn) 2159 + { 2160 + /* bpf_xxx(..., buf, len) call will access 'len' 2161 + * bytes from memory 'buf'. Both arg types need 2162 + * to be paired, so make sure there's no buggy 2163 + * helper function specification. 2164 + */ 2165 + if (arg_type_is_mem_size(fn->arg1_type) || 2166 + arg_type_is_mem_ptr(fn->arg5_type) || 2167 + check_args_pair_invalid(fn->arg1_type, fn->arg2_type) || 2168 + check_args_pair_invalid(fn->arg2_type, fn->arg3_type) || 2169 + check_args_pair_invalid(fn->arg3_type, fn->arg4_type) || 2170 + check_args_pair_invalid(fn->arg4_type, fn->arg5_type)) 2171 + return false; 2172 + 2173 + return true; 2174 + } 2175 + 2176 + static int check_func_proto(const struct bpf_func_proto *fn) 2177 + { 2178 + return check_raw_mode_ok(fn) && 2179 + check_arg_pair_ok(fn) ? 0 : -EINVAL; 2141 2180 } 2142 2181 2143 2182 /* Packet data might have moved, any old PTR_TO_PACKET[_META,_END] ··· 2330 2295 2331 2296 if (env->ops->get_func_proto) 2332 2297 fn = env->ops->get_func_proto(func_id); 2333 - 2334 2298 if (!fn) { 2335 2299 verbose(env, "unknown func %s#%d\n", func_id_name(func_id), 2336 2300 func_id); ··· 2353 2319 memset(&meta, 0, sizeof(meta)); 2354 2320 meta.pkt_access = fn->pkt_access; 2355 2321 2356 - /* We only support one arg being in raw mode at the moment, which 2357 - * is sufficient for the helper functions we have right now. 2358 - */ 2359 - err = check_raw_mode(fn); 2322 + err = check_func_proto(fn); 2360 2323 if (err) { 2361 2324 verbose(env, "kernel subsystem misconfigured func %s#%d\n", 2362 2325 func_id_name(func_id), func_id); ··· 4834 4803 insn_idx++; 4835 4804 } 4836 4805 4837 - verbose(env, "processed %d insns, stack depth ", insn_processed); 4806 + verbose(env, "processed %d insns (limit %d), stack depth ", 4807 + insn_processed, BPF_COMPLEXITY_LIMIT_INSNS); 4838 4808 for (i = 0; i < env->subprog_cnt + 1; i++) { 4839 4809 u32 depth = env->subprog_stack_depth[i]; 4840 4810
+1 -1
kernel/trace/bpf_trace.c
··· 245 245 */ 246 246 #define __BPF_TP_EMIT() __BPF_ARG3_TP() 247 247 #define __BPF_TP(...) \ 248 - __trace_printk(1 /* Fake ip will not be printed. */, \ 248 + __trace_printk(0 /* Fake ip */, \ 249 249 fmt, ##__VA_ARGS__) 250 250 251 251 #define __BPF_ARG1_TP(...) \
+104
lib/test_bpf.c
··· 6109 6109 { { ETH_HLEN, 42 } }, 6110 6110 .fill_helper = bpf_fill_ld_abs_vlan_push_pop2, 6111 6111 }, 6112 + /* Checking interpreter vs JIT wrt signed extended imms. */ 6113 + { 6114 + "JNE signed compare, test 1", 6115 + .u.insns_int = { 6116 + BPF_ALU32_IMM(BPF_MOV, R1, 0xfefbbc12), 6117 + BPF_ALU32_IMM(BPF_MOV, R3, 0xffff0000), 6118 + BPF_MOV64_REG(R2, R1), 6119 + BPF_ALU64_REG(BPF_AND, R2, R3), 6120 + BPF_ALU32_IMM(BPF_MOV, R0, 1), 6121 + BPF_JMP_IMM(BPF_JNE, R2, -17104896, 1), 6122 + BPF_ALU32_IMM(BPF_MOV, R0, 2), 6123 + BPF_EXIT_INSN(), 6124 + }, 6125 + INTERNAL, 6126 + { }, 6127 + { { 0, 1 } }, 6128 + }, 6129 + { 6130 + "JNE signed compare, test 2", 6131 + .u.insns_int = { 6132 + BPF_ALU32_IMM(BPF_MOV, R1, 0xfefbbc12), 6133 + BPF_ALU32_IMM(BPF_MOV, R3, 0xffff0000), 6134 + BPF_MOV64_REG(R2, R1), 6135 + BPF_ALU64_REG(BPF_AND, R2, R3), 6136 + BPF_ALU32_IMM(BPF_MOV, R0, 1), 6137 + BPF_JMP_IMM(BPF_JNE, R2, 0xfefb0000, 1), 6138 + BPF_ALU32_IMM(BPF_MOV, R0, 2), 6139 + BPF_EXIT_INSN(), 6140 + }, 6141 + INTERNAL, 6142 + { }, 6143 + { { 0, 1 } }, 6144 + }, 6145 + { 6146 + "JNE signed compare, test 3", 6147 + .u.insns_int = { 6148 + BPF_ALU32_IMM(BPF_MOV, R1, 0xfefbbc12), 6149 + BPF_ALU32_IMM(BPF_MOV, R3, 0xffff0000), 6150 + BPF_ALU32_IMM(BPF_MOV, R4, 0xfefb0000), 6151 + BPF_MOV64_REG(R2, R1), 6152 + BPF_ALU64_REG(BPF_AND, R2, R3), 6153 + BPF_ALU32_IMM(BPF_MOV, R0, 1), 6154 + BPF_JMP_REG(BPF_JNE, R2, R4, 1), 6155 + BPF_ALU32_IMM(BPF_MOV, R0, 2), 6156 + BPF_EXIT_INSN(), 6157 + }, 6158 + INTERNAL, 6159 + { }, 6160 + { { 0, 2 } }, 6161 + }, 6162 + { 6163 + "JNE signed compare, test 4", 6164 + .u.insns_int = { 6165 + BPF_LD_IMM64(R1, -17104896), 6166 + BPF_ALU32_IMM(BPF_MOV, R0, 1), 6167 + BPF_JMP_IMM(BPF_JNE, R1, -17104896, 1), 6168 + BPF_ALU32_IMM(BPF_MOV, R0, 2), 6169 + BPF_EXIT_INSN(), 6170 + }, 6171 + INTERNAL, 6172 + { }, 6173 + { { 0, 2 } }, 6174 + }, 6175 + { 6176 + "JNE signed compare, test 5", 6177 + .u.insns_int = { 6178 + BPF_LD_IMM64(R1, 0xfefb0000), 6179 + BPF_ALU32_IMM(BPF_MOV, R0, 1), 6180 + BPF_JMP_IMM(BPF_JNE, R1, 0xfefb0000, 1), 6181 + BPF_ALU32_IMM(BPF_MOV, R0, 2), 6182 + BPF_EXIT_INSN(), 6183 + }, 6184 + INTERNAL, 6185 + { }, 6186 + { { 0, 1 } }, 6187 + }, 6188 + { 6189 + "JNE signed compare, test 6", 6190 + .u.insns_int = { 6191 + BPF_LD_IMM64(R1, 0x7efb0000), 6192 + BPF_ALU32_IMM(BPF_MOV, R0, 1), 6193 + BPF_JMP_IMM(BPF_JNE, R1, 0x7efb0000, 1), 6194 + BPF_ALU32_IMM(BPF_MOV, R0, 2), 6195 + BPF_EXIT_INSN(), 6196 + }, 6197 + INTERNAL, 6198 + { }, 6199 + { { 0, 2 } }, 6200 + }, 6201 + { 6202 + "JNE signed compare, test 7", 6203 + .u.insns = { 6204 + BPF_STMT(BPF_LD | BPF_IMM, 0xffff0000), 6205 + BPF_STMT(BPF_MISC | BPF_TAX, 0), 6206 + BPF_STMT(BPF_LD | BPF_IMM, 0xfefbbc12), 6207 + BPF_STMT(BPF_ALU | BPF_AND | BPF_X, 0), 6208 + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0xfefb0000, 1, 0), 6209 + BPF_STMT(BPF_RET | BPF_K, 1), 6210 + BPF_STMT(BPF_RET | BPF_K, 2), 6211 + }, 6212 + CLASSIC | FLAG_NO_DATA, 6213 + {}, 6214 + { { 0, 2 } }, 6215 + }, 6112 6216 }; 6113 6217 6114 6218 static struct net_device dev;
+5 -2
net/core/filter.c
··· 2865 2865 .arg2_type = ARG_CONST_MAP_PTR, 2866 2866 .arg3_type = ARG_ANYTHING, 2867 2867 .arg4_type = ARG_PTR_TO_MEM, 2868 - .arg5_type = ARG_CONST_SIZE, 2868 + .arg5_type = ARG_CONST_SIZE_OR_ZERO, 2869 2869 }; 2870 2870 2871 2871 static unsigned short bpf_tunnel_key_af(u64 flags) ··· 3154 3154 .arg2_type = ARG_CONST_MAP_PTR, 3155 3155 .arg3_type = ARG_ANYTHING, 3156 3156 .arg4_type = ARG_PTR_TO_MEM, 3157 - .arg5_type = ARG_CONST_SIZE, 3157 + .arg5_type = ARG_CONST_SIZE_OR_ZERO, 3158 3158 }; 3159 3159 3160 3160 BPF_CALL_1(bpf_get_socket_cookie, struct sk_buff *, skb) ··· 3460 3460 return &bpf_xdp_event_output_proto; 3461 3461 case BPF_FUNC_get_smp_processor_id: 3462 3462 return &bpf_get_smp_processor_id_proto; 3463 + case BPF_FUNC_csum_diff: 3464 + return &bpf_csum_diff_proto; 3463 3465 case BPF_FUNC_xdp_adjust_head: 3464 3466 return &bpf_xdp_adjust_head_proto; 3465 3467 case BPF_FUNC_xdp_adjust_meta: ··· 4532 4530 }; 4533 4531 4534 4532 const struct bpf_prog_ops sk_filter_prog_ops = { 4533 + .test_run = bpf_prog_test_run_skb, 4535 4534 }; 4536 4535 4537 4536 const struct bpf_verifier_ops tc_cls_act_verifier_ops = {
+53 -7
net/core/sysctl_net_core.c
··· 25 25 26 26 static int zero = 0; 27 27 static int one = 1; 28 + static int two __maybe_unused = 2; 28 29 static int min_sndbuf = SOCK_MIN_SNDBUF; 29 30 static int min_rcvbuf = SOCK_MIN_RCVBUF; 30 31 static int max_skb_frags = MAX_SKB_FRAGS; ··· 251 250 return proc_dostring(&fake_table, write, buffer, lenp, ppos); 252 251 } 253 252 253 + #ifdef CONFIG_BPF_JIT 254 + static int proc_dointvec_minmax_bpf_enable(struct ctl_table *table, int write, 255 + void __user *buffer, size_t *lenp, 256 + loff_t *ppos) 257 + { 258 + int ret, jit_enable = *(int *)table->data; 259 + struct ctl_table tmp = *table; 260 + 261 + if (write && !capable(CAP_SYS_ADMIN)) 262 + return -EPERM; 263 + 264 + tmp.data = &jit_enable; 265 + ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); 266 + if (write && !ret) { 267 + if (jit_enable < 2 || 268 + (jit_enable == 2 && bpf_dump_raw_ok())) { 269 + *(int *)table->data = jit_enable; 270 + if (jit_enable == 2) 271 + pr_warn("bpf_jit_enable = 2 was set! NEVER use this in production, only for JIT debugging!\n"); 272 + } else { 273 + ret = -EPERM; 274 + } 275 + } 276 + return ret; 277 + } 278 + 279 + # ifdef CONFIG_HAVE_EBPF_JIT 280 + static int 281 + proc_dointvec_minmax_bpf_restricted(struct ctl_table *table, int write, 282 + void __user *buffer, size_t *lenp, 283 + loff_t *ppos) 284 + { 285 + if (!capable(CAP_SYS_ADMIN)) 286 + return -EPERM; 287 + 288 + return proc_dointvec_minmax(table, write, buffer, lenp, ppos); 289 + } 290 + # endif 291 + #endif 292 + 254 293 static struct ctl_table net_core_table[] = { 255 294 #ifdef CONFIG_NET 256 295 { ··· 366 325 .data = &bpf_jit_enable, 367 326 .maxlen = sizeof(int), 368 327 .mode = 0644, 369 - #ifndef CONFIG_BPF_JIT_ALWAYS_ON 370 - .proc_handler = proc_dointvec 371 - #else 372 - .proc_handler = proc_dointvec_minmax, 328 + .proc_handler = proc_dointvec_minmax_bpf_enable, 329 + # ifdef CONFIG_BPF_JIT_ALWAYS_ON 373 330 .extra1 = &one, 374 331 .extra2 = &one, 375 - #endif 332 + # else 333 + .extra1 = &zero, 334 + .extra2 = &two, 335 + # endif 376 336 }, 377 337 # ifdef CONFIG_HAVE_EBPF_JIT 378 338 { ··· 381 339 .data = &bpf_jit_harden, 382 340 .maxlen = sizeof(int), 383 341 .mode = 0600, 384 - .proc_handler = proc_dointvec, 342 + .proc_handler = proc_dointvec_minmax_bpf_restricted, 343 + .extra1 = &zero, 344 + .extra2 = &two, 385 345 }, 386 346 { 387 347 .procname = "bpf_jit_kallsyms", 388 348 .data = &bpf_jit_kallsyms, 389 349 .maxlen = sizeof(int), 390 350 .mode = 0600, 391 - .proc_handler = proc_dointvec, 351 + .proc_handler = proc_dointvec_minmax_bpf_restricted, 352 + .extra1 = &zero, 353 + .extra2 = &one, 392 354 }, 393 355 # endif 394 356 #endif
-9
net/socket.c
··· 2613 2613 2614 2614 core_initcall(sock_init); /* early initcall */ 2615 2615 2616 - static int __init jit_init(void) 2617 - { 2618 - #ifdef CONFIG_BPF_JIT_ALWAYS_ON 2619 - bpf_jit_enable = 1; 2620 - #endif 2621 - return 0; 2622 - } 2623 - pure_initcall(jit_init); 2624 - 2625 2616 #ifdef CONFIG_PROC_FS 2626 2617 void socket_seq_show(struct seq_file *seq) 2627 2618 {
+5 -3
samples/bpf/xdp2skb_meta_kern.c
··· 35 35 void *data, *data_end; 36 36 int ret; 37 37 38 - /* Reserve space in-front data pointer for our meta info. 38 + /* Reserve space in-front of data pointer for our meta info. 39 39 * (Notice drivers not supporting data_meta will fail here!) 40 40 */ 41 41 ret = bpf_xdp_adjust_meta(ctx, -(int)sizeof(*meta)); 42 42 if (ret < 0) 43 43 return XDP_ABORTED; 44 44 45 - /* For some unknown reason, these ctx pointers must be read 46 - * after bpf_xdp_adjust_meta, else verifier will reject prog. 45 + /* Notice: Kernel-side verifier requires that loading of 46 + * ctx->data MUST happen _after_ helper bpf_xdp_adjust_meta(), 47 + * as pkt-data pointers are invalidated. Helpers that require 48 + * this are determined/marked by bpf_helper_changes_pkt_data() 47 49 */ 48 50 data = (void *)(unsigned long)ctx->data; 49 51
+92 -2
samples/bpf/xdp_monitor_kern.c
··· 1 - /* XDP monitor tool, based on tracepoints 1 + /* SPDX-License-Identifier: GPL-2.0 2 + * Copyright(c) 2017-2018 Jesper Dangaard Brouer, Red Hat Inc. 2 3 * 3 - * Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat Inc. 4 + * XDP monitor tool, based on tracepoints 4 5 */ 5 6 #include <uapi/linux/bpf.h> 6 7 #include "bpf_helpers.h" ··· 116 115 if (!cnt) 117 116 return 1; 118 117 *cnt += 1; 118 + 119 + return 0; 120 + } 121 + 122 + /* Common stats data record shared with _user.c */ 123 + struct datarec { 124 + u64 processed; 125 + u64 dropped; 126 + u64 info; 127 + }; 128 + #define MAX_CPUS 64 129 + 130 + struct bpf_map_def SEC("maps") cpumap_enqueue_cnt = { 131 + .type = BPF_MAP_TYPE_PERCPU_ARRAY, 132 + .key_size = sizeof(u32), 133 + .value_size = sizeof(struct datarec), 134 + .max_entries = MAX_CPUS, 135 + }; 136 + 137 + struct bpf_map_def SEC("maps") cpumap_kthread_cnt = { 138 + .type = BPF_MAP_TYPE_PERCPU_ARRAY, 139 + .key_size = sizeof(u32), 140 + .value_size = sizeof(struct datarec), 141 + .max_entries = 1, 142 + }; 143 + 144 + /* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_cpumap_enqueue/format 145 + * Code in: kernel/include/trace/events/xdp.h 146 + */ 147 + struct cpumap_enqueue_ctx { 148 + u64 __pad; // First 8 bytes are not accessible by bpf code 149 + int map_id; // offset:8; size:4; signed:1; 150 + u32 act; // offset:12; size:4; signed:0; 151 + int cpu; // offset:16; size:4; signed:1; 152 + unsigned int drops; // offset:20; size:4; signed:0; 153 + unsigned int processed; // offset:24; size:4; signed:0; 154 + int to_cpu; // offset:28; size:4; signed:1; 155 + }; 156 + 157 + SEC("tracepoint/xdp/xdp_cpumap_enqueue") 158 + int trace_xdp_cpumap_enqueue(struct cpumap_enqueue_ctx *ctx) 159 + { 160 + u32 to_cpu = ctx->to_cpu; 161 + struct datarec *rec; 162 + 163 + if (to_cpu >= MAX_CPUS) 164 + return 1; 165 + 166 + rec = bpf_map_lookup_elem(&cpumap_enqueue_cnt, &to_cpu); 167 + if (!rec) 168 + return 0; 169 + rec->processed += ctx->processed; 170 + rec->dropped += ctx->drops; 171 + 172 + /* Record bulk events, then userspace can calc average bulk size */ 173 + if (ctx->processed > 0) 174 + rec->info += 1; 175 + 176 + return 0; 177 + } 178 + 179 + /* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_cpumap_kthread/format 180 + * Code in: kernel/include/trace/events/xdp.h 181 + */ 182 + struct cpumap_kthread_ctx { 183 + u64 __pad; // First 8 bytes are not accessible by bpf code 184 + int map_id; // offset:8; size:4; signed:1; 185 + u32 act; // offset:12; size:4; signed:0; 186 + int cpu; // offset:16; size:4; signed:1; 187 + unsigned int drops; // offset:20; size:4; signed:0; 188 + unsigned int processed; // offset:24; size:4; signed:0; 189 + int sched; // offset:28; size:4; signed:1; 190 + }; 191 + 192 + SEC("tracepoint/xdp/xdp_cpumap_kthread") 193 + int trace_xdp_cpumap_kthread(struct cpumap_kthread_ctx *ctx) 194 + { 195 + struct datarec *rec; 196 + u32 key = 0; 197 + 198 + rec = bpf_map_lookup_elem(&cpumap_kthread_cnt, &key); 199 + if (!rec) 200 + return 0; 201 + rec->processed += ctx->processed; 202 + rec->dropped += ctx->drops; 203 + 204 + /* Count times kthread yielded CPU via schedule call */ 205 + if (ctx->sched) 206 + rec->info++; 119 207 120 208 return 0; 121 209 }
+352 -66
samples/bpf/xdp_monitor_user.c
··· 1 - /* Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc. 1 + /* SPDX-License-Identifier: GPL-2.0 2 + * Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc. 2 3 */ 3 4 static const char *__doc__= 4 5 "XDP monitor tool, based on tracepoints\n" ··· 40 39 {"sec", required_argument, NULL, 's' }, 41 40 {0, 0, NULL, 0 } 42 41 }; 42 + 43 + /* C standard specifies two constants, EXIT_SUCCESS(0) and EXIT_FAILURE(1) */ 44 + #define EXIT_FAIL_MEM 5 43 45 44 46 static void usage(char *argv[]) 45 47 { ··· 112 108 return NULL; 113 109 } 114 110 111 + /* Common stats data record shared with _kern.c */ 112 + struct datarec { 113 + __u64 processed; 114 + __u64 dropped; 115 + __u64 info; 116 + }; 117 + #define MAX_CPUS 64 118 + 119 + /* Userspace structs for collection of stats from maps */ 115 120 struct record { 116 - __u64 counter; 117 121 __u64 timestamp; 122 + struct datarec total; 123 + struct datarec *cpu; 124 + }; 125 + struct u64rec { 126 + __u64 processed; 127 + }; 128 + struct record_u64 { 129 + /* record for _kern side __u64 values */ 130 + __u64 timestamp; 131 + struct u64rec total; 132 + struct u64rec *cpu; 118 133 }; 119 134 120 135 struct stats_record { 121 - struct record xdp_redir[REDIR_RES_MAX]; 122 - struct record xdp_exception[XDP_ACTION_MAX]; 136 + struct record_u64 xdp_redirect[REDIR_RES_MAX]; 137 + struct record_u64 xdp_exception[XDP_ACTION_MAX]; 138 + struct record xdp_cpumap_kthread; 139 + struct record xdp_cpumap_enqueue[MAX_CPUS]; 123 140 }; 124 141 125 - static void stats_print_headers(bool err_only) 142 + static bool map_collect_record(int fd, __u32 key, struct record *rec) 126 143 { 127 - if (err_only) 128 - printf("\n%s\n", __doc_err_only__); 144 + /* For percpu maps, userspace gets a value per possible CPU */ 145 + unsigned int nr_cpus = bpf_num_possible_cpus(); 146 + struct datarec values[nr_cpus]; 147 + __u64 sum_processed = 0; 148 + __u64 sum_dropped = 0; 149 + __u64 sum_info = 0; 150 + int i; 129 151 130 - printf("%-14s %-11s %-10s %-18s %-9s\n", 131 - "ACTION", "result", "pps ", "pps-human-readable", "measure-period"); 152 + if ((bpf_map_lookup_elem(fd, &key, values)) != 0) { 153 + fprintf(stderr, 154 + "ERR: bpf_map_lookup_elem failed key:0x%X\n", key); 155 + return false; 156 + } 157 + /* Get time as close as possible to reading map contents */ 158 + rec->timestamp = gettime(); 159 + 160 + /* Record and sum values from each CPU */ 161 + for (i = 0; i < nr_cpus; i++) { 162 + rec->cpu[i].processed = values[i].processed; 163 + sum_processed += values[i].processed; 164 + rec->cpu[i].dropped = values[i].dropped; 165 + sum_dropped += values[i].dropped; 166 + rec->cpu[i].info = values[i].info; 167 + sum_info += values[i].info; 168 + } 169 + rec->total.processed = sum_processed; 170 + rec->total.dropped = sum_dropped; 171 + rec->total.info = sum_info; 172 + return true; 173 + } 174 + 175 + static bool map_collect_record_u64(int fd, __u32 key, struct record_u64 *rec) 176 + { 177 + /* For percpu maps, userspace gets a value per possible CPU */ 178 + unsigned int nr_cpus = bpf_num_possible_cpus(); 179 + struct u64rec values[nr_cpus]; 180 + __u64 sum_total = 0; 181 + int i; 182 + 183 + if ((bpf_map_lookup_elem(fd, &key, values)) != 0) { 184 + fprintf(stderr, 185 + "ERR: bpf_map_lookup_elem failed key:0x%X\n", key); 186 + return false; 187 + } 188 + /* Get time as close as possible to reading map contents */ 189 + rec->timestamp = gettime(); 190 + 191 + /* Record and sum values from each CPU */ 192 + for (i = 0; i < nr_cpus; i++) { 193 + rec->cpu[i].processed = values[i].processed; 194 + sum_total += values[i].processed; 195 + } 196 + rec->total.processed = sum_total; 197 + return true; 132 198 } 133 199 134 200 static double calc_period(struct record *r, struct record *p) ··· 213 139 return period_; 214 140 } 215 141 216 - static double calc_pps(struct record *r, struct record *p, double period) 142 + static double calc_period_u64(struct record_u64 *r, struct record_u64 *p) 143 + { 144 + double period_ = 0; 145 + __u64 period = 0; 146 + 147 + period = r->timestamp - p->timestamp; 148 + if (period > 0) 149 + period_ = ((double) period / NANOSEC_PER_SEC); 150 + 151 + return period_; 152 + } 153 + 154 + static double calc_pps(struct datarec *r, struct datarec *p, double period) 217 155 { 218 156 __u64 packets = 0; 219 157 double pps = 0; 220 158 221 159 if (period > 0) { 222 - packets = r->counter - p->counter; 160 + packets = r->processed - p->processed; 223 161 pps = packets / period; 224 162 } 225 163 return pps; 226 164 } 227 165 228 - static void stats_print(struct stats_record *rec, 229 - struct stats_record *prev, 166 + static double calc_pps_u64(struct u64rec *r, struct u64rec *p, double period) 167 + { 168 + __u64 packets = 0; 169 + double pps = 0; 170 + 171 + if (period > 0) { 172 + packets = r->processed - p->processed; 173 + pps = packets / period; 174 + } 175 + return pps; 176 + } 177 + 178 + static double calc_drop(struct datarec *r, struct datarec *p, double period) 179 + { 180 + __u64 packets = 0; 181 + double pps = 0; 182 + 183 + if (period > 0) { 184 + packets = r->dropped - p->dropped; 185 + pps = packets / period; 186 + } 187 + return pps; 188 + } 189 + 190 + static double calc_info(struct datarec *r, struct datarec *p, double period) 191 + { 192 + __u64 packets = 0; 193 + double pps = 0; 194 + 195 + if (period > 0) { 196 + packets = r->info - p->info; 197 + pps = packets / period; 198 + } 199 + return pps; 200 + } 201 + 202 + static void stats_print(struct stats_record *stats_rec, 203 + struct stats_record *stats_prev, 230 204 bool err_only) 231 205 { 232 - double period = 0, pps = 0; 233 - struct record *r, *p; 234 - int i = 0; 206 + unsigned int nr_cpus = bpf_num_possible_cpus(); 207 + int rec_i = 0, i, to_cpu; 208 + double t = 0, pps = 0; 235 209 236 - char *fmt = "%-14s %-11s %-10.0f %'-18.0f %f\n"; 210 + /* Header */ 211 + printf("%-15s %-7s %-12s %-12s %-9s\n", 212 + "XDP-event", "CPU:to", "pps", "drop-pps", "extra-info"); 237 213 238 214 /* tracepoint: xdp:xdp_redirect_* */ 239 215 if (err_only) 240 - i = REDIR_ERROR; 216 + rec_i = REDIR_ERROR; 241 217 242 - for (; i < REDIR_RES_MAX; i++) { 243 - r = &rec->xdp_redir[i]; 244 - p = &prev->xdp_redir[i]; 218 + for (; rec_i < REDIR_RES_MAX; rec_i++) { 219 + struct record_u64 *rec, *prev; 220 + char *fmt1 = "%-15s %-7d %'-12.0f %'-12.0f %s\n"; 221 + char *fmt2 = "%-15s %-7s %'-12.0f %'-12.0f %s\n"; 245 222 246 - if (p->timestamp) { 247 - period = calc_period(r, p); 248 - pps = calc_pps(r, p, period); 223 + rec = &stats_rec->xdp_redirect[rec_i]; 224 + prev = &stats_prev->xdp_redirect[rec_i]; 225 + t = calc_period_u64(rec, prev); 226 + 227 + for (i = 0; i < nr_cpus; i++) { 228 + struct u64rec *r = &rec->cpu[i]; 229 + struct u64rec *p = &prev->cpu[i]; 230 + 231 + pps = calc_pps_u64(r, p, t); 232 + if (pps > 0) 233 + printf(fmt1, "XDP_REDIRECT", i, 234 + rec_i ? 0.0: pps, rec_i ? pps : 0.0, 235 + err2str(rec_i)); 249 236 } 250 - printf(fmt, "XDP_REDIRECT", err2str(i), pps, pps, period); 237 + pps = calc_pps_u64(&rec->total, &prev->total, t); 238 + printf(fmt2, "XDP_REDIRECT", "total", 239 + rec_i ? 0.0: pps, rec_i ? pps : 0.0, err2str(rec_i)); 251 240 } 252 241 253 242 /* tracepoint: xdp:xdp_exception */ 254 - for (i = 0; i < XDP_ACTION_MAX; i++) { 255 - r = &rec->xdp_exception[i]; 256 - p = &prev->xdp_exception[i]; 257 - if (p->timestamp) { 258 - period = calc_period(r, p); 259 - pps = calc_pps(r, p, period); 243 + for (rec_i = 0; rec_i < XDP_ACTION_MAX; rec_i++) { 244 + struct record_u64 *rec, *prev; 245 + char *fmt1 = "%-15s %-7d %'-12.0f %'-12.0f %s\n"; 246 + char *fmt2 = "%-15s %-7s %'-12.0f %'-12.0f %s\n"; 247 + 248 + rec = &stats_rec->xdp_exception[rec_i]; 249 + prev = &stats_prev->xdp_exception[rec_i]; 250 + t = calc_period_u64(rec, prev); 251 + 252 + for (i = 0; i < nr_cpus; i++) { 253 + struct u64rec *r = &rec->cpu[i]; 254 + struct u64rec *p = &prev->cpu[i]; 255 + 256 + pps = calc_pps_u64(r, p, t); 257 + if (pps > 0) 258 + printf(fmt1, "Exception", i, 259 + 0.0, pps, err2str(rec_i)); 260 260 } 261 + pps = calc_pps_u64(&rec->total, &prev->total, t); 261 262 if (pps > 0) 262 - printf(fmt, action2str(i), "Exception", 263 - pps, pps, period); 263 + printf(fmt2, "Exception", "total", 264 + 0.0, pps, action2str(rec_i)); 264 265 } 266 + 267 + /* cpumap enqueue stats */ 268 + for (to_cpu = 0; to_cpu < MAX_CPUS; to_cpu++) { 269 + char *fmt1 = "%-15s %3d:%-3d %'-12.0f %'-12.0f %'-10.2f %s\n"; 270 + char *fmt2 = "%-15s %3s:%-3d %'-12.0f %'-12.0f %'-10.2f %s\n"; 271 + struct record *rec, *prev; 272 + char *info_str = ""; 273 + double drop, info; 274 + 275 + rec = &stats_rec->xdp_cpumap_enqueue[to_cpu]; 276 + prev = &stats_prev->xdp_cpumap_enqueue[to_cpu]; 277 + t = calc_period(rec, prev); 278 + for (i = 0; i < nr_cpus; i++) { 279 + struct datarec *r = &rec->cpu[i]; 280 + struct datarec *p = &prev->cpu[i]; 281 + 282 + pps = calc_pps(r, p, t); 283 + drop = calc_drop(r, p, t); 284 + info = calc_info(r, p, t); 285 + if (info > 0) { 286 + info_str = "bulk-average"; 287 + info = pps / info; /* calc average bulk size */ 288 + } 289 + if (pps > 0) 290 + printf(fmt1, "cpumap-enqueue", 291 + i, to_cpu, pps, drop, info, info_str); 292 + } 293 + pps = calc_pps(&rec->total, &prev->total, t); 294 + if (pps > 0) { 295 + drop = calc_drop(&rec->total, &prev->total, t); 296 + info = calc_info(&rec->total, &prev->total, t); 297 + if (info > 0) { 298 + info_str = "bulk-average"; 299 + info = pps / info; /* calc average bulk size */ 300 + } 301 + printf(fmt2, "cpumap-enqueue", 302 + "sum", to_cpu, pps, drop, info, info_str); 303 + } 304 + } 305 + 306 + /* cpumap kthread stats */ 307 + { 308 + char *fmt1 = "%-15s %-7d %'-12.0f %'-12.0f %'-10.0f %s\n"; 309 + char *fmt2 = "%-15s %-7s %'-12.0f %'-12.0f %'-10.0f %s\n"; 310 + struct record *rec, *prev; 311 + double drop, info; 312 + char *i_str = ""; 313 + 314 + rec = &stats_rec->xdp_cpumap_kthread; 315 + prev = &stats_prev->xdp_cpumap_kthread; 316 + t = calc_period(rec, prev); 317 + for (i = 0; i < nr_cpus; i++) { 318 + struct datarec *r = &rec->cpu[i]; 319 + struct datarec *p = &prev->cpu[i]; 320 + 321 + pps = calc_pps(r, p, t); 322 + drop = calc_drop(r, p, t); 323 + info = calc_info(r, p, t); 324 + if (info > 0) 325 + i_str = "sched"; 326 + if (pps > 0) 327 + printf(fmt1, "cpumap-kthread", 328 + i, pps, drop, info, i_str); 329 + } 330 + pps = calc_pps(&rec->total, &prev->total, t); 331 + drop = calc_drop(&rec->total, &prev->total, t); 332 + info = calc_info(&rec->total, &prev->total, t); 333 + if (info > 0) 334 + i_str = "sched-sum"; 335 + printf(fmt2, "cpumap-kthread", "total", pps, drop, info, i_str); 336 + } 337 + 265 338 printf("\n"); 266 - } 267 - 268 - static __u64 get_key32_value64_percpu(int fd, __u32 key) 269 - { 270 - /* For percpu maps, userspace gets a value per possible CPU */ 271 - unsigned int nr_cpus = bpf_num_possible_cpus(); 272 - __u64 values[nr_cpus]; 273 - __u64 sum = 0; 274 - int i; 275 - 276 - if ((bpf_map_lookup_elem(fd, &key, values)) != 0) { 277 - fprintf(stderr, 278 - "ERR: bpf_map_lookup_elem failed key:0x%X\n", key); 279 - return 0; 280 - } 281 - 282 - /* Sum values from each CPU */ 283 - for (i = 0; i < nr_cpus; i++) { 284 - sum += values[i]; 285 - } 286 - return sum; 287 339 } 288 340 289 341 static bool stats_collect(struct stats_record *rec) ··· 422 222 */ 423 223 424 224 fd = map_data[0].fd; /* map0: redirect_err_cnt */ 425 - for (i = 0; i < REDIR_RES_MAX; i++) { 426 - rec->xdp_redir[i].timestamp = gettime(); 427 - rec->xdp_redir[i].counter = get_key32_value64_percpu(fd, i); 428 - } 225 + for (i = 0; i < REDIR_RES_MAX; i++) 226 + map_collect_record_u64(fd, i, &rec->xdp_redirect[i]); 429 227 430 228 fd = map_data[1].fd; /* map1: exception_cnt */ 431 229 for (i = 0; i < XDP_ACTION_MAX; i++) { 432 - rec->xdp_exception[i].timestamp = gettime(); 433 - rec->xdp_exception[i].counter = get_key32_value64_percpu(fd, i); 230 + map_collect_record_u64(fd, i, &rec->xdp_exception[i]); 434 231 } 232 + 233 + fd = map_data[2].fd; /* map2: cpumap_enqueue_cnt */ 234 + for (i = 0; i < MAX_CPUS; i++) 235 + map_collect_record(fd, i, &rec->xdp_cpumap_enqueue[i]); 236 + 237 + fd = map_data[3].fd; /* map3: cpumap_kthread_cnt */ 238 + map_collect_record(fd, 0, &rec->xdp_cpumap_kthread); 435 239 436 240 return true; 437 241 } 438 242 243 + static void *alloc_rec_per_cpu(int record_size) 244 + { 245 + unsigned int nr_cpus = bpf_num_possible_cpus(); 246 + void *array; 247 + size_t size; 248 + 249 + size = record_size * nr_cpus; 250 + array = malloc(size); 251 + memset(array, 0, size); 252 + if (!array) { 253 + fprintf(stderr, "Mem alloc error (nr_cpus:%u)\n", nr_cpus); 254 + exit(EXIT_FAIL_MEM); 255 + } 256 + return array; 257 + } 258 + 259 + static struct stats_record *alloc_stats_record(void) 260 + { 261 + struct stats_record *rec; 262 + int rec_sz; 263 + int i; 264 + 265 + /* Alloc main stats_record structure */ 266 + rec = malloc(sizeof(*rec)); 267 + memset(rec, 0, sizeof(*rec)); 268 + if (!rec) { 269 + fprintf(stderr, "Mem alloc error\n"); 270 + exit(EXIT_FAIL_MEM); 271 + } 272 + 273 + /* Alloc stats stored per CPU for each record */ 274 + rec_sz = sizeof(struct u64rec); 275 + for (i = 0; i < REDIR_RES_MAX; i++) 276 + rec->xdp_redirect[i].cpu = alloc_rec_per_cpu(rec_sz); 277 + 278 + for (i = 0; i < XDP_ACTION_MAX; i++) 279 + rec->xdp_exception[i].cpu = alloc_rec_per_cpu(rec_sz); 280 + 281 + rec_sz = sizeof(struct datarec); 282 + rec->xdp_cpumap_kthread.cpu = alloc_rec_per_cpu(rec_sz); 283 + 284 + for (i = 0; i < MAX_CPUS; i++) 285 + rec->xdp_cpumap_enqueue[i].cpu = alloc_rec_per_cpu(rec_sz); 286 + 287 + return rec; 288 + } 289 + 290 + static void free_stats_record(struct stats_record *r) 291 + { 292 + int i; 293 + 294 + for (i = 0; i < REDIR_RES_MAX; i++) 295 + free(r->xdp_redirect[i].cpu); 296 + 297 + for (i = 0; i < XDP_ACTION_MAX; i++) 298 + free(r->xdp_exception[i].cpu); 299 + 300 + free(r->xdp_cpumap_kthread.cpu); 301 + 302 + for (i = 0; i < MAX_CPUS; i++) 303 + free(r->xdp_cpumap_enqueue[i].cpu); 304 + 305 + free(r); 306 + } 307 + 308 + /* Pointer swap trick */ 309 + static inline void swap(struct stats_record **a, struct stats_record **b) 310 + { 311 + struct stats_record *tmp; 312 + 313 + tmp = *a; 314 + *a = *b; 315 + *b = tmp; 316 + } 317 + 439 318 static void stats_poll(int interval, bool err_only) 440 319 { 441 - struct stats_record rec, prev; 320 + struct stats_record *rec, *prev; 442 321 443 - memset(&rec, 0, sizeof(rec)); 322 + rec = alloc_stats_record(); 323 + prev = alloc_stats_record(); 324 + stats_collect(rec); 325 + 326 + if (err_only) 327 + printf("\n%s\n", __doc_err_only__); 444 328 445 329 /* Trick to pretty printf with thousands separators use %' */ 446 330 setlocale(LC_NUMERIC, "en_US"); ··· 542 258 fflush(stdout); 543 259 544 260 while (1) { 545 - memcpy(&prev, &rec, sizeof(rec)); 546 - stats_collect(&rec); 547 - stats_print_headers(err_only); 548 - stats_print(&rec, &prev, err_only); 261 + swap(&prev, &rec); 262 + stats_collect(rec); 263 + stats_print(rec, prev, err_only); 549 264 fflush(stdout); 550 265 sleep(interval); 551 266 } 267 + 268 + free_stats_record(rec); 269 + free_stats_record(prev); 552 270 } 553 271 554 272 static void print_bpf_prog_info(void)
+4 -3
tools/bpf/bpf_jit_disasm.c
··· 172 172 { 173 173 char *ptr, *pptr, *tmp; 174 174 off_t off = 0; 175 - int ret, flen, proglen, pass, ulen = 0; 175 + unsigned int proglen; 176 + int ret, flen, pass, ulen = 0; 176 177 regmatch_t pmatch[1]; 177 178 unsigned long base; 178 179 regex_t regex; ··· 200 199 } 201 200 202 201 ptr = haystack + off - (pmatch[0].rm_eo - pmatch[0].rm_so); 203 - ret = sscanf(ptr, "flen=%d proglen=%d pass=%d image=%lx", 202 + ret = sscanf(ptr, "flen=%d proglen=%u pass=%d image=%lx", 204 203 &flen, &proglen, &pass, &base); 205 204 if (ret != 4) { 206 205 regfree(&regex); ··· 240 239 } 241 240 242 241 assert(ulen == proglen); 243 - printf("%d bytes emitted from JIT compiler (pass:%d, flen:%d)\n", 242 + printf("%u bytes emitted from JIT compiler (pass:%d, flen:%d)\n", 244 243 proglen, pass, flen); 245 244 printf("%lx + <x>:\n", base); 246 245
+72
tools/bpf/bpftool/common.c
··· 34 34 /* Author: Jakub Kicinski <kubakici@wp.pl> */ 35 35 36 36 #include <errno.h> 37 + #include <fcntl.h> 37 38 #include <fts.h> 38 39 #include <libgen.h> 39 40 #include <mntent.h> ··· 432 431 return NULL; 433 432 434 433 return if_indextoname(ifindex, buf); 434 + } 435 + 436 + static int read_sysfs_hex_int(char *path) 437 + { 438 + char vendor_id_buf[8]; 439 + int len; 440 + int fd; 441 + 442 + fd = open(path, O_RDONLY); 443 + if (fd < 0) { 444 + p_err("Can't open %s: %s", path, strerror(errno)); 445 + return -1; 446 + } 447 + 448 + len = read(fd, vendor_id_buf, sizeof(vendor_id_buf)); 449 + close(fd); 450 + if (len < 0) { 451 + p_err("Can't read %s: %s", path, strerror(errno)); 452 + return -1; 453 + } 454 + if (len >= (int)sizeof(vendor_id_buf)) { 455 + p_err("Value in %s too long", path); 456 + return -1; 457 + } 458 + 459 + vendor_id_buf[len] = 0; 460 + 461 + return strtol(vendor_id_buf, NULL, 0); 462 + } 463 + 464 + static int read_sysfs_netdev_hex_int(char *devname, const char *entry_name) 465 + { 466 + char full_path[64]; 467 + 468 + snprintf(full_path, sizeof(full_path), "/sys/class/net/%s/device/%s", 469 + devname, entry_name); 470 + 471 + return read_sysfs_hex_int(full_path); 472 + } 473 + 474 + const char *ifindex_to_bfd_name_ns(__u32 ifindex, __u64 ns_dev, __u64 ns_ino) 475 + { 476 + char devname[IF_NAMESIZE]; 477 + int vendor_id; 478 + int device_id; 479 + 480 + if (!ifindex_to_name_ns(ifindex, ns_dev, ns_ino, devname)) { 481 + p_err("Can't get net device name for ifindex %d: %s", ifindex, 482 + strerror(errno)); 483 + return NULL; 484 + } 485 + 486 + vendor_id = read_sysfs_netdev_hex_int(devname, "vendor"); 487 + if (vendor_id < 0) { 488 + p_err("Can't get device vendor id for %s", devname); 489 + return NULL; 490 + } 491 + 492 + switch (vendor_id) { 493 + case 0x19ee: 494 + device_id = read_sysfs_netdev_hex_int(devname, "device"); 495 + if (device_id != 0x4000 && 496 + device_id != 0x6000 && 497 + device_id != 0x6003) 498 + p_info("Unknown NFP device ID, assuming it is NFP-6xxx arch"); 499 + return "NFP-6xxx"; 500 + default: 501 + p_err("Can't get bfd arch name for device vendor id 0x%04x", 502 + vendor_id); 503 + return NULL; 504 + } 435 505 } 436 506 437 507 void print_dev_plain(__u32 ifindex, __u64 ns_dev, __u64 ns_inode)
+15 -1
tools/bpf/bpftool/jit_disasm.c
··· 76 76 return 0; 77 77 } 78 78 79 - void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes) 79 + void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes, 80 + const char *arch) 80 81 { 81 82 disassembler_ftype disassemble; 82 83 struct disassemble_info info; ··· 101 100 else 102 101 init_disassemble_info(&info, stdout, 103 102 (fprintf_ftype) fprintf); 103 + 104 + /* Update architecture info for offload. */ 105 + if (arch) { 106 + const bfd_arch_info_type *inf = bfd_scan_arch(arch); 107 + 108 + if (inf) { 109 + bfdf->arch_info = inf; 110 + } else { 111 + p_err("No libfd support for %s", arch); 112 + return; 113 + } 114 + } 115 + 104 116 info.arch = bfd_get_arch(bfdf); 105 117 info.mach = bfd_get_mach(bfdf); 106 118 info.buffer = image;
+4 -1
tools/bpf/bpftool/main.h
··· 121 121 122 122 int prog_parse_fd(int *argc, char ***argv); 123 123 124 - void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes); 124 + void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes, 125 + const char *arch); 125 126 void print_hex_data_json(uint8_t *data, size_t len); 127 + 128 + const char *ifindex_to_bfd_name_ns(__u32 ifindex, __u64 ns_dev, __u64 ns_ino); 126 129 127 130 #endif
+7 -1
tools/bpf/bpftool/map.c
··· 66 66 [BPF_MAP_TYPE_HASH_OF_MAPS] = "hash_of_maps", 67 67 [BPF_MAP_TYPE_DEVMAP] = "devmap", 68 68 [BPF_MAP_TYPE_SOCKMAP] = "sockmap", 69 + [BPF_MAP_TYPE_CPUMAP] = "cpumap", 69 70 }; 70 71 71 72 static unsigned int get_possible_cpus(void) ··· 429 428 430 429 jsonw_name(json_wtr, "flags"); 431 430 jsonw_printf(json_wtr, "%#x", info->map_flags); 431 + 432 + print_dev_json(info->ifindex, info->netns_dev, info->netns_ino); 433 + 432 434 jsonw_uint_field(json_wtr, "bytes_key", info->key_size); 433 435 jsonw_uint_field(json_wtr, "bytes_value", info->value_size); 434 436 jsonw_uint_field(json_wtr, "max_entries", info->max_entries); ··· 473 469 if (*info->name) 474 470 printf("name %s ", info->name); 475 471 476 - printf("flags 0x%x\n", info->map_flags); 472 + printf("flags 0x%x", info->map_flags); 473 + print_dev_plain(info->ifindex, info->netns_dev, info->netns_ino); 474 + printf("\n"); 477 475 printf("\tkey %uB value %uB max_entries %u", 478 476 info->key_size, info->value_size, info->max_entries); 479 477
+11 -1
tools/bpf/bpftool/prog.c
··· 776 776 } 777 777 } else { 778 778 if (member_len == &info.jited_prog_len) { 779 - disasm_print_insn(buf, *member_len, opcodes); 779 + const char *name = NULL; 780 + 781 + if (info.ifindex) { 782 + name = ifindex_to_bfd_name_ns(info.ifindex, 783 + info.netns_dev, 784 + info.netns_ino); 785 + if (!name) 786 + goto err_free; 787 + } 788 + 789 + disasm_print_insn(buf, *member_len, opcodes, name); 780 790 } else { 781 791 kernel_syms_load(&dd); 782 792 if (json_output)
+14 -1
tools/include/uapi/linux/bpf.h
··· 900 900 __u32 data; 901 901 __u32 data_end; 902 902 __u32 data_meta; 903 + /* Below access go through struct xdp_rxq_info */ 904 + __u32 ingress_ifindex; /* rxq->dev->ifindex */ 905 + __u32 rx_queue_index; /* rxq->queue_index */ 903 906 }; 904 907 905 908 enum sk_action { ··· 938 935 __u32 max_entries; 939 936 __u32 map_flags; 940 937 char name[BPF_OBJ_NAME_LEN]; 938 + __u32 ifindex; 939 + __u64 netns_dev; 940 + __u64 netns_ino; 941 941 } __attribute__((aligned(8))); 942 942 943 943 /* User bpf_sock_ops struct to access socket values and specify request ops ··· 962 956 __u32 local_ip6[4]; /* Stored in network byte order */ 963 957 __u32 remote_port; /* Stored in network byte order */ 964 958 __u32 local_port; /* stored in host byte order */ 959 + __u32 is_fullsock; /* Some TCP fields are only valid if 960 + * there is a full socket. If not, the 961 + * fields read as zero. 962 + */ 963 + __u32 snd_cwnd; 964 + __u32 srtt_us; /* Averaged RTT << 3 in usecs */ 965 965 }; 966 966 967 967 /* List of known BPF sock_ops operators. ··· 1022 1010 #define BPF_DEVCG_DEV_CHAR (1ULL << 1) 1023 1011 1024 1012 struct bpf_cgroup_dev_ctx { 1025 - __u32 access_type; /* (access << 16) | type */ 1013 + /* access_type encoded as (BPF_DEVCG_ACC_* << 16) | BPF_DEVCG_DEV_* */ 1014 + __u32 access_type; 1026 1015 __u32 major; 1027 1016 __u32 minor; 1028 1017 };
+7
tools/testing/selftests/bpf/.gitignore
··· 3 3 test_lru_map 4 4 test_lpm_map 5 5 test_tag 6 + FEATURE-DUMP.libbpf 7 + fixdep 8 + test_align 9 + test_dev_cgroup 10 + test_progs 11 + test_verifier_log 12 + feature
+2 -1
tools/testing/selftests/bpf/Makefile
··· 19 19 TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \ 20 20 test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o \ 21 21 sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o test_tracepoint.o \ 22 - test_l4lb_noinline.o test_xdp_noinline.o test_stacktrace_map.o 22 + test_l4lb_noinline.o test_xdp_noinline.o test_stacktrace_map.o \ 23 + sample_map_ret0.o 23 24 24 25 TEST_PROGS := test_kmod.sh test_xdp_redirect.sh test_xdp_meta.sh \ 25 26 test_offload.py
+34
tools/testing/selftests/bpf/sample_map_ret0.c
··· 1 + /* SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) */ 2 + #include <linux/bpf.h> 3 + #include "bpf_helpers.h" 4 + 5 + struct bpf_map_def SEC("maps") htab = { 6 + .type = BPF_MAP_TYPE_HASH, 7 + .key_size = sizeof(__u32), 8 + .value_size = sizeof(long), 9 + .max_entries = 2, 10 + }; 11 + 12 + struct bpf_map_def SEC("maps") array = { 13 + .type = BPF_MAP_TYPE_ARRAY, 14 + .key_size = sizeof(__u32), 15 + .value_size = sizeof(long), 16 + .max_entries = 2, 17 + }; 18 + 19 + /* Sample program which should always load for testing control paths. */ 20 + SEC(".text") int func() 21 + { 22 + __u64 key64 = 0; 23 + __u32 key = 0; 24 + long *value; 25 + 26 + value = bpf_map_lookup_elem(&htab, &key); 27 + if (!value) 28 + return 1; 29 + value = bpf_map_lookup_elem(&array, &key64); 30 + if (!value) 31 + return 1; 32 + 33 + return 0; 34 + }
+122
tools/testing/selftests/bpf/test_lpm_map.c
··· 521 521 close(map_fd); 522 522 } 523 523 524 + static void test_lpm_get_next_key(void) 525 + { 526 + struct bpf_lpm_trie_key *key_p, *next_key_p; 527 + size_t key_size; 528 + __u32 value = 0; 529 + int map_fd; 530 + 531 + key_size = sizeof(*key_p) + sizeof(__u32); 532 + key_p = alloca(key_size); 533 + next_key_p = alloca(key_size); 534 + 535 + map_fd = bpf_create_map(BPF_MAP_TYPE_LPM_TRIE, key_size, sizeof(value), 536 + 100, BPF_F_NO_PREALLOC); 537 + assert(map_fd >= 0); 538 + 539 + /* empty tree. get_next_key should return ENOENT */ 540 + assert(bpf_map_get_next_key(map_fd, NULL, key_p) == -1 && 541 + errno == ENOENT); 542 + 543 + /* get and verify the first key, get the second one should fail. */ 544 + key_p->prefixlen = 16; 545 + inet_pton(AF_INET, "192.168.0.0", key_p->data); 546 + assert(bpf_map_update_elem(map_fd, key_p, &value, 0) == 0); 547 + 548 + memset(key_p, 0, key_size); 549 + assert(bpf_map_get_next_key(map_fd, NULL, key_p) == 0); 550 + assert(key_p->prefixlen == 16 && key_p->data[0] == 192 && 551 + key_p->data[1] == 168); 552 + 553 + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 && 554 + errno == ENOENT); 555 + 556 + /* no exact matching key should get the first one in post order. */ 557 + key_p->prefixlen = 8; 558 + assert(bpf_map_get_next_key(map_fd, NULL, key_p) == 0); 559 + assert(key_p->prefixlen == 16 && key_p->data[0] == 192 && 560 + key_p->data[1] == 168); 561 + 562 + /* add one more element (total two) */ 563 + key_p->prefixlen = 24; 564 + inet_pton(AF_INET, "192.168.0.0", key_p->data); 565 + assert(bpf_map_update_elem(map_fd, key_p, &value, 0) == 0); 566 + 567 + memset(key_p, 0, key_size); 568 + assert(bpf_map_get_next_key(map_fd, NULL, key_p) == 0); 569 + assert(key_p->prefixlen == 24 && key_p->data[0] == 192 && 570 + key_p->data[1] == 168 && key_p->data[2] == 0); 571 + 572 + memset(next_key_p, 0, key_size); 573 + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0); 574 + assert(next_key_p->prefixlen == 16 && next_key_p->data[0] == 192 && 575 + next_key_p->data[1] == 168); 576 + 577 + memcpy(key_p, next_key_p, key_size); 578 + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 && 579 + errno == ENOENT); 580 + 581 + /* Add one more element (total three) */ 582 + key_p->prefixlen = 24; 583 + inet_pton(AF_INET, "192.168.128.0", key_p->data); 584 + assert(bpf_map_update_elem(map_fd, key_p, &value, 0) == 0); 585 + 586 + memset(key_p, 0, key_size); 587 + assert(bpf_map_get_next_key(map_fd, NULL, key_p) == 0); 588 + assert(key_p->prefixlen == 24 && key_p->data[0] == 192 && 589 + key_p->data[1] == 168 && key_p->data[2] == 0); 590 + 591 + memset(next_key_p, 0, key_size); 592 + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0); 593 + assert(next_key_p->prefixlen == 24 && next_key_p->data[0] == 192 && 594 + next_key_p->data[1] == 168 && next_key_p->data[2] == 128); 595 + 596 + memcpy(key_p, next_key_p, key_size); 597 + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0); 598 + assert(next_key_p->prefixlen == 16 && next_key_p->data[0] == 192 && 599 + next_key_p->data[1] == 168); 600 + 601 + memcpy(key_p, next_key_p, key_size); 602 + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 && 603 + errno == ENOENT); 604 + 605 + /* Add one more element (total four) */ 606 + key_p->prefixlen = 24; 607 + inet_pton(AF_INET, "192.168.1.0", key_p->data); 608 + assert(bpf_map_update_elem(map_fd, key_p, &value, 0) == 0); 609 + 610 + memset(key_p, 0, key_size); 611 + assert(bpf_map_get_next_key(map_fd, NULL, key_p) == 0); 612 + assert(key_p->prefixlen == 24 && key_p->data[0] == 192 && 613 + key_p->data[1] == 168 && key_p->data[2] == 0); 614 + 615 + memset(next_key_p, 0, key_size); 616 + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0); 617 + assert(next_key_p->prefixlen == 24 && next_key_p->data[0] == 192 && 618 + next_key_p->data[1] == 168 && next_key_p->data[2] == 1); 619 + 620 + memcpy(key_p, next_key_p, key_size); 621 + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0); 622 + assert(next_key_p->prefixlen == 24 && next_key_p->data[0] == 192 && 623 + next_key_p->data[1] == 168 && next_key_p->data[2] == 128); 624 + 625 + memcpy(key_p, next_key_p, key_size); 626 + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0); 627 + assert(next_key_p->prefixlen == 16 && next_key_p->data[0] == 192 && 628 + next_key_p->data[1] == 168); 629 + 630 + memcpy(key_p, next_key_p, key_size); 631 + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 && 632 + errno == ENOENT); 633 + 634 + /* no exact matching key should return the first one in post order */ 635 + key_p->prefixlen = 22; 636 + inet_pton(AF_INET, "192.168.1.0", key_p->data); 637 + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0); 638 + assert(next_key_p->prefixlen == 24 && next_key_p->data[0] == 192 && 639 + next_key_p->data[1] == 168 && next_key_p->data[2] == 0); 640 + 641 + close(map_fd); 642 + } 643 + 524 644 int main(void) 525 645 { 526 646 struct rlimit limit = { RLIM_INFINITY, RLIM_INFINITY }; ··· 664 544 test_lpm_ipaddr(); 665 545 666 546 test_lpm_delete(); 547 + 548 + test_lpm_get_next_key(); 667 549 668 550 printf("test_lpm: OK\n"); 669 551 return 0;
+182 -24
tools/testing/selftests/bpf/test_offload.py
··· 20 20 import pprint 21 21 import random 22 22 import string 23 + import struct 23 24 import subprocess 24 25 import time 25 26 ··· 157 156 (len(progs), expected)) 158 157 return progs 159 158 159 + def bpftool_map_list(expected=None, ns=""): 160 + _, maps = bpftool("map show", JSON=True, ns=ns, fail=True) 161 + if expected is not None: 162 + if len(maps) != expected: 163 + fail(True, "%d BPF maps loaded, expected %d" % 164 + (len(maps), expected)) 165 + return maps 166 + 160 167 def bpftool_prog_list_wait(expected=0, n_retry=20): 161 168 for i in range(n_retry): 162 169 nprogs = len(bpftool_prog_list()) ··· 172 163 return 173 164 time.sleep(0.05) 174 165 raise Exception("Time out waiting for program counts to stabilize want %d, have %d" % (expected, nprogs)) 166 + 167 + def bpftool_map_list_wait(expected=0, n_retry=20): 168 + for i in range(n_retry): 169 + nmaps = len(bpftool_map_list()) 170 + if nmaps == expected: 171 + return 172 + time.sleep(0.05) 173 + raise Exception("Time out waiting for map counts to stabilize want %d, have %d" % (expected, nmaps)) 175 174 176 175 def ip(args, force=False, JSON=True, ns="", fail=True): 177 176 if force: ··· 209 192 netns.append(name) 210 193 return name 211 194 return None 195 + 196 + def int2str(fmt, val): 197 + ret = [] 198 + for b in struct.pack(fmt, val): 199 + ret.append(int(b)) 200 + return " ".join(map(lambda x: str(x), ret)) 201 + 202 + def str2int(strtab): 203 + inttab = [] 204 + for i in strtab: 205 + inttab.append(int(i, 16)) 206 + ba = bytearray(inttab) 207 + if len(strtab) == 4: 208 + fmt = "I" 209 + elif len(strtab) == 8: 210 + fmt = "Q" 211 + else: 212 + raise Exception("String array of len %d can't be unpacked to an int" % 213 + (len(strtab))) 214 + return struct.unpack(fmt, ba)[0] 212 215 213 216 class DebugfsDir: 214 217 """ ··· 348 311 return ip("link set dev %s mtu %d" % (self.dev["ifname"], mtu), 349 312 fail=fail) 350 313 351 - def set_xdp(self, bpf, mode, force=False, fail=True): 314 + def set_xdp(self, bpf, mode, force=False, JSON=True, fail=True): 352 315 return ip("link set dev %s xdp%s %s" % (self.dev["ifname"], mode, bpf), 353 - force=force, fail=fail) 316 + force=force, JSON=JSON, fail=fail) 354 317 355 - def unset_xdp(self, mode, force=False, fail=True): 318 + def unset_xdp(self, mode, force=False, JSON=True, fail=True): 356 319 return ip("link set dev %s xdp%s off" % (self.dev["ifname"], mode), 357 - force=force, fail=fail) 320 + force=force, JSON=JSON, fail=fail) 358 321 359 322 def ip_link_show(self, xdp): 360 323 _, link = ip("link show dev %s" % (self['ifname'])) ··· 427 390 428 391 ################################################################################ 429 392 def clean_up(): 393 + global files, netns, devs 394 + 430 395 for dev in devs: 431 396 dev.remove() 432 397 for f in files: 433 398 cmd("rm -f %s" % (f)) 434 399 for ns in netns: 435 400 cmd("ip netns delete %s" % (ns)) 401 + files = [] 402 + netns = [] 436 403 437 404 def pin_prog(file_name, idx=0): 438 405 progs = bpftool_prog_list(expected=(idx + 1)) ··· 446 405 447 406 return file_name, bpf_pinned(file_name) 448 407 449 - def check_dev_info(other_ns, ns, pin_file=None, removed=False): 450 - if removed: 451 - bpftool_prog_list(expected=0) 452 - ret, err = bpftool("prog show pin %s" % (pin_file), fail=False) 453 - fail(ret == 0, "Showing prog with removed device did not fail") 454 - fail(err["error"].find("No such device") == -1, 455 - "Showing prog with removed device expected ENODEV, error is %s" % 456 - (err["error"])) 457 - return 458 - progs = bpftool_prog_list(expected=int(not removed), ns=ns) 408 + def pin_map(file_name, idx=0, expected=1): 409 + maps = bpftool_map_list(expected=expected) 410 + m = maps[idx] 411 + bpftool("map pin id %d %s" % (m["id"], file_name)) 412 + files.append(file_name) 413 + 414 + return file_name, bpf_pinned(file_name) 415 + 416 + def check_dev_info_removed(prog_file=None, map_file=None): 417 + bpftool_prog_list(expected=0) 418 + ret, err = bpftool("prog show pin %s" % (prog_file), fail=False) 419 + fail(ret == 0, "Showing prog with removed device did not fail") 420 + fail(err["error"].find("No such device") == -1, 421 + "Showing prog with removed device expected ENODEV, error is %s" % 422 + (err["error"])) 423 + 424 + bpftool_map_list(expected=0) 425 + ret, err = bpftool("map show pin %s" % (map_file), fail=False) 426 + fail(ret == 0, "Showing map with removed device did not fail") 427 + fail(err["error"].find("No such device") == -1, 428 + "Showing map with removed device expected ENODEV, error is %s" % 429 + (err["error"])) 430 + 431 + def check_dev_info(other_ns, ns, prog_file=None, map_file=None, removed=False): 432 + progs = bpftool_prog_list(expected=1, ns=ns) 459 433 prog = progs[0] 460 434 461 435 fail("dev" not in prog.keys(), "Device parameters not reported") ··· 479 423 fail("ns_dev" not in dev.keys(), "Device parameters not reported") 480 424 fail("ns_inode" not in dev.keys(), "Device parameters not reported") 481 425 482 - if not removed and not other_ns: 426 + if not other_ns: 483 427 fail("ifname" not in dev.keys(), "Ifname not reported") 484 428 fail(dev["ifname"] != sim["ifname"], 485 429 "Ifname incorrect %s vs %s" % (dev["ifname"], sim["ifname"])) 486 430 else: 487 431 fail("ifname" in dev.keys(), "Ifname is reported for other ns") 488 - if removed: 489 - fail(dev["ifindex"] != 0, "Device perameters not zero on removed") 490 - fail(dev["ns_dev"] != 0, "Device perameters not zero on removed") 491 - fail(dev["ns_inode"] != 0, "Device perameters not zero on removed") 432 + 433 + maps = bpftool_map_list(expected=2, ns=ns) 434 + for m in maps: 435 + fail("dev" not in m.keys(), "Device parameters not reported") 436 + fail(dev != m["dev"], "Map's device different than program's") 492 437 493 438 # Parse command line 494 439 parser = argparse.ArgumentParser() ··· 521 464 cmd("mount -t debugfs none /sys/kernel/debug") 522 465 523 466 # Check samples are compiled 524 - samples = ["sample_ret0.o"] 467 + samples = ["sample_ret0.o", "sample_map_ret0.o"] 525 468 for s in samples: 526 469 ret, out = cmd("ls %s/%s" % (bpf_test_dir, s), fail=False) 527 470 skip(ret != 0, "sample %s/%s not found, please compile it" % ··· 796 739 bpftool_prog_list_wait(expected=0) 797 740 798 741 sim = NetdevSim() 799 - sim.set_ethtool_tc_offloads(True) 800 - sim.set_xdp(obj, "offload") 742 + map_obj = bpf_obj("sample_map_ret0.o") 743 + start_test("Test loading program with maps...") 744 + sim.set_xdp(map_obj, "offload", JSON=False) # map fixup msg breaks JSON 801 745 802 746 start_test("Test bpftool bound info reporting (own ns)...") 803 747 check_dev_info(False, "") ··· 815 757 sim.set_ns("") 816 758 check_dev_info(False, "") 817 759 818 - pin_file, _ = pin_prog("/sys/fs/bpf/tmp") 760 + prog_file, _ = pin_prog("/sys/fs/bpf/tmp_prog") 761 + map_file, _ = pin_map("/sys/fs/bpf/tmp_map", idx=1, expected=2) 819 762 sim.remove() 820 763 821 764 start_test("Test bpftool bound info reporting (removed dev)...") 822 - check_dev_info(True, "", pin_file=pin_file, removed=True) 765 + check_dev_info_removed(prog_file=prog_file, map_file=map_file) 766 + 767 + # Remove all pinned files and reinstantiate the netdev 768 + clean_up() 769 + bpftool_prog_list_wait(expected=0) 770 + 771 + sim = NetdevSim() 772 + 773 + start_test("Test map update (no flags)...") 774 + sim.set_xdp(map_obj, "offload", JSON=False) # map fixup msg breaks JSON 775 + maps = bpftool_map_list(expected=2) 776 + array = maps[0] if maps[0]["type"] == "array" else maps[1] 777 + htab = maps[0] if maps[0]["type"] == "hash" else maps[1] 778 + for m in maps: 779 + for i in range(2): 780 + bpftool("map update id %d key %s value %s" % 781 + (m["id"], int2str("I", i), int2str("Q", i * 3))) 782 + 783 + for m in maps: 784 + ret, _ = bpftool("map update id %d key %s value %s" % 785 + (m["id"], int2str("I", 3), int2str("Q", 3 * 3)), 786 + fail=False) 787 + fail(ret == 0, "added too many entries") 788 + 789 + start_test("Test map update (exists)...") 790 + for m in maps: 791 + for i in range(2): 792 + bpftool("map update id %d key %s value %s exist" % 793 + (m["id"], int2str("I", i), int2str("Q", i * 3))) 794 + 795 + for m in maps: 796 + ret, err = bpftool("map update id %d key %s value %s exist" % 797 + (m["id"], int2str("I", 3), int2str("Q", 3 * 3)), 798 + fail=False) 799 + fail(ret == 0, "updated non-existing key") 800 + fail(err["error"].find("No such file or directory") == -1, 801 + "expected ENOENT, error is '%s'" % (err["error"])) 802 + 803 + start_test("Test map update (noexist)...") 804 + for m in maps: 805 + for i in range(2): 806 + ret, err = bpftool("map update id %d key %s value %s noexist" % 807 + (m["id"], int2str("I", i), int2str("Q", i * 3)), 808 + fail=False) 809 + fail(ret == 0, "updated existing key") 810 + fail(err["error"].find("File exists") == -1, 811 + "expected EEXIST, error is '%s'" % (err["error"])) 812 + 813 + start_test("Test map dump...") 814 + for m in maps: 815 + _, entries = bpftool("map dump id %d" % (m["id"])) 816 + for i in range(2): 817 + key = str2int(entries[i]["key"]) 818 + fail(key != i, "expected key %d, got %d" % (key, i)) 819 + val = str2int(entries[i]["value"]) 820 + fail(val != i * 3, "expected value %d, got %d" % (val, i * 3)) 821 + 822 + start_test("Test map getnext...") 823 + for m in maps: 824 + _, entry = bpftool("map getnext id %d" % (m["id"])) 825 + key = str2int(entry["next_key"]) 826 + fail(key != 0, "next key %d, expected %d" % (key, 0)) 827 + _, entry = bpftool("map getnext id %d key %s" % 828 + (m["id"], int2str("I", 0))) 829 + key = str2int(entry["next_key"]) 830 + fail(key != 1, "next key %d, expected %d" % (key, 1)) 831 + ret, err = bpftool("map getnext id %d key %s" % 832 + (m["id"], int2str("I", 1)), fail=False) 833 + fail(ret == 0, "got next key past the end of map") 834 + fail(err["error"].find("No such file or directory") == -1, 835 + "expected ENOENT, error is '%s'" % (err["error"])) 836 + 837 + start_test("Test map delete (htab)...") 838 + for i in range(2): 839 + bpftool("map delete id %d key %s" % (htab["id"], int2str("I", i))) 840 + 841 + start_test("Test map delete (array)...") 842 + for i in range(2): 843 + ret, err = bpftool("map delete id %d key %s" % 844 + (htab["id"], int2str("I", i)), fail=False) 845 + fail(ret == 0, "removed entry from an array") 846 + fail(err["error"].find("No such file or directory") == -1, 847 + "expected ENOENT, error is '%s'" % (err["error"])) 848 + 849 + start_test("Test map remove...") 850 + sim.unset_xdp("offload") 851 + bpftool_map_list_wait(expected=0) 852 + sim.remove() 853 + 854 + sim = NetdevSim() 855 + sim.set_xdp(map_obj, "offload", JSON=False) # map fixup msg breaks JSON 856 + sim.remove() 857 + bpftool_map_list_wait(expected=0) 858 + 859 + start_test("Test map creation fail path...") 860 + sim = NetdevSim() 861 + sim.dfs["bpf_map_accept"] = "N" 862 + ret, _ = sim.set_xdp(map_obj, "offload", JSON=False, fail=False) 863 + fail(ret == 0, 864 + "netdevsim didn't refuse to create a map with offload disabled") 823 865 824 866 print("%s: OK" % (os.path.basename(__file__))) 825 867
+136 -1
tools/testing/selftests/bpf/test_verifier.c
··· 29 29 #include <linux/filter.h> 30 30 #include <linux/bpf_perf_event.h> 31 31 #include <linux/bpf.h> 32 + #include <linux/if_ether.h> 32 33 33 34 #include <bpf/bpf.h> 34 35 ··· 50 49 #define MAX_INSNS 512 51 50 #define MAX_FIXUPS 8 52 51 #define MAX_NR_MAPS 4 52 + #define POINTER_VALUE 0xcafe4all 53 + #define TEST_DATA_LEN 64 53 54 54 55 #define F_NEEDS_EFFICIENT_UNALIGNED_ACCESS (1 << 0) 55 56 #define F_LOAD_WITH_STRICT_ALIGNMENT (1 << 1) ··· 65 62 int fixup_map_in_map[MAX_FIXUPS]; 66 63 const char *errstr; 67 64 const char *errstr_unpriv; 65 + uint32_t retval; 68 66 enum { 69 67 UNDEF, 70 68 ACCEPT, ··· 99 95 BPF_EXIT_INSN(), 100 96 }, 101 97 .result = ACCEPT, 98 + .retval = -3, 99 + }, 100 + { 101 + "DIV32 by 0, zero check 1", 102 + .insns = { 103 + BPF_MOV32_IMM(BPF_REG_0, 42), 104 + BPF_MOV32_IMM(BPF_REG_1, 0), 105 + BPF_MOV32_IMM(BPF_REG_2, 1), 106 + BPF_ALU32_REG(BPF_DIV, BPF_REG_2, BPF_REG_1), 107 + BPF_EXIT_INSN(), 108 + }, 109 + .result = ACCEPT, 110 + .retval = 0, 111 + }, 112 + { 113 + "DIV32 by 0, zero check 2", 114 + .insns = { 115 + BPF_MOV32_IMM(BPF_REG_0, 42), 116 + BPF_LD_IMM64(BPF_REG_1, 0xffffffff00000000LL), 117 + BPF_MOV32_IMM(BPF_REG_2, 1), 118 + BPF_ALU32_REG(BPF_DIV, BPF_REG_2, BPF_REG_1), 119 + BPF_EXIT_INSN(), 120 + }, 121 + .result = ACCEPT, 122 + .retval = 0, 123 + }, 124 + { 125 + "DIV64 by 0, zero check", 126 + .insns = { 127 + BPF_MOV32_IMM(BPF_REG_0, 42), 128 + BPF_MOV32_IMM(BPF_REG_1, 0), 129 + BPF_MOV32_IMM(BPF_REG_2, 1), 130 + BPF_ALU64_REG(BPF_DIV, BPF_REG_2, BPF_REG_1), 131 + BPF_EXIT_INSN(), 132 + }, 133 + .result = ACCEPT, 134 + .retval = 0, 135 + }, 136 + { 137 + "MOD32 by 0, zero check 1", 138 + .insns = { 139 + BPF_MOV32_IMM(BPF_REG_0, 42), 140 + BPF_MOV32_IMM(BPF_REG_1, 0), 141 + BPF_MOV32_IMM(BPF_REG_2, 1), 142 + BPF_ALU32_REG(BPF_MOD, BPF_REG_2, BPF_REG_1), 143 + BPF_EXIT_INSN(), 144 + }, 145 + .result = ACCEPT, 146 + .retval = 0, 147 + }, 148 + { 149 + "MOD32 by 0, zero check 2", 150 + .insns = { 151 + BPF_MOV32_IMM(BPF_REG_0, 42), 152 + BPF_LD_IMM64(BPF_REG_1, 0xffffffff00000000LL), 153 + BPF_MOV32_IMM(BPF_REG_2, 1), 154 + BPF_ALU32_REG(BPF_MOD, BPF_REG_2, BPF_REG_1), 155 + BPF_EXIT_INSN(), 156 + }, 157 + .result = ACCEPT, 158 + .retval = 0, 159 + }, 160 + { 161 + "MOD64 by 0, zero check", 162 + .insns = { 163 + BPF_MOV32_IMM(BPF_REG_0, 42), 164 + BPF_MOV32_IMM(BPF_REG_1, 0), 165 + BPF_MOV32_IMM(BPF_REG_2, 1), 166 + BPF_ALU64_REG(BPF_MOD, BPF_REG_2, BPF_REG_1), 167 + BPF_EXIT_INSN(), 168 + }, 169 + .result = ACCEPT, 170 + .retval = 0, 171 + }, 172 + { 173 + "empty prog", 174 + .insns = { 175 + }, 176 + .errstr = "last insn is not an exit or jmp", 177 + .result = REJECT, 178 + }, 179 + { 180 + "only exit insn", 181 + .insns = { 182 + BPF_EXIT_INSN(), 183 + }, 184 + .errstr = "R0 !read_ok", 185 + .result = REJECT, 102 186 }, 103 187 { 104 188 "unreachable", ··· 302 210 BPF_EXIT_INSN(), 303 211 }, 304 212 .result = ACCEPT, 213 + .retval = 1, 305 214 }, 306 215 { 307 216 "test8 ld_imm64", ··· 610 517 .errstr_unpriv = "R0 leaks addr", 611 518 .result = ACCEPT, 612 519 .result_unpriv = REJECT, 520 + .retval = POINTER_VALUE, 613 521 }, 614 522 { 615 523 "check valid spill/fill, skb mark", ··· 897 803 .errstr_unpriv = "R1 pointer comparison", 898 804 .result_unpriv = REJECT, 899 805 .result = ACCEPT, 806 + .retval = -ENOENT, 900 807 }, 901 808 { 902 809 "jump test 4", ··· 1918 1823 BPF_EXIT_INSN(), 1919 1824 }, 1920 1825 .result = ACCEPT, 1826 + .retval = 0xfaceb00c, 1921 1827 }, 1922 1828 { 1923 1829 "PTR_TO_STACK store/load - bad alignment on off", ··· 1977 1881 .result = ACCEPT, 1978 1882 .result_unpriv = REJECT, 1979 1883 .errstr_unpriv = "R0 leaks addr", 1884 + .retval = POINTER_VALUE, 1980 1885 }, 1981 1886 { 1982 1887 "unpriv: add const to pointer", ··· 2151 2054 BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0), 2152 2055 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, 2153 2056 BPF_FUNC_get_hash_recalc), 2057 + BPF_MOV64_IMM(BPF_REG_0, 0), 2154 2058 BPF_EXIT_INSN(), 2155 2059 }, 2156 2060 .result = ACCEPT, ··· 2939 2841 }, 2940 2842 .result = ACCEPT, 2941 2843 .prog_type = BPF_PROG_TYPE_SCHED_CLS, 2844 + .retval = 1, 2942 2845 }, 2943 2846 { 2944 2847 "direct packet access: test12 (and, good access)", ··· 2964 2865 }, 2965 2866 .result = ACCEPT, 2966 2867 .prog_type = BPF_PROG_TYPE_SCHED_CLS, 2868 + .retval = 1, 2967 2869 }, 2968 2870 { 2969 2871 "direct packet access: test13 (branches, good access)", ··· 2995 2895 }, 2996 2896 .result = ACCEPT, 2997 2897 .prog_type = BPF_PROG_TYPE_SCHED_CLS, 2898 + .retval = 1, 2998 2899 }, 2999 2900 { 3000 2901 "direct packet access: test14 (pkt_ptr += 0, CONST_IMM, good access)", ··· 3019 2918 }, 3020 2919 .result = ACCEPT, 3021 2920 .prog_type = BPF_PROG_TYPE_SCHED_CLS, 2921 + .retval = 1, 3022 2922 }, 3023 2923 { 3024 2924 "direct packet access: test15 (spill with xadd)", ··· 3306 3204 }, 3307 3205 .result = ACCEPT, 3308 3206 .prog_type = BPF_PROG_TYPE_SCHED_CLS, 3207 + .retval = 1, 3309 3208 }, 3310 3209 { 3311 3210 "direct packet access: test28 (marking on <=, bad access)", ··· 5926 5823 }, 5927 5824 .result = ACCEPT, 5928 5825 .prog_type = BPF_PROG_TYPE_SCHED_CLS, 5826 + .retval = 0 /* csum_diff of 64-byte packet */, 5929 5827 }, 5930 5828 { 5931 5829 "helper access to variable memory: size = 0 not allowed on NULL (!ARG_PTR_TO_MEM_OR_NULL)", ··· 6295 6191 }, 6296 6192 .prog_type = BPF_PROG_TYPE_SCHED_CLS, 6297 6193 .result = ACCEPT, 6194 + .retval = 42 /* ultimate return value */, 6298 6195 }, 6299 6196 { 6300 6197 "ld_ind: check calling conv, r1", ··· 6367 6262 BPF_EXIT_INSN(), 6368 6263 }, 6369 6264 .result = ACCEPT, 6265 + .retval = 1, 6370 6266 }, 6371 6267 { 6372 6268 "check bpf_perf_event_data->sample_period byte load permitted", ··· 7355 7249 }, 7356 7250 .fixup_map1 = { 3 }, 7357 7251 .result = ACCEPT, 7252 + .retval = POINTER_VALUE, 7358 7253 .result_unpriv = REJECT, 7359 7254 .errstr_unpriv = "R0 leaks addr as return value" 7360 7255 }, ··· 7376 7269 }, 7377 7270 .fixup_map1 = { 3 }, 7378 7271 .result = ACCEPT, 7272 + .retval = POINTER_VALUE, 7379 7273 .result_unpriv = REJECT, 7380 7274 .errstr_unpriv = "R0 leaks addr as return value" 7381 7275 }, ··· 7818 7710 BPF_EXIT_INSN(), 7819 7711 }, 7820 7712 .result = ACCEPT, 7713 + .retval = TEST_DATA_LEN, 7821 7714 .prog_type = BPF_PROG_TYPE_SCHED_CLS, 7822 7715 }, 7823 7716 { ··· 8960 8851 .errstr_unpriv = "function calls to other bpf functions are allowed for root only", 8961 8852 .result_unpriv = REJECT, 8962 8853 .result = ACCEPT, 8854 + .retval = 1, 8963 8855 }, 8964 8856 { 8965 8857 "calls: overlapping caller/callee", ··· 9156 9046 }, 9157 9047 .prog_type = BPF_PROG_TYPE_SCHED_ACT, 9158 9048 .result = ACCEPT, 9049 + .retval = TEST_DATA_LEN, 9159 9050 }, 9160 9051 { 9161 9052 "calls: callee using args1", ··· 9169 9058 .errstr_unpriv = "allowed for root only", 9170 9059 .result_unpriv = REJECT, 9171 9060 .result = ACCEPT, 9061 + .retval = POINTER_VALUE, 9172 9062 }, 9173 9063 { 9174 9064 "calls: callee using wrong args2", ··· 9200 9088 .errstr_unpriv = "allowed for root only", 9201 9089 .result_unpriv = REJECT, 9202 9090 .result = ACCEPT, 9091 + .retval = TEST_DATA_LEN + TEST_DATA_LEN - ETH_HLEN - ETH_HLEN, 9203 9092 }, 9204 9093 { 9205 9094 "calls: callee changing pkt pointers", ··· 9249 9136 }, 9250 9137 .prog_type = BPF_PROG_TYPE_SCHED_CLS, 9251 9138 .result = ACCEPT, 9139 + .retval = TEST_DATA_LEN + TEST_DATA_LEN, 9252 9140 }, 9253 9141 { 9254 9142 "calls: calls with stack arith", ··· 9268 9154 }, 9269 9155 .prog_type = BPF_PROG_TYPE_SCHED_CLS, 9270 9156 .result = ACCEPT, 9157 + .retval = 42, 9271 9158 }, 9272 9159 { 9273 9160 "calls: calls with misaligned stack access", ··· 9302 9187 }, 9303 9188 .prog_type = BPF_PROG_TYPE_SCHED_CLS, 9304 9189 .result = ACCEPT, 9190 + .retval = 43, 9305 9191 }, 9306 9192 { 9307 9193 "calls: calls control flow, jump test 2", ··· 9795 9679 }, 9796 9680 .prog_type = BPF_PROG_TYPE_XDP, 9797 9681 .result = ACCEPT, 9682 + .retval = 42, 9798 9683 }, 9799 9684 { 9800 9685 "calls: write into callee stack frame", ··· 10407 10290 }, 10408 10291 .result = ACCEPT, 10409 10292 .prog_type = BPF_PROG_TYPE_SCHED_CLS, 10293 + .retval = POINTER_VALUE, 10410 10294 }, 10411 10295 { 10412 10296 "calls: pkt_ptr spill into caller stack 2", ··· 10473 10355 }, 10474 10356 .prog_type = BPF_PROG_TYPE_SCHED_CLS, 10475 10357 .result = ACCEPT, 10358 + .retval = 1, 10476 10359 }, 10477 10360 { 10478 10361 "calls: pkt_ptr spill into caller stack 4", ··· 10507 10388 }, 10508 10389 .prog_type = BPF_PROG_TYPE_SCHED_CLS, 10509 10390 .result = ACCEPT, 10391 + .retval = 1, 10510 10392 }, 10511 10393 { 10512 10394 "calls: pkt_ptr spill into caller stack 5", ··· 10916 10796 int fd_prog, expected_ret, reject_from_alignment; 10917 10797 struct bpf_insn *prog = test->insns; 10918 10798 int prog_len = probe_filter_length(prog); 10799 + char data_in[TEST_DATA_LEN] = {}; 10919 10800 int prog_type = test->prog_type; 10920 10801 int map_fds[MAX_NR_MAPS]; 10921 10802 const char *expected_err; 10922 - int i; 10803 + uint32_t retval; 10804 + int i, err; 10923 10805 10924 10806 for (i = 0; i < MAX_NR_MAPS; i++) 10925 10807 map_fds[i] = -1; ··· 10964 10842 } 10965 10843 } 10966 10844 10845 + if (fd_prog >= 0) { 10846 + err = bpf_prog_test_run(fd_prog, 1, data_in, sizeof(data_in), 10847 + NULL, NULL, &retval, NULL); 10848 + if (err && errno != 524/*ENOTSUPP*/ && errno != EPERM) { 10849 + printf("Unexpected bpf_prog_test_run error\n"); 10850 + goto fail_log; 10851 + } 10852 + if (!err && retval != test->retval && 10853 + test->retval != POINTER_VALUE) { 10854 + printf("FAIL retval %d != %d\n", retval, test->retval); 10855 + goto fail_log; 10856 + } 10857 + } 10967 10858 (*passes)++; 10968 10859 printf("OK%s\n", reject_from_alignment ? 10969 10860 " (NOTE: reject due to unknown alignment)" : "");