Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next

-2

arch/arm/net/bpf_jit_32.c

··· 25 25 26 26 #include "bpf_jit_32.h" 27 27 28 - int bpf_jit_enable __read_mostly; 29 - 30 28 /* 31 29 * eBPF prog stack layout: 32 30 *

-2

arch/arm64/net/bpf_jit_comp.c

··· 31 31 32 32 #include "bpf_jit.h" 33 33 34 - int bpf_jit_enable __read_mostly; 35 - 36 34 #define TMP_REG_1 (MAX_BPF_JIT_REG + 0) 37 35 #define TMP_REG_2 (MAX_BPF_JIT_REG + 1) 38 36 #define TCALL_CNT (MAX_BPF_JIT_REG + 2)

-2

arch/mips/net/bpf_jit.c

··· 1207 1207 return 0; 1208 1208 } 1209 1209 1210 - int bpf_jit_enable __read_mostly; 1211 - 1212 1210 void bpf_jit_compile(struct bpf_prog *fp) 1213 1211 { 1214 1212 struct jit_ctx ctx;

-2

arch/mips/net/ebpf_jit.c

··· 177 177 (ctx->idx * 4) - 4; 178 178 } 179 179 180 - int bpf_jit_enable __read_mostly; 181 - 182 180 enum which_ebpf_reg { 183 181 src_reg, 184 182 src_reg_no_fp,

-2

arch/powerpc/net/bpf_jit_comp.c

··· 18 18 19 19 #include "bpf_jit32.h" 20 20 21 - int bpf_jit_enable __read_mostly; 22 - 23 21 static inline void bpf_flush_icache(void *start, void *end) 24 22 { 25 23 smp_wmb();

-2

arch/powerpc/net/bpf_jit_comp64.c

··· 21 21 22 22 #include "bpf_jit64.h" 23 23 24 - int bpf_jit_enable __read_mostly; 25 - 26 24 static void bpf_jit_fill_ill_insns(void *area, unsigned int size) 27 25 { 28 26 memset32(area, BREAKPOINT_INSTRUCTION, size/4);

-2

arch/s390/net/bpf_jit_comp.c

··· 28 28 #include <asm/set_memory.h> 29 29 #include "bpf_jit.h" 30 30 31 - int bpf_jit_enable __read_mostly; 32 - 33 31 struct bpf_jit { 34 32 u32 seen; /* Flags to remember seen eBPF instructions */ 35 33 u32 seen_reg[16]; /* Array to remember which registers are used */

-2

arch/sparc/net/bpf_jit_comp_32.c

··· 11 11 12 12 #include "bpf_jit_32.h" 13 13 14 - int bpf_jit_enable __read_mostly; 15 - 16 14 static inline bool is_simm13(unsigned int value) 17 15 { 18 16 return value + 0x1000 < 0x2000;

-2

arch/sparc/net/bpf_jit_comp_64.c

··· 12 12 13 13 #include "bpf_jit_64.h" 14 14 15 - int bpf_jit_enable __read_mostly; 16 - 17 15 static inline bool is_simm13(unsigned int value) 18 16 { 19 17 return value + 0x1000 < 0x2000;

+30 -7

arch/x86/net/bpf_jit_comp.c

··· 15 15 #include <asm/set_memory.h> 16 16 #include <linux/bpf.h> 17 17 18 - int bpf_jit_enable __read_mostly; 19 - 20 18 /* 21 19 * assembly code in arch/x86/net/bpf_jit.S 22 20 */ ··· 150 152 BIT(BPF_REG_8) | 151 153 BIT(BPF_REG_9) | 152 154 BIT(BPF_REG_AX)); 155 + } 156 + 157 + static bool is_axreg(u32 reg) 158 + { 159 + return reg == BPF_REG_0; 153 160 } 154 161 155 162 /* add modifiers if 'reg' maps to x64 registers r8..r15 */ ··· 450 447 else if (is_ereg(dst_reg)) 451 448 EMIT1(add_1mod(0x40, dst_reg)); 452 449 450 + /* b3 holds 'normal' opcode, b2 short form only valid 451 + * in case dst is eax/rax. 452 + */ 453 453 switch (BPF_OP(insn->code)) { 454 - case BPF_ADD: b3 = 0xC0; break; 455 - case BPF_SUB: b3 = 0xE8; break; 456 - case BPF_AND: b3 = 0xE0; break; 457 - case BPF_OR: b3 = 0xC8; break; 458 - case BPF_XOR: b3 = 0xF0; break; 454 + case BPF_ADD: 455 + b3 = 0xC0; 456 + b2 = 0x05; 457 + break; 458 + case BPF_SUB: 459 + b3 = 0xE8; 460 + b2 = 0x2D; 461 + break; 462 + case BPF_AND: 463 + b3 = 0xE0; 464 + b2 = 0x25; 465 + break; 466 + case BPF_OR: 467 + b3 = 0xC8; 468 + b2 = 0x0D; 469 + break; 470 + case BPF_XOR: 471 + b3 = 0xF0; 472 + b2 = 0x35; 473 + break; 459 474 } 460 475 461 476 if (is_imm8(imm32)) 462 477 EMIT3(0x83, add_1reg(b3, dst_reg), imm32); 478 + else if (is_axreg(dst_reg)) 479 + EMIT1_off32(b2, imm32); 463 480 else 464 481 EMIT2_off32(0x81, add_1reg(b3, dst_reg), imm32); 465 482 break;

+8 -1

drivers/net/ethernet/netronome/nfp/bpf/cmsg.c

··· 157 157 int tag) 158 158 { 159 159 struct sk_buff *skb; 160 - int err; 160 + int i, err; 161 + 162 + for (i = 0; i < 50; i++) { 163 + udelay(4); 164 + skb = nfp_bpf_reply(bpf, tag); 165 + if (skb) 166 + return skb; 167 + } 161 168 162 169 err = wait_event_interruptible_timeout(bpf->cmsg_wq, 163 170 skb = nfp_bpf_reply(bpf, tag),

+11 -1

drivers/net/ethernet/netronome/nfp/bpf/offload.c

··· 127 127 struct nfp_prog *nfp_prog = prog->aux->offload->dev_priv; 128 128 unsigned int stack_size; 129 129 unsigned int max_instr; 130 + int err; 130 131 131 132 stack_size = nn_readb(nn, NFP_NET_CFG_BPF_STACK_SZ) * 64; 132 133 if (prog->aux->stack_depth > stack_size) { ··· 144 143 if (!nfp_prog->prog) 145 144 return -ENOMEM; 146 145 147 - return nfp_bpf_jit(nfp_prog); 146 + err = nfp_bpf_jit(nfp_prog); 147 + if (err) 148 + return err; 149 + 150 + prog->aux->offload->jited_len = nfp_prog->prog_len * sizeof(u64); 151 + prog->aux->offload->jited_image = nfp_prog->prog; 152 + 153 + return 0; 148 154 } 149 155 150 156 static int nfp_bpf_destroy(struct nfp_net *nn, struct bpf_prog *prog) ··· 176 168 static int 177 169 nfp_bpf_map_delete_elem(struct bpf_offloaded_map *offmap, void *key) 178 170 { 171 + if (offmap->map.map_type == BPF_MAP_TYPE_ARRAY) 172 + return -EINVAL; 179 173 return nfp_bpf_ctrl_del_entry(offmap, key); 180 174 } 181 175

+246

drivers/net/netdevsim/bpf.c

··· 17 17 #include <linux/bpf_verifier.h> 18 18 #include <linux/debugfs.h> 19 19 #include <linux/kernel.h> 20 + #include <linux/mutex.h> 20 21 #include <linux/rtnetlink.h> 21 22 #include <net/pkt_cls.h> 22 23 ··· 29 28 struct dentry *ddir; 30 29 const char *state; 31 30 bool is_loaded; 31 + struct list_head l; 32 + }; 33 + 34 + #define NSIM_BPF_MAX_KEYS 2 35 + 36 + struct nsim_bpf_bound_map { 37 + struct netdevsim *ns; 38 + struct bpf_offloaded_map *map; 39 + struct mutex mutex; 40 + struct nsim_map_entry { 41 + void *key; 42 + void *value; 43 + } entry[NSIM_BPF_MAX_KEYS]; 32 44 struct list_head l; 33 45 }; 34 46 ··· 298 284 return 0; 299 285 } 300 286 287 + static bool 288 + nsim_map_key_match(struct bpf_map *map, struct nsim_map_entry *e, void *key) 289 + { 290 + return e->key && !memcmp(key, e->key, map->key_size); 291 + } 292 + 293 + static int nsim_map_key_find(struct bpf_offloaded_map *offmap, void *key) 294 + { 295 + struct nsim_bpf_bound_map *nmap = offmap->dev_priv; 296 + unsigned int i; 297 + 298 + for (i = 0; i < ARRAY_SIZE(nmap->entry); i++) 299 + if (nsim_map_key_match(&offmap->map, &nmap->entry[i], key)) 300 + return i; 301 + 302 + return -ENOENT; 303 + } 304 + 305 + static int 306 + nsim_map_alloc_elem(struct bpf_offloaded_map *offmap, unsigned int idx) 307 + { 308 + struct nsim_bpf_bound_map *nmap = offmap->dev_priv; 309 + 310 + nmap->entry[idx].key = kmalloc(offmap->map.key_size, GFP_USER); 311 + if (!nmap->entry[idx].key) 312 + return -ENOMEM; 313 + nmap->entry[idx].value = kmalloc(offmap->map.value_size, GFP_USER); 314 + if (!nmap->entry[idx].value) { 315 + kfree(nmap->entry[idx].key); 316 + nmap->entry[idx].key = NULL; 317 + return -ENOMEM; 318 + } 319 + 320 + return 0; 321 + } 322 + 323 + static int 324 + nsim_map_get_next_key(struct bpf_offloaded_map *offmap, 325 + void *key, void *next_key) 326 + { 327 + struct nsim_bpf_bound_map *nmap = offmap->dev_priv; 328 + int idx = -ENOENT; 329 + 330 + mutex_lock(&nmap->mutex); 331 + 332 + if (key) 333 + idx = nsim_map_key_find(offmap, key); 334 + if (idx == -ENOENT) 335 + idx = 0; 336 + else 337 + idx++; 338 + 339 + for (; idx < ARRAY_SIZE(nmap->entry); idx++) { 340 + if (nmap->entry[idx].key) { 341 + memcpy(next_key, nmap->entry[idx].key, 342 + offmap->map.key_size); 343 + break; 344 + } 345 + } 346 + 347 + mutex_unlock(&nmap->mutex); 348 + 349 + if (idx == ARRAY_SIZE(nmap->entry)) 350 + return -ENOENT; 351 + return 0; 352 + } 353 + 354 + static int 355 + nsim_map_lookup_elem(struct bpf_offloaded_map *offmap, void *key, void *value) 356 + { 357 + struct nsim_bpf_bound_map *nmap = offmap->dev_priv; 358 + int idx; 359 + 360 + mutex_lock(&nmap->mutex); 361 + 362 + idx = nsim_map_key_find(offmap, key); 363 + if (idx >= 0) 364 + memcpy(value, nmap->entry[idx].value, offmap->map.value_size); 365 + 366 + mutex_unlock(&nmap->mutex); 367 + 368 + return idx < 0 ? idx : 0; 369 + } 370 + 371 + static int 372 + nsim_map_update_elem(struct bpf_offloaded_map *offmap, 373 + void *key, void *value, u64 flags) 374 + { 375 + struct nsim_bpf_bound_map *nmap = offmap->dev_priv; 376 + int idx, err = 0; 377 + 378 + mutex_lock(&nmap->mutex); 379 + 380 + idx = nsim_map_key_find(offmap, key); 381 + if (idx < 0 && flags == BPF_EXIST) { 382 + err = idx; 383 + goto exit_unlock; 384 + } 385 + if (idx >= 0 && flags == BPF_NOEXIST) { 386 + err = -EEXIST; 387 + goto exit_unlock; 388 + } 389 + 390 + if (idx < 0) { 391 + for (idx = 0; idx < ARRAY_SIZE(nmap->entry); idx++) 392 + if (!nmap->entry[idx].key) 393 + break; 394 + if (idx == ARRAY_SIZE(nmap->entry)) { 395 + err = -E2BIG; 396 + goto exit_unlock; 397 + } 398 + 399 + err = nsim_map_alloc_elem(offmap, idx); 400 + if (err) 401 + goto exit_unlock; 402 + } 403 + 404 + memcpy(nmap->entry[idx].key, key, offmap->map.key_size); 405 + memcpy(nmap->entry[idx].value, value, offmap->map.value_size); 406 + exit_unlock: 407 + mutex_unlock(&nmap->mutex); 408 + 409 + return err; 410 + } 411 + 412 + static int nsim_map_delete_elem(struct bpf_offloaded_map *offmap, void *key) 413 + { 414 + struct nsim_bpf_bound_map *nmap = offmap->dev_priv; 415 + int idx; 416 + 417 + if (offmap->map.map_type == BPF_MAP_TYPE_ARRAY) 418 + return -EINVAL; 419 + 420 + mutex_lock(&nmap->mutex); 421 + 422 + idx = nsim_map_key_find(offmap, key); 423 + if (idx >= 0) { 424 + kfree(nmap->entry[idx].key); 425 + kfree(nmap->entry[idx].value); 426 + memset(&nmap->entry[idx], 0, sizeof(nmap->entry[idx])); 427 + } 428 + 429 + mutex_unlock(&nmap->mutex); 430 + 431 + return idx < 0 ? idx : 0; 432 + } 433 + 434 + static const struct bpf_map_dev_ops nsim_bpf_map_ops = { 435 + .map_get_next_key = nsim_map_get_next_key, 436 + .map_lookup_elem = nsim_map_lookup_elem, 437 + .map_update_elem = nsim_map_update_elem, 438 + .map_delete_elem = nsim_map_delete_elem, 439 + }; 440 + 441 + static int 442 + nsim_bpf_map_alloc(struct netdevsim *ns, struct bpf_offloaded_map *offmap) 443 + { 444 + struct nsim_bpf_bound_map *nmap; 445 + unsigned int i; 446 + int err; 447 + 448 + if (WARN_ON(offmap->map.map_type != BPF_MAP_TYPE_ARRAY && 449 + offmap->map.map_type != BPF_MAP_TYPE_HASH)) 450 + return -EINVAL; 451 + if (offmap->map.max_entries > NSIM_BPF_MAX_KEYS) 452 + return -ENOMEM; 453 + if (offmap->map.map_flags) 454 + return -EINVAL; 455 + 456 + nmap = kzalloc(sizeof(*nmap), GFP_USER); 457 + if (!nmap) 458 + return -ENOMEM; 459 + 460 + offmap->dev_priv = nmap; 461 + nmap->ns = ns; 462 + nmap->map = offmap; 463 + mutex_init(&nmap->mutex); 464 + 465 + if (offmap->map.map_type == BPF_MAP_TYPE_ARRAY) { 466 + for (i = 0; i < ARRAY_SIZE(nmap->entry); i++) { 467 + u32 *key; 468 + 469 + err = nsim_map_alloc_elem(offmap, i); 470 + if (err) 471 + goto err_free; 472 + key = nmap->entry[i].key; 473 + *key = i; 474 + } 475 + } 476 + 477 + offmap->dev_ops = &nsim_bpf_map_ops; 478 + list_add_tail(&nmap->l, &ns->bpf_bound_maps); 479 + 480 + return 0; 481 + 482 + err_free: 483 + while (--i) { 484 + kfree(nmap->entry[i].key); 485 + kfree(nmap->entry[i].value); 486 + } 487 + kfree(nmap); 488 + return err; 489 + } 490 + 491 + static void nsim_bpf_map_free(struct bpf_offloaded_map *offmap) 492 + { 493 + struct nsim_bpf_bound_map *nmap = offmap->dev_priv; 494 + unsigned int i; 495 + 496 + for (i = 0; i < ARRAY_SIZE(nmap->entry); i++) { 497 + kfree(nmap->entry[i].key); 498 + kfree(nmap->entry[i].value); 499 + } 500 + list_del_init(&nmap->l); 501 + mutex_destroy(&nmap->mutex); 502 + kfree(nmap); 503 + } 504 + 301 505 int nsim_bpf(struct net_device *dev, struct netdev_bpf *bpf) 302 506 { 303 507 struct netdevsim *ns = netdev_priv(dev); ··· 560 328 return err; 561 329 562 330 return nsim_xdp_set_prog(ns, bpf); 331 + case BPF_OFFLOAD_MAP_ALLOC: 332 + if (!ns->bpf_map_accept) 333 + return -EOPNOTSUPP; 334 + 335 + return nsim_bpf_map_alloc(ns, bpf->offmap); 336 + case BPF_OFFLOAD_MAP_FREE: 337 + nsim_bpf_map_free(bpf->offmap); 338 + return 0; 563 339 default: 564 340 return -EINVAL; 565 341 } ··· 576 336 int nsim_bpf_init(struct netdevsim *ns) 577 337 { 578 338 INIT_LIST_HEAD(&ns->bpf_bound_progs); 339 + INIT_LIST_HEAD(&ns->bpf_bound_maps); 579 340 580 341 debugfs_create_u32("bpf_offloaded_id", 0400, ns->ddir, 581 342 &ns->bpf_offloaded_id); ··· 603 362 debugfs_create_bool("bpf_xdpoffload_accept", 0600, ns->ddir, 604 363 &ns->bpf_xdpoffload_accept); 605 364 365 + ns->bpf_map_accept = true; 366 + debugfs_create_bool("bpf_map_accept", 0600, ns->ddir, 367 + &ns->bpf_map_accept); 368 + 606 369 return 0; 607 370 } 608 371 609 372 void nsim_bpf_uninit(struct netdevsim *ns) 610 373 { 611 374 WARN_ON(!list_empty(&ns->bpf_bound_progs)); 375 + WARN_ON(!list_empty(&ns->bpf_bound_maps)); 612 376 WARN_ON(ns->xdp_prog); 613 377 WARN_ON(ns->bpf_offloaded); 614 378 }

+3

drivers/net/netdevsim/netdevsim.h

··· 61 61 bool bpf_tc_non_bound_accept; 62 62 bool bpf_xdpdrv_accept; 63 63 bool bpf_xdpoffload_accept; 64 + 65 + bool bpf_map_accept; 66 + struct list_head bpf_bound_maps; 64 67 }; 65 68 66 69 extern struct dentry *nsim_ddir;

+4

include/linux/bpf.h

··· 234 234 struct list_head offloads; 235 235 bool dev_state; 236 236 const struct bpf_prog_offload_ops *dev_ops; 237 + void *jited_image; 238 + u32 jited_len; 237 239 }; 238 240 239 241 struct bpf_prog_aux { ··· 585 583 void bpf_prog_offload_destroy(struct bpf_prog *prog); 586 584 int bpf_prog_offload_info_fill(struct bpf_prog_info *info, 587 585 struct bpf_prog *prog); 586 + 587 + int bpf_map_offload_info_fill(struct bpf_map_info *info, struct bpf_map *map); 588 588 589 589 int bpf_map_offload_lookup_elem(struct bpf_map *map, void *key, void *value); 590 590 int bpf_map_offload_update_elem(struct bpf_map *map,

+4 -1

include/uapi/linux/bpf.h

··· 17 17 #define BPF_ALU64 0x07 /* alu mode in double word width */ 18 18 19 19 /* ld/ldx fields */ 20 - #define BPF_DW 0x18 /* double word */ 20 + #define BPF_DW 0x18 /* double word (64-bit) */ 21 21 #define BPF_XADD 0xc0 /* exclusive add */ 22 22 23 23 /* alu/jmp fields */ ··· 938 938 __u32 max_entries; 939 939 __u32 map_flags; 940 940 char name[BPF_OBJ_NAME_LEN]; 941 + __u32 ifindex; 942 + __u64 netns_dev; 943 + __u64 netns_ino; 941 944 } __attribute__((aligned(8))); 942 945 943 946 /* User bpf_sock_ops struct to access socket values and specify request ops

+4 -3

include/uapi/linux/bpf_common.h

··· 15 15 16 16 /* ld/ldx fields */ 17 17 #define BPF_SIZE(code) ((code) & 0x18) 18 - #define BPF_W 0x00 19 - #define BPF_H 0x08 20 - #define BPF_B 0x10 18 + #define BPF_W 0x00 /* 32-bit */ 19 + #define BPF_H 0x08 /* 16-bit */ 20 + #define BPF_B 0x10 /* 8-bit */ 21 + /* eBPF BPF_DW 0x18 64-bit */ 21 22 #define BPF_MODE(code) ((code) & 0xe0) 22 23 #define BPF_IMM 0x00 23 24 #define BPF_ABS 0x20

+35 -26

kernel/bpf/arraymap.c

··· 49 49 } 50 50 51 51 /* Called from syscall */ 52 + static int array_map_alloc_check(union bpf_attr *attr) 53 + { 54 + bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY; 55 + int numa_node = bpf_map_attr_numa_node(attr); 56 + 57 + /* check sanity of attributes */ 58 + if (attr->max_entries == 0 || attr->key_size != 4 || 59 + attr->value_size == 0 || 60 + attr->map_flags & ~ARRAY_CREATE_FLAG_MASK || 61 + (percpu && numa_node != NUMA_NO_NODE)) 62 + return -EINVAL; 63 + 64 + if (attr->value_size > KMALLOC_MAX_SIZE) 65 + /* if value_size is bigger, the user space won't be able to 66 + * access the elements. 67 + */ 68 + return -E2BIG; 69 + 70 + return 0; 71 + } 72 + 52 73 static struct bpf_map *array_map_alloc(union bpf_attr *attr) 53 74 { 54 75 bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY; ··· 78 57 bool unpriv = !capable(CAP_SYS_ADMIN); 79 58 struct bpf_array *array; 80 59 u64 array_size, mask64; 81 - 82 - /* check sanity of attributes */ 83 - if (attr->max_entries == 0 || attr->key_size != 4 || 84 - attr->value_size == 0 || 85 - attr->map_flags & ~ARRAY_CREATE_FLAG_MASK || 86 - (percpu && numa_node != NUMA_NO_NODE)) 87 - return ERR_PTR(-EINVAL); 88 - 89 - if (attr->value_size > KMALLOC_MAX_SIZE) 90 - /* if value_size is bigger, the user space won't be able to 91 - * access the elements. 92 - */ 93 - return ERR_PTR(-E2BIG); 94 60 95 61 elem_size = round_up(attr->value_size, 8); 96 62 ··· 120 112 array->map.unpriv_array = unpriv; 121 113 122 114 /* copy mandatory map attributes */ 123 - array->map.map_type = attr->map_type; 124 - array->map.key_size = attr->key_size; 125 - array->map.value_size = attr->value_size; 126 - array->map.max_entries = attr->max_entries; 127 - array->map.map_flags = attr->map_flags; 128 - array->map.numa_node = numa_node; 115 + bpf_map_init_from_attr(&array->map, attr); 129 116 array->elem_size = elem_size; 130 117 131 118 if (!percpu) ··· 330 327 } 331 328 332 329 const struct bpf_map_ops array_map_ops = { 330 + .map_alloc_check = array_map_alloc_check, 333 331 .map_alloc = array_map_alloc, 334 332 .map_free = array_map_free, 335 333 .map_get_next_key = array_map_get_next_key, ··· 341 337 }; 342 338 343 339 const struct bpf_map_ops percpu_array_map_ops = { 340 + .map_alloc_check = array_map_alloc_check, 344 341 .map_alloc = array_map_alloc, 345 342 .map_free = array_map_free, 346 343 .map_get_next_key = array_map_get_next_key, ··· 350 345 .map_delete_elem = array_map_delete_elem, 351 346 }; 352 347 353 - static struct bpf_map *fd_array_map_alloc(union bpf_attr *attr) 348 + static int fd_array_map_alloc_check(union bpf_attr *attr) 354 349 { 355 350 /* only file descriptors can be stored in this type of map */ 356 351 if (attr->value_size != sizeof(u32)) 357 - return ERR_PTR(-EINVAL); 358 - return array_map_alloc(attr); 352 + return -EINVAL; 353 + return array_map_alloc_check(attr); 359 354 } 360 355 361 356 static void fd_array_map_free(struct bpf_map *map) ··· 479 474 } 480 475 481 476 const struct bpf_map_ops prog_array_map_ops = { 482 - .map_alloc = fd_array_map_alloc, 477 + .map_alloc_check = fd_array_map_alloc_check, 478 + .map_alloc = array_map_alloc, 483 479 .map_free = fd_array_map_free, 484 480 .map_get_next_key = array_map_get_next_key, 485 481 .map_lookup_elem = fd_array_map_lookup_elem, ··· 567 561 } 568 562 569 563 const struct bpf_map_ops perf_event_array_map_ops = { 570 - .map_alloc = fd_array_map_alloc, 564 + .map_alloc_check = fd_array_map_alloc_check, 565 + .map_alloc = array_map_alloc, 571 566 .map_free = fd_array_map_free, 572 567 .map_get_next_key = array_map_get_next_key, 573 568 .map_lookup_elem = fd_array_map_lookup_elem, ··· 599 592 } 600 593 601 594 const struct bpf_map_ops cgroup_array_map_ops = { 602 - .map_alloc = fd_array_map_alloc, 595 + .map_alloc_check = fd_array_map_alloc_check, 596 + .map_alloc = array_map_alloc, 603 597 .map_free = cgroup_fd_array_free, 604 598 .map_get_next_key = array_map_get_next_key, 605 599 .map_lookup_elem = fd_array_map_lookup_elem, ··· 618 610 if (IS_ERR(inner_map_meta)) 619 611 return inner_map_meta; 620 612 621 - map = fd_array_map_alloc(attr); 613 + map = array_map_alloc(attr); 622 614 if (IS_ERR(map)) { 623 615 bpf_map_meta_free(inner_map_meta); 624 616 return map; ··· 681 673 } 682 674 683 675 const struct bpf_map_ops array_of_maps_map_ops = { 676 + .map_alloc_check = fd_array_map_alloc_check, 684 677 .map_alloc = array_of_map_alloc, 685 678 .map_free = array_of_map_free, 686 679 .map_get_next_key = array_map_get_next_key,

+12 -7

kernel/bpf/core.c

··· 300 300 } 301 301 302 302 #ifdef CONFIG_BPF_JIT 303 + /* All BPF JIT sysctl knobs here. */ 304 + int bpf_jit_enable __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_ALWAYS_ON); 305 + int bpf_jit_harden __read_mostly; 306 + int bpf_jit_kallsyms __read_mostly; 307 + 303 308 static __always_inline void 304 309 bpf_get_prog_addr_region(const struct bpf_prog *prog, 305 310 unsigned long *symbol_start, ··· 385 380 static DEFINE_SPINLOCK(bpf_lock); 386 381 static LIST_HEAD(bpf_kallsyms); 387 382 static struct latch_tree_root bpf_tree __cacheline_aligned; 388 - 389 - int bpf_jit_kallsyms __read_mostly; 390 383 391 384 static void bpf_prog_ksym_node_add(struct bpf_prog_aux *aux) 392 385 { ··· 565 562 566 563 bpf_prog_unlock_free(fp); 567 564 } 568 - 569 - int bpf_jit_harden __read_mostly; 570 565 571 566 static int bpf_jit_blind_insn(const struct bpf_insn *from, 572 567 const struct bpf_insn *aux, ··· 1380 1379 } 1381 1380 1382 1381 #else 1383 - static unsigned int __bpf_prog_ret0(const void *ctx, 1384 - const struct bpf_insn *insn) 1382 + static unsigned int __bpf_prog_ret0_warn(const void *ctx, 1383 + const struct bpf_insn *insn) 1385 1384 { 1385 + /* If this handler ever gets executed, then BPF_JIT_ALWAYS_ON 1386 + * is not working properly, so warn about it! 1387 + */ 1388 + WARN_ON_ONCE(1); 1386 1389 return 0; 1387 1390 } 1388 1391 #endif ··· 1446 1441 1447 1442 fp->bpf_func = interpreters[(round_up(stack_depth, 32) / 32) - 1]; 1448 1443 #else 1449 - fp->bpf_func = __bpf_prog_ret0; 1444 + fp->bpf_func = __bpf_prog_ret0_warn; 1450 1445 #endif 1451 1446 1452 1447 /* eBPF JITs can rewrite the program in case constant

+93 -2

kernel/bpf/lpm_trie.c

··· 591 591 raw_spin_unlock(&trie->lock); 592 592 } 593 593 594 - static int trie_get_next_key(struct bpf_map *map, void *key, void *next_key) 594 + static int trie_get_next_key(struct bpf_map *map, void *_key, void *_next_key) 595 595 { 596 - return -ENOTSUPP; 596 + struct lpm_trie *trie = container_of(map, struct lpm_trie, map); 597 + struct bpf_lpm_trie_key *key = _key, *next_key = _next_key; 598 + struct lpm_trie_node *node, *next_node = NULL, *parent; 599 + struct lpm_trie_node **node_stack = NULL; 600 + struct lpm_trie_node __rcu **root; 601 + int err = 0, stack_ptr = -1; 602 + unsigned int next_bit; 603 + size_t matchlen; 604 + 605 + /* The get_next_key follows postorder. For the 4 node example in 606 + * the top of this file, the trie_get_next_key() returns the following 607 + * one after another: 608 + * 192.168.0.0/24 609 + * 192.168.1.0/24 610 + * 192.168.128.0/24 611 + * 192.168.0.0/16 612 + * 613 + * The idea is to return more specific keys before less specific ones. 614 + */ 615 + 616 + /* Empty trie */ 617 + if (!rcu_dereference(trie->root)) 618 + return -ENOENT; 619 + 620 + /* For invalid key, find the leftmost node in the trie */ 621 + if (!key || key->prefixlen > trie->max_prefixlen) { 622 + root = &trie->root; 623 + goto find_leftmost; 624 + } 625 + 626 + node_stack = kmalloc(trie->max_prefixlen * sizeof(struct lpm_trie_node *), 627 + GFP_USER | __GFP_NOWARN); 628 + if (!node_stack) 629 + return -ENOMEM; 630 + 631 + /* Try to find the exact node for the given key */ 632 + for (node = rcu_dereference(trie->root); node;) { 633 + node_stack[++stack_ptr] = node; 634 + matchlen = longest_prefix_match(trie, node, key); 635 + if (node->prefixlen != matchlen || 636 + node->prefixlen == key->prefixlen) 637 + break; 638 + 639 + next_bit = extract_bit(key->data, node->prefixlen); 640 + node = rcu_dereference(node->child[next_bit]); 641 + } 642 + if (!node || node->prefixlen != key->prefixlen || 643 + (node->flags & LPM_TREE_NODE_FLAG_IM)) { 644 + root = &trie->root; 645 + goto find_leftmost; 646 + } 647 + 648 + /* The node with the exactly-matching key has been found, 649 + * find the first node in postorder after the matched node. 650 + */ 651 + node = node_stack[stack_ptr]; 652 + while (stack_ptr > 0) { 653 + parent = node_stack[stack_ptr - 1]; 654 + if (rcu_dereference(parent->child[0]) == node && 655 + rcu_dereference(parent->child[1])) { 656 + root = &parent->child[1]; 657 + goto find_leftmost; 658 + } 659 + if (!(parent->flags & LPM_TREE_NODE_FLAG_IM)) { 660 + next_node = parent; 661 + goto do_copy; 662 + } 663 + 664 + node = parent; 665 + stack_ptr--; 666 + } 667 + 668 + /* did not find anything */ 669 + err = -ENOENT; 670 + goto free_stack; 671 + 672 + find_leftmost: 673 + /* Find the leftmost non-intermediate node, all intermediate nodes 674 + * have exact two children, so this function will never return NULL. 675 + */ 676 + for (node = rcu_dereference(*root); node;) { 677 + if (!(node->flags & LPM_TREE_NODE_FLAG_IM)) 678 + next_node = node; 679 + node = rcu_dereference(node->child[0]); 680 + } 681 + do_copy: 682 + next_key->prefixlen = next_node->prefixlen; 683 + memcpy((void *)next_key + offsetof(struct bpf_lpm_trie_key, data), 684 + next_node->data, trie->data_size); 685 + free_stack: 686 + kfree(node_stack); 687 + return err; 597 688 } 598 689 599 690 const struct bpf_map_ops trie_map_ops = {

+80 -1

kernel/bpf/offload.c

··· 230 230 .prog = prog, 231 231 .info = info, 232 232 }; 233 + struct bpf_prog_aux *aux = prog->aux; 233 234 struct inode *ns_inode; 234 235 struct path ns_path; 236 + char __user *uinsns; 235 237 void *res; 238 + u32 ulen; 236 239 237 240 res = ns_get_path_cb(&ns_path, bpf_prog_offload_info_fill_ns, &args); 238 241 if (IS_ERR(res)) { ··· 243 240 return -ENODEV; 244 241 return PTR_ERR(res); 245 242 } 243 + 244 + down_read(&bpf_devs_lock); 245 + 246 + if (!aux->offload) { 247 + up_read(&bpf_devs_lock); 248 + return -ENODEV; 249 + } 250 + 251 + ulen = info->jited_prog_len; 252 + info->jited_prog_len = aux->offload->jited_len; 253 + if (info->jited_prog_len & ulen) { 254 + uinsns = u64_to_user_ptr(info->jited_prog_insns); 255 + ulen = min_t(u32, info->jited_prog_len, ulen); 256 + if (copy_to_user(uinsns, aux->offload->jited_image, ulen)) { 257 + up_read(&bpf_devs_lock); 258 + return -EFAULT; 259 + } 260 + } 261 + 262 + up_read(&bpf_devs_lock); 246 263 247 264 ns_inode = ns_path.dentry->d_inode; 248 265 info->netns_dev = new_encode_dev(ns_inode->i_sb->s_dev); ··· 299 276 300 277 if (!capable(CAP_SYS_ADMIN)) 301 278 return ERR_PTR(-EPERM); 302 - if (attr->map_type != BPF_MAP_TYPE_HASH) 279 + if (attr->map_type != BPF_MAP_TYPE_ARRAY && 280 + attr->map_type != BPF_MAP_TYPE_HASH) 303 281 return ERR_PTR(-EINVAL); 304 282 305 283 offmap = kzalloc(sizeof(*offmap), GFP_USER); ··· 411 387 up_read(&bpf_devs_lock); 412 388 413 389 return ret; 390 + } 391 + 392 + struct ns_get_path_bpf_map_args { 393 + struct bpf_offloaded_map *offmap; 394 + struct bpf_map_info *info; 395 + }; 396 + 397 + static struct ns_common *bpf_map_offload_info_fill_ns(void *private_data) 398 + { 399 + struct ns_get_path_bpf_map_args *args = private_data; 400 + struct ns_common *ns; 401 + struct net *net; 402 + 403 + rtnl_lock(); 404 + down_read(&bpf_devs_lock); 405 + 406 + if (args->offmap->netdev) { 407 + args->info->ifindex = args->offmap->netdev->ifindex; 408 + net = dev_net(args->offmap->netdev); 409 + get_net(net); 410 + ns = &net->ns; 411 + } else { 412 + args->info->ifindex = 0; 413 + ns = NULL; 414 + } 415 + 416 + up_read(&bpf_devs_lock); 417 + rtnl_unlock(); 418 + 419 + return ns; 420 + } 421 + 422 + int bpf_map_offload_info_fill(struct bpf_map_info *info, struct bpf_map *map) 423 + { 424 + struct ns_get_path_bpf_map_args args = { 425 + .offmap = map_to_offmap(map), 426 + .info = info, 427 + }; 428 + struct inode *ns_inode; 429 + struct path ns_path; 430 + void *res; 431 + 432 + res = ns_get_path_cb(&ns_path, bpf_map_offload_info_fill_ns, &args); 433 + if (IS_ERR(res)) { 434 + if (!info->ifindex) 435 + return -ENODEV; 436 + return PTR_ERR(res); 437 + } 438 + 439 + ns_inode = ns_path.dentry->d_inode; 440 + info->netns_dev = new_encode_dev(ns_inode->i_sb->s_dev); 441 + info->netns_ino = ns_inode->i_ino; 442 + path_put(&ns_path); 443 + 444 + return 0; 414 445 } 415 446 416 447 bool bpf_offload_dev_match(struct bpf_prog *prog, struct bpf_map *map)

+26 -13

kernel/bpf/syscall.c

··· 1504 1504 struct bpf_prog *prog; 1505 1505 int ret = -ENOTSUPP; 1506 1506 1507 + if (!capable(CAP_SYS_ADMIN)) 1508 + return -EPERM; 1507 1509 if (CHECK_ATTR(BPF_PROG_TEST_RUN)) 1508 1510 return -EINVAL; 1509 1511 ··· 1726 1724 goto done; 1727 1725 } 1728 1726 1729 - ulen = info.jited_prog_len; 1730 - info.jited_prog_len = prog->jited_len; 1731 - if (info.jited_prog_len && ulen) { 1732 - if (bpf_dump_raw_ok()) { 1733 - uinsns = u64_to_user_ptr(info.jited_prog_insns); 1734 - ulen = min_t(u32, info.jited_prog_len, ulen); 1735 - if (copy_to_user(uinsns, prog->bpf_func, ulen)) 1736 - return -EFAULT; 1737 - } else { 1738 - info.jited_prog_insns = 0; 1739 - } 1740 - } 1741 - 1742 1727 ulen = info.xlated_prog_len; 1743 1728 info.xlated_prog_len = bpf_prog_insn_size(prog); 1744 1729 if (info.xlated_prog_len && ulen) { ··· 1751 1762 err = bpf_prog_offload_info_fill(&info, prog); 1752 1763 if (err) 1753 1764 return err; 1765 + goto done; 1766 + } 1767 + 1768 + /* NOTE: the following code is supposed to be skipped for offload. 1769 + * bpf_prog_offload_info_fill() is the place to fill similar fields 1770 + * for offload. 1771 + */ 1772 + ulen = info.jited_prog_len; 1773 + info.jited_prog_len = prog->jited_len; 1774 + if (info.jited_prog_len && ulen) { 1775 + if (bpf_dump_raw_ok()) { 1776 + uinsns = u64_to_user_ptr(info.jited_prog_insns); 1777 + ulen = min_t(u32, info.jited_prog_len, ulen); 1778 + if (copy_to_user(uinsns, prog->bpf_func, ulen)) 1779 + return -EFAULT; 1780 + } else { 1781 + info.jited_prog_insns = 0; 1782 + } 1754 1783 } 1755 1784 1756 1785 done: ··· 1800 1793 info.max_entries = map->max_entries; 1801 1794 info.map_flags = map->map_flags; 1802 1795 memcpy(info.name, map->name, sizeof(map->name)); 1796 + 1797 + if (bpf_map_is_dev_bound(map)) { 1798 + err = bpf_map_offload_info_fill(&info, map); 1799 + if (err) 1800 + return err; 1801 + } 1803 1802 1804 1803 if (copy_to_user(uinfo, &info, info_len) || 1805 1804 put_user(info_len, &uattr->info.info_len))

+57 -25

kernel/bpf/verifier.c

··· 1850 1850 } 1851 1851 } 1852 1852 1853 + static bool arg_type_is_mem_ptr(enum bpf_arg_type type) 1854 + { 1855 + return type == ARG_PTR_TO_MEM || 1856 + type == ARG_PTR_TO_MEM_OR_NULL || 1857 + type == ARG_PTR_TO_UNINIT_MEM; 1858 + } 1859 + 1860 + static bool arg_type_is_mem_size(enum bpf_arg_type type) 1861 + { 1862 + return type == ARG_CONST_SIZE || 1863 + type == ARG_CONST_SIZE_OR_ZERO; 1864 + } 1865 + 1853 1866 static int check_func_arg(struct bpf_verifier_env *env, u32 regno, 1854 1867 enum bpf_arg_type arg_type, 1855 1868 struct bpf_call_arg_meta *meta) ··· 1912 1899 expected_type = PTR_TO_CTX; 1913 1900 if (type != expected_type) 1914 1901 goto err_type; 1915 - } else if (arg_type == ARG_PTR_TO_MEM || 1916 - arg_type == ARG_PTR_TO_MEM_OR_NULL || 1917 - arg_type == ARG_PTR_TO_UNINIT_MEM) { 1902 + } else if (arg_type_is_mem_ptr(arg_type)) { 1918 1903 expected_type = PTR_TO_STACK; 1919 1904 /* One exception here. In case function allows for NULL to be 1920 1905 * passed in as argument, it's a SCALAR_VALUE type. Final test ··· 1973 1962 err = check_stack_boundary(env, regno, 1974 1963 meta->map_ptr->value_size, 1975 1964 false, NULL); 1976 - } else if (arg_type == ARG_CONST_SIZE || 1977 - arg_type == ARG_CONST_SIZE_OR_ZERO) { 1965 + } else if (arg_type_is_mem_size(arg_type)) { 1978 1966 bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO); 1979 - 1980 - /* bpf_xxx(..., buf, len) call will access 'len' bytes 1981 - * from stack pointer 'buf'. Check it 1982 - * note: regno == len, regno - 1 == buf 1983 - */ 1984 - if (regno == 0) { 1985 - /* kernel subsystem misconfigured verifier */ 1986 - verbose(env, 1987 - "ARG_CONST_SIZE cannot be first argument\n"); 1988 - return -EACCES; 1989 - } 1990 1967 1991 1968 /* The register is SCALAR_VALUE; the access check 1992 1969 * happens using its boundaries. 1993 1970 */ 1994 - 1995 1971 if (!tnum_is_const(reg->var_off)) 1996 1972 /* For unprivileged variable accesses, disable raw 1997 1973 * mode so that the program is required to ··· 2122 2124 return -EINVAL; 2123 2125 } 2124 2126 2125 - static int check_raw_mode(const struct bpf_func_proto *fn) 2127 + static bool check_raw_mode_ok(const struct bpf_func_proto *fn) 2126 2128 { 2127 2129 int count = 0; 2128 2130 ··· 2137 2139 if (fn->arg5_type == ARG_PTR_TO_UNINIT_MEM) 2138 2140 count++; 2139 2141 2140 - return count > 1 ? -EINVAL : 0; 2142 + /* We only support one arg being in raw mode at the moment, 2143 + * which is sufficient for the helper functions we have 2144 + * right now. 2145 + */ 2146 + return count <= 1; 2147 + } 2148 + 2149 + static bool check_args_pair_invalid(enum bpf_arg_type arg_curr, 2150 + enum bpf_arg_type arg_next) 2151 + { 2152 + return (arg_type_is_mem_ptr(arg_curr) && 2153 + !arg_type_is_mem_size(arg_next)) || 2154 + (!arg_type_is_mem_ptr(arg_curr) && 2155 + arg_type_is_mem_size(arg_next)); 2156 + } 2157 + 2158 + static bool check_arg_pair_ok(const struct bpf_func_proto *fn) 2159 + { 2160 + /* bpf_xxx(..., buf, len) call will access 'len' 2161 + * bytes from memory 'buf'. Both arg types need 2162 + * to be paired, so make sure there's no buggy 2163 + * helper function specification. 2164 + */ 2165 + if (arg_type_is_mem_size(fn->arg1_type) || 2166 + arg_type_is_mem_ptr(fn->arg5_type) || 2167 + check_args_pair_invalid(fn->arg1_type, fn->arg2_type) || 2168 + check_args_pair_invalid(fn->arg2_type, fn->arg3_type) || 2169 + check_args_pair_invalid(fn->arg3_type, fn->arg4_type) || 2170 + check_args_pair_invalid(fn->arg4_type, fn->arg5_type)) 2171 + return false; 2172 + 2173 + return true; 2174 + } 2175 + 2176 + static int check_func_proto(const struct bpf_func_proto *fn) 2177 + { 2178 + return check_raw_mode_ok(fn) && 2179 + check_arg_pair_ok(fn) ? 0 : -EINVAL; 2141 2180 } 2142 2181 2143 2182 /* Packet data might have moved, any old PTR_TO_PACKET[_META,_END] ··· 2330 2295 2331 2296 if (env->ops->get_func_proto) 2332 2297 fn = env->ops->get_func_proto(func_id); 2333 - 2334 2298 if (!fn) { 2335 2299 verbose(env, "unknown func %s#%d\n", func_id_name(func_id), 2336 2300 func_id); ··· 2353 2319 memset(&meta, 0, sizeof(meta)); 2354 2320 meta.pkt_access = fn->pkt_access; 2355 2321 2356 - /* We only support one arg being in raw mode at the moment, which 2357 - * is sufficient for the helper functions we have right now. 2358 - */ 2359 - err = check_raw_mode(fn); 2322 + err = check_func_proto(fn); 2360 2323 if (err) { 2361 2324 verbose(env, "kernel subsystem misconfigured func %s#%d\n", 2362 2325 func_id_name(func_id), func_id); ··· 4834 4803 insn_idx++; 4835 4804 } 4836 4805 4837 - verbose(env, "processed %d insns, stack depth ", insn_processed); 4806 + verbose(env, "processed %d insns (limit %d), stack depth ", 4807 + insn_processed, BPF_COMPLEXITY_LIMIT_INSNS); 4838 4808 for (i = 0; i < env->subprog_cnt + 1; i++) { 4839 4809 u32 depth = env->subprog_stack_depth[i]; 4840 4810

+1 -1

kernel/trace/bpf_trace.c

··· 245 245 */ 246 246 #define __BPF_TP_EMIT() __BPF_ARG3_TP() 247 247 #define __BPF_TP(...) \ 248 - __trace_printk(1 /* Fake ip will not be printed. */, \ 248 + __trace_printk(0 /* Fake ip */, \ 249 249 fmt, ##__VA_ARGS__) 250 250 251 251 #define __BPF_ARG1_TP(...) \

+104

lib/test_bpf.c

··· 6109 6109 { { ETH_HLEN, 42 } }, 6110 6110 .fill_helper = bpf_fill_ld_abs_vlan_push_pop2, 6111 6111 }, 6112 + /* Checking interpreter vs JIT wrt signed extended imms. */ 6113 + { 6114 + "JNE signed compare, test 1", 6115 + .u.insns_int = { 6116 + BPF_ALU32_IMM(BPF_MOV, R1, 0xfefbbc12), 6117 + BPF_ALU32_IMM(BPF_MOV, R3, 0xffff0000), 6118 + BPF_MOV64_REG(R2, R1), 6119 + BPF_ALU64_REG(BPF_AND, R2, R3), 6120 + BPF_ALU32_IMM(BPF_MOV, R0, 1), 6121 + BPF_JMP_IMM(BPF_JNE, R2, -17104896, 1), 6122 + BPF_ALU32_IMM(BPF_MOV, R0, 2), 6123 + BPF_EXIT_INSN(), 6124 + }, 6125 + INTERNAL, 6126 + { }, 6127 + { { 0, 1 } }, 6128 + }, 6129 + { 6130 + "JNE signed compare, test 2", 6131 + .u.insns_int = { 6132 + BPF_ALU32_IMM(BPF_MOV, R1, 0xfefbbc12), 6133 + BPF_ALU32_IMM(BPF_MOV, R3, 0xffff0000), 6134 + BPF_MOV64_REG(R2, R1), 6135 + BPF_ALU64_REG(BPF_AND, R2, R3), 6136 + BPF_ALU32_IMM(BPF_MOV, R0, 1), 6137 + BPF_JMP_IMM(BPF_JNE, R2, 0xfefb0000, 1), 6138 + BPF_ALU32_IMM(BPF_MOV, R0, 2), 6139 + BPF_EXIT_INSN(), 6140 + }, 6141 + INTERNAL, 6142 + { }, 6143 + { { 0, 1 } }, 6144 + }, 6145 + { 6146 + "JNE signed compare, test 3", 6147 + .u.insns_int = { 6148 + BPF_ALU32_IMM(BPF_MOV, R1, 0xfefbbc12), 6149 + BPF_ALU32_IMM(BPF_MOV, R3, 0xffff0000), 6150 + BPF_ALU32_IMM(BPF_MOV, R4, 0xfefb0000), 6151 + BPF_MOV64_REG(R2, R1), 6152 + BPF_ALU64_REG(BPF_AND, R2, R3), 6153 + BPF_ALU32_IMM(BPF_MOV, R0, 1), 6154 + BPF_JMP_REG(BPF_JNE, R2, R4, 1), 6155 + BPF_ALU32_IMM(BPF_MOV, R0, 2), 6156 + BPF_EXIT_INSN(), 6157 + }, 6158 + INTERNAL, 6159 + { }, 6160 + { { 0, 2 } }, 6161 + }, 6162 + { 6163 + "JNE signed compare, test 4", 6164 + .u.insns_int = { 6165 + BPF_LD_IMM64(R1, -17104896), 6166 + BPF_ALU32_IMM(BPF_MOV, R0, 1), 6167 + BPF_JMP_IMM(BPF_JNE, R1, -17104896, 1), 6168 + BPF_ALU32_IMM(BPF_MOV, R0, 2), 6169 + BPF_EXIT_INSN(), 6170 + }, 6171 + INTERNAL, 6172 + { }, 6173 + { { 0, 2 } }, 6174 + }, 6175 + { 6176 + "JNE signed compare, test 5", 6177 + .u.insns_int = { 6178 + BPF_LD_IMM64(R1, 0xfefb0000), 6179 + BPF_ALU32_IMM(BPF_MOV, R0, 1), 6180 + BPF_JMP_IMM(BPF_JNE, R1, 0xfefb0000, 1), 6181 + BPF_ALU32_IMM(BPF_MOV, R0, 2), 6182 + BPF_EXIT_INSN(), 6183 + }, 6184 + INTERNAL, 6185 + { }, 6186 + { { 0, 1 } }, 6187 + }, 6188 + { 6189 + "JNE signed compare, test 6", 6190 + .u.insns_int = { 6191 + BPF_LD_IMM64(R1, 0x7efb0000), 6192 + BPF_ALU32_IMM(BPF_MOV, R0, 1), 6193 + BPF_JMP_IMM(BPF_JNE, R1, 0x7efb0000, 1), 6194 + BPF_ALU32_IMM(BPF_MOV, R0, 2), 6195 + BPF_EXIT_INSN(), 6196 + }, 6197 + INTERNAL, 6198 + { }, 6199 + { { 0, 2 } }, 6200 + }, 6201 + { 6202 + "JNE signed compare, test 7", 6203 + .u.insns = { 6204 + BPF_STMT(BPF_LD | BPF_IMM, 0xffff0000), 6205 + BPF_STMT(BPF_MISC | BPF_TAX, 0), 6206 + BPF_STMT(BPF_LD | BPF_IMM, 0xfefbbc12), 6207 + BPF_STMT(BPF_ALU | BPF_AND | BPF_X, 0), 6208 + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0xfefb0000, 1, 0), 6209 + BPF_STMT(BPF_RET | BPF_K, 1), 6210 + BPF_STMT(BPF_RET | BPF_K, 2), 6211 + }, 6212 + CLASSIC | FLAG_NO_DATA, 6213 + {}, 6214 + { { 0, 2 } }, 6215 + }, 6112 6216 }; 6113 6217 6114 6218 static struct net_device dev;

+5 -2

net/core/filter.c

··· 2865 2865 .arg2_type = ARG_CONST_MAP_PTR, 2866 2866 .arg3_type = ARG_ANYTHING, 2867 2867 .arg4_type = ARG_PTR_TO_MEM, 2868 - .arg5_type = ARG_CONST_SIZE, 2868 + .arg5_type = ARG_CONST_SIZE_OR_ZERO, 2869 2869 }; 2870 2870 2871 2871 static unsigned short bpf_tunnel_key_af(u64 flags) ··· 3154 3154 .arg2_type = ARG_CONST_MAP_PTR, 3155 3155 .arg3_type = ARG_ANYTHING, 3156 3156 .arg4_type = ARG_PTR_TO_MEM, 3157 - .arg5_type = ARG_CONST_SIZE, 3157 + .arg5_type = ARG_CONST_SIZE_OR_ZERO, 3158 3158 }; 3159 3159 3160 3160 BPF_CALL_1(bpf_get_socket_cookie, struct sk_buff *, skb) ··· 3460 3460 return &bpf_xdp_event_output_proto; 3461 3461 case BPF_FUNC_get_smp_processor_id: 3462 3462 return &bpf_get_smp_processor_id_proto; 3463 + case BPF_FUNC_csum_diff: 3464 + return &bpf_csum_diff_proto; 3463 3465 case BPF_FUNC_xdp_adjust_head: 3464 3466 return &bpf_xdp_adjust_head_proto; 3465 3467 case BPF_FUNC_xdp_adjust_meta: ··· 4532 4530 }; 4533 4531 4534 4532 const struct bpf_prog_ops sk_filter_prog_ops = { 4533 + .test_run = bpf_prog_test_run_skb, 4535 4534 }; 4536 4535 4537 4536 const struct bpf_verifier_ops tc_cls_act_verifier_ops = {

+53 -7

net/core/sysctl_net_core.c

··· 25 25 26 26 static int zero = 0; 27 27 static int one = 1; 28 + static int two __maybe_unused = 2; 28 29 static int min_sndbuf = SOCK_MIN_SNDBUF; 29 30 static int min_rcvbuf = SOCK_MIN_RCVBUF; 30 31 static int max_skb_frags = MAX_SKB_FRAGS; ··· 251 250 return proc_dostring(&fake_table, write, buffer, lenp, ppos); 252 251 } 253 252 253 + #ifdef CONFIG_BPF_JIT 254 + static int proc_dointvec_minmax_bpf_enable(struct ctl_table *table, int write, 255 + void __user *buffer, size_t *lenp, 256 + loff_t *ppos) 257 + { 258 + int ret, jit_enable = *(int *)table->data; 259 + struct ctl_table tmp = *table; 260 + 261 + if (write && !capable(CAP_SYS_ADMIN)) 262 + return -EPERM; 263 + 264 + tmp.data = &jit_enable; 265 + ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); 266 + if (write && !ret) { 267 + if (jit_enable < 2 || 268 + (jit_enable == 2 && bpf_dump_raw_ok())) { 269 + *(int *)table->data = jit_enable; 270 + if (jit_enable == 2) 271 + pr_warn("bpf_jit_enable = 2 was set! NEVER use this in production, only for JIT debugging!\n"); 272 + } else { 273 + ret = -EPERM; 274 + } 275 + } 276 + return ret; 277 + } 278 + 279 + # ifdef CONFIG_HAVE_EBPF_JIT 280 + static int 281 + proc_dointvec_minmax_bpf_restricted(struct ctl_table *table, int write, 282 + void __user *buffer, size_t *lenp, 283 + loff_t *ppos) 284 + { 285 + if (!capable(CAP_SYS_ADMIN)) 286 + return -EPERM; 287 + 288 + return proc_dointvec_minmax(table, write, buffer, lenp, ppos); 289 + } 290 + # endif 291 + #endif 292 + 254 293 static struct ctl_table net_core_table[] = { 255 294 #ifdef CONFIG_NET 256 295 { ··· 366 325 .data = &bpf_jit_enable, 367 326 .maxlen = sizeof(int), 368 327 .mode = 0644, 369 - #ifndef CONFIG_BPF_JIT_ALWAYS_ON 370 - .proc_handler = proc_dointvec 371 - #else 372 - .proc_handler = proc_dointvec_minmax, 328 + .proc_handler = proc_dointvec_minmax_bpf_enable, 329 + # ifdef CONFIG_BPF_JIT_ALWAYS_ON 373 330 .extra1 = &one, 374 331 .extra2 = &one, 375 - #endif 332 + # else 333 + .extra1 = &zero, 334 + .extra2 = &two, 335 + # endif 376 336 }, 377 337 # ifdef CONFIG_HAVE_EBPF_JIT 378 338 { ··· 381 339 .data = &bpf_jit_harden, 382 340 .maxlen = sizeof(int), 383 341 .mode = 0600, 384 - .proc_handler = proc_dointvec, 342 + .proc_handler = proc_dointvec_minmax_bpf_restricted, 343 + .extra1 = &zero, 344 + .extra2 = &two, 385 345 }, 386 346 { 387 347 .procname = "bpf_jit_kallsyms", 388 348 .data = &bpf_jit_kallsyms, 389 349 .maxlen = sizeof(int), 390 350 .mode = 0600, 391 - .proc_handler = proc_dointvec, 351 + .proc_handler = proc_dointvec_minmax_bpf_restricted, 352 + .extra1 = &zero, 353 + .extra2 = &one, 392 354 }, 393 355 # endif 394 356 #endif

-9

net/socket.c

··· 2613 2613 2614 2614 core_initcall(sock_init); /* early initcall */ 2615 2615 2616 - static int __init jit_init(void) 2617 - { 2618 - #ifdef CONFIG_BPF_JIT_ALWAYS_ON 2619 - bpf_jit_enable = 1; 2620 - #endif 2621 - return 0; 2622 - } 2623 - pure_initcall(jit_init); 2624 - 2625 2616 #ifdef CONFIG_PROC_FS 2626 2617 void socket_seq_show(struct seq_file *seq) 2627 2618 {

+5 -3

samples/bpf/xdp2skb_meta_kern.c

··· 35 35 void *data, *data_end; 36 36 int ret; 37 37 38 - /* Reserve space in-front data pointer for our meta info. 38 + /* Reserve space in-front of data pointer for our meta info. 39 39 * (Notice drivers not supporting data_meta will fail here!) 40 40 */ 41 41 ret = bpf_xdp_adjust_meta(ctx, -(int)sizeof(*meta)); 42 42 if (ret < 0) 43 43 return XDP_ABORTED; 44 44 45 - /* For some unknown reason, these ctx pointers must be read 46 - * after bpf_xdp_adjust_meta, else verifier will reject prog. 45 + /* Notice: Kernel-side verifier requires that loading of 46 + * ctx->data MUST happen _after_ helper bpf_xdp_adjust_meta(), 47 + * as pkt-data pointers are invalidated. Helpers that require 48 + * this are determined/marked by bpf_helper_changes_pkt_data() 47 49 */ 48 50 data = (void *)(unsigned long)ctx->data; 49 51

+92 -2

samples/bpf/xdp_monitor_kern.c

··· 1 - /* XDP monitor tool, based on tracepoints 1 + /* SPDX-License-Identifier: GPL-2.0 2 + * Copyright(c) 2017-2018 Jesper Dangaard Brouer, Red Hat Inc. 2 3 * 3 - * Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat Inc. 4 + * XDP monitor tool, based on tracepoints 4 5 */ 5 6 #include <uapi/linux/bpf.h> 6 7 #include "bpf_helpers.h" ··· 116 115 if (!cnt) 117 116 return 1; 118 117 *cnt += 1; 118 + 119 + return 0; 120 + } 121 + 122 + /* Common stats data record shared with _user.c */ 123 + struct datarec { 124 + u64 processed; 125 + u64 dropped; 126 + u64 info; 127 + }; 128 + #define MAX_CPUS 64 129 + 130 + struct bpf_map_def SEC("maps") cpumap_enqueue_cnt = { 131 + .type = BPF_MAP_TYPE_PERCPU_ARRAY, 132 + .key_size = sizeof(u32), 133 + .value_size = sizeof(struct datarec), 134 + .max_entries = MAX_CPUS, 135 + }; 136 + 137 + struct bpf_map_def SEC("maps") cpumap_kthread_cnt = { 138 + .type = BPF_MAP_TYPE_PERCPU_ARRAY, 139 + .key_size = sizeof(u32), 140 + .value_size = sizeof(struct datarec), 141 + .max_entries = 1, 142 + }; 143 + 144 + /* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_cpumap_enqueue/format 145 + * Code in: kernel/include/trace/events/xdp.h 146 + */ 147 + struct cpumap_enqueue_ctx { 148 + u64 __pad; // First 8 bytes are not accessible by bpf code 149 + int map_id; // offset:8; size:4; signed:1; 150 + u32 act; // offset:12; size:4; signed:0; 151 + int cpu; // offset:16; size:4; signed:1; 152 + unsigned int drops; // offset:20; size:4; signed:0; 153 + unsigned int processed; // offset:24; size:4; signed:0; 154 + int to_cpu; // offset:28; size:4; signed:1; 155 + }; 156 + 157 + SEC("tracepoint/xdp/xdp_cpumap_enqueue") 158 + int trace_xdp_cpumap_enqueue(struct cpumap_enqueue_ctx *ctx) 159 + { 160 + u32 to_cpu = ctx->to_cpu; 161 + struct datarec *rec; 162 + 163 + if (to_cpu >= MAX_CPUS) 164 + return 1; 165 + 166 + rec = bpf_map_lookup_elem(&cpumap_enqueue_cnt, &to_cpu); 167 + if (!rec) 168 + return 0; 169 + rec->processed += ctx->processed; 170 + rec->dropped += ctx->drops; 171 + 172 + /* Record bulk events, then userspace can calc average bulk size */ 173 + if (ctx->processed > 0) 174 + rec->info += 1; 175 + 176 + return 0; 177 + } 178 + 179 + /* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_cpumap_kthread/format 180 + * Code in: kernel/include/trace/events/xdp.h 181 + */ 182 + struct cpumap_kthread_ctx { 183 + u64 __pad; // First 8 bytes are not accessible by bpf code 184 + int map_id; // offset:8; size:4; signed:1; 185 + u32 act; // offset:12; size:4; signed:0; 186 + int cpu; // offset:16; size:4; signed:1; 187 + unsigned int drops; // offset:20; size:4; signed:0; 188 + unsigned int processed; // offset:24; size:4; signed:0; 189 + int sched; // offset:28; size:4; signed:1; 190 + }; 191 + 192 + SEC("tracepoint/xdp/xdp_cpumap_kthread") 193 + int trace_xdp_cpumap_kthread(struct cpumap_kthread_ctx *ctx) 194 + { 195 + struct datarec *rec; 196 + u32 key = 0; 197 + 198 + rec = bpf_map_lookup_elem(&cpumap_kthread_cnt, &key); 199 + if (!rec) 200 + return 0; 201 + rec->processed += ctx->processed; 202 + rec->dropped += ctx->drops; 203 + 204 + /* Count times kthread yielded CPU via schedule call */ 205 + if (ctx->sched) 206 + rec->info++; 119 207 120 208 return 0; 121 209 }

+352 -66

samples/bpf/xdp_monitor_user.c

··· 1 - /* Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc. 1 + /* SPDX-License-Identifier: GPL-2.0 2 + * Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc. 2 3 */ 3 4 static const char *__doc__= 4 5 "XDP monitor tool, based on tracepoints\n" ··· 40 39 {"sec", required_argument, NULL, 's' }, 41 40 {0, 0, NULL, 0 } 42 41 }; 42 + 43 + /* C standard specifies two constants, EXIT_SUCCESS(0) and EXIT_FAILURE(1) */ 44 + #define EXIT_FAIL_MEM 5 43 45 44 46 static void usage(char *argv[]) 45 47 { ··· 112 108 return NULL; 113 109 } 114 110 111 + /* Common stats data record shared with _kern.c */ 112 + struct datarec { 113 + __u64 processed; 114 + __u64 dropped; 115 + __u64 info; 116 + }; 117 + #define MAX_CPUS 64 118 + 119 + /* Userspace structs for collection of stats from maps */ 115 120 struct record { 116 - __u64 counter; 117 121 __u64 timestamp; 122 + struct datarec total; 123 + struct datarec *cpu; 124 + }; 125 + struct u64rec { 126 + __u64 processed; 127 + }; 128 + struct record_u64 { 129 + /* record for _kern side __u64 values */ 130 + __u64 timestamp; 131 + struct u64rec total; 132 + struct u64rec *cpu; 118 133 }; 119 134 120 135 struct stats_record { 121 - struct record xdp_redir[REDIR_RES_MAX]; 122 - struct record xdp_exception[XDP_ACTION_MAX]; 136 + struct record_u64 xdp_redirect[REDIR_RES_MAX]; 137 + struct record_u64 xdp_exception[XDP_ACTION_MAX]; 138 + struct record xdp_cpumap_kthread; 139 + struct record xdp_cpumap_enqueue[MAX_CPUS]; 123 140 }; 124 141 125 - static void stats_print_headers(bool err_only) 142 + static bool map_collect_record(int fd, __u32 key, struct record *rec) 126 143 { 127 - if (err_only) 128 - printf("\n%s\n", __doc_err_only__); 144 + /* For percpu maps, userspace gets a value per possible CPU */ 145 + unsigned int nr_cpus = bpf_num_possible_cpus(); 146 + struct datarec values[nr_cpus]; 147 + __u64 sum_processed = 0; 148 + __u64 sum_dropped = 0; 149 + __u64 sum_info = 0; 150 + int i; 129 151 130 - printf("%-14s %-11s %-10s %-18s %-9s\n", 131 - "ACTION", "result", "pps ", "pps-human-readable", "measure-period"); 152 + if ((bpf_map_lookup_elem(fd, &key, values)) != 0) { 153 + fprintf(stderr, 154 + "ERR: bpf_map_lookup_elem failed key:0x%X\n", key); 155 + return false; 156 + } 157 + /* Get time as close as possible to reading map contents */ 158 + rec->timestamp = gettime(); 159 + 160 + /* Record and sum values from each CPU */ 161 + for (i = 0; i < nr_cpus; i++) { 162 + rec->cpu[i].processed = values[i].processed; 163 + sum_processed += values[i].processed; 164 + rec->cpu[i].dropped = values[i].dropped; 165 + sum_dropped += values[i].dropped; 166 + rec->cpu[i].info = values[i].info; 167 + sum_info += values[i].info; 168 + } 169 + rec->total.processed = sum_processed; 170 + rec->total.dropped = sum_dropped; 171 + rec->total.info = sum_info; 172 + return true; 173 + } 174 + 175 + static bool map_collect_record_u64(int fd, __u32 key, struct record_u64 *rec) 176 + { 177 + /* For percpu maps, userspace gets a value per possible CPU */ 178 + unsigned int nr_cpus = bpf_num_possible_cpus(); 179 + struct u64rec values[nr_cpus]; 180 + __u64 sum_total = 0; 181 + int i; 182 + 183 + if ((bpf_map_lookup_elem(fd, &key, values)) != 0) { 184 + fprintf(stderr, 185 + "ERR: bpf_map_lookup_elem failed key:0x%X\n", key); 186 + return false; 187 + } 188 + /* Get time as close as possible to reading map contents */ 189 + rec->timestamp = gettime(); 190 + 191 + /* Record and sum values from each CPU */ 192 + for (i = 0; i < nr_cpus; i++) { 193 + rec->cpu[i].processed = values[i].processed; 194 + sum_total += values[i].processed; 195 + } 196 + rec->total.processed = sum_total; 197 + return true; 132 198 } 133 199 134 200 static double calc_period(struct record *r, struct record *p) ··· 213 139 return period_; 214 140 } 215 141 216 - static double calc_pps(struct record *r, struct record *p, double period) 142 + static double calc_period_u64(struct record_u64 *r, struct record_u64 *p) 143 + { 144 + double period_ = 0; 145 + __u64 period = 0; 146 + 147 + period = r->timestamp - p->timestamp; 148 + if (period > 0) 149 + period_ = ((double) period / NANOSEC_PER_SEC); 150 + 151 + return period_; 152 + } 153 + 154 + static double calc_pps(struct datarec *r, struct datarec *p, double period) 217 155 { 218 156 __u64 packets = 0; 219 157 double pps = 0; 220 158 221 159 if (period > 0) { 222 - packets = r->counter - p->counter; 160 + packets = r->processed - p->processed; 223 161 pps = packets / period; 224 162 } 225 163 return pps; 226 164 } 227 165 228 - static void stats_print(struct stats_record *rec, 229 - struct stats_record *prev, 166 + static double calc_pps_u64(struct u64rec *r, struct u64rec *p, double period) 167 + { 168 + __u64 packets = 0; 169 + double pps = 0; 170 + 171 + if (period > 0) { 172 + packets = r->processed - p->processed; 173 + pps = packets / period; 174 + } 175 + return pps; 176 + } 177 + 178 + static double calc_drop(struct datarec *r, struct datarec *p, double period) 179 + { 180 + __u64 packets = 0; 181 + double pps = 0; 182 + 183 + if (period > 0) { 184 + packets = r->dropped - p->dropped; 185 + pps = packets / period; 186 + } 187 + return pps; 188 + } 189 + 190 + static double calc_info(struct datarec *r, struct datarec *p, double period) 191 + { 192 + __u64 packets = 0; 193 + double pps = 0; 194 + 195 + if (period > 0) { 196 + packets = r->info - p->info; 197 + pps = packets / period; 198 + } 199 + return pps; 200 + } 201 + 202 + static void stats_print(struct stats_record *stats_rec, 203 + struct stats_record *stats_prev, 230 204 bool err_only) 231 205 { 232 - double period = 0, pps = 0; 233 - struct record *r, *p; 234 - int i = 0; 206 + unsigned int nr_cpus = bpf_num_possible_cpus(); 207 + int rec_i = 0, i, to_cpu; 208 + double t = 0, pps = 0; 235 209 236 - char *fmt = "%-14s %-11s %-10.0f %'-18.0f %f\n"; 210 + /* Header */ 211 + printf("%-15s %-7s %-12s %-12s %-9s\n", 212 + "XDP-event", "CPU:to", "pps", "drop-pps", "extra-info"); 237 213 238 214 /* tracepoint: xdp:xdp_redirect_* */ 239 215 if (err_only) 240 - i = REDIR_ERROR; 216 + rec_i = REDIR_ERROR; 241 217 242 - for (; i < REDIR_RES_MAX; i++) { 243 - r = &rec->xdp_redir[i]; 244 - p = &prev->xdp_redir[i]; 218 + for (; rec_i < REDIR_RES_MAX; rec_i++) { 219 + struct record_u64 *rec, *prev; 220 + char *fmt1 = "%-15s %-7d %'-12.0f %'-12.0f %s\n"; 221 + char *fmt2 = "%-15s %-7s %'-12.0f %'-12.0f %s\n"; 245 222 246 - if (p->timestamp) { 247 - period = calc_period(r, p); 248 - pps = calc_pps(r, p, period); 223 + rec = &stats_rec->xdp_redirect[rec_i]; 224 + prev = &stats_prev->xdp_redirect[rec_i]; 225 + t = calc_period_u64(rec, prev); 226 + 227 + for (i = 0; i < nr_cpus; i++) { 228 + struct u64rec *r = &rec->cpu[i]; 229 + struct u64rec *p = &prev->cpu[i]; 230 + 231 + pps = calc_pps_u64(r, p, t); 232 + if (pps > 0) 233 + printf(fmt1, "XDP_REDIRECT", i, 234 + rec_i ? 0.0: pps, rec_i ? pps : 0.0, 235 + err2str(rec_i)); 249 236 } 250 - printf(fmt, "XDP_REDIRECT", err2str(i), pps, pps, period); 237 + pps = calc_pps_u64(&rec->total, &prev->total, t); 238 + printf(fmt2, "XDP_REDIRECT", "total", 239 + rec_i ? 0.0: pps, rec_i ? pps : 0.0, err2str(rec_i)); 251 240 } 252 241 253 242 /* tracepoint: xdp:xdp_exception */ 254 - for (i = 0; i < XDP_ACTION_MAX; i++) { 255 - r = &rec->xdp_exception[i]; 256 - p = &prev->xdp_exception[i]; 257 - if (p->timestamp) { 258 - period = calc_period(r, p); 259 - pps = calc_pps(r, p, period); 243 + for (rec_i = 0; rec_i < XDP_ACTION_MAX; rec_i++) { 244 + struct record_u64 *rec, *prev; 245 + char *fmt1 = "%-15s %-7d %'-12.0f %'-12.0f %s\n"; 246 + char *fmt2 = "%-15s %-7s %'-12.0f %'-12.0f %s\n"; 247 + 248 + rec = &stats_rec->xdp_exception[rec_i]; 249 + prev = &stats_prev->xdp_exception[rec_i]; 250 + t = calc_period_u64(rec, prev); 251 + 252 + for (i = 0; i < nr_cpus; i++) { 253 + struct u64rec *r = &rec->cpu[i]; 254 + struct u64rec *p = &prev->cpu[i]; 255 + 256 + pps = calc_pps_u64(r, p, t); 257 + if (pps > 0) 258 + printf(fmt1, "Exception", i, 259 + 0.0, pps, err2str(rec_i)); 260 260 } 261 + pps = calc_pps_u64(&rec->total, &prev->total, t); 261 262 if (pps > 0) 262 - printf(fmt, action2str(i), "Exception", 263 - pps, pps, period); 263 + printf(fmt2, "Exception", "total", 264 + 0.0, pps, action2str(rec_i)); 264 265 } 266 + 267 + /* cpumap enqueue stats */ 268 + for (to_cpu = 0; to_cpu < MAX_CPUS; to_cpu++) { 269 + char *fmt1 = "%-15s %3d:%-3d %'-12.0f %'-12.0f %'-10.2f %s\n"; 270 + char *fmt2 = "%-15s %3s:%-3d %'-12.0f %'-12.0f %'-10.2f %s\n"; 271 + struct record *rec, *prev; 272 + char *info_str = ""; 273 + double drop, info; 274 + 275 + rec = &stats_rec->xdp_cpumap_enqueue[to_cpu]; 276 + prev = &stats_prev->xdp_cpumap_enqueue[to_cpu]; 277 + t = calc_period(rec, prev); 278 + for (i = 0; i < nr_cpus; i++) { 279 + struct datarec *r = &rec->cpu[i]; 280 + struct datarec *p = &prev->cpu[i]; 281 + 282 + pps = calc_pps(r, p, t); 283 + drop = calc_drop(r, p, t); 284 + info = calc_info(r, p, t); 285 + if (info > 0) { 286 + info_str = "bulk-average"; 287 + info = pps / info; /* calc average bulk size */ 288 + } 289 + if (pps > 0) 290 + printf(fmt1, "cpumap-enqueue", 291 + i, to_cpu, pps, drop, info, info_str); 292 + } 293 + pps = calc_pps(&rec->total, &prev->total, t); 294 + if (pps > 0) { 295 + drop = calc_drop(&rec->total, &prev->total, t); 296 + info = calc_info(&rec->total, &prev->total, t); 297 + if (info > 0) { 298 + info_str = "bulk-average"; 299 + info = pps / info; /* calc average bulk size */ 300 + } 301 + printf(fmt2, "cpumap-enqueue", 302 + "sum", to_cpu, pps, drop, info, info_str); 303 + } 304 + } 305 + 306 + /* cpumap kthread stats */ 307 + { 308 + char *fmt1 = "%-15s %-7d %'-12.0f %'-12.0f %'-10.0f %s\n"; 309 + char *fmt2 = "%-15s %-7s %'-12.0f %'-12.0f %'-10.0f %s\n"; 310 + struct record *rec, *prev; 311 + double drop, info; 312 + char *i_str = ""; 313 + 314 + rec = &stats_rec->xdp_cpumap_kthread; 315 + prev = &stats_prev->xdp_cpumap_kthread; 316 + t = calc_period(rec, prev); 317 + for (i = 0; i < nr_cpus; i++) { 318 + struct datarec *r = &rec->cpu[i]; 319 + struct datarec *p = &prev->cpu[i]; 320 + 321 + pps = calc_pps(r, p, t); 322 + drop = calc_drop(r, p, t); 323 + info = calc_info(r, p, t); 324 + if (info > 0) 325 + i_str = "sched"; 326 + if (pps > 0) 327 + printf(fmt1, "cpumap-kthread", 328 + i, pps, drop, info, i_str); 329 + } 330 + pps = calc_pps(&rec->total, &prev->total, t); 331 + drop = calc_drop(&rec->total, &prev->total, t); 332 + info = calc_info(&rec->total, &prev->total, t); 333 + if (info > 0) 334 + i_str = "sched-sum"; 335 + printf(fmt2, "cpumap-kthread", "total", pps, drop, info, i_str); 336 + } 337 + 265 338 printf("\n"); 266 - } 267 - 268 - static __u64 get_key32_value64_percpu(int fd, __u32 key) 269 - { 270 - /* For percpu maps, userspace gets a value per possible CPU */ 271 - unsigned int nr_cpus = bpf_num_possible_cpus(); 272 - __u64 values[nr_cpus]; 273 - __u64 sum = 0; 274 - int i; 275 - 276 - if ((bpf_map_lookup_elem(fd, &key, values)) != 0) { 277 - fprintf(stderr, 278 - "ERR: bpf_map_lookup_elem failed key:0x%X\n", key); 279 - return 0; 280 - } 281 - 282 - /* Sum values from each CPU */ 283 - for (i = 0; i < nr_cpus; i++) { 284 - sum += values[i]; 285 - } 286 - return sum; 287 339 } 288 340 289 341 static bool stats_collect(struct stats_record *rec) ··· 422 222 */ 423 223 424 224 fd = map_data[0].fd; /* map0: redirect_err_cnt */ 425 - for (i = 0; i < REDIR_RES_MAX; i++) { 426 - rec->xdp_redir[i].timestamp = gettime(); 427 - rec->xdp_redir[i].counter = get_key32_value64_percpu(fd, i); 428 - } 225 + for (i = 0; i < REDIR_RES_MAX; i++) 226 + map_collect_record_u64(fd, i, &rec->xdp_redirect[i]); 429 227 430 228 fd = map_data[1].fd; /* map1: exception_cnt */ 431 229 for (i = 0; i < XDP_ACTION_MAX; i++) { 432 - rec->xdp_exception[i].timestamp = gettime(); 433 - rec->xdp_exception[i].counter = get_key32_value64_percpu(fd, i); 230 + map_collect_record_u64(fd, i, &rec->xdp_exception[i]); 434 231 } 232 + 233 + fd = map_data[2].fd; /* map2: cpumap_enqueue_cnt */ 234 + for (i = 0; i < MAX_CPUS; i++) 235 + map_collect_record(fd, i, &rec->xdp_cpumap_enqueue[i]); 236 + 237 + fd = map_data[3].fd; /* map3: cpumap_kthread_cnt */ 238 + map_collect_record(fd, 0, &rec->xdp_cpumap_kthread); 435 239 436 240 return true; 437 241 } 438 242 243 + static void *alloc_rec_per_cpu(int record_size) 244 + { 245 + unsigned int nr_cpus = bpf_num_possible_cpus(); 246 + void *array; 247 + size_t size; 248 + 249 + size = record_size * nr_cpus; 250 + array = malloc(size); 251 + memset(array, 0, size); 252 + if (!array) { 253 + fprintf(stderr, "Mem alloc error (nr_cpus:%u)\n", nr_cpus); 254 + exit(EXIT_FAIL_MEM); 255 + } 256 + return array; 257 + } 258 + 259 + static struct stats_record *alloc_stats_record(void) 260 + { 261 + struct stats_record *rec; 262 + int rec_sz; 263 + int i; 264 + 265 + /* Alloc main stats_record structure */ 266 + rec = malloc(sizeof(*rec)); 267 + memset(rec, 0, sizeof(*rec)); 268 + if (!rec) { 269 + fprintf(stderr, "Mem alloc error\n"); 270 + exit(EXIT_FAIL_MEM); 271 + } 272 + 273 + /* Alloc stats stored per CPU for each record */ 274 + rec_sz = sizeof(struct u64rec); 275 + for (i = 0; i < REDIR_RES_MAX; i++) 276 + rec->xdp_redirect[i].cpu = alloc_rec_per_cpu(rec_sz); 277 + 278 + for (i = 0; i < XDP_ACTION_MAX; i++) 279 + rec->xdp_exception[i].cpu = alloc_rec_per_cpu(rec_sz); 280 + 281 + rec_sz = sizeof(struct datarec); 282 + rec->xdp_cpumap_kthread.cpu = alloc_rec_per_cpu(rec_sz); 283 + 284 + for (i = 0; i < MAX_CPUS; i++) 285 + rec->xdp_cpumap_enqueue[i].cpu = alloc_rec_per_cpu(rec_sz); 286 + 287 + return rec; 288 + } 289 + 290 + static void free_stats_record(struct stats_record *r) 291 + { 292 + int i; 293 + 294 + for (i = 0; i < REDIR_RES_MAX; i++) 295 + free(r->xdp_redirect[i].cpu); 296 + 297 + for (i = 0; i < XDP_ACTION_MAX; i++) 298 + free(r->xdp_exception[i].cpu); 299 + 300 + free(r->xdp_cpumap_kthread.cpu); 301 + 302 + for (i = 0; i < MAX_CPUS; i++) 303 + free(r->xdp_cpumap_enqueue[i].cpu); 304 + 305 + free(r); 306 + } 307 + 308 + /* Pointer swap trick */ 309 + static inline void swap(struct stats_record **a, struct stats_record **b) 310 + { 311 + struct stats_record *tmp; 312 + 313 + tmp = *a; 314 + *a = *b; 315 + *b = tmp; 316 + } 317 + 439 318 static void stats_poll(int interval, bool err_only) 440 319 { 441 - struct stats_record rec, prev; 320 + struct stats_record *rec, *prev; 442 321 443 - memset(&rec, 0, sizeof(rec)); 322 + rec = alloc_stats_record(); 323 + prev = alloc_stats_record(); 324 + stats_collect(rec); 325 + 326 + if (err_only) 327 + printf("\n%s\n", __doc_err_only__); 444 328 445 329 /* Trick to pretty printf with thousands separators use %' */ 446 330 setlocale(LC_NUMERIC, "en_US"); ··· 542 258 fflush(stdout); 543 259 544 260 while (1) { 545 - memcpy(&prev, &rec, sizeof(rec)); 546 - stats_collect(&rec); 547 - stats_print_headers(err_only); 548 - stats_print(&rec, &prev, err_only); 261 + swap(&prev, &rec); 262 + stats_collect(rec); 263 + stats_print(rec, prev, err_only); 549 264 fflush(stdout); 550 265 sleep(interval); 551 266 } 267 + 268 + free_stats_record(rec); 269 + free_stats_record(prev); 552 270 } 553 271 554 272 static void print_bpf_prog_info(void)

+4 -3

tools/bpf/bpf_jit_disasm.c

··· 172 172 { 173 173 char *ptr, *pptr, *tmp; 174 174 off_t off = 0; 175 - int ret, flen, proglen, pass, ulen = 0; 175 + unsigned int proglen; 176 + int ret, flen, pass, ulen = 0; 176 177 regmatch_t pmatch[1]; 177 178 unsigned long base; 178 179 regex_t regex; ··· 200 199 } 201 200 202 201 ptr = haystack + off - (pmatch[0].rm_eo - pmatch[0].rm_so); 203 - ret = sscanf(ptr, "flen=%d proglen=%d pass=%d image=%lx", 202 + ret = sscanf(ptr, "flen=%d proglen=%u pass=%d image=%lx", 204 203 &flen, &proglen, &pass, &base); 205 204 if (ret != 4) { 206 205 regfree(&regex); ··· 240 239 } 241 240 242 241 assert(ulen == proglen); 243 - printf("%d bytes emitted from JIT compiler (pass:%d, flen:%d)\n", 242 + printf("%u bytes emitted from JIT compiler (pass:%d, flen:%d)\n", 244 243 proglen, pass, flen); 245 244 printf("%lx + <x>:\n", base); 246 245

+72

tools/bpf/bpftool/common.c

··· 34 34 /* Author: Jakub Kicinski <kubakici@wp.pl> */ 35 35 36 36 #include <errno.h> 37 + #include <fcntl.h> 37 38 #include <fts.h> 38 39 #include <libgen.h> 39 40 #include <mntent.h> ··· 432 431 return NULL; 433 432 434 433 return if_indextoname(ifindex, buf); 434 + } 435 + 436 + static int read_sysfs_hex_int(char *path) 437 + { 438 + char vendor_id_buf[8]; 439 + int len; 440 + int fd; 441 + 442 + fd = open(path, O_RDONLY); 443 + if (fd < 0) { 444 + p_err("Can't open %s: %s", path, strerror(errno)); 445 + return -1; 446 + } 447 + 448 + len = read(fd, vendor_id_buf, sizeof(vendor_id_buf)); 449 + close(fd); 450 + if (len < 0) { 451 + p_err("Can't read %s: %s", path, strerror(errno)); 452 + return -1; 453 + } 454 + if (len >= (int)sizeof(vendor_id_buf)) { 455 + p_err("Value in %s too long", path); 456 + return -1; 457 + } 458 + 459 + vendor_id_buf[len] = 0; 460 + 461 + return strtol(vendor_id_buf, NULL, 0); 462 + } 463 + 464 + static int read_sysfs_netdev_hex_int(char *devname, const char *entry_name) 465 + { 466 + char full_path[64]; 467 + 468 + snprintf(full_path, sizeof(full_path), "/sys/class/net/%s/device/%s", 469 + devname, entry_name); 470 + 471 + return read_sysfs_hex_int(full_path); 472 + } 473 + 474 + const char *ifindex_to_bfd_name_ns(__u32 ifindex, __u64 ns_dev, __u64 ns_ino) 475 + { 476 + char devname[IF_NAMESIZE]; 477 + int vendor_id; 478 + int device_id; 479 + 480 + if (!ifindex_to_name_ns(ifindex, ns_dev, ns_ino, devname)) { 481 + p_err("Can't get net device name for ifindex %d: %s", ifindex, 482 + strerror(errno)); 483 + return NULL; 484 + } 485 + 486 + vendor_id = read_sysfs_netdev_hex_int(devname, "vendor"); 487 + if (vendor_id < 0) { 488 + p_err("Can't get device vendor id for %s", devname); 489 + return NULL; 490 + } 491 + 492 + switch (vendor_id) { 493 + case 0x19ee: 494 + device_id = read_sysfs_netdev_hex_int(devname, "device"); 495 + if (device_id != 0x4000 && 496 + device_id != 0x6000 && 497 + device_id != 0x6003) 498 + p_info("Unknown NFP device ID, assuming it is NFP-6xxx arch"); 499 + return "NFP-6xxx"; 500 + default: 501 + p_err("Can't get bfd arch name for device vendor id 0x%04x", 502 + vendor_id); 503 + return NULL; 504 + } 435 505 } 436 506 437 507 void print_dev_plain(__u32 ifindex, __u64 ns_dev, __u64 ns_inode)

+15 -1

tools/bpf/bpftool/jit_disasm.c

··· 76 76 return 0; 77 77 } 78 78 79 - void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes) 79 + void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes, 80 + const char *arch) 80 81 { 81 82 disassembler_ftype disassemble; 82 83 struct disassemble_info info; ··· 101 100 else 102 101 init_disassemble_info(&info, stdout, 103 102 (fprintf_ftype) fprintf); 103 + 104 + /* Update architecture info for offload. */ 105 + if (arch) { 106 + const bfd_arch_info_type *inf = bfd_scan_arch(arch); 107 + 108 + if (inf) { 109 + bfdf->arch_info = inf; 110 + } else { 111 + p_err("No libfd support for %s", arch); 112 + return; 113 + } 114 + } 115 + 104 116 info.arch = bfd_get_arch(bfdf); 105 117 info.mach = bfd_get_mach(bfdf); 106 118 info.buffer = image;

+4 -1

tools/bpf/bpftool/main.h

··· 121 121 122 122 int prog_parse_fd(int *argc, char ***argv); 123 123 124 - void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes); 124 + void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes, 125 + const char *arch); 125 126 void print_hex_data_json(uint8_t *data, size_t len); 127 + 128 + const char *ifindex_to_bfd_name_ns(__u32 ifindex, __u64 ns_dev, __u64 ns_ino); 126 129 127 130 #endif

+7 -1

tools/bpf/bpftool/map.c

··· 66 66 [BPF_MAP_TYPE_HASH_OF_MAPS] = "hash_of_maps", 67 67 [BPF_MAP_TYPE_DEVMAP] = "devmap", 68 68 [BPF_MAP_TYPE_SOCKMAP] = "sockmap", 69 + [BPF_MAP_TYPE_CPUMAP] = "cpumap", 69 70 }; 70 71 71 72 static unsigned int get_possible_cpus(void) ··· 429 428 430 429 jsonw_name(json_wtr, "flags"); 431 430 jsonw_printf(json_wtr, "%#x", info->map_flags); 431 + 432 + print_dev_json(info->ifindex, info->netns_dev, info->netns_ino); 433 + 432 434 jsonw_uint_field(json_wtr, "bytes_key", info->key_size); 433 435 jsonw_uint_field(json_wtr, "bytes_value", info->value_size); 434 436 jsonw_uint_field(json_wtr, "max_entries", info->max_entries); ··· 473 469 if (*info->name) 474 470 printf("name %s ", info->name); 475 471 476 - printf("flags 0x%x\n", info->map_flags); 472 + printf("flags 0x%x", info->map_flags); 473 + print_dev_plain(info->ifindex, info->netns_dev, info->netns_ino); 474 + printf("\n"); 477 475 printf("\tkey %uB value %uB max_entries %u", 478 476 info->key_size, info->value_size, info->max_entries); 479 477

+11 -1

tools/bpf/bpftool/prog.c

··· 776 776 } 777 777 } else { 778 778 if (member_len == &info.jited_prog_len) { 779 - disasm_print_insn(buf, *member_len, opcodes); 779 + const char *name = NULL; 780 + 781 + if (info.ifindex) { 782 + name = ifindex_to_bfd_name_ns(info.ifindex, 783 + info.netns_dev, 784 + info.netns_ino); 785 + if (!name) 786 + goto err_free; 787 + } 788 + 789 + disasm_print_insn(buf, *member_len, opcodes, name); 780 790 } else { 781 791 kernel_syms_load(&dd); 782 792 if (json_output)

+14 -1

tools/include/uapi/linux/bpf.h

··· 900 900 __u32 data; 901 901 __u32 data_end; 902 902 __u32 data_meta; 903 + /* Below access go through struct xdp_rxq_info */ 904 + __u32 ingress_ifindex; /* rxq->dev->ifindex */ 905 + __u32 rx_queue_index; /* rxq->queue_index */ 903 906 }; 904 907 905 908 enum sk_action { ··· 938 935 __u32 max_entries; 939 936 __u32 map_flags; 940 937 char name[BPF_OBJ_NAME_LEN]; 938 + __u32 ifindex; 939 + __u64 netns_dev; 940 + __u64 netns_ino; 941 941 } __attribute__((aligned(8))); 942 942 943 943 /* User bpf_sock_ops struct to access socket values and specify request ops ··· 962 956 __u32 local_ip6[4]; /* Stored in network byte order */ 963 957 __u32 remote_port; /* Stored in network byte order */ 964 958 __u32 local_port; /* stored in host byte order */ 959 + __u32 is_fullsock; /* Some TCP fields are only valid if 960 + * there is a full socket. If not, the 961 + * fields read as zero. 962 + */ 963 + __u32 snd_cwnd; 964 + __u32 srtt_us; /* Averaged RTT << 3 in usecs */ 965 965 }; 966 966 967 967 /* List of known BPF sock_ops operators. ··· 1022 1010 #define BPF_DEVCG_DEV_CHAR (1ULL << 1) 1023 1011 1024 1012 struct bpf_cgroup_dev_ctx { 1025 - __u32 access_type; /* (access << 16) | type */ 1013 + /* access_type encoded as (BPF_DEVCG_ACC_* << 16) | BPF_DEVCG_DEV_* */ 1014 + __u32 access_type; 1026 1015 __u32 major; 1027 1016 __u32 minor; 1028 1017 };

+7

tools/testing/selftests/bpf/.gitignore

··· 3 3 test_lru_map 4 4 test_lpm_map 5 5 test_tag 6 + FEATURE-DUMP.libbpf 7 + fixdep 8 + test_align 9 + test_dev_cgroup 10 + test_progs 11 + test_verifier_log 12 + feature

+2 -1

tools/testing/selftests/bpf/Makefile

··· 19 19 TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \ 20 20 test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o \ 21 21 sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o test_tracepoint.o \ 22 - test_l4lb_noinline.o test_xdp_noinline.o test_stacktrace_map.o 22 + test_l4lb_noinline.o test_xdp_noinline.o test_stacktrace_map.o \ 23 + sample_map_ret0.o 23 24 24 25 TEST_PROGS := test_kmod.sh test_xdp_redirect.sh test_xdp_meta.sh \ 25 26 test_offload.py

+34

tools/testing/selftests/bpf/sample_map_ret0.c

··· 1 + /* SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) */ 2 + #include <linux/bpf.h> 3 + #include "bpf_helpers.h" 4 + 5 + struct bpf_map_def SEC("maps") htab = { 6 + .type = BPF_MAP_TYPE_HASH, 7 + .key_size = sizeof(__u32), 8 + .value_size = sizeof(long), 9 + .max_entries = 2, 10 + }; 11 + 12 + struct bpf_map_def SEC("maps") array = { 13 + .type = BPF_MAP_TYPE_ARRAY, 14 + .key_size = sizeof(__u32), 15 + .value_size = sizeof(long), 16 + .max_entries = 2, 17 + }; 18 + 19 + /* Sample program which should always load for testing control paths. */ 20 + SEC(".text") int func() 21 + { 22 + __u64 key64 = 0; 23 + __u32 key = 0; 24 + long *value; 25 + 26 + value = bpf_map_lookup_elem(&htab, &key); 27 + if (!value) 28 + return 1; 29 + value = bpf_map_lookup_elem(&array, &key64); 30 + if (!value) 31 + return 1; 32 + 33 + return 0; 34 + }

+122

tools/testing/selftests/bpf/test_lpm_map.c

··· 521 521 close(map_fd); 522 522 } 523 523 524 + static void test_lpm_get_next_key(void) 525 + { 526 + struct bpf_lpm_trie_key *key_p, *next_key_p; 527 + size_t key_size; 528 + __u32 value = 0; 529 + int map_fd; 530 + 531 + key_size = sizeof(*key_p) + sizeof(__u32); 532 + key_p = alloca(key_size); 533 + next_key_p = alloca(key_size); 534 + 535 + map_fd = bpf_create_map(BPF_MAP_TYPE_LPM_TRIE, key_size, sizeof(value), 536 + 100, BPF_F_NO_PREALLOC); 537 + assert(map_fd >= 0); 538 + 539 + /* empty tree. get_next_key should return ENOENT */ 540 + assert(bpf_map_get_next_key(map_fd, NULL, key_p) == -1 && 541 + errno == ENOENT); 542 + 543 + /* get and verify the first key, get the second one should fail. */ 544 + key_p->prefixlen = 16; 545 + inet_pton(AF_INET, "192.168.0.0", key_p->data); 546 + assert(bpf_map_update_elem(map_fd, key_p, &value, 0) == 0); 547 + 548 + memset(key_p, 0, key_size); 549 + assert(bpf_map_get_next_key(map_fd, NULL, key_p) == 0); 550 + assert(key_p->prefixlen == 16 && key_p->data[0] == 192 && 551 + key_p->data[1] == 168); 552 + 553 + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 && 554 + errno == ENOENT); 555 + 556 + /* no exact matching key should get the first one in post order. */ 557 + key_p->prefixlen = 8; 558 + assert(bpf_map_get_next_key(map_fd, NULL, key_p) == 0); 559 + assert(key_p->prefixlen == 16 && key_p->data[0] == 192 && 560 + key_p->data[1] == 168); 561 + 562 + /* add one more element (total two) */ 563 + key_p->prefixlen = 24; 564 + inet_pton(AF_INET, "192.168.0.0", key_p->data); 565 + assert(bpf_map_update_elem(map_fd, key_p, &value, 0) == 0); 566 + 567 + memset(key_p, 0, key_size); 568 + assert(bpf_map_get_next_key(map_fd, NULL, key_p) == 0); 569 + assert(key_p->prefixlen == 24 && key_p->data[0] == 192 && 570 + key_p->data[1] == 168 && key_p->data[2] == 0); 571 + 572 + memset(next_key_p, 0, key_size); 573 + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0); 574 + assert(next_key_p->prefixlen == 16 && next_key_p->data[0] == 192 && 575 + next_key_p->data[1] == 168); 576 + 577 + memcpy(key_p, next_key_p, key_size); 578 + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 && 579 + errno == ENOENT); 580 + 581 + /* Add one more element (total three) */ 582 + key_p->prefixlen = 24; 583 + inet_pton(AF_INET, "192.168.128.0", key_p->data); 584 + assert(bpf_map_update_elem(map_fd, key_p, &value, 0) == 0); 585 + 586 + memset(key_p, 0, key_size); 587 + assert(bpf_map_get_next_key(map_fd, NULL, key_p) == 0); 588 + assert(key_p->prefixlen == 24 && key_p->data[0] == 192 && 589 + key_p->data[1] == 168 && key_p->data[2] == 0); 590 + 591 + memset(next_key_p, 0, key_size); 592 + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0); 593 + assert(next_key_p->prefixlen == 24 && next_key_p->data[0] == 192 && 594 + next_key_p->data[1] == 168 && next_key_p->data[2] == 128); 595 + 596 + memcpy(key_p, next_key_p, key_size); 597 + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0); 598 + assert(next_key_p->prefixlen == 16 && next_key_p->data[0] == 192 && 599 + next_key_p->data[1] == 168); 600 + 601 + memcpy(key_p, next_key_p, key_size); 602 + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 && 603 + errno == ENOENT); 604 + 605 + /* Add one more element (total four) */ 606 + key_p->prefixlen = 24; 607 + inet_pton(AF_INET, "192.168.1.0", key_p->data); 608 + assert(bpf_map_update_elem(map_fd, key_p, &value, 0) == 0); 609 + 610 + memset(key_p, 0, key_size); 611 + assert(bpf_map_get_next_key(map_fd, NULL, key_p) == 0); 612 + assert(key_p->prefixlen == 24 && key_p->data[0] == 192 && 613 + key_p->data[1] == 168 && key_p->data[2] == 0); 614 + 615 + memset(next_key_p, 0, key_size); 616 + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0); 617 + assert(next_key_p->prefixlen == 24 && next_key_p->data[0] == 192 && 618 + next_key_p->data[1] == 168 && next_key_p->data[2] == 1); 619 + 620 + memcpy(key_p, next_key_p, key_size); 621 + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0); 622 + assert(next_key_p->prefixlen == 24 && next_key_p->data[0] == 192 && 623 + next_key_p->data[1] == 168 && next_key_p->data[2] == 128); 624 + 625 + memcpy(key_p, next_key_p, key_size); 626 + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0); 627 + assert(next_key_p->prefixlen == 16 && next_key_p->data[0] == 192 && 628 + next_key_p->data[1] == 168); 629 + 630 + memcpy(key_p, next_key_p, key_size); 631 + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 && 632 + errno == ENOENT); 633 + 634 + /* no exact matching key should return the first one in post order */ 635 + key_p->prefixlen = 22; 636 + inet_pton(AF_INET, "192.168.1.0", key_p->data); 637 + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0); 638 + assert(next_key_p->prefixlen == 24 && next_key_p->data[0] == 192 && 639 + next_key_p->data[1] == 168 && next_key_p->data[2] == 0); 640 + 641 + close(map_fd); 642 + } 643 + 524 644 int main(void) 525 645 { 526 646 struct rlimit limit = { RLIM_INFINITY, RLIM_INFINITY }; ··· 664 544 test_lpm_ipaddr(); 665 545 666 546 test_lpm_delete(); 547 + 548 + test_lpm_get_next_key(); 667 549 668 550 printf("test_lpm: OK\n"); 669 551 return 0;

+182 -24

tools/testing/selftests/bpf/test_offload.py

··· 20 20 import pprint 21 21 import random 22 22 import string 23 + import struct 23 24 import subprocess 24 25 import time 25 26 ··· 157 156 (len(progs), expected)) 158 157 return progs 159 158 159 + def bpftool_map_list(expected=None, ns=""): 160 + _, maps = bpftool("map show", JSON=True, ns=ns, fail=True) 161 + if expected is not None: 162 + if len(maps) != expected: 163 + fail(True, "%d BPF maps loaded, expected %d" % 164 + (len(maps), expected)) 165 + return maps 166 + 160 167 def bpftool_prog_list_wait(expected=0, n_retry=20): 161 168 for i in range(n_retry): 162 169 nprogs = len(bpftool_prog_list()) ··· 172 163 return 173 164 time.sleep(0.05) 174 165 raise Exception("Time out waiting for program counts to stabilize want %d, have %d" % (expected, nprogs)) 166 + 167 + def bpftool_map_list_wait(expected=0, n_retry=20): 168 + for i in range(n_retry): 169 + nmaps = len(bpftool_map_list()) 170 + if nmaps == expected: 171 + return 172 + time.sleep(0.05) 173 + raise Exception("Time out waiting for map counts to stabilize want %d, have %d" % (expected, nmaps)) 175 174 176 175 def ip(args, force=False, JSON=True, ns="", fail=True): 177 176 if force: ··· 209 192 netns.append(name) 210 193 return name 211 194 return None 195 + 196 + def int2str(fmt, val): 197 + ret = [] 198 + for b in struct.pack(fmt, val): 199 + ret.append(int(b)) 200 + return " ".join(map(lambda x: str(x), ret)) 201 + 202 + def str2int(strtab): 203 + inttab = [] 204 + for i in strtab: 205 + inttab.append(int(i, 16)) 206 + ba = bytearray(inttab) 207 + if len(strtab) == 4: 208 + fmt = "I" 209 + elif len(strtab) == 8: 210 + fmt = "Q" 211 + else: 212 + raise Exception("String array of len %d can't be unpacked to an int" % 213 + (len(strtab))) 214 + return struct.unpack(fmt, ba)[0] 212 215 213 216 class DebugfsDir: 214 217 """ ··· 348 311 return ip("link set dev %s mtu %d" % (self.dev["ifname"], mtu), 349 312 fail=fail) 350 313 351 - def set_xdp(self, bpf, mode, force=False, fail=True): 314 + def set_xdp(self, bpf, mode, force=False, JSON=True, fail=True): 352 315 return ip("link set dev %s xdp%s %s" % (self.dev["ifname"], mode, bpf), 353 - force=force, fail=fail) 316 + force=force, JSON=JSON, fail=fail) 354 317 355 - def unset_xdp(self, mode, force=False, fail=True): 318 + def unset_xdp(self, mode, force=False, JSON=True, fail=True): 356 319 return ip("link set dev %s xdp%s off" % (self.dev["ifname"], mode), 357 - force=force, fail=fail) 320 + force=force, JSON=JSON, fail=fail) 358 321 359 322 def ip_link_show(self, xdp): 360 323 _, link = ip("link show dev %s" % (self['ifname'])) ··· 427 390 428 391 ################################################################################ 429 392 def clean_up(): 393 + global files, netns, devs 394 + 430 395 for dev in devs: 431 396 dev.remove() 432 397 for f in files: 433 398 cmd("rm -f %s" % (f)) 434 399 for ns in netns: 435 400 cmd("ip netns delete %s" % (ns)) 401 + files = [] 402 + netns = [] 436 403 437 404 def pin_prog(file_name, idx=0): 438 405 progs = bpftool_prog_list(expected=(idx + 1)) ··· 446 405 447 406 return file_name, bpf_pinned(file_name) 448 407 449 - def check_dev_info(other_ns, ns, pin_file=None, removed=False): 450 - if removed: 451 - bpftool_prog_list(expected=0) 452 - ret, err = bpftool("prog show pin %s" % (pin_file), fail=False) 453 - fail(ret == 0, "Showing prog with removed device did not fail") 454 - fail(err["error"].find("No such device") == -1, 455 - "Showing prog with removed device expected ENODEV, error is %s" % 456 - (err["error"])) 457 - return 458 - progs = bpftool_prog_list(expected=int(not removed), ns=ns) 408 + def pin_map(file_name, idx=0, expected=1): 409 + maps = bpftool_map_list(expected=expected) 410 + m = maps[idx] 411 + bpftool("map pin id %d %s" % (m["id"], file_name)) 412 + files.append(file_name) 413 + 414 + return file_name, bpf_pinned(file_name) 415 + 416 + def check_dev_info_removed(prog_file=None, map_file=None): 417 + bpftool_prog_list(expected=0) 418 + ret, err = bpftool("prog show pin %s" % (prog_file), fail=False) 419 + fail(ret == 0, "Showing prog with removed device did not fail") 420 + fail(err["error"].find("No such device") == -1, 421 + "Showing prog with removed device expected ENODEV, error is %s" % 422 + (err["error"])) 423 + 424 + bpftool_map_list(expected=0) 425 + ret, err = bpftool("map show pin %s" % (map_file), fail=False) 426 + fail(ret == 0, "Showing map with removed device did not fail") 427 + fail(err["error"].find("No such device") == -1, 428 + "Showing map with removed device expected ENODEV, error is %s" % 429 + (err["error"])) 430 + 431 + def check_dev_info(other_ns, ns, prog_file=None, map_file=None, removed=False): 432 + progs = bpftool_prog_list(expected=1, ns=ns) 459 433 prog = progs[0] 460 434 461 435 fail("dev" not in prog.keys(), "Device parameters not reported") ··· 479 423 fail("ns_dev" not in dev.keys(), "Device parameters not reported") 480 424 fail("ns_inode" not in dev.keys(), "Device parameters not reported") 481 425 482 - if not removed and not other_ns: 426 + if not other_ns: 483 427 fail("ifname" not in dev.keys(), "Ifname not reported") 484 428 fail(dev["ifname"] != sim["ifname"], 485 429 "Ifname incorrect %s vs %s" % (dev["ifname"], sim["ifname"])) 486 430 else: 487 431 fail("ifname" in dev.keys(), "Ifname is reported for other ns") 488 - if removed: 489 - fail(dev["ifindex"] != 0, "Device perameters not zero on removed") 490 - fail(dev["ns_dev"] != 0, "Device perameters not zero on removed") 491 - fail(dev["ns_inode"] != 0, "Device perameters not zero on removed") 432 + 433 + maps = bpftool_map_list(expected=2, ns=ns) 434 + for m in maps: 435 + fail("dev" not in m.keys(), "Device parameters not reported") 436 + fail(dev != m["dev"], "Map's device different than program's") 492 437 493 438 # Parse command line 494 439 parser = argparse.ArgumentParser() ··· 521 464 cmd("mount -t debugfs none /sys/kernel/debug") 522 465 523 466 # Check samples are compiled 524 - samples = ["sample_ret0.o"] 467 + samples = ["sample_ret0.o", "sample_map_ret0.o"] 525 468 for s in samples: 526 469 ret, out = cmd("ls %s/%s" % (bpf_test_dir, s), fail=False) 527 470 skip(ret != 0, "sample %s/%s not found, please compile it" % ··· 796 739 bpftool_prog_list_wait(expected=0) 797 740 798 741 sim = NetdevSim() 799 - sim.set_ethtool_tc_offloads(True) 800 - sim.set_xdp(obj, "offload") 742 + map_obj = bpf_obj("sample_map_ret0.o") 743 + start_test("Test loading program with maps...") 744 + sim.set_xdp(map_obj, "offload", JSON=False) # map fixup msg breaks JSON 801 745 802 746 start_test("Test bpftool bound info reporting (own ns)...") 803 747 check_dev_info(False, "") ··· 815 757 sim.set_ns("") 816 758 check_dev_info(False, "") 817 759 818 - pin_file, _ = pin_prog("/sys/fs/bpf/tmp") 760 + prog_file, _ = pin_prog("/sys/fs/bpf/tmp_prog") 761 + map_file, _ = pin_map("/sys/fs/bpf/tmp_map", idx=1, expected=2) 819 762 sim.remove() 820 763 821 764 start_test("Test bpftool bound info reporting (removed dev)...") 822 - check_dev_info(True, "", pin_file=pin_file, removed=True) 765 + check_dev_info_removed(prog_file=prog_file, map_file=map_file) 766 + 767 + # Remove all pinned files and reinstantiate the netdev 768 + clean_up() 769 + bpftool_prog_list_wait(expected=0) 770 + 771 + sim = NetdevSim() 772 + 773 + start_test("Test map update (no flags)...") 774 + sim.set_xdp(map_obj, "offload", JSON=False) # map fixup msg breaks JSON 775 + maps = bpftool_map_list(expected=2) 776 + array = maps[0] if maps[0]["type"] == "array" else maps[1] 777 + htab = maps[0] if maps[0]["type"] == "hash" else maps[1] 778 + for m in maps: 779 + for i in range(2): 780 + bpftool("map update id %d key %s value %s" % 781 + (m["id"], int2str("I", i), int2str("Q", i * 3))) 782 + 783 + for m in maps: 784 + ret, _ = bpftool("map update id %d key %s value %s" % 785 + (m["id"], int2str("I", 3), int2str("Q", 3 * 3)), 786 + fail=False) 787 + fail(ret == 0, "added too many entries") 788 + 789 + start_test("Test map update (exists)...") 790 + for m in maps: 791 + for i in range(2): 792 + bpftool("map update id %d key %s value %s exist" % 793 + (m["id"], int2str("I", i), int2str("Q", i * 3))) 794 + 795 + for m in maps: 796 + ret, err = bpftool("map update id %d key %s value %s exist" % 797 + (m["id"], int2str("I", 3), int2str("Q", 3 * 3)), 798 + fail=False) 799 + fail(ret == 0, "updated non-existing key") 800 + fail(err["error"].find("No such file or directory") == -1, 801 + "expected ENOENT, error is '%s'" % (err["error"])) 802 + 803 + start_test("Test map update (noexist)...") 804 + for m in maps: 805 + for i in range(2): 806 + ret, err = bpftool("map update id %d key %s value %s noexist" % 807 + (m["id"], int2str("I", i), int2str("Q", i * 3)), 808 + fail=False) 809 + fail(ret == 0, "updated existing key") 810 + fail(err["error"].find("File exists") == -1, 811 + "expected EEXIST, error is '%s'" % (err["error"])) 812 + 813 + start_test("Test map dump...") 814 + for m in maps: 815 + _, entries = bpftool("map dump id %d" % (m["id"])) 816 + for i in range(2): 817 + key = str2int(entries[i]["key"]) 818 + fail(key != i, "expected key %d, got %d" % (key, i)) 819 + val = str2int(entries[i]["value"]) 820 + fail(val != i * 3, "expected value %d, got %d" % (val, i * 3)) 821 + 822 + start_test("Test map getnext...") 823 + for m in maps: 824 + _, entry = bpftool("map getnext id %d" % (m["id"])) 825 + key = str2int(entry["next_key"]) 826 + fail(key != 0, "next key %d, expected %d" % (key, 0)) 827 + _, entry = bpftool("map getnext id %d key %s" % 828 + (m["id"], int2str("I", 0))) 829 + key = str2int(entry["next_key"]) 830 + fail(key != 1, "next key %d, expected %d" % (key, 1)) 831 + ret, err = bpftool("map getnext id %d key %s" % 832 + (m["id"], int2str("I", 1)), fail=False) 833 + fail(ret == 0, "got next key past the end of map") 834 + fail(err["error"].find("No such file or directory") == -1, 835 + "expected ENOENT, error is '%s'" % (err["error"])) 836 + 837 + start_test("Test map delete (htab)...") 838 + for i in range(2): 839 + bpftool("map delete id %d key %s" % (htab["id"], int2str("I", i))) 840 + 841 + start_test("Test map delete (array)...") 842 + for i in range(2): 843 + ret, err = bpftool("map delete id %d key %s" % 844 + (htab["id"], int2str("I", i)), fail=False) 845 + fail(ret == 0, "removed entry from an array") 846 + fail(err["error"].find("No such file or directory") == -1, 847 + "expected ENOENT, error is '%s'" % (err["error"])) 848 + 849 + start_test("Test map remove...") 850 + sim.unset_xdp("offload") 851 + bpftool_map_list_wait(expected=0) 852 + sim.remove() 853 + 854 + sim = NetdevSim() 855 + sim.set_xdp(map_obj, "offload", JSON=False) # map fixup msg breaks JSON 856 + sim.remove() 857 + bpftool_map_list_wait(expected=0) 858 + 859 + start_test("Test map creation fail path...") 860 + sim = NetdevSim() 861 + sim.dfs["bpf_map_accept"] = "N" 862 + ret, _ = sim.set_xdp(map_obj, "offload", JSON=False, fail=False) 863 + fail(ret == 0, 864 + "netdevsim didn't refuse to create a map with offload disabled") 823 865 824 866 print("%s: OK" % (os.path.basename(__file__))) 825 867

+136 -1

tools/testing/selftests/bpf/test_verifier.c

··· 29 29 #include <linux/filter.h> 30 30 #include <linux/bpf_perf_event.h> 31 31 #include <linux/bpf.h> 32 + #include <linux/if_ether.h> 32 33 33 34 #include <bpf/bpf.h> 34 35 ··· 50 49 #define MAX_INSNS 512 51 50 #define MAX_FIXUPS 8 52 51 #define MAX_NR_MAPS 4 52 + #define POINTER_VALUE 0xcafe4all 53 + #define TEST_DATA_LEN 64 53 54 54 55 #define F_NEEDS_EFFICIENT_UNALIGNED_ACCESS (1 << 0) 55 56 #define F_LOAD_WITH_STRICT_ALIGNMENT (1 << 1) ··· 65 62 int fixup_map_in_map[MAX_FIXUPS]; 66 63 const char *errstr; 67 64 const char *errstr_unpriv; 65 + uint32_t retval; 68 66 enum { 69 67 UNDEF, 70 68 ACCEPT, ··· 99 95 BPF_EXIT_INSN(), 100 96 }, 101 97 .result = ACCEPT, 98 + .retval = -3, 99 + }, 100 + { 101 + "DIV32 by 0, zero check 1", 102 + .insns = { 103 + BPF_MOV32_IMM(BPF_REG_0, 42), 104 + BPF_MOV32_IMM(BPF_REG_1, 0), 105 + BPF_MOV32_IMM(BPF_REG_2, 1), 106 + BPF_ALU32_REG(BPF_DIV, BPF_REG_2, BPF_REG_1), 107 + BPF_EXIT_INSN(), 108 + }, 109 + .result = ACCEPT, 110 + .retval = 0, 111 + }, 112 + { 113 + "DIV32 by 0, zero check 2", 114 + .insns = { 115 + BPF_MOV32_IMM(BPF_REG_0, 42), 116 + BPF_LD_IMM64(BPF_REG_1, 0xffffffff00000000LL), 117 + BPF_MOV32_IMM(BPF_REG_2, 1), 118 + BPF_ALU32_REG(BPF_DIV, BPF_REG_2, BPF_REG_1), 119 + BPF_EXIT_INSN(), 120 + }, 121 + .result = ACCEPT, 122 + .retval = 0, 123 + }, 124 + { 125 + "DIV64 by 0, zero check", 126 + .insns = { 127 + BPF_MOV32_IMM(BPF_REG_0, 42), 128 + BPF_MOV32_IMM(BPF_REG_1, 0), 129 + BPF_MOV32_IMM(BPF_REG_2, 1), 130 + BPF_ALU64_REG(BPF_DIV, BPF_REG_2, BPF_REG_1), 131 + BPF_EXIT_INSN(), 132 + }, 133 + .result = ACCEPT, 134 + .retval = 0, 135 + }, 136 + { 137 + "MOD32 by 0, zero check 1", 138 + .insns = { 139 + BPF_MOV32_IMM(BPF_REG_0, 42), 140 + BPF_MOV32_IMM(BPF_REG_1, 0), 141 + BPF_MOV32_IMM(BPF_REG_2, 1), 142 + BPF_ALU32_REG(BPF_MOD, BPF_REG_2, BPF_REG_1), 143 + BPF_EXIT_INSN(), 144 + }, 145 + .result = ACCEPT, 146 + .retval = 0, 147 + }, 148 + { 149 + "MOD32 by 0, zero check 2", 150 + .insns = { 151 + BPF_MOV32_IMM(BPF_REG_0, 42), 152 + BPF_LD_IMM64(BPF_REG_1, 0xffffffff00000000LL), 153 + BPF_MOV32_IMM(BPF_REG_2, 1), 154 + BPF_ALU32_REG(BPF_MOD, BPF_REG_2, BPF_REG_1), 155 + BPF_EXIT_INSN(), 156 + }, 157 + .result = ACCEPT, 158 + .retval = 0, 159 + }, 160 + { 161 + "MOD64 by 0, zero check", 162 + .insns = { 163 + BPF_MOV32_IMM(BPF_REG_0, 42), 164 + BPF_MOV32_IMM(BPF_REG_1, 0), 165 + BPF_MOV32_IMM(BPF_REG_2, 1), 166 + BPF_ALU64_REG(BPF_MOD, BPF_REG_2, BPF_REG_1), 167 + BPF_EXIT_INSN(), 168 + }, 169 + .result = ACCEPT, 170 + .retval = 0, 171 + }, 172 + { 173 + "empty prog", 174 + .insns = { 175 + }, 176 + .errstr = "last insn is not an exit or jmp", 177 + .result = REJECT, 178 + }, 179 + { 180 + "only exit insn", 181 + .insns = { 182 + BPF_EXIT_INSN(), 183 + }, 184 + .errstr = "R0 !read_ok", 185 + .result = REJECT, 102 186 }, 103 187 { 104 188 "unreachable", ··· 302 210 BPF_EXIT_INSN(), 303 211 }, 304 212 .result = ACCEPT, 213 + .retval = 1, 305 214 }, 306 215 { 307 216 "test8 ld_imm64", ··· 610 517 .errstr_unpriv = "R0 leaks addr", 611 518 .result = ACCEPT, 612 519 .result_unpriv = REJECT, 520 + .retval = POINTER_VALUE, 613 521 }, 614 522 { 615 523 "check valid spill/fill, skb mark", ··· 897 803 .errstr_unpriv = "R1 pointer comparison", 898 804 .result_unpriv = REJECT, 899 805 .result = ACCEPT, 806 + .retval = -ENOENT, 900 807 }, 901 808 { 902 809 "jump test 4", ··· 1918 1823 BPF_EXIT_INSN(), 1919 1824 }, 1920 1825 .result = ACCEPT, 1826 + .retval = 0xfaceb00c, 1921 1827 }, 1922 1828 { 1923 1829 "PTR_TO_STACK store/load - bad alignment on off", ··· 1977 1881 .result = ACCEPT, 1978 1882 .result_unpriv = REJECT, 1979 1883 .errstr_unpriv = "R0 leaks addr", 1884 + .retval = POINTER_VALUE, 1980 1885 }, 1981 1886 { 1982 1887 "unpriv: add const to pointer", ··· 2151 2054 BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0), 2152 2055 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, 2153 2056 BPF_FUNC_get_hash_recalc), 2057 + BPF_MOV64_IMM(BPF_REG_0, 0), 2154 2058 BPF_EXIT_INSN(), 2155 2059 }, 2156 2060 .result = ACCEPT, ··· 2939 2841 }, 2940 2842 .result = ACCEPT, 2941 2843 .prog_type = BPF_PROG_TYPE_SCHED_CLS, 2844 + .retval = 1, 2942 2845 }, 2943 2846 { 2944 2847 "direct packet access: test12 (and, good access)", ··· 2964 2865 }, 2965 2866 .result = ACCEPT, 2966 2867 .prog_type = BPF_PROG_TYPE_SCHED_CLS, 2868 + .retval = 1, 2967 2869 }, 2968 2870 { 2969 2871 "direct packet access: test13 (branches, good access)", ··· 2995 2895 }, 2996 2896 .result = ACCEPT, 2997 2897 .prog_type = BPF_PROG_TYPE_SCHED_CLS, 2898 + .retval = 1, 2998 2899 }, 2999 2900 { 3000 2901 "direct packet access: test14 (pkt_ptr += 0, CONST_IMM, good access)", ··· 3019 2918 }, 3020 2919 .result = ACCEPT, 3021 2920 .prog_type = BPF_PROG_TYPE_SCHED_CLS, 2921 + .retval = 1, 3022 2922 }, 3023 2923 { 3024 2924 "direct packet access: test15 (spill with xadd)", ··· 3306 3204 }, 3307 3205 .result = ACCEPT, 3308 3206 .prog_type = BPF_PROG_TYPE_SCHED_CLS, 3207 + .retval = 1, 3309 3208 }, 3310 3209 { 3311 3210 "direct packet access: test28 (marking on <=, bad access)", ··· 5926 5823 }, 5927 5824 .result = ACCEPT, 5928 5825 .prog_type = BPF_PROG_TYPE_SCHED_CLS, 5826 + .retval = 0 /* csum_diff of 64-byte packet */, 5929 5827 }, 5930 5828 { 5931 5829 "helper access to variable memory: size = 0 not allowed on NULL (!ARG_PTR_TO_MEM_OR_NULL)", ··· 6295 6191 }, 6296 6192 .prog_type = BPF_PROG_TYPE_SCHED_CLS, 6297 6193 .result = ACCEPT, 6194 + .retval = 42 /* ultimate return value */, 6298 6195 }, 6299 6196 { 6300 6197 "ld_ind: check calling conv, r1", ··· 6367 6262 BPF_EXIT_INSN(), 6368 6263 }, 6369 6264 .result = ACCEPT, 6265 + .retval = 1, 6370 6266 }, 6371 6267 { 6372 6268 "check bpf_perf_event_data->sample_period byte load permitted", ··· 7355 7249 }, 7356 7250 .fixup_map1 = { 3 }, 7357 7251 .result = ACCEPT, 7252 + .retval = POINTER_VALUE, 7358 7253 .result_unpriv = REJECT, 7359 7254 .errstr_unpriv = "R0 leaks addr as return value" 7360 7255 }, ··· 7376 7269 }, 7377 7270 .fixup_map1 = { 3 }, 7378 7271 .result = ACCEPT, 7272 + .retval = POINTER_VALUE, 7379 7273 .result_unpriv = REJECT, 7380 7274 .errstr_unpriv = "R0 leaks addr as return value" 7381 7275 }, ··· 7818 7710 BPF_EXIT_INSN(), 7819 7711 }, 7820 7712 .result = ACCEPT, 7713 + .retval = TEST_DATA_LEN, 7821 7714 .prog_type = BPF_PROG_TYPE_SCHED_CLS, 7822 7715 }, 7823 7716 { ··· 8960 8851 .errstr_unpriv = "function calls to other bpf functions are allowed for root only", 8961 8852 .result_unpriv = REJECT, 8962 8853 .result = ACCEPT, 8854 + .retval = 1, 8963 8855 }, 8964 8856 { 8965 8857 "calls: overlapping caller/callee", ··· 9156 9046 }, 9157 9047 .prog_type = BPF_PROG_TYPE_SCHED_ACT, 9158 9048 .result = ACCEPT, 9049 + .retval = TEST_DATA_LEN, 9159 9050 }, 9160 9051 { 9161 9052 "calls: callee using args1", ··· 9169 9058 .errstr_unpriv = "allowed for root only", 9170 9059 .result_unpriv = REJECT, 9171 9060 .result = ACCEPT, 9061 + .retval = POINTER_VALUE, 9172 9062 }, 9173 9063 { 9174 9064 "calls: callee using wrong args2", ··· 9200 9088 .errstr_unpriv = "allowed for root only", 9201 9089 .result_unpriv = REJECT, 9202 9090 .result = ACCEPT, 9091 + .retval = TEST_DATA_LEN + TEST_DATA_LEN - ETH_HLEN - ETH_HLEN, 9203 9092 }, 9204 9093 { 9205 9094 "calls: callee changing pkt pointers", ··· 9249 9136 }, 9250 9137 .prog_type = BPF_PROG_TYPE_SCHED_CLS, 9251 9138 .result = ACCEPT, 9139 + .retval = TEST_DATA_LEN + TEST_DATA_LEN, 9252 9140 }, 9253 9141 { 9254 9142 "calls: calls with stack arith", ··· 9268 9154 }, 9269 9155 .prog_type = BPF_PROG_TYPE_SCHED_CLS, 9270 9156 .result = ACCEPT, 9157 + .retval = 42, 9271 9158 }, 9272 9159 { 9273 9160 "calls: calls with misaligned stack access", ··· 9302 9187 }, 9303 9188 .prog_type = BPF_PROG_TYPE_SCHED_CLS, 9304 9189 .result = ACCEPT, 9190 + .retval = 43, 9305 9191 }, 9306 9192 { 9307 9193 "calls: calls control flow, jump test 2", ··· 9795 9679 }, 9796 9680 .prog_type = BPF_PROG_TYPE_XDP, 9797 9681 .result = ACCEPT, 9682 + .retval = 42, 9798 9683 }, 9799 9684 { 9800 9685 "calls: write into callee stack frame", ··· 10407 10290 }, 10408 10291 .result = ACCEPT, 10409 10292 .prog_type = BPF_PROG_TYPE_SCHED_CLS, 10293 + .retval = POINTER_VALUE, 10410 10294 }, 10411 10295 { 10412 10296 "calls: pkt_ptr spill into caller stack 2", ··· 10473 10355 }, 10474 10356 .prog_type = BPF_PROG_TYPE_SCHED_CLS, 10475 10357 .result = ACCEPT, 10358 + .retval = 1, 10476 10359 }, 10477 10360 { 10478 10361 "calls: pkt_ptr spill into caller stack 4", ··· 10507 10388 }, 10508 10389 .prog_type = BPF_PROG_TYPE_SCHED_CLS, 10509 10390 .result = ACCEPT, 10391 + .retval = 1, 10510 10392 }, 10511 10393 { 10512 10394 "calls: pkt_ptr spill into caller stack 5", ··· 10916 10796 int fd_prog, expected_ret, reject_from_alignment; 10917 10797 struct bpf_insn *prog = test->insns; 10918 10798 int prog_len = probe_filter_length(prog); 10799 + char data_in[TEST_DATA_LEN] = {}; 10919 10800 int prog_type = test->prog_type; 10920 10801 int map_fds[MAX_NR_MAPS]; 10921 10802 const char *expected_err; 10922 - int i; 10803 + uint32_t retval; 10804 + int i, err; 10923 10805 10924 10806 for (i = 0; i < MAX_NR_MAPS; i++) 10925 10807 map_fds[i] = -1; ··· 10964 10842 } 10965 10843 } 10966 10844 10845 + if (fd_prog >= 0) { 10846 + err = bpf_prog_test_run(fd_prog, 1, data_in, sizeof(data_in), 10847 + NULL, NULL, &retval, NULL); 10848 + if (err && errno != 524/*ENOTSUPP*/ && errno != EPERM) { 10849 + printf("Unexpected bpf_prog_test_run error\n"); 10850 + goto fail_log; 10851 + } 10852 + if (!err && retval != test->retval && 10853 + test->retval != POINTER_VALUE) { 10854 + printf("FAIL retval %d != %d\n", retval, test->retval); 10855 + goto fail_log; 10856 + } 10857 + } 10967 10858 (*passes)++; 10968 10859 printf("OK%s\n", reject_from_alignment ? 10969 10860 " (NOTE: reject due to unknown alignment)" : "");