Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next

Alexei Starovoitov says:

====================
pull-request: bpf-next 2021-04-01

The following pull-request contains BPF updates for your *net-next* tree.

We've added 68 non-merge commits during the last 7 day(s) which contain
a total of 70 files changed, 2944 insertions(+), 1139 deletions(-).

The main changes are:

1) UDP support for sockmap, from Cong.

2) Verifier merge conflict resolution fix, from Daniel.

3) xsk selftests enhancements, from Maciej.

4) Unstable helpers aka kernel func calling, from Martin.

5) Batches ops for LPM map, from Pedro.

6) Fix race in bpf_get_local_storage, from Yonghong.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>

+2975 -1170
+15
Documentation/bpf/bpf_design_QA.rst
··· 258 258 helpers, etc be added out of kernel module code? 259 259 260 260 A: NO. 261 + 262 + Q: Directly calling kernel function is an ABI? 263 + ---------------------------------------------- 264 + Q: Some kernel functions (e.g. tcp_slow_start) can be called 265 + by BPF programs. Do these kernel functions become an ABI? 266 + 267 + A: NO. 268 + 269 + The kernel function protos will change and the bpf programs will be 270 + rejected by the verifier. Also, for example, some of the bpf-callable 271 + kernel functions have already been used by other kernel tcp 272 + cc (congestion-control) implementations. If any of these kernel 273 + functions has changed, both the in-tree and out-of-tree kernel tcp cc 274 + implementations have to be changed. The same goes for the bpf 275 + programs and they have to be adjusted accordingly.
+5
arch/x86/net/bpf_jit_comp.c
··· 2346 2346 tmp : orig_prog); 2347 2347 return prog; 2348 2348 } 2349 + 2350 + bool bpf_jit_supports_kfunc_call(void) 2351 + { 2352 + return true; 2353 + }
+198
arch/x86/net/bpf_jit_comp32.c
··· 1390 1390 *pprog = prog; 1391 1391 } 1392 1392 1393 + static void emit_push_r32(const u8 src[], u8 **pprog) 1394 + { 1395 + u8 *prog = *pprog; 1396 + int cnt = 0; 1397 + 1398 + /* mov ecx,dword ptr [ebp+off] */ 1399 + EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(src_lo)); 1400 + /* push ecx */ 1401 + EMIT1(0x51); 1402 + 1403 + *pprog = prog; 1404 + } 1405 + 1393 1406 static u8 get_cond_jmp_opcode(const u8 op, bool is_cmp_lo) 1394 1407 { 1395 1408 u8 jmp_cond; ··· 1470 1457 } 1471 1458 1472 1459 return jmp_cond; 1460 + } 1461 + 1462 + /* i386 kernel compiles with "-mregparm=3". From gcc document: 1463 + * 1464 + * ==== snippet ==== 1465 + * regparm (number) 1466 + * On x86-32 targets, the regparm attribute causes the compiler 1467 + * to pass arguments number one to (number) if they are of integral 1468 + * type in registers EAX, EDX, and ECX instead of on the stack. 1469 + * Functions that take a variable number of arguments continue 1470 + * to be passed all of their arguments on the stack. 1471 + * ==== snippet ==== 1472 + * 1473 + * The first three args of a function will be considered for 1474 + * putting into the 32bit register EAX, EDX, and ECX. 1475 + * 1476 + * Two 32bit registers are used to pass a 64bit arg. 1477 + * 1478 + * For example, 1479 + * void foo(u32 a, u32 b, u32 c, u32 d): 1480 + * u32 a: EAX 1481 + * u32 b: EDX 1482 + * u32 c: ECX 1483 + * u32 d: stack 1484 + * 1485 + * void foo(u64 a, u32 b, u32 c): 1486 + * u64 a: EAX (lo32) EDX (hi32) 1487 + * u32 b: ECX 1488 + * u32 c: stack 1489 + * 1490 + * void foo(u32 a, u64 b, u32 c): 1491 + * u32 a: EAX 1492 + * u64 b: EDX (lo32) ECX (hi32) 1493 + * u32 c: stack 1494 + * 1495 + * void foo(u32 a, u32 b, u64 c): 1496 + * u32 a: EAX 1497 + * u32 b: EDX 1498 + * u64 c: stack 1499 + * 1500 + * The return value will be stored in the EAX (and EDX for 64bit value). 1501 + * 1502 + * For example, 1503 + * u32 foo(u32 a, u32 b, u32 c): 1504 + * return value: EAX 1505 + * 1506 + * u64 foo(u32 a, u32 b, u32 c): 1507 + * return value: EAX (lo32) EDX (hi32) 1508 + * 1509 + * Notes: 1510 + * The verifier only accepts function having integer and pointers 1511 + * as its args and return value, so it does not have 1512 + * struct-by-value. 1513 + * 1514 + * emit_kfunc_call() finds out the btf_func_model by calling 1515 + * bpf_jit_find_kfunc_model(). A btf_func_model 1516 + * has the details about the number of args, size of each arg, 1517 + * and the size of the return value. 1518 + * 1519 + * It first decides how many args can be passed by EAX, EDX, and ECX. 1520 + * That will decide what args should be pushed to the stack: 1521 + * [first_stack_regno, last_stack_regno] are the bpf regnos 1522 + * that should be pushed to the stack. 1523 + * 1524 + * It will first push all args to the stack because the push 1525 + * will need to use ECX. Then, it moves 1526 + * [BPF_REG_1, first_stack_regno) to EAX, EDX, and ECX. 1527 + * 1528 + * When emitting a call (0xE8), it needs to figure out 1529 + * the jmp_offset relative to the jit-insn address immediately 1530 + * following the call (0xE8) instruction. At this point, it knows 1531 + * the end of the jit-insn address after completely translated the 1532 + * current (BPF_JMP | BPF_CALL) bpf-insn. It is passed as "end_addr" 1533 + * to the emit_kfunc_call(). Thus, it can learn the "immediate-follow-call" 1534 + * address by figuring out how many jit-insn is generated between 1535 + * the call (0xE8) and the end_addr: 1536 + * - 0-1 jit-insn (3 bytes each) to restore the esp pointer if there 1537 + * is arg pushed to the stack. 1538 + * - 0-2 jit-insns (3 bytes each) to handle the return value. 1539 + */ 1540 + static int emit_kfunc_call(const struct bpf_prog *bpf_prog, u8 *end_addr, 1541 + const struct bpf_insn *insn, u8 **pprog) 1542 + { 1543 + const u8 arg_regs[] = { IA32_EAX, IA32_EDX, IA32_ECX }; 1544 + int i, cnt = 0, first_stack_regno, last_stack_regno; 1545 + int free_arg_regs = ARRAY_SIZE(arg_regs); 1546 + const struct btf_func_model *fm; 1547 + int bytes_in_stack = 0; 1548 + const u8 *cur_arg_reg; 1549 + u8 *prog = *pprog; 1550 + s64 jmp_offset; 1551 + 1552 + fm = bpf_jit_find_kfunc_model(bpf_prog, insn); 1553 + if (!fm) 1554 + return -EINVAL; 1555 + 1556 + first_stack_regno = BPF_REG_1; 1557 + for (i = 0; i < fm->nr_args; i++) { 1558 + int regs_needed = fm->arg_size[i] > sizeof(u32) ? 2 : 1; 1559 + 1560 + if (regs_needed > free_arg_regs) 1561 + break; 1562 + 1563 + free_arg_regs -= regs_needed; 1564 + first_stack_regno++; 1565 + } 1566 + 1567 + /* Push the args to the stack */ 1568 + last_stack_regno = BPF_REG_0 + fm->nr_args; 1569 + for (i = last_stack_regno; i >= first_stack_regno; i--) { 1570 + if (fm->arg_size[i - 1] > sizeof(u32)) { 1571 + emit_push_r64(bpf2ia32[i], &prog); 1572 + bytes_in_stack += 8; 1573 + } else { 1574 + emit_push_r32(bpf2ia32[i], &prog); 1575 + bytes_in_stack += 4; 1576 + } 1577 + } 1578 + 1579 + cur_arg_reg = &arg_regs[0]; 1580 + for (i = BPF_REG_1; i < first_stack_regno; i++) { 1581 + /* mov e[adc]x,dword ptr [ebp+off] */ 1582 + EMIT3(0x8B, add_2reg(0x40, IA32_EBP, *cur_arg_reg++), 1583 + STACK_VAR(bpf2ia32[i][0])); 1584 + if (fm->arg_size[i - 1] > sizeof(u32)) 1585 + /* mov e[adc]x,dword ptr [ebp+off] */ 1586 + EMIT3(0x8B, add_2reg(0x40, IA32_EBP, *cur_arg_reg++), 1587 + STACK_VAR(bpf2ia32[i][1])); 1588 + } 1589 + 1590 + if (bytes_in_stack) 1591 + /* add esp,"bytes_in_stack" */ 1592 + end_addr -= 3; 1593 + 1594 + /* mov dword ptr [ebp+off],edx */ 1595 + if (fm->ret_size > sizeof(u32)) 1596 + end_addr -= 3; 1597 + 1598 + /* mov dword ptr [ebp+off],eax */ 1599 + if (fm->ret_size) 1600 + end_addr -= 3; 1601 + 1602 + jmp_offset = (u8 *)__bpf_call_base + insn->imm - end_addr; 1603 + if (!is_simm32(jmp_offset)) { 1604 + pr_err("unsupported BPF kernel function jmp_offset:%lld\n", 1605 + jmp_offset); 1606 + return -EINVAL; 1607 + } 1608 + 1609 + EMIT1_off32(0xE8, jmp_offset); 1610 + 1611 + if (fm->ret_size) 1612 + /* mov dword ptr [ebp+off],eax */ 1613 + EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX), 1614 + STACK_VAR(bpf2ia32[BPF_REG_0][0])); 1615 + 1616 + if (fm->ret_size > sizeof(u32)) 1617 + /* mov dword ptr [ebp+off],edx */ 1618 + EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EDX), 1619 + STACK_VAR(bpf2ia32[BPF_REG_0][1])); 1620 + 1621 + if (bytes_in_stack) 1622 + /* add esp,"bytes_in_stack" */ 1623 + EMIT3(0x83, add_1reg(0xC0, IA32_ESP), bytes_in_stack); 1624 + 1625 + *pprog = prog; 1626 + 1627 + return 0; 1473 1628 } 1474 1629 1475 1630 static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, ··· 2068 1887 2069 1888 if (insn->src_reg == BPF_PSEUDO_CALL) 2070 1889 goto notyet; 1890 + 1891 + if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) { 1892 + int err; 1893 + 1894 + err = emit_kfunc_call(bpf_prog, 1895 + image + addrs[i], 1896 + insn, &prog); 1897 + 1898 + if (err) 1899 + return err; 1900 + break; 1901 + } 2071 1902 2072 1903 func = (u8 *) __bpf_call_base + imm32; 2073 1904 jmp_offset = func - (image + addrs[i]); ··· 2585 2392 bpf_jit_prog_release_other(prog, prog == orig_prog ? 2586 2393 tmp : orig_prog); 2587 2394 return prog; 2395 + } 2396 + 2397 + bool bpf_jit_supports_kfunc_call(void) 2398 + { 2399 + return true; 2588 2400 }
+12
drivers/net/veth.c
··· 218 218 } 219 219 } 220 220 221 + static void veth_get_channels(struct net_device *dev, 222 + struct ethtool_channels *channels) 223 + { 224 + channels->tx_count = dev->real_num_tx_queues; 225 + channels->rx_count = dev->real_num_rx_queues; 226 + channels->max_tx = dev->real_num_tx_queues; 227 + channels->max_rx = dev->real_num_rx_queues; 228 + channels->combined_count = min(dev->real_num_rx_queues, dev->real_num_tx_queues); 229 + channels->max_combined = min(dev->real_num_rx_queues, dev->real_num_tx_queues); 230 + } 231 + 221 232 static const struct ethtool_ops veth_ethtool_ops = { 222 233 .get_drvinfo = veth_get_drvinfo, 223 234 .get_link = ethtool_op_get_link, ··· 237 226 .get_ethtool_stats = veth_get_ethtool_stats, 238 227 .get_link_ksettings = veth_get_link_ksettings, 239 228 .get_ts_info = ethtool_op_get_ts_info, 229 + .get_channels = veth_get_channels, 240 230 }; 241 231 242 232 /* general routines */
+49 -8
include/linux/bpf-cgroup.h
··· 20 20 struct bpf_cgroup_storage; 21 21 struct ctl_table; 22 22 struct ctl_table_header; 23 + struct task_struct; 23 24 24 25 #ifdef CONFIG_CGROUP_BPF 25 26 26 27 extern struct static_key_false cgroup_bpf_enabled_key[MAX_BPF_ATTACH_TYPE]; 27 28 #define cgroup_bpf_enabled(type) static_branch_unlikely(&cgroup_bpf_enabled_key[type]) 28 29 29 - DECLARE_PER_CPU(struct bpf_cgroup_storage*, 30 - bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]); 30 + #define BPF_CGROUP_STORAGE_NEST_MAX 8 31 + 32 + struct bpf_cgroup_storage_info { 33 + struct task_struct *task; 34 + struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE]; 35 + }; 36 + 37 + /* For each cpu, permit maximum BPF_CGROUP_STORAGE_NEST_MAX number of tasks 38 + * to use bpf cgroup storage simultaneously. 39 + */ 40 + DECLARE_PER_CPU(struct bpf_cgroup_storage_info, 41 + bpf_cgroup_storage_info[BPF_CGROUP_STORAGE_NEST_MAX]); 31 42 32 43 #define for_each_cgroup_storage_type(stype) \ 33 44 for (stype = 0; stype < MAX_BPF_CGROUP_STORAGE_TYPE; stype++) ··· 172 161 return BPF_CGROUP_STORAGE_SHARED; 173 162 } 174 163 175 - static inline void bpf_cgroup_storage_set(struct bpf_cgroup_storage 176 - *storage[MAX_BPF_CGROUP_STORAGE_TYPE]) 164 + static inline int bpf_cgroup_storage_set(struct bpf_cgroup_storage 165 + *storage[MAX_BPF_CGROUP_STORAGE_TYPE]) 177 166 { 178 167 enum bpf_cgroup_storage_type stype; 168 + int i, err = 0; 179 169 180 - for_each_cgroup_storage_type(stype) 181 - this_cpu_write(bpf_cgroup_storage[stype], storage[stype]); 170 + preempt_disable(); 171 + for (i = 0; i < BPF_CGROUP_STORAGE_NEST_MAX; i++) { 172 + if (unlikely(this_cpu_read(bpf_cgroup_storage_info[i].task) != NULL)) 173 + continue; 174 + 175 + this_cpu_write(bpf_cgroup_storage_info[i].task, current); 176 + for_each_cgroup_storage_type(stype) 177 + this_cpu_write(bpf_cgroup_storage_info[i].storage[stype], 178 + storage[stype]); 179 + goto out; 180 + } 181 + err = -EBUSY; 182 + WARN_ON_ONCE(1); 183 + 184 + out: 185 + preempt_enable(); 186 + return err; 187 + } 188 + 189 + static inline void bpf_cgroup_storage_unset(void) 190 + { 191 + int i; 192 + 193 + for (i = 0; i < BPF_CGROUP_STORAGE_NEST_MAX; i++) { 194 + if (unlikely(this_cpu_read(bpf_cgroup_storage_info[i].task) != current)) 195 + continue; 196 + 197 + this_cpu_write(bpf_cgroup_storage_info[i].task, NULL); 198 + return; 199 + } 182 200 } 183 201 184 202 struct bpf_cgroup_storage * ··· 488 448 return -EINVAL; 489 449 } 490 450 491 - static inline void bpf_cgroup_storage_set( 492 - struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE]) {} 451 + static inline int bpf_cgroup_storage_set( 452 + struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE]) { return 0; } 453 + static inline void bpf_cgroup_storage_unset(void) {} 493 454 static inline int bpf_cgroup_storage_assign(struct bpf_prog_aux *aux, 494 455 struct bpf_map *map) { return 0; } 495 456 static inline struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(
+51 -7
include/linux/bpf.h
··· 56 56 u32 seq_priv_size; 57 57 }; 58 58 59 - /* map is generic key/value storage optionally accesible by eBPF programs */ 59 + /* map is generic key/value storage optionally accessible by eBPF programs */ 60 60 struct bpf_map_ops { 61 61 /* funcs callable from userspace (via syscall) */ 62 62 int (*map_alloc_check)(union bpf_attr *attr); ··· 427 427 PTR_TO_PERCPU_BTF_ID, /* reg points to a percpu kernel variable */ 428 428 PTR_TO_FUNC, /* reg points to a bpf program function */ 429 429 PTR_TO_MAP_KEY, /* reg points to a map element key */ 430 + __BPF_REG_TYPE_MAX, 430 431 }; 431 432 432 433 /* The information passed from prog-specific *_is_valid_access ··· 481 480 const struct btf_type *t, int off, int size, 482 481 enum bpf_access_type atype, 483 482 u32 *next_btf_id); 483 + bool (*check_kfunc_call)(u32 kfunc_btf_id); 484 484 }; 485 485 486 486 struct bpf_prog_offload_ops { ··· 798 796 struct module *module; 799 797 }; 800 798 799 + struct bpf_kfunc_desc_tab; 800 + 801 801 struct bpf_prog_aux { 802 802 atomic64_t refcnt; 803 803 u32 used_map_cnt; ··· 836 832 struct bpf_prog **func; 837 833 void *jit_data; /* JIT specific data. arch dependent */ 838 834 struct bpf_jit_poke_descriptor *poke_tab; 835 + struct bpf_kfunc_desc_tab *kfunc_tab; 839 836 u32 size_poke_tab; 840 837 struct bpf_ksym ksym; 841 838 const struct bpf_prog_ops *ops; ··· 1111 1106 /* BPF program asks to set CN on the packet. */ 1112 1107 #define BPF_RET_SET_CN (1 << 0) 1113 1108 1109 + /* For BPF_PROG_RUN_ARRAY_FLAGS and __BPF_PROG_RUN_ARRAY, 1110 + * if bpf_cgroup_storage_set() failed, the rest of programs 1111 + * will not execute. This should be a really rare scenario 1112 + * as it requires BPF_CGROUP_STORAGE_NEST_MAX number of 1113 + * preemptions all between bpf_cgroup_storage_set() and 1114 + * bpf_cgroup_storage_unset() on the same cpu. 1115 + */ 1114 1116 #define BPF_PROG_RUN_ARRAY_FLAGS(array, ctx, func, ret_flags) \ 1115 1117 ({ \ 1116 1118 struct bpf_prog_array_item *_item; \ ··· 1130 1118 _array = rcu_dereference(array); \ 1131 1119 _item = &_array->items[0]; \ 1132 1120 while ((_prog = READ_ONCE(_item->prog))) { \ 1133 - bpf_cgroup_storage_set(_item->cgroup_storage); \ 1121 + if (unlikely(bpf_cgroup_storage_set(_item->cgroup_storage))) \ 1122 + break; \ 1134 1123 func_ret = func(_prog, ctx); \ 1135 1124 _ret &= (func_ret & 1); \ 1136 1125 *(ret_flags) |= (func_ret >> 1); \ 1126 + bpf_cgroup_storage_unset(); \ 1137 1127 _item++; \ 1138 1128 } \ 1139 1129 rcu_read_unlock(); \ ··· 1156 1142 goto _out; \ 1157 1143 _item = &_array->items[0]; \ 1158 1144 while ((_prog = READ_ONCE(_item->prog))) { \ 1159 - if (set_cg_storage) \ 1160 - bpf_cgroup_storage_set(_item->cgroup_storage); \ 1161 - _ret &= func(_prog, ctx); \ 1145 + if (!set_cg_storage) { \ 1146 + _ret &= func(_prog, ctx); \ 1147 + } else { \ 1148 + if (unlikely(bpf_cgroup_storage_set(_item->cgroup_storage))) \ 1149 + break; \ 1150 + _ret &= func(_prog, ctx); \ 1151 + bpf_cgroup_storage_unset(); \ 1152 + } \ 1162 1153 _item++; \ 1163 1154 } \ 1164 1155 _out: \ ··· 1532 1513 int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, 1533 1514 const union bpf_attr *kattr, 1534 1515 union bpf_attr __user *uattr); 1516 + bool bpf_prog_test_check_kfunc_call(u32 kfunc_id); 1535 1517 bool btf_ctx_access(int off, int size, enum bpf_access_type type, 1536 1518 const struct bpf_prog *prog, 1537 1519 struct bpf_insn_access_aux *info); ··· 1551 1531 struct btf_func_model *m); 1552 1532 1553 1533 struct bpf_reg_state; 1554 - int btf_check_func_arg_match(struct bpf_verifier_env *env, int subprog, 1555 - struct bpf_reg_state *regs); 1534 + int btf_check_subprog_arg_match(struct bpf_verifier_env *env, int subprog, 1535 + struct bpf_reg_state *regs); 1536 + int btf_check_kfunc_arg_match(struct bpf_verifier_env *env, 1537 + const struct btf *btf, u32 func_id, 1538 + struct bpf_reg_state *regs); 1556 1539 int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog, 1557 1540 struct bpf_reg_state *reg); 1558 1541 int btf_check_type_match(struct bpf_verifier_log *log, const struct bpf_prog *prog, ··· 1566 1543 1567 1544 const struct bpf_func_proto *bpf_base_func_proto(enum bpf_func_id func_id); 1568 1545 void bpf_task_storage_free(struct task_struct *task); 1546 + bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog); 1547 + const struct btf_func_model * 1548 + bpf_jit_find_kfunc_model(const struct bpf_prog *prog, 1549 + const struct bpf_insn *insn); 1569 1550 #else /* !CONFIG_BPF_SYSCALL */ 1570 1551 static inline struct bpf_prog *bpf_prog_get(u32 ufd) 1571 1552 { ··· 1732 1705 return -ENOTSUPP; 1733 1706 } 1734 1707 1708 + static inline bool bpf_prog_test_check_kfunc_call(u32 kfunc_id) 1709 + { 1710 + return false; 1711 + } 1712 + 1735 1713 static inline void bpf_map_put(struct bpf_map *map) 1736 1714 { 1737 1715 } ··· 1754 1722 1755 1723 static inline void bpf_task_storage_free(struct task_struct *task) 1756 1724 { 1725 + } 1726 + 1727 + static inline bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog) 1728 + { 1729 + return false; 1730 + } 1731 + 1732 + static inline const struct btf_func_model * 1733 + bpf_jit_find_kfunc_model(const struct bpf_prog *prog, 1734 + const struct bpf_insn *insn) 1735 + { 1736 + return NULL; 1757 1737 } 1758 1738 #endif /* CONFIG_BPF_SYSCALL */ 1759 1739
+6
include/linux/btf.h
··· 110 110 const struct btf_type * 111 111 btf_resolve_size(const struct btf *btf, const struct btf_type *type, 112 112 u32 *type_size); 113 + const char *btf_type_str(const struct btf_type *t); 113 114 114 115 #define for_each_member(i, struct_type, member) \ 115 116 for (i = 0, member = btf_type_member(struct_type); \ ··· 140 139 static inline bool btf_type_is_enum(const struct btf_type *t) 141 140 { 142 141 return BTF_INFO_KIND(t->info) == BTF_KIND_ENUM; 142 + } 143 + 144 + static inline bool btf_type_is_scalar(const struct btf_type *t) 145 + { 146 + return btf_type_is_int(t) || btf_type_is_enum(t); 143 147 } 144 148 145 149 static inline bool btf_type_is_typedef(const struct btf_type *t)
+2 -11
include/linux/filter.h
··· 877 877 void bpf_prog_fill_jited_linfo(struct bpf_prog *prog, 878 878 const u32 *insn_to_jit_off); 879 879 int bpf_prog_alloc_jited_linfo(struct bpf_prog *prog); 880 - void bpf_prog_free_jited_linfo(struct bpf_prog *prog); 881 - void bpf_prog_free_unused_jited_linfo(struct bpf_prog *prog); 880 + void bpf_prog_jit_attempt_done(struct bpf_prog *prog); 882 881 883 882 struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags); 884 883 struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flags); ··· 918 919 struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog); 919 920 void bpf_jit_compile(struct bpf_prog *prog); 920 921 bool bpf_jit_needs_zext(void); 922 + bool bpf_jit_supports_kfunc_call(void); 921 923 bool bpf_helper_changes_pkt_data(void *func); 922 924 923 925 static inline bool bpf_dump_raw_ok(const struct cred *cred) ··· 1245 1245 1246 1246 void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, 1247 1247 int k, unsigned int size); 1248 - 1249 - static inline void *bpf_load_pointer(const struct sk_buff *skb, int k, 1250 - unsigned int size, void *buffer) 1251 - { 1252 - if (k >= 0) 1253 - return skb_header_pointer(skb, k, size, buffer); 1254 - 1255 - return bpf_internal_load_pointer_neg_helper(skb, k, size); 1256 - } 1257 1248 1258 1249 static inline int bpf_tell_extensions(void) 1259 1250 {
+1
include/linux/skbuff.h
··· 3626 3626 unsigned int flags); 3627 3627 int skb_send_sock_locked(struct sock *sk, struct sk_buff *skb, int offset, 3628 3628 int len); 3629 + int skb_send_sock(struct sock *sk, struct sk_buff *skb, int offset, int len); 3629 3630 void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to); 3630 3631 unsigned int skb_zerocopy_headlen(const struct sk_buff *from); 3631 3632 int skb_zerocopy(struct sk_buff *to, struct sk_buff *from,
+58 -19
include/linux/skmsg.h
··· 58 58 struct bpf_prog *msg_parser; 59 59 struct bpf_prog *stream_parser; 60 60 struct bpf_prog *stream_verdict; 61 + struct bpf_prog *skb_verdict; 61 62 }; 62 63 63 64 enum sk_psock_state_bits { ··· 90 89 #endif 91 90 struct sk_buff_head ingress_skb; 92 91 struct list_head ingress_msg; 92 + spinlock_t ingress_lock; 93 93 unsigned long state; 94 94 struct list_head link; 95 95 spinlock_t link_lock; ··· 99 97 void (*saved_close)(struct sock *sk, long timeout); 100 98 void (*saved_write_space)(struct sock *sk); 101 99 void (*saved_data_ready)(struct sock *sk); 100 + int (*psock_update_sk_prot)(struct sock *sk, bool restore); 102 101 struct proto *sk_proto; 102 + struct mutex work_mutex; 103 103 struct sk_psock_work_state work_state; 104 104 struct work_struct work; 105 - union { 106 - struct rcu_head rcu; 107 - struct work_struct gc; 108 - }; 105 + struct rcu_work rwork; 109 106 }; 110 107 111 108 int sk_msg_alloc(struct sock *sk, struct sk_msg *msg, int len, ··· 125 124 struct sk_msg *msg, u32 bytes); 126 125 int sk_msg_memcopy_from_iter(struct sock *sk, struct iov_iter *from, 127 126 struct sk_msg *msg, u32 bytes); 127 + int sk_msg_wait_data(struct sock *sk, struct sk_psock *psock, int flags, 128 + long timeo, int *err); 129 + int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg, 130 + int len, int flags); 128 131 129 132 static inline void sk_msg_check_to_free(struct sk_msg *msg, u32 i, u32 bytes) 130 133 { ··· 289 284 static inline void sk_psock_queue_msg(struct sk_psock *psock, 290 285 struct sk_msg *msg) 291 286 { 287 + spin_lock_bh(&psock->ingress_lock); 292 288 list_add_tail(&msg->list, &psock->ingress_msg); 289 + spin_unlock_bh(&psock->ingress_lock); 290 + } 291 + 292 + static inline struct sk_msg *sk_psock_dequeue_msg(struct sk_psock *psock) 293 + { 294 + struct sk_msg *msg; 295 + 296 + spin_lock_bh(&psock->ingress_lock); 297 + msg = list_first_entry_or_null(&psock->ingress_msg, struct sk_msg, list); 298 + if (msg) 299 + list_del(&msg->list); 300 + spin_unlock_bh(&psock->ingress_lock); 301 + return msg; 302 + } 303 + 304 + static inline struct sk_msg *sk_psock_peek_msg(struct sk_psock *psock) 305 + { 306 + struct sk_msg *msg; 307 + 308 + spin_lock_bh(&psock->ingress_lock); 309 + msg = list_first_entry_or_null(&psock->ingress_msg, struct sk_msg, list); 310 + spin_unlock_bh(&psock->ingress_lock); 311 + return msg; 312 + } 313 + 314 + static inline struct sk_msg *sk_psock_next_msg(struct sk_psock *psock, 315 + struct sk_msg *msg) 316 + { 317 + struct sk_msg *ret; 318 + 319 + spin_lock_bh(&psock->ingress_lock); 320 + if (list_is_last(&msg->list, &psock->ingress_msg)) 321 + ret = NULL; 322 + else 323 + ret = list_next_entry(msg, list); 324 + spin_unlock_bh(&psock->ingress_lock); 325 + return ret; 293 326 } 294 327 295 328 static inline bool sk_psock_queue_empty(const struct sk_psock *psock) 296 329 { 297 330 return psock ? list_empty(&psock->ingress_msg) : true; 331 + } 332 + 333 + static inline void kfree_sk_msg(struct sk_msg *msg) 334 + { 335 + if (msg->skb) 336 + consume_skb(msg->skb); 337 + kfree(msg); 298 338 } 299 339 300 340 static inline void sk_psock_report_error(struct sk_psock *psock, int err) ··· 351 301 } 352 302 353 303 struct sk_psock *sk_psock_init(struct sock *sk, int node); 304 + void sk_psock_stop(struct sk_psock *psock, bool wait); 354 305 355 306 #if IS_ENABLED(CONFIG_BPF_STREAM_PARSER) 356 307 int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock); ··· 400 349 } 401 350 } 402 351 403 - static inline void sk_psock_update_proto(struct sock *sk, 404 - struct sk_psock *psock, 405 - struct proto *ops) 406 - { 407 - /* Pairs with lockless read in sk_clone_lock() */ 408 - WRITE_ONCE(sk->sk_prot, ops); 409 - } 410 - 411 352 static inline void sk_psock_restore_proto(struct sock *sk, 412 353 struct sk_psock *psock) 413 354 { 414 355 sk->sk_prot->unhash = psock->saved_unhash; 415 - if (inet_csk_has_ulp(sk)) { 416 - tcp_update_ulp(sk, psock->sk_proto, psock->saved_write_space); 417 - } else { 418 - sk->sk_write_space = psock->saved_write_space; 419 - /* Pairs with lockless read in sk_clone_lock() */ 420 - WRITE_ONCE(sk->sk_prot, psock->sk_proto); 421 - } 356 + if (psock->psock_update_sk_prot) 357 + psock->psock_update_sk_prot(sk, true); 422 358 } 423 359 424 360 static inline void sk_psock_set_state(struct sk_psock *psock, ··· 480 442 psock_set_prog(&progs->msg_parser, NULL); 481 443 psock_set_prog(&progs->stream_parser, NULL); 482 444 psock_set_prog(&progs->stream_verdict, NULL); 445 + psock_set_prog(&progs->skb_verdict, NULL); 483 446 } 484 447 485 448 int sk_psock_tls_strp_read(struct sk_psock *psock, struct sk_buff *skb);
-1
include/net/bpf_sk_storage.h
··· 27 27 struct bpf_sk_storage_diag; 28 28 struct sk_buff; 29 29 struct nlattr; 30 - struct sock; 31 30 32 31 #ifdef CONFIG_BPF_SYSCALL 33 32 int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk);
+3
include/net/sock.h
··· 1184 1184 void (*unhash)(struct sock *sk); 1185 1185 void (*rehash)(struct sock *sk); 1186 1186 int (*get_port)(struct sock *sk, unsigned short snum); 1187 + #ifdef CONFIG_BPF_SYSCALL 1188 + int (*psock_update_sk_prot)(struct sock *sk, bool restore); 1189 + #endif 1187 1190 1188 1191 /* Keeping track of sockets in use */ 1189 1192 #ifdef CONFIG_PROC_FS
+1 -2
include/net/tcp.h
··· 2203 2203 2204 2204 #ifdef CONFIG_BPF_SYSCALL 2205 2205 struct proto *tcp_bpf_get_proto(struct sock *sk, struct sk_psock *psock); 2206 + int tcp_bpf_update_proto(struct sock *sk, bool restore); 2206 2207 void tcp_bpf_clone(const struct sock *sk, struct sock *newsk); 2207 2208 #endif /* CONFIG_BPF_SYSCALL */ 2208 2209 2209 2210 int tcp_bpf_sendmsg_redir(struct sock *sk, struct sk_msg *msg, u32 bytes, 2210 2211 int flags); 2211 - int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock, 2212 - struct msghdr *msg, int len, int flags); 2213 2212 #endif /* CONFIG_NET_SOCK_MSG */ 2214 2213 2215 2214 #if !defined(CONFIG_BPF_SYSCALL) || !defined(CONFIG_NET_SOCK_MSG)
+3
include/net/udp.h
··· 329 329 struct sk_buff *skb); 330 330 struct sock *udp6_lib_lookup_skb(const struct sk_buff *skb, 331 331 __be16 sport, __be16 dport); 332 + int udp_read_sock(struct sock *sk, read_descriptor_t *desc, 333 + sk_read_actor_t recv_actor); 332 334 333 335 /* UDP uses skb->dev_scratch to cache as much information as possible and avoid 334 336 * possibly multiple cache miss on dequeue() ··· 543 541 #ifdef CONFIG_BPF_SYSCALL 544 542 struct sk_psock; 545 543 struct proto *udp_bpf_get_proto(struct sock *sk, struct sk_psock *psock); 544 + int udp_bpf_update_proto(struct sock *sk, bool restore); 546 545 #endif 547 546 548 547 #endif /* _UDP_H */
+5
include/uapi/linux/bpf.h
··· 957 957 BPF_XDP_CPUMAP, 958 958 BPF_SK_LOOKUP, 959 959 BPF_XDP, 960 + BPF_SK_SKB_VERDICT, 960 961 __MAX_BPF_ATTACH_TYPE 961 962 }; 962 963 ··· 1118 1117 * offset to another bpf function 1119 1118 */ 1120 1119 #define BPF_PSEUDO_CALL 1 1120 + /* when bpf_call->src_reg == BPF_PSEUDO_KFUNC_CALL, 1121 + * bpf_call->imm == btf_id of a BTF_KIND_FUNC in the running kernel 1122 + */ 1123 + #define BPF_PSEUDO_KFUNC_CALL 2 1121 1124 1122 1125 /* flags for BPF_MAP_UPDATE_ELEM command */ 1123 1126 enum {
+158 -91
kernel/bpf/btf.c
··· 283 283 [BTF_KIND_FLOAT] = "FLOAT", 284 284 }; 285 285 286 - static const char *btf_type_str(const struct btf_type *t) 286 + const char *btf_type_str(const struct btf_type *t) 287 287 { 288 288 return btf_kind_str[BTF_INFO_KIND(t->info)]; 289 289 } ··· 789 789 790 790 while (btf_type_is_modifier(t) && 791 791 BTF_INFO_KIND(t->info) != BTF_KIND_TYPEDEF) { 792 - id = t->type; 793 792 t = btf_type_by_id(btf, t->type); 794 793 } 795 794 ··· 4376 4377 #undef BPF_LINK_TYPE 4377 4378 4378 4379 static const struct btf_member * 4379 - btf_get_prog_ctx_type(struct bpf_verifier_log *log, struct btf *btf, 4380 + btf_get_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf, 4380 4381 const struct btf_type *t, enum bpf_prog_type prog_type, 4381 4382 int arg) 4382 4383 { ··· 5361 5362 return btf_check_func_type_match(log, btf1, t1, btf2, t2); 5362 5363 } 5363 5364 5365 + static u32 *reg2btf_ids[__BPF_REG_TYPE_MAX] = { 5366 + #ifdef CONFIG_NET 5367 + [PTR_TO_SOCKET] = &btf_sock_ids[BTF_SOCK_TYPE_SOCK], 5368 + [PTR_TO_SOCK_COMMON] = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON], 5369 + [PTR_TO_TCP_SOCK] = &btf_sock_ids[BTF_SOCK_TYPE_TCP], 5370 + #endif 5371 + }; 5372 + 5373 + static int btf_check_func_arg_match(struct bpf_verifier_env *env, 5374 + const struct btf *btf, u32 func_id, 5375 + struct bpf_reg_state *regs, 5376 + bool ptr_to_mem_ok) 5377 + { 5378 + struct bpf_verifier_log *log = &env->log; 5379 + const char *func_name, *ref_tname; 5380 + const struct btf_type *t, *ref_t; 5381 + const struct btf_param *args; 5382 + u32 i, nargs, ref_id; 5383 + 5384 + t = btf_type_by_id(btf, func_id); 5385 + if (!t || !btf_type_is_func(t)) { 5386 + /* These checks were already done by the verifier while loading 5387 + * struct bpf_func_info or in add_kfunc_call(). 5388 + */ 5389 + bpf_log(log, "BTF of func_id %u doesn't point to KIND_FUNC\n", 5390 + func_id); 5391 + return -EFAULT; 5392 + } 5393 + func_name = btf_name_by_offset(btf, t->name_off); 5394 + 5395 + t = btf_type_by_id(btf, t->type); 5396 + if (!t || !btf_type_is_func_proto(t)) { 5397 + bpf_log(log, "Invalid BTF of func %s\n", func_name); 5398 + return -EFAULT; 5399 + } 5400 + args = (const struct btf_param *)(t + 1); 5401 + nargs = btf_type_vlen(t); 5402 + if (nargs > MAX_BPF_FUNC_REG_ARGS) { 5403 + bpf_log(log, "Function %s has %d > %d args\n", func_name, nargs, 5404 + MAX_BPF_FUNC_REG_ARGS); 5405 + return -EINVAL; 5406 + } 5407 + 5408 + /* check that BTF function arguments match actual types that the 5409 + * verifier sees. 5410 + */ 5411 + for (i = 0; i < nargs; i++) { 5412 + u32 regno = i + 1; 5413 + struct bpf_reg_state *reg = &regs[regno]; 5414 + 5415 + t = btf_type_skip_modifiers(btf, args[i].type, NULL); 5416 + if (btf_type_is_scalar(t)) { 5417 + if (reg->type == SCALAR_VALUE) 5418 + continue; 5419 + bpf_log(log, "R%d is not a scalar\n", regno); 5420 + return -EINVAL; 5421 + } 5422 + 5423 + if (!btf_type_is_ptr(t)) { 5424 + bpf_log(log, "Unrecognized arg#%d type %s\n", 5425 + i, btf_type_str(t)); 5426 + return -EINVAL; 5427 + } 5428 + 5429 + ref_t = btf_type_skip_modifiers(btf, t->type, &ref_id); 5430 + ref_tname = btf_name_by_offset(btf, ref_t->name_off); 5431 + if (btf_is_kernel(btf)) { 5432 + const struct btf_type *reg_ref_t; 5433 + const struct btf *reg_btf; 5434 + const char *reg_ref_tname; 5435 + u32 reg_ref_id; 5436 + 5437 + if (!btf_type_is_struct(ref_t)) { 5438 + bpf_log(log, "kernel function %s args#%d pointer type %s %s is not supported\n", 5439 + func_name, i, btf_type_str(ref_t), 5440 + ref_tname); 5441 + return -EINVAL; 5442 + } 5443 + 5444 + if (reg->type == PTR_TO_BTF_ID) { 5445 + reg_btf = reg->btf; 5446 + reg_ref_id = reg->btf_id; 5447 + } else if (reg2btf_ids[reg->type]) { 5448 + reg_btf = btf_vmlinux; 5449 + reg_ref_id = *reg2btf_ids[reg->type]; 5450 + } else { 5451 + bpf_log(log, "kernel function %s args#%d expected pointer to %s %s but R%d is not a pointer to btf_id\n", 5452 + func_name, i, 5453 + btf_type_str(ref_t), ref_tname, regno); 5454 + return -EINVAL; 5455 + } 5456 + 5457 + reg_ref_t = btf_type_skip_modifiers(reg_btf, reg_ref_id, 5458 + &reg_ref_id); 5459 + reg_ref_tname = btf_name_by_offset(reg_btf, 5460 + reg_ref_t->name_off); 5461 + if (!btf_struct_ids_match(log, reg_btf, reg_ref_id, 5462 + reg->off, btf, ref_id)) { 5463 + bpf_log(log, "kernel function %s args#%d expected pointer to %s %s but R%d has a pointer to %s %s\n", 5464 + func_name, i, 5465 + btf_type_str(ref_t), ref_tname, 5466 + regno, btf_type_str(reg_ref_t), 5467 + reg_ref_tname); 5468 + return -EINVAL; 5469 + } 5470 + } else if (btf_get_prog_ctx_type(log, btf, t, 5471 + env->prog->type, i)) { 5472 + /* If function expects ctx type in BTF check that caller 5473 + * is passing PTR_TO_CTX. 5474 + */ 5475 + if (reg->type != PTR_TO_CTX) { 5476 + bpf_log(log, 5477 + "arg#%d expected pointer to ctx, but got %s\n", 5478 + i, btf_type_str(t)); 5479 + return -EINVAL; 5480 + } 5481 + if (check_ctx_reg(env, reg, regno)) 5482 + return -EINVAL; 5483 + } else if (ptr_to_mem_ok) { 5484 + const struct btf_type *resolve_ret; 5485 + u32 type_size; 5486 + 5487 + resolve_ret = btf_resolve_size(btf, ref_t, &type_size); 5488 + if (IS_ERR(resolve_ret)) { 5489 + bpf_log(log, 5490 + "arg#%d reference type('%s %s') size cannot be determined: %ld\n", 5491 + i, btf_type_str(ref_t), ref_tname, 5492 + PTR_ERR(resolve_ret)); 5493 + return -EINVAL; 5494 + } 5495 + 5496 + if (check_mem_reg(env, reg, regno, type_size)) 5497 + return -EINVAL; 5498 + } else { 5499 + return -EINVAL; 5500 + } 5501 + } 5502 + 5503 + return 0; 5504 + } 5505 + 5364 5506 /* Compare BTF of a function with given bpf_reg_state. 5365 5507 * Returns: 5366 5508 * EFAULT - there is a verifier bug. Abort verification. ··· 5509 5369 * 0 - BTF matches with what bpf_reg_state expects. 5510 5370 * Only PTR_TO_CTX and SCALAR_VALUE states are recognized. 5511 5371 */ 5512 - int btf_check_func_arg_match(struct bpf_verifier_env *env, int subprog, 5513 - struct bpf_reg_state *regs) 5372 + int btf_check_subprog_arg_match(struct bpf_verifier_env *env, int subprog, 5373 + struct bpf_reg_state *regs) 5514 5374 { 5515 - struct bpf_verifier_log *log = &env->log; 5516 5375 struct bpf_prog *prog = env->prog; 5517 5376 struct btf *btf = prog->aux->btf; 5518 - const struct btf_param *args; 5519 - const struct btf_type *t, *ref_t; 5520 - u32 i, nargs, btf_id, type_size; 5521 - const char *tname; 5522 5377 bool is_global; 5378 + u32 btf_id; 5379 + int err; 5523 5380 5524 5381 if (!prog->aux->func_info) 5525 5382 return -EINVAL; ··· 5528 5391 if (prog->aux->func_info_aux[subprog].unreliable) 5529 5392 return -EINVAL; 5530 5393 5531 - t = btf_type_by_id(btf, btf_id); 5532 - if (!t || !btf_type_is_func(t)) { 5533 - /* These checks were already done by the verifier while loading 5534 - * struct bpf_func_info 5535 - */ 5536 - bpf_log(log, "BTF of func#%d doesn't point to KIND_FUNC\n", 5537 - subprog); 5538 - return -EFAULT; 5539 - } 5540 - tname = btf_name_by_offset(btf, t->name_off); 5541 - 5542 - t = btf_type_by_id(btf, t->type); 5543 - if (!t || !btf_type_is_func_proto(t)) { 5544 - bpf_log(log, "Invalid BTF of func %s\n", tname); 5545 - return -EFAULT; 5546 - } 5547 - args = (const struct btf_param *)(t + 1); 5548 - nargs = btf_type_vlen(t); 5549 - if (nargs > MAX_BPF_FUNC_REG_ARGS) { 5550 - bpf_log(log, "Function %s has %d > %d args\n", tname, nargs, 5551 - MAX_BPF_FUNC_REG_ARGS); 5552 - goto out; 5553 - } 5554 - 5555 5394 is_global = prog->aux->func_info_aux[subprog].linkage == BTF_FUNC_GLOBAL; 5556 - /* check that BTF function arguments match actual types that the 5557 - * verifier sees. 5558 - */ 5559 - for (i = 0; i < nargs; i++) { 5560 - struct bpf_reg_state *reg = &regs[i + 1]; 5395 + err = btf_check_func_arg_match(env, btf, btf_id, regs, is_global); 5561 5396 5562 - t = btf_type_by_id(btf, args[i].type); 5563 - while (btf_type_is_modifier(t)) 5564 - t = btf_type_by_id(btf, t->type); 5565 - if (btf_type_is_int(t) || btf_type_is_enum(t)) { 5566 - if (reg->type == SCALAR_VALUE) 5567 - continue; 5568 - bpf_log(log, "R%d is not a scalar\n", i + 1); 5569 - goto out; 5570 - } 5571 - if (btf_type_is_ptr(t)) { 5572 - /* If function expects ctx type in BTF check that caller 5573 - * is passing PTR_TO_CTX. 5574 - */ 5575 - if (btf_get_prog_ctx_type(log, btf, t, prog->type, i)) { 5576 - if (reg->type != PTR_TO_CTX) { 5577 - bpf_log(log, 5578 - "arg#%d expected pointer to ctx, but got %s\n", 5579 - i, btf_kind_str[BTF_INFO_KIND(t->info)]); 5580 - goto out; 5581 - } 5582 - if (check_ctx_reg(env, reg, i + 1)) 5583 - goto out; 5584 - continue; 5585 - } 5586 - 5587 - if (!is_global) 5588 - goto out; 5589 - 5590 - t = btf_type_skip_modifiers(btf, t->type, NULL); 5591 - 5592 - ref_t = btf_resolve_size(btf, t, &type_size); 5593 - if (IS_ERR(ref_t)) { 5594 - bpf_log(log, 5595 - "arg#%d reference type('%s %s') size cannot be determined: %ld\n", 5596 - i, btf_type_str(t), btf_name_by_offset(btf, t->name_off), 5597 - PTR_ERR(ref_t)); 5598 - goto out; 5599 - } 5600 - 5601 - if (check_mem_reg(env, reg, i + 1, type_size)) 5602 - goto out; 5603 - 5604 - continue; 5605 - } 5606 - bpf_log(log, "Unrecognized arg#%d type %s\n", 5607 - i, btf_kind_str[BTF_INFO_KIND(t->info)]); 5608 - goto out; 5609 - } 5610 - return 0; 5611 - out: 5612 5397 /* Compiler optimizations can remove arguments from static functions 5613 5398 * or mismatched type can be passed into a global function. 5614 5399 * In such cases mark the function as unreliable from BTF point of view. 5615 5400 */ 5616 - prog->aux->func_info_aux[subprog].unreliable = true; 5617 - return -EINVAL; 5401 + if (err) 5402 + prog->aux->func_info_aux[subprog].unreliable = true; 5403 + return err; 5404 + } 5405 + 5406 + int btf_check_kfunc_arg_match(struct bpf_verifier_env *env, 5407 + const struct btf *btf, u32 func_id, 5408 + struct bpf_reg_state *regs) 5409 + { 5410 + return btf_check_func_arg_match(env, btf, func_id, regs, false); 5618 5411 } 5619 5412 5620 5413 /* Convert BTF of a function into bpf_reg_state if possible
+24 -23
kernel/bpf/core.c
··· 143 143 if (!prog->aux->nr_linfo || !prog->jit_requested) 144 144 return 0; 145 145 146 - prog->aux->jited_linfo = kcalloc(prog->aux->nr_linfo, 147 - sizeof(*prog->aux->jited_linfo), 148 - GFP_KERNEL_ACCOUNT | __GFP_NOWARN); 146 + prog->aux->jited_linfo = kvcalloc(prog->aux->nr_linfo, 147 + sizeof(*prog->aux->jited_linfo), 148 + GFP_KERNEL_ACCOUNT | __GFP_NOWARN); 149 149 if (!prog->aux->jited_linfo) 150 150 return -ENOMEM; 151 151 152 152 return 0; 153 153 } 154 154 155 - void bpf_prog_free_jited_linfo(struct bpf_prog *prog) 155 + void bpf_prog_jit_attempt_done(struct bpf_prog *prog) 156 156 { 157 - kfree(prog->aux->jited_linfo); 158 - prog->aux->jited_linfo = NULL; 159 - } 157 + if (prog->aux->jited_linfo && 158 + (!prog->jited || !prog->aux->jited_linfo[0])) { 159 + kvfree(prog->aux->jited_linfo); 160 + prog->aux->jited_linfo = NULL; 161 + } 160 162 161 - void bpf_prog_free_unused_jited_linfo(struct bpf_prog *prog) 162 - { 163 - if (prog->aux->jited_linfo && !prog->aux->jited_linfo[0]) 164 - bpf_prog_free_jited_linfo(prog); 163 + kfree(prog->aux->kfunc_tab); 164 + prog->aux->kfunc_tab = NULL; 165 165 } 166 166 167 167 /* The jit engine is responsible to provide an array ··· 215 215 */ 216 216 jited_linfo[i] = prog->bpf_func + 217 217 insn_to_jit_off[linfo[i].insn_off - insn_start - 1]; 218 - } 219 - 220 - void bpf_prog_free_linfo(struct bpf_prog *prog) 221 - { 222 - bpf_prog_free_jited_linfo(prog); 223 - kvfree(prog->aux->linfo); 224 218 } 225 219 226 220 struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size, ··· 1843 1849 /* In case of BPF to BPF calls, verifier did all the prep 1844 1850 * work with regards to JITing, etc. 1845 1851 */ 1852 + bool jit_needed = false; 1853 + 1846 1854 if (fp->bpf_func) 1847 1855 goto finalize; 1856 + 1857 + if (IS_ENABLED(CONFIG_BPF_JIT_ALWAYS_ON) || 1858 + bpf_prog_has_kfunc_call(fp)) 1859 + jit_needed = true; 1848 1860 1849 1861 bpf_prog_select_func(fp); 1850 1862 ··· 1866 1866 return fp; 1867 1867 1868 1868 fp = bpf_int_jit_compile(fp); 1869 - if (!fp->jited) { 1870 - bpf_prog_free_jited_linfo(fp); 1871 - #ifdef CONFIG_BPF_JIT_ALWAYS_ON 1869 + bpf_prog_jit_attempt_done(fp); 1870 + if (!fp->jited && jit_needed) { 1872 1871 *err = -ENOTSUPP; 1873 1872 return fp; 1874 - #endif 1875 - } else { 1876 - bpf_prog_free_unused_jited_linfo(fp); 1877 1873 } 1878 1874 } else { 1879 1875 *err = bpf_prog_offload_compile(fp); ··· 2346 2350 * them using insn_is_zext. 2347 2351 */ 2348 2352 bool __weak bpf_jit_needs_zext(void) 2353 + { 2354 + return false; 2355 + } 2356 + 2357 + bool __weak bpf_jit_supports_kfunc_call(void) 2349 2358 { 2350 2359 return false; 2351 2360 }
+10 -3
kernel/bpf/disasm.c
··· 19 19 { 20 20 BUILD_BUG_ON(ARRAY_SIZE(func_id_str) != __BPF_FUNC_MAX_ID); 21 21 22 - if (insn->src_reg != BPF_PSEUDO_CALL && 22 + if (!insn->src_reg && 23 23 insn->imm >= 0 && insn->imm < __BPF_FUNC_MAX_ID && 24 24 func_id_str[insn->imm]) 25 25 return func_id_str[insn->imm]; 26 26 27 - if (cbs && cbs->cb_call) 28 - return cbs->cb_call(cbs->private_data, insn); 27 + if (cbs && cbs->cb_call) { 28 + const char *res; 29 + 30 + res = cbs->cb_call(cbs->private_data, insn); 31 + if (res) 32 + return res; 33 + } 29 34 30 35 if (insn->src_reg == BPF_PSEUDO_CALL) 31 36 snprintf(buff, len, "%+d", insn->imm); 37 + else if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) 38 + snprintf(buff, len, "kernel-function"); 32 39 33 40 return buff; 34 41 }
+11 -4
kernel/bpf/helpers.c
··· 382 382 }; 383 383 384 384 #ifdef CONFIG_CGROUP_BPF 385 - DECLARE_PER_CPU(struct bpf_cgroup_storage*, 386 - bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]); 385 + DECLARE_PER_CPU(struct bpf_cgroup_storage_info, 386 + bpf_cgroup_storage_info[BPF_CGROUP_STORAGE_NEST_MAX]); 387 387 388 388 BPF_CALL_2(bpf_get_local_storage, struct bpf_map *, map, u64, flags) 389 389 { ··· 392 392 * verifier checks that its value is correct. 393 393 */ 394 394 enum bpf_cgroup_storage_type stype = cgroup_storage_type(map); 395 - struct bpf_cgroup_storage *storage; 395 + struct bpf_cgroup_storage *storage = NULL; 396 396 void *ptr; 397 + int i; 397 398 398 - storage = this_cpu_read(bpf_cgroup_storage[stype]); 399 + for (i = 0; i < BPF_CGROUP_STORAGE_NEST_MAX; i++) { 400 + if (unlikely(this_cpu_read(bpf_cgroup_storage_info[i].task) != current)) 401 + continue; 402 + 403 + storage = this_cpu_read(bpf_cgroup_storage_info[i].storage[stype]); 404 + break; 405 + } 399 406 400 407 if (stype == BPF_CGROUP_STORAGE_SHARED) 401 408 ptr = &READ_ONCE(storage->buf)->data[0];
+3 -2
kernel/bpf/local_storage.c
··· 9 9 #include <linux/slab.h> 10 10 #include <uapi/linux/btf.h> 11 11 12 - DEFINE_PER_CPU(struct bpf_cgroup_storage*, bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]); 13 - 14 12 #ifdef CONFIG_CGROUP_BPF 13 + 14 + DEFINE_PER_CPU(struct bpf_cgroup_storage_info, 15 + bpf_cgroup_storage_info[BPF_CGROUP_STORAGE_NEST_MAX]); 15 16 16 17 #include "../cgroup/cgroup-internal.h" 17 18
+3
kernel/bpf/lpm_trie.c
··· 726 726 .map_lookup_elem = trie_lookup_elem, 727 727 .map_update_elem = trie_update_elem, 728 728 .map_delete_elem = trie_delete_elem, 729 + .map_lookup_batch = generic_map_lookup_batch, 730 + .map_update_batch = generic_map_update_batch, 731 + .map_delete_batch = generic_map_delete_batch, 729 732 .map_check_btf = trie_check_btf, 730 733 .map_btf_name = "lpm_trie", 731 734 .map_btf_id = &trie_map_btf_id,
+4 -1
kernel/bpf/syscall.c
··· 1694 1694 { 1695 1695 bpf_prog_kallsyms_del_all(prog); 1696 1696 btf_put(prog->aux->btf); 1697 - bpf_prog_free_linfo(prog); 1697 + kvfree(prog->aux->jited_linfo); 1698 + kvfree(prog->aux->linfo); 1699 + kfree(prog->aux->kfunc_tab); 1698 1700 if (prog->aux->attach_btf) 1699 1701 btf_put(prog->aux->attach_btf); 1700 1702 ··· 2948 2946 return BPF_PROG_TYPE_SK_MSG; 2949 2947 case BPF_SK_SKB_STREAM_PARSER: 2950 2948 case BPF_SK_SKB_STREAM_VERDICT: 2949 + case BPF_SK_SKB_VERDICT: 2951 2950 return BPF_PROG_TYPE_SK_SKB; 2952 2951 case BPF_LIRC_MODE2: 2953 2952 return BPF_PROG_TYPE_LIRC_MODE2;
+348 -42
kernel/bpf/verifier.c
··· 234 234 insn->src_reg == BPF_PSEUDO_CALL; 235 235 } 236 236 237 + static bool bpf_pseudo_kfunc_call(const struct bpf_insn *insn) 238 + { 239 + return insn->code == (BPF_JMP | BPF_CALL) && 240 + insn->src_reg == BPF_PSEUDO_KFUNC_CALL; 241 + } 242 + 237 243 static bool bpf_pseudo_func(const struct bpf_insn *insn) 238 244 { 239 245 return insn->code == (BPF_LD | BPF_IMM | BPF_DW) && ··· 1560 1554 verbose(env, "too many subprograms\n"); 1561 1555 return -E2BIG; 1562 1556 } 1557 + /* determine subprog starts. The end is one before the next starts */ 1563 1558 env->subprog_info[env->subprog_cnt++].start = off; 1564 1559 sort(env->subprog_info, env->subprog_cnt, 1565 1560 sizeof(env->subprog_info[0]), cmp_subprogs, NULL); 1566 1561 return env->subprog_cnt - 1; 1567 1562 } 1568 1563 1569 - static int check_subprogs(struct bpf_verifier_env *env) 1564 + struct bpf_kfunc_desc { 1565 + struct btf_func_model func_model; 1566 + u32 func_id; 1567 + s32 imm; 1568 + }; 1569 + 1570 + #define MAX_KFUNC_DESCS 256 1571 + struct bpf_kfunc_desc_tab { 1572 + struct bpf_kfunc_desc descs[MAX_KFUNC_DESCS]; 1573 + u32 nr_descs; 1574 + }; 1575 + 1576 + static int kfunc_desc_cmp_by_id(const void *a, const void *b) 1570 1577 { 1571 - int i, ret, subprog_start, subprog_end, off, cur_subprog = 0; 1578 + const struct bpf_kfunc_desc *d0 = a; 1579 + const struct bpf_kfunc_desc *d1 = b; 1580 + 1581 + /* func_id is not greater than BTF_MAX_TYPE */ 1582 + return d0->func_id - d1->func_id; 1583 + } 1584 + 1585 + static const struct bpf_kfunc_desc * 1586 + find_kfunc_desc(const struct bpf_prog *prog, u32 func_id) 1587 + { 1588 + struct bpf_kfunc_desc desc = { 1589 + .func_id = func_id, 1590 + }; 1591 + struct bpf_kfunc_desc_tab *tab; 1592 + 1593 + tab = prog->aux->kfunc_tab; 1594 + return bsearch(&desc, tab->descs, tab->nr_descs, 1595 + sizeof(tab->descs[0]), kfunc_desc_cmp_by_id); 1596 + } 1597 + 1598 + static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id) 1599 + { 1600 + const struct btf_type *func, *func_proto; 1601 + struct bpf_kfunc_desc_tab *tab; 1602 + struct bpf_prog_aux *prog_aux; 1603 + struct bpf_kfunc_desc *desc; 1604 + const char *func_name; 1605 + unsigned long addr; 1606 + int err; 1607 + 1608 + prog_aux = env->prog->aux; 1609 + tab = prog_aux->kfunc_tab; 1610 + if (!tab) { 1611 + if (!btf_vmlinux) { 1612 + verbose(env, "calling kernel function is not supported without CONFIG_DEBUG_INFO_BTF\n"); 1613 + return -ENOTSUPP; 1614 + } 1615 + 1616 + if (!env->prog->jit_requested) { 1617 + verbose(env, "JIT is required for calling kernel function\n"); 1618 + return -ENOTSUPP; 1619 + } 1620 + 1621 + if (!bpf_jit_supports_kfunc_call()) { 1622 + verbose(env, "JIT does not support calling kernel function\n"); 1623 + return -ENOTSUPP; 1624 + } 1625 + 1626 + if (!env->prog->gpl_compatible) { 1627 + verbose(env, "cannot call kernel function from non-GPL compatible program\n"); 1628 + return -EINVAL; 1629 + } 1630 + 1631 + tab = kzalloc(sizeof(*tab), GFP_KERNEL); 1632 + if (!tab) 1633 + return -ENOMEM; 1634 + prog_aux->kfunc_tab = tab; 1635 + } 1636 + 1637 + if (find_kfunc_desc(env->prog, func_id)) 1638 + return 0; 1639 + 1640 + if (tab->nr_descs == MAX_KFUNC_DESCS) { 1641 + verbose(env, "too many different kernel function calls\n"); 1642 + return -E2BIG; 1643 + } 1644 + 1645 + func = btf_type_by_id(btf_vmlinux, func_id); 1646 + if (!func || !btf_type_is_func(func)) { 1647 + verbose(env, "kernel btf_id %u is not a function\n", 1648 + func_id); 1649 + return -EINVAL; 1650 + } 1651 + func_proto = btf_type_by_id(btf_vmlinux, func->type); 1652 + if (!func_proto || !btf_type_is_func_proto(func_proto)) { 1653 + verbose(env, "kernel function btf_id %u does not have a valid func_proto\n", 1654 + func_id); 1655 + return -EINVAL; 1656 + } 1657 + 1658 + func_name = btf_name_by_offset(btf_vmlinux, func->name_off); 1659 + addr = kallsyms_lookup_name(func_name); 1660 + if (!addr) { 1661 + verbose(env, "cannot find address for kernel function %s\n", 1662 + func_name); 1663 + return -EINVAL; 1664 + } 1665 + 1666 + desc = &tab->descs[tab->nr_descs++]; 1667 + desc->func_id = func_id; 1668 + desc->imm = BPF_CAST_CALL(addr) - __bpf_call_base; 1669 + err = btf_distill_func_proto(&env->log, btf_vmlinux, 1670 + func_proto, func_name, 1671 + &desc->func_model); 1672 + if (!err) 1673 + sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]), 1674 + kfunc_desc_cmp_by_id, NULL); 1675 + return err; 1676 + } 1677 + 1678 + static int kfunc_desc_cmp_by_imm(const void *a, const void *b) 1679 + { 1680 + const struct bpf_kfunc_desc *d0 = a; 1681 + const struct bpf_kfunc_desc *d1 = b; 1682 + 1683 + if (d0->imm > d1->imm) 1684 + return 1; 1685 + else if (d0->imm < d1->imm) 1686 + return -1; 1687 + return 0; 1688 + } 1689 + 1690 + static void sort_kfunc_descs_by_imm(struct bpf_prog *prog) 1691 + { 1692 + struct bpf_kfunc_desc_tab *tab; 1693 + 1694 + tab = prog->aux->kfunc_tab; 1695 + if (!tab) 1696 + return; 1697 + 1698 + sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]), 1699 + kfunc_desc_cmp_by_imm, NULL); 1700 + } 1701 + 1702 + bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog) 1703 + { 1704 + return !!prog->aux->kfunc_tab; 1705 + } 1706 + 1707 + const struct btf_func_model * 1708 + bpf_jit_find_kfunc_model(const struct bpf_prog *prog, 1709 + const struct bpf_insn *insn) 1710 + { 1711 + const struct bpf_kfunc_desc desc = { 1712 + .imm = insn->imm, 1713 + }; 1714 + const struct bpf_kfunc_desc *res; 1715 + struct bpf_kfunc_desc_tab *tab; 1716 + 1717 + tab = prog->aux->kfunc_tab; 1718 + res = bsearch(&desc, tab->descs, tab->nr_descs, 1719 + sizeof(tab->descs[0]), kfunc_desc_cmp_by_imm); 1720 + 1721 + return res ? &res->func_model : NULL; 1722 + } 1723 + 1724 + static int add_subprog_and_kfunc(struct bpf_verifier_env *env) 1725 + { 1572 1726 struct bpf_subprog_info *subprog = env->subprog_info; 1573 1727 struct bpf_insn *insn = env->prog->insnsi; 1574 - int insn_cnt = env->prog->len; 1728 + int i, ret, insn_cnt = env->prog->len; 1575 1729 1576 1730 /* Add entry function. */ 1577 1731 ret = add_subprog(env, 0); 1578 - if (ret < 0) 1732 + if (ret) 1579 1733 return ret; 1580 1734 1581 - /* determine subprog starts. The end is one before the next starts */ 1582 - for (i = 0; i < insn_cnt; i++) { 1583 - if (bpf_pseudo_func(insn + i)) { 1584 - if (!env->bpf_capable) { 1585 - verbose(env, 1586 - "function pointers are allowed for CAP_BPF and CAP_SYS_ADMIN\n"); 1587 - return -EPERM; 1588 - } 1589 - ret = add_subprog(env, i + insn[i].imm + 1); 1590 - if (ret < 0) 1591 - return ret; 1592 - /* remember subprog */ 1593 - insn[i + 1].imm = ret; 1735 + for (i = 0; i < insn_cnt; i++, insn++) { 1736 + if (!bpf_pseudo_func(insn) && !bpf_pseudo_call(insn) && 1737 + !bpf_pseudo_kfunc_call(insn)) 1594 1738 continue; 1595 - } 1596 - if (!bpf_pseudo_call(insn + i)) 1597 - continue; 1739 + 1598 1740 if (!env->bpf_capable) { 1599 - verbose(env, 1600 - "function calls to other bpf functions are allowed for CAP_BPF and CAP_SYS_ADMIN\n"); 1741 + verbose(env, "loading/calling other bpf or kernel functions are allowed for CAP_BPF and CAP_SYS_ADMIN\n"); 1601 1742 return -EPERM; 1602 1743 } 1603 - ret = add_subprog(env, i + insn[i].imm + 1); 1744 + 1745 + if (bpf_pseudo_func(insn)) { 1746 + ret = add_subprog(env, i + insn->imm + 1); 1747 + if (ret >= 0) 1748 + /* remember subprog */ 1749 + insn[1].imm = ret; 1750 + } else if (bpf_pseudo_call(insn)) { 1751 + ret = add_subprog(env, i + insn->imm + 1); 1752 + } else { 1753 + ret = add_kfunc_call(env, insn->imm); 1754 + } 1755 + 1604 1756 if (ret < 0) 1605 1757 return ret; 1606 1758 } ··· 1771 1607 if (env->log.level & BPF_LOG_LEVEL2) 1772 1608 for (i = 0; i < env->subprog_cnt; i++) 1773 1609 verbose(env, "func#%d @%d\n", i, subprog[i].start); 1610 + 1611 + return 0; 1612 + } 1613 + 1614 + static int check_subprogs(struct bpf_verifier_env *env) 1615 + { 1616 + int i, subprog_start, subprog_end, off, cur_subprog = 0; 1617 + struct bpf_subprog_info *subprog = env->subprog_info; 1618 + struct bpf_insn *insn = env->prog->insnsi; 1619 + int insn_cnt = env->prog->len; 1774 1620 1775 1621 /* now check that all jumps are within the same subprog */ 1776 1622 subprog_start = subprog[cur_subprog].start; ··· 2090 1916 return i; 2091 1917 } 2092 1918 1919 + static const char *disasm_kfunc_name(void *data, const struct bpf_insn *insn) 1920 + { 1921 + const struct btf_type *func; 1922 + 1923 + if (insn->src_reg != BPF_PSEUDO_KFUNC_CALL) 1924 + return NULL; 1925 + 1926 + func = btf_type_by_id(btf_vmlinux, insn->imm); 1927 + return btf_name_by_offset(btf_vmlinux, func->name_off); 1928 + } 1929 + 2093 1930 /* For given verifier state backtrack_insn() is called from the last insn to 2094 1931 * the first insn. Its purpose is to compute a bitmask of registers and 2095 1932 * stack slots that needs precision in the parent verifier state. ··· 2109 1924 u32 *reg_mask, u64 *stack_mask) 2110 1925 { 2111 1926 const struct bpf_insn_cbs cbs = { 1927 + .cb_call = disasm_kfunc_name, 2112 1928 .cb_print = verbose, 2113 1929 .private_data = env, 2114 1930 }; ··· 5551 5365 func_info_aux = env->prog->aux->func_info_aux; 5552 5366 if (func_info_aux) 5553 5367 is_global = func_info_aux[subprog].linkage == BTF_FUNC_GLOBAL; 5554 - err = btf_check_func_arg_match(env, subprog, caller->regs); 5368 + err = btf_check_subprog_arg_match(env, subprog, caller->regs); 5555 5369 if (err == -EFAULT) 5556 5370 return err; 5557 5371 if (is_global) { ··· 6146 5960 return 0; 6147 5961 } 6148 5962 5963 + /* mark_btf_func_reg_size() is used when the reg size is determined by 5964 + * the BTF func_proto's return value size and argument. 5965 + */ 5966 + static void mark_btf_func_reg_size(struct bpf_verifier_env *env, u32 regno, 5967 + size_t reg_size) 5968 + { 5969 + struct bpf_reg_state *reg = &cur_regs(env)[regno]; 5970 + 5971 + if (regno == BPF_REG_0) { 5972 + /* Function return value */ 5973 + reg->live |= REG_LIVE_WRITTEN; 5974 + reg->subreg_def = reg_size == sizeof(u64) ? 5975 + DEF_NOT_SUBREG : env->insn_idx + 1; 5976 + } else { 5977 + /* Function argument */ 5978 + if (reg_size == sizeof(u64)) { 5979 + mark_insn_zext(env, reg); 5980 + mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64); 5981 + } else { 5982 + mark_reg_read(env, reg, reg->parent, REG_LIVE_READ32); 5983 + } 5984 + } 5985 + } 5986 + 5987 + static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn) 5988 + { 5989 + const struct btf_type *t, *func, *func_proto, *ptr_type; 5990 + struct bpf_reg_state *regs = cur_regs(env); 5991 + const char *func_name, *ptr_type_name; 5992 + u32 i, nargs, func_id, ptr_type_id; 5993 + const struct btf_param *args; 5994 + int err; 5995 + 5996 + func_id = insn->imm; 5997 + func = btf_type_by_id(btf_vmlinux, func_id); 5998 + func_name = btf_name_by_offset(btf_vmlinux, func->name_off); 5999 + func_proto = btf_type_by_id(btf_vmlinux, func->type); 6000 + 6001 + if (!env->ops->check_kfunc_call || 6002 + !env->ops->check_kfunc_call(func_id)) { 6003 + verbose(env, "calling kernel function %s is not allowed\n", 6004 + func_name); 6005 + return -EACCES; 6006 + } 6007 + 6008 + /* Check the arguments */ 6009 + err = btf_check_kfunc_arg_match(env, btf_vmlinux, func_id, regs); 6010 + if (err) 6011 + return err; 6012 + 6013 + for (i = 0; i < CALLER_SAVED_REGS; i++) 6014 + mark_reg_not_init(env, regs, caller_saved[i]); 6015 + 6016 + /* Check return type */ 6017 + t = btf_type_skip_modifiers(btf_vmlinux, func_proto->type, NULL); 6018 + if (btf_type_is_scalar(t)) { 6019 + mark_reg_unknown(env, regs, BPF_REG_0); 6020 + mark_btf_func_reg_size(env, BPF_REG_0, t->size); 6021 + } else if (btf_type_is_ptr(t)) { 6022 + ptr_type = btf_type_skip_modifiers(btf_vmlinux, t->type, 6023 + &ptr_type_id); 6024 + if (!btf_type_is_struct(ptr_type)) { 6025 + ptr_type_name = btf_name_by_offset(btf_vmlinux, 6026 + ptr_type->name_off); 6027 + verbose(env, "kernel function %s returns pointer type %s %s is not supported\n", 6028 + func_name, btf_type_str(ptr_type), 6029 + ptr_type_name); 6030 + return -EINVAL; 6031 + } 6032 + mark_reg_known_zero(env, regs, BPF_REG_0); 6033 + regs[BPF_REG_0].btf = btf_vmlinux; 6034 + regs[BPF_REG_0].type = PTR_TO_BTF_ID; 6035 + regs[BPF_REG_0].btf_id = ptr_type_id; 6036 + mark_btf_func_reg_size(env, BPF_REG_0, sizeof(void *)); 6037 + } /* else { add_kfunc_call() ensures it is btf_type_is_void(t) } */ 6038 + 6039 + nargs = btf_type_vlen(func_proto); 6040 + args = (const struct btf_param *)(func_proto + 1); 6041 + for (i = 0; i < nargs; i++) { 6042 + u32 regno = i + 1; 6043 + 6044 + t = btf_type_skip_modifiers(btf_vmlinux, args[i].type, NULL); 6045 + if (btf_type_is_ptr(t)) 6046 + mark_btf_func_reg_size(env, regno, sizeof(void *)); 6047 + else 6048 + /* scalar. ensured by btf_check_kfunc_arg_match() */ 6049 + mark_btf_func_reg_size(env, regno, t->size); 6050 + } 6051 + 6052 + return 0; 6053 + } 6054 + 6149 6055 static bool signed_add_overflows(s64 a, s64 b) 6150 6056 { 6151 6057 /* Do the add in u64, where overflow is well-defined */ ··· 6340 6062 else 6341 6063 *ptr_limit = -off - 1; 6342 6064 return *ptr_limit >= max ? -ERANGE : 0; 6343 - case PTR_TO_MAP_KEY: 6344 - /* Currently, this code is not exercised as the only use 6345 - * is bpf_for_each_map_elem() helper which requires 6346 - * bpf_capble. The code has been tested manually for 6347 - * future use. 6348 - */ 6349 - if (mask_to_left) { 6350 - *ptr_limit = ptr_reg->umax_value + ptr_reg->off; 6351 - } else { 6352 - off = ptr_reg->smin_value + ptr_reg->off; 6353 - *ptr_limit = ptr_reg->map_ptr->key_size - off; 6354 - } 6355 - return 0; 6356 6065 case PTR_TO_MAP_VALUE: 6357 6066 max = ptr_reg->map_ptr->value_size; 6358 6067 if (mask_to_left) { ··· 6546 6281 verbose(env, "R%d pointer arithmetic on %s prohibited\n", 6547 6282 dst, reg_type_str[ptr_reg->type]); 6548 6283 return -EACCES; 6549 - case PTR_TO_MAP_KEY: 6550 6284 case PTR_TO_MAP_VALUE: 6551 6285 if (!env->allow_ptr_leaks && !known && (smin_val < 0) != (smax_val < 0)) { 6552 6286 verbose(env, "R%d has unknown scalar with mixed signed bounds, pointer arithmetic with it prohibited for !root\n", ··· 10440 10176 10441 10177 if (env->log.level & BPF_LOG_LEVEL) { 10442 10178 const struct bpf_insn_cbs cbs = { 10179 + .cb_call = disasm_kfunc_name, 10443 10180 .cb_print = verbose, 10444 10181 .private_data = env, 10445 10182 }; ··· 10588 10323 if (BPF_SRC(insn->code) != BPF_K || 10589 10324 insn->off != 0 || 10590 10325 (insn->src_reg != BPF_REG_0 && 10591 - insn->src_reg != BPF_PSEUDO_CALL) || 10326 + insn->src_reg != BPF_PSEUDO_CALL && 10327 + insn->src_reg != BPF_PSEUDO_KFUNC_CALL) || 10592 10328 insn->dst_reg != BPF_REG_0 || 10593 10329 class == BPF_JMP32) { 10594 10330 verbose(env, "BPF_CALL uses reserved fields\n"); ··· 10604 10338 } 10605 10339 if (insn->src_reg == BPF_PSEUDO_CALL) 10606 10340 err = check_func_call(env, insn, &env->insn_idx); 10341 + else if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) 10342 + err = check_kfunc_call(env, insn); 10607 10343 else 10608 10344 err = check_helper_call(env, insn, &env->insn_idx); 10609 10345 if (err) ··· 11916 11648 func[i]->aux->name[0] = 'F'; 11917 11649 func[i]->aux->stack_depth = env->subprog_info[i].stack_depth; 11918 11650 func[i]->jit_requested = 1; 11651 + func[i]->aux->kfunc_tab = prog->aux->kfunc_tab; 11919 11652 func[i]->aux->linfo = prog->aux->linfo; 11920 11653 func[i]->aux->nr_linfo = prog->aux->nr_linfo; 11921 11654 func[i]->aux->jited_linfo = prog->aux->jited_linfo; ··· 12024 11755 prog->bpf_func = func[0]->bpf_func; 12025 11756 prog->aux->func = func; 12026 11757 prog->aux->func_cnt = env->subprog_cnt; 12027 - bpf_prog_free_unused_jited_linfo(prog); 11758 + bpf_prog_jit_attempt_done(prog); 12028 11759 return 0; 12029 11760 out_free: 12030 11761 for (i = 0; i < env->subprog_cnt; i++) { ··· 12047 11778 insn->off = 0; 12048 11779 insn->imm = env->insn_aux_data[i].call_imm; 12049 11780 } 12050 - bpf_prog_free_jited_linfo(prog); 11781 + bpf_prog_jit_attempt_done(prog); 12051 11782 return err; 12052 11783 } 12053 11784 ··· 12056 11787 #ifndef CONFIG_BPF_JIT_ALWAYS_ON 12057 11788 struct bpf_prog *prog = env->prog; 12058 11789 struct bpf_insn *insn = prog->insnsi; 11790 + bool has_kfunc_call = bpf_prog_has_kfunc_call(prog); 12059 11791 int i, depth; 12060 11792 #endif 12061 11793 int err = 0; ··· 12070 11800 return err; 12071 11801 } 12072 11802 #ifndef CONFIG_BPF_JIT_ALWAYS_ON 11803 + if (has_kfunc_call) { 11804 + verbose(env, "calling kernel functions are not allowed in non-JITed programs\n"); 11805 + return -EINVAL; 11806 + } 12073 11807 if (env->subprog_cnt > 1 && env->prog->aux->tail_call_reachable) { 12074 11808 /* When JIT fails the progs with bpf2bpf calls and tail_calls 12075 11809 * have to be rejected, since interpreter doesn't support them yet. ··· 12100 11826 err = 0; 12101 11827 #endif 12102 11828 return err; 11829 + } 11830 + 11831 + static int fixup_kfunc_call(struct bpf_verifier_env *env, 11832 + struct bpf_insn *insn) 11833 + { 11834 + const struct bpf_kfunc_desc *desc; 11835 + 11836 + /* insn->imm has the btf func_id. Replace it with 11837 + * an address (relative to __bpf_base_call). 11838 + */ 11839 + desc = find_kfunc_desc(env->prog, insn->imm); 11840 + if (!desc) { 11841 + verbose(env, "verifier internal error: kernel function descriptor not found for func_id %u\n", 11842 + insn->imm); 11843 + return -EFAULT; 11844 + } 11845 + 11846 + insn->imm = desc->imm; 11847 + 11848 + return 0; 12103 11849 } 12104 11850 12105 11851 /* Do various post-verification rewrites in a single program pass. ··· 12257 11963 continue; 12258 11964 if (insn->src_reg == BPF_PSEUDO_CALL) 12259 11965 continue; 11966 + if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) { 11967 + ret = fixup_kfunc_call(env, insn); 11968 + if (ret) 11969 + return ret; 11970 + continue; 11971 + } 12260 11972 12261 11973 if (insn->imm == BPF_FUNC_get_route_realm) 12262 11974 prog->dst_needed = 1; ··· 12492 12192 } 12493 12193 } 12494 12194 12195 + sort_kfunc_descs_by_imm(env->prog); 12196 + 12495 12197 return 0; 12496 12198 } 12497 12199 ··· 12604 12302 /* 1st arg to a function */ 12605 12303 regs[BPF_REG_1].type = PTR_TO_CTX; 12606 12304 mark_reg_known_zero(env, regs, BPF_REG_1); 12607 - ret = btf_check_func_arg_match(env, subprog, regs); 12305 + ret = btf_check_subprog_arg_match(env, subprog, regs); 12608 12306 if (ret == -EFAULT) 12609 12307 /* unlikely verifier bug. abort. 12610 12308 * ret == 0 and ret < 0 are sadly acceptable for ··· 13197 12895 GFP_USER); 13198 12896 ret = -ENOMEM; 13199 12897 if (!env->explored_states) 12898 + goto skip_full_check; 12899 + 12900 + ret = add_subprog_and_kfunc(env); 12901 + if (ret < 0) 13200 12902 goto skip_full_check; 13201 12903 13202 12904 ret = check_subprogs(env);
+33 -1
net/bpf/test_run.c
··· 2 2 /* Copyright (c) 2017 Facebook 3 3 */ 4 4 #include <linux/bpf.h> 5 + #include <linux/btf_ids.h> 5 6 #include <linux/slab.h> 6 7 #include <linux/vmalloc.h> 7 8 #include <linux/etherdevice.h> ··· 107 106 108 107 bpf_test_timer_enter(&t); 109 108 do { 110 - bpf_cgroup_storage_set(storage); 109 + ret = bpf_cgroup_storage_set(storage); 110 + if (ret) 111 + break; 111 112 112 113 if (xdp) 113 114 *retval = bpf_prog_run_xdp(prog, ctx); 114 115 else 115 116 *retval = BPF_PROG_RUN(prog, ctx); 117 + 118 + bpf_cgroup_storage_unset(); 116 119 } while (bpf_test_timer_continue(&t, repeat, &ret, time)); 117 120 bpf_test_timer_leave(&t); 118 121 ··· 214 209 *b += 1; 215 210 return a + *b; 216 211 } 212 + 213 + u64 noinline bpf_kfunc_call_test1(struct sock *sk, u32 a, u64 b, u32 c, u64 d) 214 + { 215 + return a + b + c + d; 216 + } 217 + 218 + int noinline bpf_kfunc_call_test2(struct sock *sk, u32 a, u32 b) 219 + { 220 + return a + b; 221 + } 222 + 223 + struct sock * noinline bpf_kfunc_call_test3(struct sock *sk) 224 + { 225 + return sk; 226 + } 227 + 217 228 __diag_pop(); 218 229 219 230 ALLOW_ERROR_INJECTION(bpf_modify_return_test, ERRNO); 231 + 232 + BTF_SET_START(test_sk_kfunc_ids) 233 + BTF_ID(func, bpf_kfunc_call_test1) 234 + BTF_ID(func, bpf_kfunc_call_test2) 235 + BTF_ID(func, bpf_kfunc_call_test3) 236 + BTF_SET_END(test_sk_kfunc_ids) 237 + 238 + bool bpf_prog_test_check_kfunc_call(u32 kfunc_id) 239 + { 240 + return btf_id_set_contains(&test_sk_kfunc_ids, kfunc_id); 241 + } 220 242 221 243 static void *bpf_test_init(const union bpf_attr *kattr, u32 size, 222 244 u32 headroom, u32 tailroom)
+1
net/core/filter.c
··· 9813 9813 .convert_ctx_access = tc_cls_act_convert_ctx_access, 9814 9814 .gen_prologue = tc_cls_act_prologue, 9815 9815 .gen_ld_abs = bpf_gen_ld_abs, 9816 + .check_kfunc_call = bpf_prog_test_check_kfunc_call, 9816 9817 }; 9817 9818 9818 9819 const struct bpf_prog_ops tc_cls_act_prog_ops = {
+48 -7
net/core/skbuff.c
··· 2500 2500 } 2501 2501 EXPORT_SYMBOL_GPL(skb_splice_bits); 2502 2502 2503 - /* Send skb data on a socket. Socket must be locked. */ 2504 - int skb_send_sock_locked(struct sock *sk, struct sk_buff *skb, int offset, 2505 - int len) 2503 + static int sendmsg_unlocked(struct sock *sk, struct msghdr *msg, 2504 + struct kvec *vec, size_t num, size_t size) 2505 + { 2506 + struct socket *sock = sk->sk_socket; 2507 + 2508 + if (!sock) 2509 + return -EINVAL; 2510 + return kernel_sendmsg(sock, msg, vec, num, size); 2511 + } 2512 + 2513 + static int sendpage_unlocked(struct sock *sk, struct page *page, int offset, 2514 + size_t size, int flags) 2515 + { 2516 + struct socket *sock = sk->sk_socket; 2517 + 2518 + if (!sock) 2519 + return -EINVAL; 2520 + return kernel_sendpage(sock, page, offset, size, flags); 2521 + } 2522 + 2523 + typedef int (*sendmsg_func)(struct sock *sk, struct msghdr *msg, 2524 + struct kvec *vec, size_t num, size_t size); 2525 + typedef int (*sendpage_func)(struct sock *sk, struct page *page, int offset, 2526 + size_t size, int flags); 2527 + static int __skb_send_sock(struct sock *sk, struct sk_buff *skb, int offset, 2528 + int len, sendmsg_func sendmsg, sendpage_func sendpage) 2506 2529 { 2507 2530 unsigned int orig_len = len; 2508 2531 struct sk_buff *head = skb; ··· 2545 2522 memset(&msg, 0, sizeof(msg)); 2546 2523 msg.msg_flags = MSG_DONTWAIT; 2547 2524 2548 - ret = kernel_sendmsg_locked(sk, &msg, &kv, 1, slen); 2525 + ret = INDIRECT_CALL_2(sendmsg, kernel_sendmsg_locked, 2526 + sendmsg_unlocked, sk, &msg, &kv, 1, slen); 2549 2527 if (ret <= 0) 2550 2528 goto error; 2551 2529 ··· 2577 2553 slen = min_t(size_t, len, skb_frag_size(frag) - offset); 2578 2554 2579 2555 while (slen) { 2580 - ret = kernel_sendpage_locked(sk, skb_frag_page(frag), 2581 - skb_frag_off(frag) + offset, 2582 - slen, MSG_DONTWAIT); 2556 + ret = INDIRECT_CALL_2(sendpage, kernel_sendpage_locked, 2557 + sendpage_unlocked, sk, 2558 + skb_frag_page(frag), 2559 + skb_frag_off(frag) + offset, 2560 + slen, MSG_DONTWAIT); 2583 2561 if (ret <= 0) 2584 2562 goto error; 2585 2563 ··· 2613 2587 error: 2614 2588 return orig_len == len ? ret : orig_len - len; 2615 2589 } 2590 + 2591 + /* Send skb data on a socket. Socket must be locked. */ 2592 + int skb_send_sock_locked(struct sock *sk, struct sk_buff *skb, int offset, 2593 + int len) 2594 + { 2595 + return __skb_send_sock(sk, skb, offset, len, kernel_sendmsg_locked, 2596 + kernel_sendpage_locked); 2597 + } 2616 2598 EXPORT_SYMBOL_GPL(skb_send_sock_locked); 2599 + 2600 + /* Send skb data on a socket. Socket must be unlocked. */ 2601 + int skb_send_sock(struct sock *sk, struct sk_buff *skb, int offset, int len) 2602 + { 2603 + return __skb_send_sock(sk, skb, offset, len, sendmsg_unlocked, 2604 + sendpage_unlocked); 2605 + } 2617 2606 2618 2607 /** 2619 2608 * skb_store_bits - store bits from kernel buffer to skb
+143 -34
net/core/skmsg.c
··· 399 399 } 400 400 EXPORT_SYMBOL_GPL(sk_msg_memcopy_from_iter); 401 401 402 + int sk_msg_wait_data(struct sock *sk, struct sk_psock *psock, int flags, 403 + long timeo, int *err) 404 + { 405 + DEFINE_WAIT_FUNC(wait, woken_wake_function); 406 + int ret = 0; 407 + 408 + if (sk->sk_shutdown & RCV_SHUTDOWN) 409 + return 1; 410 + 411 + if (!timeo) 412 + return ret; 413 + 414 + add_wait_queue(sk_sleep(sk), &wait); 415 + sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); 416 + ret = sk_wait_event(sk, &timeo, 417 + !list_empty(&psock->ingress_msg) || 418 + !skb_queue_empty(&sk->sk_receive_queue), &wait); 419 + sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); 420 + remove_wait_queue(sk_sleep(sk), &wait); 421 + return ret; 422 + } 423 + EXPORT_SYMBOL_GPL(sk_msg_wait_data); 424 + 425 + /* Receive sk_msg from psock->ingress_msg to @msg. */ 426 + int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg, 427 + int len, int flags) 428 + { 429 + struct iov_iter *iter = &msg->msg_iter; 430 + int peek = flags & MSG_PEEK; 431 + struct sk_msg *msg_rx; 432 + int i, copied = 0; 433 + 434 + msg_rx = sk_psock_peek_msg(psock); 435 + while (copied != len) { 436 + struct scatterlist *sge; 437 + 438 + if (unlikely(!msg_rx)) 439 + break; 440 + 441 + i = msg_rx->sg.start; 442 + do { 443 + struct page *page; 444 + int copy; 445 + 446 + sge = sk_msg_elem(msg_rx, i); 447 + copy = sge->length; 448 + page = sg_page(sge); 449 + if (copied + copy > len) 450 + copy = len - copied; 451 + copy = copy_page_to_iter(page, sge->offset, copy, iter); 452 + if (!copy) 453 + return copied ? copied : -EFAULT; 454 + 455 + copied += copy; 456 + if (likely(!peek)) { 457 + sge->offset += copy; 458 + sge->length -= copy; 459 + if (!msg_rx->skb) 460 + sk_mem_uncharge(sk, copy); 461 + msg_rx->sg.size -= copy; 462 + 463 + if (!sge->length) { 464 + sk_msg_iter_var_next(i); 465 + if (!msg_rx->skb) 466 + put_page(page); 467 + } 468 + } else { 469 + /* Lets not optimize peek case if copy_page_to_iter 470 + * didn't copy the entire length lets just break. 471 + */ 472 + if (copy != sge->length) 473 + return copied; 474 + sk_msg_iter_var_next(i); 475 + } 476 + 477 + if (copied == len) 478 + break; 479 + } while (i != msg_rx->sg.end); 480 + 481 + if (unlikely(peek)) { 482 + msg_rx = sk_psock_next_msg(psock, msg_rx); 483 + if (!msg_rx) 484 + break; 485 + continue; 486 + } 487 + 488 + msg_rx->sg.start = i; 489 + if (!sge->length && msg_rx->sg.start == msg_rx->sg.end) { 490 + msg_rx = sk_psock_dequeue_msg(psock); 491 + kfree_sk_msg(msg_rx); 492 + } 493 + msg_rx = sk_psock_peek_msg(psock); 494 + } 495 + 496 + return copied; 497 + } 498 + EXPORT_SYMBOL_GPL(sk_msg_recvmsg); 499 + 402 500 static struct sk_msg *sk_psock_create_ingress_msg(struct sock *sk, 403 501 struct sk_buff *skb) 404 502 { ··· 508 410 if (!sk_rmem_schedule(sk, skb, skb->truesize)) 509 411 return NULL; 510 412 511 - msg = kzalloc(sizeof(*msg), __GFP_NOWARN | GFP_ATOMIC); 413 + msg = kzalloc(sizeof(*msg), __GFP_NOWARN | GFP_KERNEL); 512 414 if (unlikely(!msg)) 513 415 return NULL; 514 416 ··· 595 497 if (!ingress) { 596 498 if (!sock_writeable(psock->sk)) 597 499 return -EAGAIN; 598 - return skb_send_sock_locked(psock->sk, skb, off, len); 500 + return skb_send_sock(psock->sk, skb, off, len); 599 501 } 600 502 return sk_psock_skb_ingress(psock, skb); 601 503 } ··· 609 511 u32 len, off; 610 512 int ret; 611 513 612 - /* Lock sock to avoid losing sk_socket during loop. */ 613 - lock_sock(psock->sk); 514 + mutex_lock(&psock->work_mutex); 614 515 if (state->skb) { 615 516 skb = state->skb; 616 517 len = state->len; ··· 626 529 skb_bpf_redirect_clear(skb); 627 530 do { 628 531 ret = -EIO; 629 - if (likely(psock->sk->sk_socket)) 532 + if (!sock_flag(psock->sk, SOCK_DEAD)) 630 533 ret = sk_psock_handle_skb(psock, skb, off, 631 534 len, ingress); 632 535 if (ret <= 0) { ··· 650 553 kfree_skb(skb); 651 554 } 652 555 end: 653 - release_sock(psock->sk); 556 + mutex_unlock(&psock->work_mutex); 654 557 } 655 558 656 559 struct sk_psock *sk_psock_init(struct sock *sk, int node) ··· 659 562 struct proto *prot; 660 563 661 564 write_lock_bh(&sk->sk_callback_lock); 662 - 663 - if (inet_csk_has_ulp(sk)) { 664 - psock = ERR_PTR(-EINVAL); 665 - goto out; 666 - } 667 565 668 566 if (sk->sk_user_data) { 669 567 psock = ERR_PTR(-EBUSY); ··· 683 591 spin_lock_init(&psock->link_lock); 684 592 685 593 INIT_WORK(&psock->work, sk_psock_backlog); 594 + mutex_init(&psock->work_mutex); 686 595 INIT_LIST_HEAD(&psock->ingress_msg); 596 + spin_lock_init(&psock->ingress_lock); 687 597 skb_queue_head_init(&psock->ingress_skb); 688 598 689 599 sk_psock_set_state(psock, SK_PSOCK_TX_ENABLED); ··· 724 630 } 725 631 } 726 632 727 - static void sk_psock_zap_ingress(struct sk_psock *psock) 633 + static void __sk_psock_zap_ingress(struct sk_psock *psock) 728 634 { 729 635 struct sk_buff *skb; 730 636 731 - while ((skb = __skb_dequeue(&psock->ingress_skb)) != NULL) { 637 + while ((skb = skb_dequeue(&psock->ingress_skb)) != NULL) { 732 638 skb_bpf_redirect_clear(skb); 733 639 kfree_skb(skb); 734 640 } ··· 745 651 } 746 652 } 747 653 654 + void sk_psock_stop(struct sk_psock *psock, bool wait) 655 + { 656 + spin_lock_bh(&psock->ingress_lock); 657 + sk_psock_clear_state(psock, SK_PSOCK_TX_ENABLED); 658 + sk_psock_cork_free(psock); 659 + __sk_psock_zap_ingress(psock); 660 + spin_unlock_bh(&psock->ingress_lock); 661 + 662 + if (wait) 663 + cancel_work_sync(&psock->work); 664 + } 665 + 748 666 static void sk_psock_done_strp(struct sk_psock *psock); 749 667 750 - static void sk_psock_destroy_deferred(struct work_struct *gc) 668 + static void sk_psock_destroy(struct work_struct *work) 751 669 { 752 - struct sk_psock *psock = container_of(gc, struct sk_psock, gc); 753 - 670 + struct sk_psock *psock = container_of(to_rcu_work(work), 671 + struct sk_psock, rwork); 754 672 /* No sk_callback_lock since already detached. */ 755 673 756 674 sk_psock_done_strp(psock); 757 675 758 676 cancel_work_sync(&psock->work); 677 + mutex_destroy(&psock->work_mutex); 759 678 760 679 psock_progs_drop(&psock->progs); 761 680 762 681 sk_psock_link_destroy(psock); 763 682 sk_psock_cork_free(psock); 764 - sk_psock_zap_ingress(psock); 765 683 766 684 if (psock->sk_redir) 767 685 sock_put(psock->sk_redir); ··· 781 675 kfree(psock); 782 676 } 783 677 784 - static void sk_psock_destroy(struct rcu_head *rcu) 785 - { 786 - struct sk_psock *psock = container_of(rcu, struct sk_psock, rcu); 787 - 788 - INIT_WORK(&psock->gc, sk_psock_destroy_deferred); 789 - schedule_work(&psock->gc); 790 - } 791 - 792 678 void sk_psock_drop(struct sock *sk, struct sk_psock *psock) 793 679 { 794 - sk_psock_cork_free(psock); 795 - sk_psock_zap_ingress(psock); 680 + sk_psock_stop(psock, false); 796 681 797 682 write_lock_bh(&sk->sk_callback_lock); 798 683 sk_psock_restore_proto(sk, psock); 799 684 rcu_assign_sk_user_data(sk, NULL); 800 685 if (psock->progs.stream_parser) 801 686 sk_psock_stop_strp(sk, psock); 802 - else if (psock->progs.stream_verdict) 687 + else if (psock->progs.stream_verdict || psock->progs.skb_verdict) 803 688 sk_psock_stop_verdict(sk, psock); 804 689 write_unlock_bh(&sk->sk_callback_lock); 805 - sk_psock_clear_state(psock, SK_PSOCK_TX_ENABLED); 806 690 807 - call_rcu(&psock->rcu, sk_psock_destroy); 691 + INIT_RCU_WORK(&psock->rwork, sk_psock_destroy); 692 + queue_rcu_work(system_wq, &psock->rwork); 808 693 } 809 694 EXPORT_SYMBOL_GPL(sk_psock_drop); 810 695 ··· 864 767 * error that caused the pipe to break. We can't send a packet on 865 768 * a socket that is in this state so we drop the skb. 866 769 */ 867 - if (!psock_other || sock_flag(sk_other, SOCK_DEAD) || 868 - !sk_psock_test_state(psock_other, SK_PSOCK_TX_ENABLED)) { 770 + if (!psock_other || sock_flag(sk_other, SOCK_DEAD)) { 771 + kfree_skb(skb); 772 + return; 773 + } 774 + spin_lock_bh(&psock_other->ingress_lock); 775 + if (!sk_psock_test_state(psock_other, SK_PSOCK_TX_ENABLED)) { 776 + spin_unlock_bh(&psock_other->ingress_lock); 869 777 kfree_skb(skb); 870 778 return; 871 779 } 872 780 873 781 skb_queue_tail(&psock_other->ingress_skb, skb); 874 782 schedule_work(&psock_other->work); 783 + spin_unlock_bh(&psock_other->ingress_lock); 875 784 } 876 785 877 786 static void sk_psock_tls_verdict_apply(struct sk_buff *skb, struct sock *sk, int verdict) ··· 945 842 err = sk_psock_skb_ingress_self(psock, skb); 946 843 } 947 844 if (err < 0) { 948 - skb_queue_tail(&psock->ingress_skb, skb); 949 - schedule_work(&psock->work); 845 + spin_lock_bh(&psock->ingress_lock); 846 + if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) { 847 + skb_queue_tail(&psock->ingress_skb, skb); 848 + schedule_work(&psock->work); 849 + } 850 + spin_unlock_bh(&psock->ingress_lock); 950 851 } 951 852 break; 952 853 case __SK_REDIRECT: ··· 1117 1010 } 1118 1011 skb_set_owner_r(skb, sk); 1119 1012 prog = READ_ONCE(psock->progs.stream_verdict); 1013 + if (!prog) 1014 + prog = READ_ONCE(psock->progs.skb_verdict); 1120 1015 if (likely(prog)) { 1121 1016 skb_dst_drop(skb); 1122 1017 skb_bpf_redirect_clear(skb);
+55 -63
net/core/sock_map.c
··· 26 26 27 27 static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog, 28 28 struct bpf_prog *old, u32 which); 29 + static struct sk_psock_progs *sock_map_progs(struct bpf_map *map); 29 30 30 31 static struct bpf_map *sock_map_alloc(union bpf_attr *attr) 31 32 { ··· 156 155 strp_stop = true; 157 156 if (psock->saved_data_ready && stab->progs.stream_verdict) 158 157 verdict_stop = true; 158 + if (psock->saved_data_ready && stab->progs.skb_verdict) 159 + verdict_stop = true; 159 160 list_del(&link->list); 160 161 sk_psock_free_link(link); 161 162 } ··· 185 182 186 183 static int sock_map_init_proto(struct sock *sk, struct sk_psock *psock) 187 184 { 188 - struct proto *prot; 189 - 190 - switch (sk->sk_type) { 191 - case SOCK_STREAM: 192 - prot = tcp_bpf_get_proto(sk, psock); 193 - break; 194 - 195 - case SOCK_DGRAM: 196 - prot = udp_bpf_get_proto(sk, psock); 197 - break; 198 - 199 - default: 185 + if (!sk->sk_prot->psock_update_sk_prot) 200 186 return -EINVAL; 201 - } 202 - 203 - if (IS_ERR(prot)) 204 - return PTR_ERR(prot); 205 - 206 - sk_psock_update_proto(sk, psock, prot); 207 - return 0; 187 + psock->psock_update_sk_prot = sk->sk_prot->psock_update_sk_prot; 188 + return sk->sk_prot->psock_update_sk_prot(sk, false); 208 189 } 209 190 210 191 static struct sk_psock *sock_map_psock_get_checked(struct sock *sk) ··· 211 224 return psock; 212 225 } 213 226 214 - static int sock_map_link(struct bpf_map *map, struct sk_psock_progs *progs, 215 - struct sock *sk) 227 + static bool sock_map_redirect_allowed(const struct sock *sk); 228 + 229 + static int sock_map_link(struct bpf_map *map, struct sock *sk) 216 230 { 217 - struct bpf_prog *msg_parser, *stream_parser, *stream_verdict; 231 + struct sk_psock_progs *progs = sock_map_progs(map); 232 + struct bpf_prog *stream_verdict = NULL; 233 + struct bpf_prog *stream_parser = NULL; 234 + struct bpf_prog *skb_verdict = NULL; 235 + struct bpf_prog *msg_parser = NULL; 218 236 struct sk_psock *psock; 219 237 int ret; 238 + 239 + /* Only sockets we can redirect into/from in BPF need to hold 240 + * refs to parser/verdict progs and have their sk_data_ready 241 + * and sk_write_space callbacks overridden. 242 + */ 243 + if (!sock_map_redirect_allowed(sk)) 244 + goto no_progs; 220 245 221 246 stream_verdict = READ_ONCE(progs->stream_verdict); 222 247 if (stream_verdict) { ··· 255 256 } 256 257 } 257 258 259 + skb_verdict = READ_ONCE(progs->skb_verdict); 260 + if (skb_verdict) { 261 + skb_verdict = bpf_prog_inc_not_zero(skb_verdict); 262 + if (IS_ERR(skb_verdict)) { 263 + ret = PTR_ERR(skb_verdict); 264 + goto out_put_msg_parser; 265 + } 266 + } 267 + 268 + no_progs: 258 269 psock = sock_map_psock_get_checked(sk); 259 270 if (IS_ERR(psock)) { 260 271 ret = PTR_ERR(psock); ··· 274 265 if (psock) { 275 266 if ((msg_parser && READ_ONCE(psock->progs.msg_parser)) || 276 267 (stream_parser && READ_ONCE(psock->progs.stream_parser)) || 268 + (skb_verdict && READ_ONCE(psock->progs.skb_verdict)) || 269 + (skb_verdict && READ_ONCE(psock->progs.stream_verdict)) || 270 + (stream_verdict && READ_ONCE(psock->progs.skb_verdict)) || 277 271 (stream_verdict && READ_ONCE(psock->progs.stream_verdict))) { 278 272 sk_psock_put(sk, psock); 279 273 ret = -EBUSY; ··· 308 296 } else if (!stream_parser && stream_verdict && !psock->saved_data_ready) { 309 297 psock_set_prog(&psock->progs.stream_verdict, stream_verdict); 310 298 sk_psock_start_verdict(sk,psock); 299 + } else if (!stream_verdict && skb_verdict && !psock->saved_data_ready) { 300 + psock_set_prog(&psock->progs.skb_verdict, skb_verdict); 301 + sk_psock_start_verdict(sk, psock); 311 302 } 312 303 write_unlock_bh(&sk->sk_callback_lock); 313 304 return 0; ··· 319 304 out_drop: 320 305 sk_psock_put(sk, psock); 321 306 out_progs: 307 + if (skb_verdict) 308 + bpf_prog_put(skb_verdict); 309 + out_put_msg_parser: 322 310 if (msg_parser) 323 311 bpf_prog_put(msg_parser); 324 312 out_put_stream_parser: ··· 330 312 out_put_stream_verdict: 331 313 if (stream_verdict) 332 314 bpf_prog_put(stream_verdict); 333 - return ret; 334 - } 335 - 336 - static int sock_map_link_no_progs(struct bpf_map *map, struct sock *sk) 337 - { 338 - struct sk_psock *psock; 339 - int ret; 340 - 341 - psock = sock_map_psock_get_checked(sk); 342 - if (IS_ERR(psock)) 343 - return PTR_ERR(psock); 344 - 345 - if (!psock) { 346 - psock = sk_psock_init(sk, map->numa_node); 347 - if (IS_ERR(psock)) 348 - return PTR_ERR(psock); 349 - } 350 - 351 - ret = sock_map_init_proto(sk, psock); 352 - if (ret < 0) 353 - sk_psock_put(sk, psock); 354 315 return ret; 355 316 } 356 317 ··· 463 466 return 0; 464 467 } 465 468 466 - static bool sock_map_redirect_allowed(const struct sock *sk); 467 - 468 469 static int sock_map_update_common(struct bpf_map *map, u32 idx, 469 470 struct sock *sk, u64 flags) 470 471 { ··· 482 487 if (!link) 483 488 return -ENOMEM; 484 489 485 - /* Only sockets we can redirect into/from in BPF need to hold 486 - * refs to parser/verdict progs and have their sk_data_ready 487 - * and sk_write_space callbacks overridden. 488 - */ 489 - if (sock_map_redirect_allowed(sk)) 490 - ret = sock_map_link(map, &stab->progs, sk); 491 - else 492 - ret = sock_map_link_no_progs(map, sk); 490 + ret = sock_map_link(map, sk); 493 491 if (ret < 0) 494 492 goto out_free; 495 493 ··· 535 547 536 548 static bool sock_map_redirect_allowed(const struct sock *sk) 537 549 { 538 - return sk_is_tcp(sk) && sk->sk_state != TCP_LISTEN; 550 + if (sk_is_tcp(sk)) 551 + return sk->sk_state != TCP_LISTEN; 552 + else 553 + return sk->sk_state == TCP_ESTABLISHED; 539 554 } 540 555 541 556 static bool sock_map_sk_is_suitable(const struct sock *sk) 542 557 { 543 - return sk_is_tcp(sk) || sk_is_udp(sk); 558 + return !!sk->sk_prot->psock_update_sk_prot; 544 559 } 545 560 546 561 static bool sock_map_sk_state_allowed(const struct sock *sk) ··· 990 999 if (!link) 991 1000 return -ENOMEM; 992 1001 993 - /* Only sockets we can redirect into/from in BPF need to hold 994 - * refs to parser/verdict progs and have their sk_data_ready 995 - * and sk_write_space callbacks overridden. 996 - */ 997 - if (sock_map_redirect_allowed(sk)) 998 - ret = sock_map_link(map, &htab->progs, sk); 999 - else 1000 - ret = sock_map_link_no_progs(map, sk); 1002 + ret = sock_map_link(map, sk); 1001 1003 if (ret < 0) 1002 1004 goto out_free; 1003 1005 ··· 1450 1466 break; 1451 1467 #endif 1452 1468 case BPF_SK_SKB_STREAM_VERDICT: 1469 + if (progs->skb_verdict) 1470 + return -EBUSY; 1453 1471 pprog = &progs->stream_verdict; 1472 + break; 1473 + case BPF_SK_SKB_VERDICT: 1474 + if (progs->stream_verdict) 1475 + return -EBUSY; 1476 + pprog = &progs->skb_verdict; 1454 1477 break; 1455 1478 default: 1456 1479 return -EOPNOTSUPP; ··· 1531 1540 saved_close = psock->saved_close; 1532 1541 sock_map_remove_links(sk, psock); 1533 1542 rcu_read_unlock(); 1543 + sk_psock_stop(psock, true); 1534 1544 release_sock(sk); 1535 1545 saved_close(sk, timeout); 1536 1546 }
+1
net/ipv4/af_inet.c
··· 1070 1070 .setsockopt = sock_common_setsockopt, 1071 1071 .getsockopt = sock_common_getsockopt, 1072 1072 .sendmsg = inet_sendmsg, 1073 + .read_sock = udp_read_sock, 1073 1074 .recvmsg = inet_recvmsg, 1074 1075 .mmap = sock_no_mmap, 1075 1076 .sendpage = inet_sendpage,
+43
net/ipv4/bpf_tcp_ca.c
··· 5 5 #include <linux/bpf_verifier.h> 6 6 #include <linux/bpf.h> 7 7 #include <linux/btf.h> 8 + #include <linux/btf_ids.h> 8 9 #include <linux/filter.h> 9 10 #include <net/tcp.h> 10 11 #include <net/bpf_sk_storage.h> ··· 179 178 } 180 179 } 181 180 181 + BTF_SET_START(bpf_tcp_ca_kfunc_ids) 182 + BTF_ID(func, tcp_reno_ssthresh) 183 + BTF_ID(func, tcp_reno_cong_avoid) 184 + BTF_ID(func, tcp_reno_undo_cwnd) 185 + BTF_ID(func, tcp_slow_start) 186 + BTF_ID(func, tcp_cong_avoid_ai) 187 + #ifdef CONFIG_DYNAMIC_FTRACE 188 + #if IS_BUILTIN(CONFIG_TCP_CONG_CUBIC) 189 + BTF_ID(func, cubictcp_init) 190 + BTF_ID(func, cubictcp_recalc_ssthresh) 191 + BTF_ID(func, cubictcp_cong_avoid) 192 + BTF_ID(func, cubictcp_state) 193 + BTF_ID(func, cubictcp_cwnd_event) 194 + BTF_ID(func, cubictcp_acked) 195 + #endif 196 + #if IS_BUILTIN(CONFIG_TCP_CONG_DCTCP) 197 + BTF_ID(func, dctcp_init) 198 + BTF_ID(func, dctcp_update_alpha) 199 + BTF_ID(func, dctcp_cwnd_event) 200 + BTF_ID(func, dctcp_ssthresh) 201 + BTF_ID(func, dctcp_cwnd_undo) 202 + BTF_ID(func, dctcp_state) 203 + #endif 204 + #if IS_BUILTIN(CONFIG_TCP_CONG_BBR) 205 + BTF_ID(func, bbr_init) 206 + BTF_ID(func, bbr_main) 207 + BTF_ID(func, bbr_sndbuf_expand) 208 + BTF_ID(func, bbr_undo_cwnd) 209 + BTF_ID(func, bbr_cwnd_event) 210 + BTF_ID(func, bbr_ssthresh) 211 + BTF_ID(func, bbr_min_tso_segs) 212 + BTF_ID(func, bbr_set_state) 213 + #endif 214 + #endif /* CONFIG_DYNAMIC_FTRACE */ 215 + BTF_SET_END(bpf_tcp_ca_kfunc_ids) 216 + 217 + static bool bpf_tcp_ca_check_kfunc_call(u32 kfunc_btf_id) 218 + { 219 + return btf_id_set_contains(&bpf_tcp_ca_kfunc_ids, kfunc_btf_id); 220 + } 221 + 182 222 static const struct bpf_verifier_ops bpf_tcp_ca_verifier_ops = { 183 223 .get_func_proto = bpf_tcp_ca_get_func_proto, 184 224 .is_valid_access = bpf_tcp_ca_is_valid_access, 185 225 .btf_struct_access = bpf_tcp_ca_btf_struct_access, 226 + .check_kfunc_call = bpf_tcp_ca_check_kfunc_call, 186 227 }; 187 228 188 229 static int bpf_tcp_ca_init_member(const struct btf_type *t,
+23 -107
net/ipv4/tcp_bpf.c
··· 10 10 #include <net/inet_common.h> 11 11 #include <net/tls.h> 12 12 13 - int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock, 14 - struct msghdr *msg, int len, int flags) 15 - { 16 - struct iov_iter *iter = &msg->msg_iter; 17 - int peek = flags & MSG_PEEK; 18 - struct sk_msg *msg_rx; 19 - int i, copied = 0; 20 - 21 - msg_rx = list_first_entry_or_null(&psock->ingress_msg, 22 - struct sk_msg, list); 23 - 24 - while (copied != len) { 25 - struct scatterlist *sge; 26 - 27 - if (unlikely(!msg_rx)) 28 - break; 29 - 30 - i = msg_rx->sg.start; 31 - do { 32 - struct page *page; 33 - int copy; 34 - 35 - sge = sk_msg_elem(msg_rx, i); 36 - copy = sge->length; 37 - page = sg_page(sge); 38 - if (copied + copy > len) 39 - copy = len - copied; 40 - copy = copy_page_to_iter(page, sge->offset, copy, iter); 41 - if (!copy) 42 - return copied ? copied : -EFAULT; 43 - 44 - copied += copy; 45 - if (likely(!peek)) { 46 - sge->offset += copy; 47 - sge->length -= copy; 48 - if (!msg_rx->skb) 49 - sk_mem_uncharge(sk, copy); 50 - msg_rx->sg.size -= copy; 51 - 52 - if (!sge->length) { 53 - sk_msg_iter_var_next(i); 54 - if (!msg_rx->skb) 55 - put_page(page); 56 - } 57 - } else { 58 - /* Lets not optimize peek case if copy_page_to_iter 59 - * didn't copy the entire length lets just break. 60 - */ 61 - if (copy != sge->length) 62 - return copied; 63 - sk_msg_iter_var_next(i); 64 - } 65 - 66 - if (copied == len) 67 - break; 68 - } while (i != msg_rx->sg.end); 69 - 70 - if (unlikely(peek)) { 71 - if (msg_rx == list_last_entry(&psock->ingress_msg, 72 - struct sk_msg, list)) 73 - break; 74 - msg_rx = list_next_entry(msg_rx, list); 75 - continue; 76 - } 77 - 78 - msg_rx->sg.start = i; 79 - if (!sge->length && msg_rx->sg.start == msg_rx->sg.end) { 80 - list_del(&msg_rx->list); 81 - if (msg_rx->skb) 82 - consume_skb(msg_rx->skb); 83 - kfree(msg_rx); 84 - } 85 - msg_rx = list_first_entry_or_null(&psock->ingress_msg, 86 - struct sk_msg, list); 87 - } 88 - 89 - return copied; 90 - } 91 - EXPORT_SYMBOL_GPL(__tcp_bpf_recvmsg); 92 - 93 13 static int bpf_tcp_ingress(struct sock *sk, struct sk_psock *psock, 94 14 struct sk_msg *msg, u32 apply_bytes, int flags) 95 15 { ··· 163 243 return !empty; 164 244 } 165 245 166 - static int tcp_bpf_wait_data(struct sock *sk, struct sk_psock *psock, 167 - int flags, long timeo, int *err) 168 - { 169 - DEFINE_WAIT_FUNC(wait, woken_wake_function); 170 - int ret = 0; 171 - 172 - if (sk->sk_shutdown & RCV_SHUTDOWN) 173 - return 1; 174 - 175 - if (!timeo) 176 - return ret; 177 - 178 - add_wait_queue(sk_sleep(sk), &wait); 179 - sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); 180 - ret = sk_wait_event(sk, &timeo, 181 - !list_empty(&psock->ingress_msg) || 182 - !skb_queue_empty(&sk->sk_receive_queue), &wait); 183 - sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); 184 - remove_wait_queue(sk_sleep(sk), &wait); 185 - return ret; 186 - } 187 - 188 246 static int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, 189 247 int nonblock, int flags, int *addr_len) 190 248 { ··· 182 284 } 183 285 lock_sock(sk); 184 286 msg_bytes_ready: 185 - copied = __tcp_bpf_recvmsg(sk, psock, msg, len, flags); 287 + copied = sk_msg_recvmsg(sk, psock, msg, len, flags); 186 288 if (!copied) { 187 289 int data, err = 0; 188 290 long timeo; 189 291 190 292 timeo = sock_rcvtimeo(sk, nonblock); 191 - data = tcp_bpf_wait_data(sk, psock, flags, timeo, &err); 293 + data = sk_msg_wait_data(sk, psock, flags, timeo, &err); 192 294 if (data) { 193 295 if (!sk_psock_queue_empty(psock)) 194 296 goto msg_bytes_ready; ··· 499 601 ops->sendpage == tcp_sendpage ? 0 : -ENOTSUPP; 500 602 } 501 603 502 - struct proto *tcp_bpf_get_proto(struct sock *sk, struct sk_psock *psock) 604 + int tcp_bpf_update_proto(struct sock *sk, bool restore) 503 605 { 606 + struct sk_psock *psock = sk_psock(sk); 504 607 int family = sk->sk_family == AF_INET6 ? TCP_BPF_IPV6 : TCP_BPF_IPV4; 505 608 int config = psock->progs.msg_parser ? TCP_BPF_TX : TCP_BPF_BASE; 506 609 610 + if (restore) { 611 + if (inet_csk_has_ulp(sk)) { 612 + tcp_update_ulp(sk, psock->sk_proto, psock->saved_write_space); 613 + } else { 614 + sk->sk_write_space = psock->saved_write_space; 615 + /* Pairs with lockless read in sk_clone_lock() */ 616 + WRITE_ONCE(sk->sk_prot, psock->sk_proto); 617 + } 618 + return 0; 619 + } 620 + 621 + if (inet_csk_has_ulp(sk)) 622 + return -EINVAL; 623 + 507 624 if (sk->sk_family == AF_INET6) { 508 625 if (tcp_bpf_assert_proto_ops(psock->sk_proto)) 509 - return ERR_PTR(-EINVAL); 626 + return -EINVAL; 510 627 511 628 tcp_bpf_check_v6_needs_rebuild(psock->sk_proto); 512 629 } 513 630 514 - return &tcp_bpf_prots[family][config]; 631 + /* Pairs with lockless read in sk_clone_lock() */ 632 + WRITE_ONCE(sk->sk_prot, &tcp_bpf_prots[family][config]); 633 + return 0; 515 634 } 635 + EXPORT_SYMBOL_GPL(tcp_bpf_update_proto); 516 636 517 637 /* If a child got cloned from a listening socket that had tcp_bpf 518 638 * protocol callbacks installed, we need to restore the callbacks to
+12 -12
net/ipv4/tcp_cubic.c
··· 124 124 ca->sample_cnt = 0; 125 125 } 126 126 127 - static void bictcp_init(struct sock *sk) 127 + static void cubictcp_init(struct sock *sk) 128 128 { 129 129 struct bictcp *ca = inet_csk_ca(sk); 130 130 ··· 137 137 tcp_sk(sk)->snd_ssthresh = initial_ssthresh; 138 138 } 139 139 140 - static void bictcp_cwnd_event(struct sock *sk, enum tcp_ca_event event) 140 + static void cubictcp_cwnd_event(struct sock *sk, enum tcp_ca_event event) 141 141 { 142 142 if (event == CA_EVENT_TX_START) { 143 143 struct bictcp *ca = inet_csk_ca(sk); ··· 319 319 ca->cnt = max(ca->cnt, 2U); 320 320 } 321 321 322 - static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked) 322 + static void cubictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked) 323 323 { 324 324 struct tcp_sock *tp = tcp_sk(sk); 325 325 struct bictcp *ca = inet_csk_ca(sk); ··· 338 338 tcp_cong_avoid_ai(tp, ca->cnt, acked); 339 339 } 340 340 341 - static u32 bictcp_recalc_ssthresh(struct sock *sk) 341 + static u32 cubictcp_recalc_ssthresh(struct sock *sk) 342 342 { 343 343 const struct tcp_sock *tp = tcp_sk(sk); 344 344 struct bictcp *ca = inet_csk_ca(sk); ··· 355 355 return max((tp->snd_cwnd * beta) / BICTCP_BETA_SCALE, 2U); 356 356 } 357 357 358 - static void bictcp_state(struct sock *sk, u8 new_state) 358 + static void cubictcp_state(struct sock *sk, u8 new_state) 359 359 { 360 360 if (new_state == TCP_CA_Loss) { 361 361 bictcp_reset(inet_csk_ca(sk)); ··· 442 442 } 443 443 } 444 444 445 - static void bictcp_acked(struct sock *sk, const struct ack_sample *sample) 445 + static void cubictcp_acked(struct sock *sk, const struct ack_sample *sample) 446 446 { 447 447 const struct tcp_sock *tp = tcp_sk(sk); 448 448 struct bictcp *ca = inet_csk_ca(sk); ··· 471 471 } 472 472 473 473 static struct tcp_congestion_ops cubictcp __read_mostly = { 474 - .init = bictcp_init, 475 - .ssthresh = bictcp_recalc_ssthresh, 476 - .cong_avoid = bictcp_cong_avoid, 477 - .set_state = bictcp_state, 474 + .init = cubictcp_init, 475 + .ssthresh = cubictcp_recalc_ssthresh, 476 + .cong_avoid = cubictcp_cong_avoid, 477 + .set_state = cubictcp_state, 478 478 .undo_cwnd = tcp_reno_undo_cwnd, 479 - .cwnd_event = bictcp_cwnd_event, 480 - .pkts_acked = bictcp_acked, 479 + .cwnd_event = cubictcp_cwnd_event, 480 + .pkts_acked = cubictcp_acked, 481 481 .owner = THIS_MODULE, 482 482 .name = "cubic", 483 483 };
+3
net/ipv4/tcp_ipv4.c
··· 2806 2806 .hash = inet_hash, 2807 2807 .unhash = inet_unhash, 2808 2808 .get_port = inet_csk_get_port, 2809 + #ifdef CONFIG_BPF_SYSCALL 2810 + .psock_update_sk_prot = tcp_bpf_update_proto, 2811 + #endif 2809 2812 .enter_memory_pressure = tcp_enter_memory_pressure, 2810 2813 .leave_memory_pressure = tcp_leave_memory_pressure, 2811 2814 .stream_memory_free = tcp_stream_memory_free,
+32
net/ipv4/udp.c
··· 1782 1782 } 1783 1783 EXPORT_SYMBOL(__skb_recv_udp); 1784 1784 1785 + int udp_read_sock(struct sock *sk, read_descriptor_t *desc, 1786 + sk_read_actor_t recv_actor) 1787 + { 1788 + int copied = 0; 1789 + 1790 + while (1) { 1791 + struct sk_buff *skb; 1792 + int err, used; 1793 + 1794 + skb = skb_recv_udp(sk, 0, 1, &err); 1795 + if (!skb) 1796 + return err; 1797 + used = recv_actor(desc, skb, 0, skb->len); 1798 + if (used <= 0) { 1799 + if (!copied) 1800 + copied = used; 1801 + break; 1802 + } else if (used <= skb->len) { 1803 + copied += used; 1804 + } 1805 + 1806 + if (!desc->count) 1807 + break; 1808 + } 1809 + 1810 + return copied; 1811 + } 1812 + EXPORT_SYMBOL(udp_read_sock); 1813 + 1785 1814 /* 1786 1815 * This should be easy, if there is something there we 1787 1816 * return it, otherwise we block. ··· 2883 2854 .unhash = udp_lib_unhash, 2884 2855 .rehash = udp_v4_rehash, 2885 2856 .get_port = udp_v4_get_port, 2857 + #ifdef CONFIG_BPF_SYSCALL 2858 + .psock_update_sk_prot = udp_bpf_update_proto, 2859 + #endif 2886 2860 .memory_allocated = &udp_memory_allocated, 2887 2861 .sysctl_mem = sysctl_udp_mem, 2888 2862 .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_udp_wmem_min),
+76 -3
net/ipv4/udp_bpf.c
··· 4 4 #include <linux/skmsg.h> 5 5 #include <net/sock.h> 6 6 #include <net/udp.h> 7 + #include <net/inet_common.h> 8 + 9 + #include "udp_impl.h" 10 + 11 + static struct proto *udpv6_prot_saved __read_mostly; 12 + 13 + static int sk_udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, 14 + int noblock, int flags, int *addr_len) 15 + { 16 + #if IS_ENABLED(CONFIG_IPV6) 17 + if (sk->sk_family == AF_INET6) 18 + return udpv6_prot_saved->recvmsg(sk, msg, len, noblock, flags, 19 + addr_len); 20 + #endif 21 + return udp_prot.recvmsg(sk, msg, len, noblock, flags, addr_len); 22 + } 23 + 24 + static int udp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, 25 + int nonblock, int flags, int *addr_len) 26 + { 27 + struct sk_psock *psock; 28 + int copied, ret; 29 + 30 + if (unlikely(flags & MSG_ERRQUEUE)) 31 + return inet_recv_error(sk, msg, len, addr_len); 32 + 33 + psock = sk_psock_get(sk); 34 + if (unlikely(!psock)) 35 + return sk_udp_recvmsg(sk, msg, len, nonblock, flags, addr_len); 36 + 37 + lock_sock(sk); 38 + if (sk_psock_queue_empty(psock)) { 39 + ret = sk_udp_recvmsg(sk, msg, len, nonblock, flags, addr_len); 40 + goto out; 41 + } 42 + 43 + msg_bytes_ready: 44 + copied = sk_msg_recvmsg(sk, psock, msg, len, flags); 45 + if (!copied) { 46 + int data, err = 0; 47 + long timeo; 48 + 49 + timeo = sock_rcvtimeo(sk, nonblock); 50 + data = sk_msg_wait_data(sk, psock, flags, timeo, &err); 51 + if (data) { 52 + if (!sk_psock_queue_empty(psock)) 53 + goto msg_bytes_ready; 54 + ret = sk_udp_recvmsg(sk, msg, len, nonblock, flags, addr_len); 55 + goto out; 56 + } 57 + if (err) { 58 + ret = err; 59 + goto out; 60 + } 61 + copied = -EAGAIN; 62 + } 63 + ret = copied; 64 + out: 65 + release_sock(sk); 66 + sk_psock_put(sk, psock); 67 + return ret; 68 + } 7 69 8 70 enum { 9 71 UDP_BPF_IPV4, ··· 73 11 UDP_BPF_NUM_PROTS, 74 12 }; 75 13 76 - static struct proto *udpv6_prot_saved __read_mostly; 77 14 static DEFINE_SPINLOCK(udpv6_prot_lock); 78 15 static struct proto udp_bpf_prots[UDP_BPF_NUM_PROTS]; 79 16 ··· 81 20 *prot = *base; 82 21 prot->unhash = sock_map_unhash; 83 22 prot->close = sock_map_close; 23 + prot->recvmsg = udp_bpf_recvmsg; 84 24 } 85 25 86 26 static void udp_bpf_check_v6_needs_rebuild(struct proto *ops) ··· 103 41 } 104 42 core_initcall(udp_bpf_v4_build_proto); 105 43 106 - struct proto *udp_bpf_get_proto(struct sock *sk, struct sk_psock *psock) 44 + int udp_bpf_update_proto(struct sock *sk, bool restore) 107 45 { 108 46 int family = sk->sk_family == AF_INET ? UDP_BPF_IPV4 : UDP_BPF_IPV6; 47 + struct sk_psock *psock = sk_psock(sk); 48 + 49 + if (restore) { 50 + sk->sk_write_space = psock->saved_write_space; 51 + /* Pairs with lockless read in sk_clone_lock() */ 52 + WRITE_ONCE(sk->sk_prot, psock->sk_proto); 53 + return 0; 54 + } 109 55 110 56 if (sk->sk_family == AF_INET6) 111 57 udp_bpf_check_v6_needs_rebuild(psock->sk_proto); 112 58 113 - return &udp_bpf_prots[family]; 59 + /* Pairs with lockless read in sk_clone_lock() */ 60 + WRITE_ONCE(sk->sk_prot, &udp_bpf_prots[family]); 61 + return 0; 114 62 } 63 + EXPORT_SYMBOL_GPL(udp_bpf_update_proto);
+1
net/ipv6/af_inet6.c
··· 714 714 .getsockopt = sock_common_getsockopt, /* ok */ 715 715 .sendmsg = inet6_sendmsg, /* retpoline's sake */ 716 716 .recvmsg = inet6_recvmsg, /* retpoline's sake */ 717 + .read_sock = udp_read_sock, 717 718 .mmap = sock_no_mmap, 718 719 .sendpage = sock_no_sendpage, 719 720 .set_peek_off = sk_set_peek_off,
+3
net/ipv6/tcp_ipv6.c
··· 2139 2139 .hash = inet6_hash, 2140 2140 .unhash = inet_unhash, 2141 2141 .get_port = inet_csk_get_port, 2142 + #ifdef CONFIG_BPF_SYSCALL 2143 + .psock_update_sk_prot = tcp_bpf_update_proto, 2144 + #endif 2142 2145 .enter_memory_pressure = tcp_enter_memory_pressure, 2143 2146 .leave_memory_pressure = tcp_leave_memory_pressure, 2144 2147 .stream_memory_free = tcp_stream_memory_free,
+3
net/ipv6/udp.c
··· 1714 1714 .unhash = udp_lib_unhash, 1715 1715 .rehash = udp_v6_rehash, 1716 1716 .get_port = udp_v6_get_port, 1717 + #ifdef CONFIG_BPF_SYSCALL 1718 + .psock_update_sk_prot = udp_bpf_update_proto, 1719 + #endif 1717 1720 .memory_allocated = &udp_memory_allocated, 1718 1721 .sysctl_mem = sysctl_udp_mem, 1719 1722 .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_udp_wmem_min),
+2 -2
net/tls/tls_sw.c
··· 1789 1789 skb = tls_wait_data(sk, psock, flags, timeo, &err); 1790 1790 if (!skb) { 1791 1791 if (psock) { 1792 - int ret = __tcp_bpf_recvmsg(sk, psock, 1793 - msg, len, flags); 1792 + int ret = sk_msg_recvmsg(sk, psock, msg, len, 1793 + flags); 1794 1794 1795 1795 if (ret > 0) { 1796 1796 decrypted += ret;
-1
samples/bpf/sampleip_kern.c
··· 4 4 * modify it under the terms of version 2 of the GNU General Public 5 5 * License as published by the Free Software Foundation. 6 6 */ 7 - #include <linux/version.h> 8 7 #include <linux/ptrace.h> 9 8 #include <uapi/linux/bpf.h> 10 9 #include <uapi/linux/bpf_perf_event.h>
-1
samples/bpf/trace_event_kern.c
··· 5 5 * License as published by the Free Software Foundation. 6 6 */ 7 7 #include <linux/ptrace.h> 8 - #include <linux/version.h> 9 8 #include <uapi/linux/bpf.h> 10 9 #include <uapi/linux/bpf_perf_event.h> 11 10 #include <uapi/linux/perf_event.h>
+18 -45
samples/bpf/xdpsock_user.c
··· 96 96 static int opt_timeout = 1000; 97 97 static bool opt_need_wakeup = true; 98 98 static u32 opt_num_xsks = 1; 99 - static u32 prog_id; 100 99 static bool opt_busy_poll; 101 100 static bool opt_reduced_cap; 102 101 ··· 461 462 return NULL; 462 463 } 463 464 464 - static void remove_xdp_program(void) 465 - { 466 - u32 curr_prog_id = 0; 467 - int cmd = CLOSE_CONN; 468 - 469 - if (bpf_get_link_xdp_id(opt_ifindex, &curr_prog_id, opt_xdp_flags)) { 470 - printf("bpf_get_link_xdp_id failed\n"); 471 - exit(EXIT_FAILURE); 472 - } 473 - if (prog_id == curr_prog_id) 474 - bpf_set_link_xdp_fd(opt_ifindex, -1, opt_xdp_flags); 475 - else if (!curr_prog_id) 476 - printf("couldn't find a prog id on a given interface\n"); 477 - else 478 - printf("program on interface changed, not removing\n"); 479 - 480 - if (opt_reduced_cap) { 481 - if (write(sock, &cmd, sizeof(int)) < 0) { 482 - fprintf(stderr, "Error writing into stream socket: %s", strerror(errno)); 483 - exit(EXIT_FAILURE); 484 - } 485 - } 486 - } 487 - 488 465 static void int_exit(int sig) 489 466 { 490 467 benchmark_done = true; 491 - } 492 - 493 - static void xdpsock_cleanup(void) 494 - { 495 - struct xsk_umem *umem = xsks[0]->umem->umem; 496 - int i; 497 - 498 - dump_stats(); 499 - for (i = 0; i < num_socks; i++) 500 - xsk_socket__delete(xsks[i]->xsk); 501 - (void)xsk_umem__delete(umem); 502 - remove_xdp_program(); 503 468 } 504 469 505 470 static void __exit_with_error(int error, const char *file, const char *func, ··· 471 508 { 472 509 fprintf(stderr, "%s:%s:%i: errno: %d/\"%s\"\n", file, func, 473 510 line, error, strerror(error)); 474 - dump_stats(); 475 - remove_xdp_program(); 476 511 exit(EXIT_FAILURE); 477 512 } 478 513 479 - #define exit_with_error(error) __exit_with_error(error, __FILE__, __func__, \ 480 - __LINE__) 514 + #define exit_with_error(error) __exit_with_error(error, __FILE__, __func__, __LINE__) 515 + 516 + static void xdpsock_cleanup(void) 517 + { 518 + struct xsk_umem *umem = xsks[0]->umem->umem; 519 + int i, cmd = CLOSE_CONN; 520 + 521 + dump_stats(); 522 + for (i = 0; i < num_socks; i++) 523 + xsk_socket__delete(xsks[i]->xsk); 524 + (void)xsk_umem__delete(umem); 525 + 526 + if (opt_reduced_cap) { 527 + if (write(sock, &cmd, sizeof(int)) < 0) 528 + exit_with_error(errno); 529 + } 530 + } 531 + 481 532 static void swap_mac_addresses(void *data) 482 533 { 483 534 struct ether_header *eth = (struct ether_header *)data; ··· 854 877 txr = tx ? &xsk->tx : NULL; 855 878 ret = xsk_socket__create(&xsk->xsk, opt_if, opt_queue, umem->umem, 856 879 rxr, txr, &cfg); 857 - if (ret) 858 - exit_with_error(-ret); 859 - 860 - ret = bpf_get_link_xdp_id(opt_ifindex, &prog_id, opt_xdp_flags); 861 880 if (ret) 862 881 exit_with_error(-ret); 863 882
+1
tools/bpf/bpftool/common.c
··· 57 57 58 58 [BPF_SK_SKB_STREAM_PARSER] = "sk_skb_stream_parser", 59 59 [BPF_SK_SKB_STREAM_VERDICT] = "sk_skb_stream_verdict", 60 + [BPF_SK_SKB_VERDICT] = "sk_skb_verdict", 60 61 [BPF_SK_MSG_VERDICT] = "sk_msg_verdict", 61 62 [BPF_LIRC_MODE2] = "lirc_mode2", 62 63 [BPF_FLOW_DISSECTOR] = "flow_dissector",
+1
tools/bpf/bpftool/prog.c
··· 76 76 static const char * const attach_type_strings[] = { 77 77 [BPF_SK_SKB_STREAM_PARSER] = "stream_parser", 78 78 [BPF_SK_SKB_STREAM_VERDICT] = "stream_verdict", 79 + [BPF_SK_SKB_VERDICT] = "skb_verdict", 79 80 [BPF_SK_MSG_VERDICT] = "msg_verdict", 80 81 [BPF_FLOW_DISSECTOR] = "flow_dissector", 81 82 [__MAX_BPF_ATTACH_TYPE] = NULL,
+5 -6
tools/bpf/resolve_btfids/main.c
··· 115 115 116 116 static int verbose; 117 117 118 - int eprintf(int level, int var, const char *fmt, ...) 118 + static int eprintf(int level, int var, const char *fmt, ...) 119 119 { 120 120 va_list args; 121 - int ret; 121 + int ret = 0; 122 122 123 123 if (var >= level) { 124 124 va_start(args, fmt); ··· 385 385 static int symbols_collect(struct object *obj) 386 386 { 387 387 Elf_Scn *scn = NULL; 388 - int n, i, err = 0; 388 + int n, i; 389 389 GElf_Shdr sh; 390 390 char *name; 391 391 ··· 402 402 * Scan symbols and look for the ones starting with 403 403 * __BTF_ID__* over .BTF_ids section. 404 404 */ 405 - for (i = 0; !err && i < n; i++) { 406 - char *tmp, *prefix; 405 + for (i = 0; i < n; i++) { 406 + char *prefix; 407 407 struct btf_id *id; 408 408 GElf_Sym sym; 409 - int err = -1; 410 409 411 410 if (!gelf_getsym(obj->efile.symbols, i, &sym)) 412 411 return -1;
+5
tools/include/uapi/linux/bpf.h
··· 957 957 BPF_XDP_CPUMAP, 958 958 BPF_SK_LOOKUP, 959 959 BPF_XDP, 960 + BPF_SK_SKB_VERDICT, 960 961 __MAX_BPF_ATTACH_TYPE 961 962 }; 962 963 ··· 1118 1117 * offset to another bpf function 1119 1118 */ 1120 1119 #define BPF_PSEUDO_CALL 1 1120 + /* when bpf_call->src_reg == BPF_PSEUDO_KFUNC_CALL, 1121 + * bpf_call->imm == btf_id of a BTF_KIND_FUNC in the running kernel 1122 + */ 1123 + #define BPF_PSEUDO_KFUNC_CALL 2 1121 1124 1122 1125 /* flags for BPF_MAP_UPDATE_ELEM command */ 1123 1126 enum {
+303 -106
tools/lib/bpf/libbpf.c
··· 185 185 RELO_LD64, 186 186 RELO_CALL, 187 187 RELO_DATA, 188 - RELO_EXTERN, 188 + RELO_EXTERN_VAR, 189 + RELO_EXTERN_FUNC, 189 190 RELO_SUBPROG_ADDR, 190 191 }; 191 192 ··· 574 573 insn->off == 0; 575 574 } 576 575 577 - static bool is_ldimm64(struct bpf_insn *insn) 576 + static bool is_ldimm64_insn(struct bpf_insn *insn) 578 577 { 579 578 return insn->code == (BPF_LD | BPF_IMM | BPF_DW); 580 579 } 581 580 581 + static bool is_call_insn(const struct bpf_insn *insn) 582 + { 583 + return insn->code == (BPF_JMP | BPF_CALL); 584 + } 585 + 582 586 static bool insn_is_pseudo_func(struct bpf_insn *insn) 583 587 { 584 - return is_ldimm64(insn) && insn->src_reg == BPF_PSEUDO_FUNC; 588 + return is_ldimm64_insn(insn) && insn->src_reg == BPF_PSEUDO_FUNC; 585 589 } 586 590 587 591 static int ··· 1927 1921 return btf_is_func_proto(t) ? t : NULL; 1928 1922 } 1929 1923 1930 - static const char *btf_kind_str(const struct btf_type *t) 1924 + static const char *__btf_kind_str(__u16 kind) 1931 1925 { 1932 - switch (btf_kind(t)) { 1926 + switch (kind) { 1933 1927 case BTF_KIND_UNKN: return "void"; 1934 1928 case BTF_KIND_INT: return "int"; 1935 1929 case BTF_KIND_PTR: return "ptr"; ··· 1949 1943 case BTF_KIND_FLOAT: return "float"; 1950 1944 default: return "unknown"; 1951 1945 } 1946 + } 1947 + 1948 + static const char *btf_kind_str(const struct btf_type *t) 1949 + { 1950 + return __btf_kind_str(btf_kind(t)); 1951 + } 1952 + 1953 + static enum btf_func_linkage btf_func_linkage(const struct btf_type *t) 1954 + { 1955 + return (enum btf_func_linkage)BTF_INFO_VLEN(t->info); 1952 1956 } 1953 1957 1954 1958 /* ··· 3025 3009 static int find_extern_btf_id(const struct btf *btf, const char *ext_name) 3026 3010 { 3027 3011 const struct btf_type *t; 3028 - const char *var_name; 3012 + const char *tname; 3029 3013 int i, n; 3030 3014 3031 3015 if (!btf) ··· 3035 3019 for (i = 1; i <= n; i++) { 3036 3020 t = btf__type_by_id(btf, i); 3037 3021 3038 - if (!btf_is_var(t)) 3022 + if (!btf_is_var(t) && !btf_is_func(t)) 3039 3023 continue; 3040 3024 3041 - var_name = btf__name_by_offset(btf, t->name_off); 3042 - if (strcmp(var_name, ext_name)) 3025 + tname = btf__name_by_offset(btf, t->name_off); 3026 + if (strcmp(tname, ext_name)) 3043 3027 continue; 3044 3028 3045 - if (btf_var(t)->linkage != BTF_VAR_GLOBAL_EXTERN) 3029 + if (btf_is_var(t) && 3030 + btf_var(t)->linkage != BTF_VAR_GLOBAL_EXTERN) 3031 + return -EINVAL; 3032 + 3033 + if (btf_is_func(t) && btf_func_linkage(t) != BTF_FUNC_EXTERN) 3046 3034 return -EINVAL; 3047 3035 3048 3036 return i; ··· 3159 3139 return 0; 3160 3140 } 3161 3141 3142 + static int add_dummy_ksym_var(struct btf *btf) 3143 + { 3144 + int i, int_btf_id, sec_btf_id, dummy_var_btf_id; 3145 + const struct btf_var_secinfo *vs; 3146 + const struct btf_type *sec; 3147 + 3148 + sec_btf_id = btf__find_by_name_kind(btf, KSYMS_SEC, 3149 + BTF_KIND_DATASEC); 3150 + if (sec_btf_id < 0) 3151 + return 0; 3152 + 3153 + sec = btf__type_by_id(btf, sec_btf_id); 3154 + vs = btf_var_secinfos(sec); 3155 + for (i = 0; i < btf_vlen(sec); i++, vs++) { 3156 + const struct btf_type *vt; 3157 + 3158 + vt = btf__type_by_id(btf, vs->type); 3159 + if (btf_is_func(vt)) 3160 + break; 3161 + } 3162 + 3163 + /* No func in ksyms sec. No need to add dummy var. */ 3164 + if (i == btf_vlen(sec)) 3165 + return 0; 3166 + 3167 + int_btf_id = find_int_btf_id(btf); 3168 + dummy_var_btf_id = btf__add_var(btf, 3169 + "dummy_ksym", 3170 + BTF_VAR_GLOBAL_ALLOCATED, 3171 + int_btf_id); 3172 + if (dummy_var_btf_id < 0) 3173 + pr_warn("cannot create a dummy_ksym var\n"); 3174 + 3175 + return dummy_var_btf_id; 3176 + } 3177 + 3162 3178 static int bpf_object__collect_externs(struct bpf_object *obj) 3163 3179 { 3164 3180 struct btf_type *sec, *kcfg_sec = NULL, *ksym_sec = NULL; 3165 3181 const struct btf_type *t; 3166 3182 struct extern_desc *ext; 3167 - int i, n, off; 3183 + int i, n, off, dummy_var_btf_id; 3168 3184 const char *ext_name, *sec_name; 3169 3185 Elf_Scn *scn; 3170 3186 GElf_Shdr sh; ··· 3211 3155 scn = elf_sec_by_idx(obj, obj->efile.symbols_shndx); 3212 3156 if (elf_sec_hdr(obj, scn, &sh)) 3213 3157 return -LIBBPF_ERRNO__FORMAT; 3158 + 3159 + dummy_var_btf_id = add_dummy_ksym_var(obj->btf); 3160 + if (dummy_var_btf_id < 0) 3161 + return dummy_var_btf_id; 3214 3162 3215 3163 n = sh.sh_size / sh.sh_entsize; 3216 3164 pr_debug("looking for externs among %d symbols...\n", n); ··· 3260 3200 sec_name = btf__name_by_offset(obj->btf, sec->name_off); 3261 3201 3262 3202 if (strcmp(sec_name, KCONFIG_SEC) == 0) { 3203 + if (btf_is_func(t)) { 3204 + pr_warn("extern function %s is unsupported under %s section\n", 3205 + ext->name, KCONFIG_SEC); 3206 + return -ENOTSUP; 3207 + } 3263 3208 kcfg_sec = sec; 3264 3209 ext->type = EXT_KCFG; 3265 3210 ext->kcfg.sz = btf__resolve_size(obj->btf, t->type); ··· 3286 3221 return -ENOTSUP; 3287 3222 } 3288 3223 } else if (strcmp(sec_name, KSYMS_SEC) == 0) { 3224 + if (btf_is_func(t) && ext->is_weak) { 3225 + pr_warn("extern weak function %s is unsupported\n", 3226 + ext->name); 3227 + return -ENOTSUP; 3228 + } 3289 3229 ksym_sec = sec; 3290 3230 ext->type = EXT_KSYM; 3291 3231 skip_mods_and_typedefs(obj->btf, t->type, ··· 3317 3247 * extern variables in DATASEC 3318 3248 */ 3319 3249 int int_btf_id = find_int_btf_id(obj->btf); 3250 + /* For extern function, a dummy_var added earlier 3251 + * will be used to replace the vs->type and 3252 + * its name string will be used to refill 3253 + * the missing param's name. 3254 + */ 3255 + const struct btf_type *dummy_var; 3320 3256 3257 + dummy_var = btf__type_by_id(obj->btf, dummy_var_btf_id); 3321 3258 for (i = 0; i < obj->nr_extern; i++) { 3322 3259 ext = &obj->externs[i]; 3323 3260 if (ext->type != EXT_KSYM) ··· 3343 3266 ext_name = btf__name_by_offset(obj->btf, vt->name_off); 3344 3267 ext = find_extern_by_name(obj, ext_name); 3345 3268 if (!ext) { 3346 - pr_warn("failed to find extern definition for BTF var '%s'\n", 3347 - ext_name); 3269 + pr_warn("failed to find extern definition for BTF %s '%s'\n", 3270 + btf_kind_str(vt), ext_name); 3348 3271 return -ESRCH; 3349 3272 } 3350 - btf_var(vt)->linkage = BTF_VAR_GLOBAL_ALLOCATED; 3351 - vt->type = int_btf_id; 3273 + if (btf_is_func(vt)) { 3274 + const struct btf_type *func_proto; 3275 + struct btf_param *param; 3276 + int j; 3277 + 3278 + func_proto = btf__type_by_id(obj->btf, 3279 + vt->type); 3280 + param = btf_params(func_proto); 3281 + /* Reuse the dummy_var string if the 3282 + * func proto does not have param name. 3283 + */ 3284 + for (j = 0; j < btf_vlen(func_proto); j++) 3285 + if (param[j].type && !param[j].name_off) 3286 + param[j].name_off = 3287 + dummy_var->name_off; 3288 + vs->type = dummy_var_btf_id; 3289 + vt->info &= ~0xffff; 3290 + vt->info |= BTF_FUNC_GLOBAL; 3291 + } else { 3292 + btf_var(vt)->linkage = BTF_VAR_GLOBAL_ALLOCATED; 3293 + vt->type = int_btf_id; 3294 + } 3352 3295 vs->offset = off; 3353 3296 vs->size = sizeof(int); 3354 3297 } ··· 3500 3403 3501 3404 reloc_desc->processed = false; 3502 3405 3503 - /* sub-program call relocation */ 3504 - if (insn->code == (BPF_JMP | BPF_CALL)) { 3505 - if (insn->src_reg != BPF_PSEUDO_CALL) { 3506 - pr_warn("prog '%s': incorrect bpf_call opcode\n", prog->name); 3507 - return -LIBBPF_ERRNO__RELOC; 3508 - } 3509 - /* text_shndx can be 0, if no default "main" program exists */ 3510 - if (!shdr_idx || shdr_idx != obj->efile.text_shndx) { 3511 - sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx)); 3512 - pr_warn("prog '%s': bad call relo against '%s' in section '%s'\n", 3513 - prog->name, sym_name, sym_sec_name); 3514 - return -LIBBPF_ERRNO__RELOC; 3515 - } 3516 - if (sym->st_value % BPF_INSN_SZ) { 3517 - pr_warn("prog '%s': bad call relo against '%s' at offset %zu\n", 3518 - prog->name, sym_name, (size_t)sym->st_value); 3519 - return -LIBBPF_ERRNO__RELOC; 3520 - } 3521 - reloc_desc->type = RELO_CALL; 3522 - reloc_desc->insn_idx = insn_idx; 3523 - reloc_desc->sym_off = sym->st_value; 3524 - return 0; 3525 - } 3526 - 3527 - if (!is_ldimm64(insn)) { 3406 + if (!is_call_insn(insn) && !is_ldimm64_insn(insn)) { 3528 3407 pr_warn("prog '%s': invalid relo against '%s' for insns[%d].code 0x%x\n", 3529 3408 prog->name, sym_name, insn_idx, insn->code); 3530 3409 return -LIBBPF_ERRNO__RELOC; ··· 3523 3450 } 3524 3451 pr_debug("prog '%s': found extern #%d '%s' (sym %d) for insn #%u\n", 3525 3452 prog->name, i, ext->name, ext->sym_idx, insn_idx); 3526 - reloc_desc->type = RELO_EXTERN; 3453 + if (insn->code == (BPF_JMP | BPF_CALL)) 3454 + reloc_desc->type = RELO_EXTERN_FUNC; 3455 + else 3456 + reloc_desc->type = RELO_EXTERN_VAR; 3527 3457 reloc_desc->insn_idx = insn_idx; 3528 3458 reloc_desc->sym_off = i; /* sym_off stores extern index */ 3459 + return 0; 3460 + } 3461 + 3462 + /* sub-program call relocation */ 3463 + if (is_call_insn(insn)) { 3464 + if (insn->src_reg != BPF_PSEUDO_CALL) { 3465 + pr_warn("prog '%s': incorrect bpf_call opcode\n", prog->name); 3466 + return -LIBBPF_ERRNO__RELOC; 3467 + } 3468 + /* text_shndx can be 0, if no default "main" program exists */ 3469 + if (!shdr_idx || shdr_idx != obj->efile.text_shndx) { 3470 + sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx)); 3471 + pr_warn("prog '%s': bad call relo against '%s' in section '%s'\n", 3472 + prog->name, sym_name, sym_sec_name); 3473 + return -LIBBPF_ERRNO__RELOC; 3474 + } 3475 + if (sym->st_value % BPF_INSN_SZ) { 3476 + pr_warn("prog '%s': bad call relo against '%s' at offset %zu\n", 3477 + prog->name, sym_name, (size_t)sym->st_value); 3478 + return -LIBBPF_ERRNO__RELOC; 3479 + } 3480 + reloc_desc->type = RELO_CALL; 3481 + reloc_desc->insn_idx = insn_idx; 3482 + reloc_desc->sym_off = sym->st_value; 3529 3483 return 0; 3530 3484 } 3531 3485 ··· 5795 5695 /* poison second part of ldimm64 to avoid confusing error from 5796 5696 * verifier about "unknown opcode 00" 5797 5697 */ 5798 - if (is_ldimm64(insn)) 5698 + if (is_ldimm64_insn(insn)) 5799 5699 bpf_core_poison_insn(prog, relo_idx, insn_idx + 1, insn + 1); 5800 5700 bpf_core_poison_insn(prog, relo_idx, insn_idx, insn); 5801 5701 return 0; ··· 5871 5771 case BPF_LD: { 5872 5772 __u64 imm; 5873 5773 5874 - if (!is_ldimm64(insn) || 5774 + if (!is_ldimm64_insn(insn) || 5875 5775 insn[0].src_reg != 0 || insn[0].off != 0 || 5876 5776 insn_idx + 1 >= prog->insns_cnt || 5877 5777 insn[1].code != 0 || insn[1].dst_reg != 0 || ··· 6313 6213 insn[0].imm = obj->maps[relo->map_idx].fd; 6314 6214 relo->processed = true; 6315 6215 break; 6316 - case RELO_EXTERN: 6216 + case RELO_EXTERN_VAR: 6317 6217 ext = &obj->externs[relo->sym_off]; 6318 6218 if (ext->type == EXT_KCFG) { 6319 6219 insn[0].src_reg = BPF_PSEUDO_MAP_VALUE; ··· 6329 6229 insn[1].imm = ext->ksym.addr >> 32; 6330 6230 } 6331 6231 } 6232 + relo->processed = true; 6233 + break; 6234 + case RELO_EXTERN_FUNC: 6235 + ext = &obj->externs[relo->sym_off]; 6236 + insn[0].src_reg = BPF_PSEUDO_KFUNC_CALL; 6237 + insn[0].imm = ext->ksym.kernel_btf_id; 6332 6238 relo->processed = true; 6333 6239 break; 6334 6240 case RELO_SUBPROG_ADDR: ··· 7457 7351 { 7458 7352 char sym_type, sym_name[500]; 7459 7353 unsigned long long sym_addr; 7354 + const struct btf_type *t; 7460 7355 struct extern_desc *ext; 7461 7356 int ret, err = 0; 7462 7357 FILE *f; ··· 7484 7377 if (!ext || ext->type != EXT_KSYM) 7485 7378 continue; 7486 7379 7380 + t = btf__type_by_id(obj->btf, ext->btf_id); 7381 + if (!btf_is_var(t)) 7382 + continue; 7383 + 7487 7384 if (ext->is_set && ext->ksym.addr != sym_addr) { 7488 7385 pr_warn("extern (ksym) '%s' resolution is ambiguous: 0x%llx or 0x%llx\n", 7489 7386 sym_name, ext->ksym.addr, sym_addr); ··· 7506 7395 return err; 7507 7396 } 7508 7397 7398 + static int find_ksym_btf_id(struct bpf_object *obj, const char *ksym_name, 7399 + __u16 kind, struct btf **res_btf, 7400 + int *res_btf_fd) 7401 + { 7402 + int i, id, btf_fd, err; 7403 + struct btf *btf; 7404 + 7405 + btf = obj->btf_vmlinux; 7406 + btf_fd = 0; 7407 + id = btf__find_by_name_kind(btf, ksym_name, kind); 7408 + 7409 + if (id == -ENOENT) { 7410 + err = load_module_btfs(obj); 7411 + if (err) 7412 + return err; 7413 + 7414 + for (i = 0; i < obj->btf_module_cnt; i++) { 7415 + btf = obj->btf_modules[i].btf; 7416 + /* we assume module BTF FD is always >0 */ 7417 + btf_fd = obj->btf_modules[i].fd; 7418 + id = btf__find_by_name_kind(btf, ksym_name, kind); 7419 + if (id != -ENOENT) 7420 + break; 7421 + } 7422 + } 7423 + if (id <= 0) { 7424 + pr_warn("extern (%s ksym) '%s': failed to find BTF ID in kernel BTF(s).\n", 7425 + __btf_kind_str(kind), ksym_name); 7426 + return -ESRCH; 7427 + } 7428 + 7429 + *res_btf = btf; 7430 + *res_btf_fd = btf_fd; 7431 + return id; 7432 + } 7433 + 7434 + static int bpf_object__resolve_ksym_var_btf_id(struct bpf_object *obj, 7435 + struct extern_desc *ext) 7436 + { 7437 + const struct btf_type *targ_var, *targ_type; 7438 + __u32 targ_type_id, local_type_id; 7439 + const char *targ_var_name; 7440 + int id, btf_fd = 0, err; 7441 + struct btf *btf = NULL; 7442 + 7443 + id = find_ksym_btf_id(obj, ext->name, BTF_KIND_VAR, &btf, &btf_fd); 7444 + if (id < 0) 7445 + return id; 7446 + 7447 + /* find local type_id */ 7448 + local_type_id = ext->ksym.type_id; 7449 + 7450 + /* find target type_id */ 7451 + targ_var = btf__type_by_id(btf, id); 7452 + targ_var_name = btf__name_by_offset(btf, targ_var->name_off); 7453 + targ_type = skip_mods_and_typedefs(btf, targ_var->type, &targ_type_id); 7454 + 7455 + err = bpf_core_types_are_compat(obj->btf, local_type_id, 7456 + btf, targ_type_id); 7457 + if (err <= 0) { 7458 + const struct btf_type *local_type; 7459 + const char *targ_name, *local_name; 7460 + 7461 + local_type = btf__type_by_id(obj->btf, local_type_id); 7462 + local_name = btf__name_by_offset(obj->btf, local_type->name_off); 7463 + targ_name = btf__name_by_offset(btf, targ_type->name_off); 7464 + 7465 + pr_warn("extern (var ksym) '%s': incompatible types, expected [%d] %s %s, but kernel has [%d] %s %s\n", 7466 + ext->name, local_type_id, 7467 + btf_kind_str(local_type), local_name, targ_type_id, 7468 + btf_kind_str(targ_type), targ_name); 7469 + return -EINVAL; 7470 + } 7471 + 7472 + ext->is_set = true; 7473 + ext->ksym.kernel_btf_obj_fd = btf_fd; 7474 + ext->ksym.kernel_btf_id = id; 7475 + pr_debug("extern (var ksym) '%s': resolved to [%d] %s %s\n", 7476 + ext->name, id, btf_kind_str(targ_var), targ_var_name); 7477 + 7478 + return 0; 7479 + } 7480 + 7481 + static int bpf_object__resolve_ksym_func_btf_id(struct bpf_object *obj, 7482 + struct extern_desc *ext) 7483 + { 7484 + int local_func_proto_id, kfunc_proto_id, kfunc_id; 7485 + const struct btf_type *kern_func; 7486 + struct btf *kern_btf = NULL; 7487 + int ret, kern_btf_fd = 0; 7488 + 7489 + local_func_proto_id = ext->ksym.type_id; 7490 + 7491 + kfunc_id = find_ksym_btf_id(obj, ext->name, BTF_KIND_FUNC, 7492 + &kern_btf, &kern_btf_fd); 7493 + if (kfunc_id < 0) { 7494 + pr_warn("extern (func ksym) '%s': not found in kernel BTF\n", 7495 + ext->name); 7496 + return kfunc_id; 7497 + } 7498 + 7499 + if (kern_btf != obj->btf_vmlinux) { 7500 + pr_warn("extern (func ksym) '%s': function in kernel module is not supported\n", 7501 + ext->name); 7502 + return -ENOTSUP; 7503 + } 7504 + 7505 + kern_func = btf__type_by_id(kern_btf, kfunc_id); 7506 + kfunc_proto_id = kern_func->type; 7507 + 7508 + ret = bpf_core_types_are_compat(obj->btf, local_func_proto_id, 7509 + kern_btf, kfunc_proto_id); 7510 + if (ret <= 0) { 7511 + pr_warn("extern (func ksym) '%s': func_proto [%d] incompatible with kernel [%d]\n", 7512 + ext->name, local_func_proto_id, kfunc_proto_id); 7513 + return -EINVAL; 7514 + } 7515 + 7516 + ext->is_set = true; 7517 + ext->ksym.kernel_btf_obj_fd = kern_btf_fd; 7518 + ext->ksym.kernel_btf_id = kfunc_id; 7519 + pr_debug("extern (func ksym) '%s': resolved to kernel [%d]\n", 7520 + ext->name, kfunc_id); 7521 + 7522 + return 0; 7523 + } 7524 + 7509 7525 static int bpf_object__resolve_ksyms_btf_id(struct bpf_object *obj) 7510 7526 { 7527 + const struct btf_type *t; 7511 7528 struct extern_desc *ext; 7512 - struct btf *btf; 7513 - int i, j, id, btf_fd, err; 7529 + int i, err; 7514 7530 7515 7531 for (i = 0; i < obj->nr_extern; i++) { 7516 - const struct btf_type *targ_var, *targ_type; 7517 - __u32 targ_type_id, local_type_id; 7518 - const char *targ_var_name; 7519 - int ret; 7520 - 7521 7532 ext = &obj->externs[i]; 7522 7533 if (ext->type != EXT_KSYM || !ext->ksym.type_id) 7523 7534 continue; 7524 7535 7525 - btf = obj->btf_vmlinux; 7526 - btf_fd = 0; 7527 - id = btf__find_by_name_kind(btf, ext->name, BTF_KIND_VAR); 7528 - if (id == -ENOENT) { 7529 - err = load_module_btfs(obj); 7530 - if (err) 7531 - return err; 7532 - 7533 - for (j = 0; j < obj->btf_module_cnt; j++) { 7534 - btf = obj->btf_modules[j].btf; 7535 - /* we assume module BTF FD is always >0 */ 7536 - btf_fd = obj->btf_modules[j].fd; 7537 - id = btf__find_by_name_kind(btf, ext->name, BTF_KIND_VAR); 7538 - if (id != -ENOENT) 7539 - break; 7540 - } 7541 - } 7542 - if (id <= 0) { 7543 - pr_warn("extern (ksym) '%s': failed to find BTF ID in kernel BTF(s).\n", 7544 - ext->name); 7545 - return -ESRCH; 7546 - } 7547 - 7548 - /* find local type_id */ 7549 - local_type_id = ext->ksym.type_id; 7550 - 7551 - /* find target type_id */ 7552 - targ_var = btf__type_by_id(btf, id); 7553 - targ_var_name = btf__name_by_offset(btf, targ_var->name_off); 7554 - targ_type = skip_mods_and_typedefs(btf, targ_var->type, &targ_type_id); 7555 - 7556 - ret = bpf_core_types_are_compat(obj->btf, local_type_id, 7557 - btf, targ_type_id); 7558 - if (ret <= 0) { 7559 - const struct btf_type *local_type; 7560 - const char *targ_name, *local_name; 7561 - 7562 - local_type = btf__type_by_id(obj->btf, local_type_id); 7563 - local_name = btf__name_by_offset(obj->btf, local_type->name_off); 7564 - targ_name = btf__name_by_offset(btf, targ_type->name_off); 7565 - 7566 - pr_warn("extern (ksym) '%s': incompatible types, expected [%d] %s %s, but kernel has [%d] %s %s\n", 7567 - ext->name, local_type_id, 7568 - btf_kind_str(local_type), local_name, targ_type_id, 7569 - btf_kind_str(targ_type), targ_name); 7570 - return -EINVAL; 7571 - } 7572 - 7573 - ext->is_set = true; 7574 - ext->ksym.kernel_btf_obj_fd = btf_fd; 7575 - ext->ksym.kernel_btf_id = id; 7576 - pr_debug("extern (ksym) '%s': resolved to [%d] %s %s\n", 7577 - ext->name, id, btf_kind_str(targ_var), targ_var_name); 7536 + t = btf__type_by_id(obj->btf, ext->btf_id); 7537 + if (btf_is_var(t)) 7538 + err = bpf_object__resolve_ksym_var_btf_id(obj, ext); 7539 + else 7540 + err = bpf_object__resolve_ksym_func_btf_id(obj, ext); 7541 + if (err) 7542 + return err; 7578 7543 } 7579 7544 return 0; 7580 7545 } ··· 8457 8270 return obj->btf ? btf__fd(obj->btf) : -1; 8458 8271 } 8459 8272 8273 + int bpf_object__set_kversion(struct bpf_object *obj, __u32 kern_version) 8274 + { 8275 + if (obj->loaded) 8276 + return -EINVAL; 8277 + 8278 + obj->kern_version = kern_version; 8279 + 8280 + return 0; 8281 + } 8282 + 8460 8283 int bpf_object__set_priv(struct bpf_object *obj, void *priv, 8461 8284 bpf_object_clear_priv_t clear_priv) 8462 8285 { ··· 8655 8458 return fd; 8656 8459 } 8657 8460 8658 - enum bpf_prog_type bpf_program__get_type(struct bpf_program *prog) 8461 + enum bpf_prog_type bpf_program__get_type(const struct bpf_program *prog) 8659 8462 { 8660 8463 return prog->type; 8661 8464 } ··· 8700 8503 BPF_PROG_TYPE_FNS(sk_lookup, BPF_PROG_TYPE_SK_LOOKUP); 8701 8504 8702 8505 enum bpf_attach_type 8703 - bpf_program__get_expected_attach_type(struct bpf_program *prog) 8506 + bpf_program__get_expected_attach_type(const struct bpf_program *prog) 8704 8507 { 8705 8508 return prog->expected_attach_type; 8706 8509 }
+3 -2
tools/lib/bpf/libbpf.h
··· 143 143 144 144 LIBBPF_API const char *bpf_object__name(const struct bpf_object *obj); 145 145 LIBBPF_API unsigned int bpf_object__kversion(const struct bpf_object *obj); 146 + LIBBPF_API int bpf_object__set_kversion(struct bpf_object *obj, __u32 kern_version); 146 147 147 148 struct btf; 148 149 LIBBPF_API struct btf *bpf_object__btf(const struct bpf_object *obj); ··· 362 361 LIBBPF_API int bpf_program__set_extension(struct bpf_program *prog); 363 362 LIBBPF_API int bpf_program__set_sk_lookup(struct bpf_program *prog); 364 363 365 - LIBBPF_API enum bpf_prog_type bpf_program__get_type(struct bpf_program *prog); 364 + LIBBPF_API enum bpf_prog_type bpf_program__get_type(const struct bpf_program *prog); 366 365 LIBBPF_API void bpf_program__set_type(struct bpf_program *prog, 367 366 enum bpf_prog_type type); 368 367 369 368 LIBBPF_API enum bpf_attach_type 370 - bpf_program__get_expected_attach_type(struct bpf_program *prog); 369 + bpf_program__get_expected_attach_type(const struct bpf_program *prog); 371 370 LIBBPF_API void 372 371 bpf_program__set_expected_attach_type(struct bpf_program *prog, 373 372 enum bpf_attach_type type);
+1
tools/lib/bpf/libbpf.map
··· 359 359 bpf_linker__finalize; 360 360 bpf_linker__free; 361 361 bpf_linker__new; 362 + bpf_object__set_kversion; 362 363 } LIBBPF_0.3.0;
+28 -9
tools/lib/bpf/linker.c
··· 94 94 int sec_sym_idx; 95 95 96 96 /* section's DATASEC variable info, emitted on BTF finalization */ 97 + bool has_btf; 97 98 int sec_var_cnt; 98 99 struct btf_var_secinfo *sec_vars; 99 100 ··· 1437 1436 continue; 1438 1437 dst_sec = &linker->secs[src_sec->dst_id]; 1439 1438 1439 + /* Mark section as having BTF regardless of the presence of 1440 + * variables. In some cases compiler might generate empty BTF 1441 + * with no variables information. E.g., when promoting local 1442 + * array/structure variable initial values and BPF object 1443 + * file otherwise has no read-only static variables in 1444 + * .rodata. We need to preserve such empty BTF and just set 1445 + * correct section size. 1446 + */ 1447 + dst_sec->has_btf = true; 1448 + 1440 1449 t = btf__type_by_id(obj->btf, src_sec->sec_type_id); 1441 1450 src_var = btf_var_secinfos(t); 1442 1451 n = btf_vlen(t); ··· 1728 1717 for (i = 1; i < linker->sec_cnt; i++) { 1729 1718 struct dst_sec *sec = &linker->secs[i]; 1730 1719 1731 - if (!sec->sec_var_cnt) 1720 + if (!sec->has_btf) 1732 1721 continue; 1733 1722 1734 1723 id = btf__add_datasec(btf, sec->sec_name, sec->sec_sz); ··· 1906 1895 struct dst_sec *sec = &linker->secs[i]; 1907 1896 1908 1897 sz = emit_btf_ext_data(linker, cur, sec->sec_name, &sec->func_info); 1909 - if (sz < 0) 1910 - return sz; 1898 + if (sz < 0) { 1899 + err = sz; 1900 + goto out; 1901 + } 1911 1902 1912 1903 cur += sz; 1913 1904 } ··· 1923 1910 struct dst_sec *sec = &linker->secs[i]; 1924 1911 1925 1912 sz = emit_btf_ext_data(linker, cur, sec->sec_name, &sec->line_info); 1926 - if (sz < 0) 1927 - return sz; 1913 + if (sz < 0) { 1914 + err = sz; 1915 + goto out; 1916 + } 1928 1917 1929 1918 cur += sz; 1930 1919 } ··· 1940 1925 struct dst_sec *sec = &linker->secs[i]; 1941 1926 1942 1927 sz = emit_btf_ext_data(linker, cur, sec->sec_name, &sec->core_relo_info); 1943 - if (sz < 0) 1944 - return sz; 1928 + if (sz < 0) { 1929 + err = sz; 1930 + goto out; 1931 + } 1945 1932 1946 1933 cur += sz; 1947 1934 } ··· 1954 1937 if (err) { 1955 1938 linker->btf_ext = NULL; 1956 1939 pr_warn("failed to parse final .BTF.ext data: %d\n", err); 1957 - return err; 1940 + goto out; 1958 1941 } 1959 1942 1960 - return 0; 1943 + out: 1944 + free(data); 1945 + return err; 1961 1946 }
+217 -49
tools/lib/bpf/xsk.c
··· 28 28 #include <sys/mman.h> 29 29 #include <sys/socket.h> 30 30 #include <sys/types.h> 31 + #include <linux/if_link.h> 31 32 32 33 #include "bpf.h" 33 34 #include "libbpf.h" ··· 71 70 int ifindex; 72 71 struct list_head list; 73 72 int prog_fd; 73 + int link_fd; 74 74 int xsks_map_fd; 75 75 char ifname[IFNAMSIZ]; 76 + bool has_bpf_link; 76 77 }; 77 78 78 79 struct xsk_socket { ··· 412 409 static const int log_buf_size = 16 * 1024; 413 410 struct xsk_ctx *ctx = xsk->ctx; 414 411 char log_buf[log_buf_size]; 415 - int err, prog_fd; 412 + int prog_fd; 416 413 417 414 /* This is the fallback C-program: 418 415 * SEC("xdp_sock") int xdp_sock_prog(struct xdp_md *ctx) ··· 502 499 return prog_fd; 503 500 } 504 501 505 - err = bpf_set_link_xdp_fd(xsk->ctx->ifindex, prog_fd, 506 - xsk->config.xdp_flags); 502 + ctx->prog_fd = prog_fd; 503 + return 0; 504 + } 505 + 506 + static int xsk_create_bpf_link(struct xsk_socket *xsk) 507 + { 508 + DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts); 509 + struct xsk_ctx *ctx = xsk->ctx; 510 + __u32 prog_id = 0; 511 + int link_fd; 512 + int err; 513 + 514 + err = bpf_get_link_xdp_id(ctx->ifindex, &prog_id, xsk->config.xdp_flags); 507 515 if (err) { 508 - close(prog_fd); 516 + pr_warn("getting XDP prog id failed\n"); 509 517 return err; 510 518 } 511 519 512 - ctx->prog_fd = prog_fd; 520 + /* if there's a netlink-based XDP prog loaded on interface, bail out 521 + * and ask user to do the removal by himself 522 + */ 523 + if (prog_id) { 524 + pr_warn("Netlink-based XDP prog detected, please unload it in order to launch AF_XDP prog\n"); 525 + return -EINVAL; 526 + } 527 + 528 + opts.flags = xsk->config.xdp_flags & ~(XDP_FLAGS_UPDATE_IF_NOEXIST | XDP_FLAGS_REPLACE); 529 + 530 + link_fd = bpf_link_create(ctx->prog_fd, ctx->ifindex, BPF_XDP, &opts); 531 + if (link_fd < 0) { 532 + pr_warn("bpf_link_create failed: %s\n", strerror(errno)); 533 + return link_fd; 534 + } 535 + 536 + ctx->link_fd = link_fd; 513 537 return 0; 514 538 } 515 539 ··· 655 625 close(fd); 656 626 } 657 627 658 - err = 0; 659 628 if (ctx->xsks_map_fd == -1) 660 629 err = -ENOENT; 661 630 ··· 669 640 670 641 return bpf_map_update_elem(ctx->xsks_map_fd, &ctx->queue_id, 671 642 &xsk->fd, 0); 643 + } 644 + 645 + static int xsk_link_lookup(int ifindex, __u32 *prog_id, int *link_fd) 646 + { 647 + struct bpf_link_info link_info; 648 + __u32 link_len; 649 + __u32 id = 0; 650 + int err; 651 + int fd; 652 + 653 + while (true) { 654 + err = bpf_link_get_next_id(id, &id); 655 + if (err) { 656 + if (errno == ENOENT) { 657 + err = 0; 658 + break; 659 + } 660 + pr_warn("can't get next link: %s\n", strerror(errno)); 661 + break; 662 + } 663 + 664 + fd = bpf_link_get_fd_by_id(id); 665 + if (fd < 0) { 666 + if (errno == ENOENT) 667 + continue; 668 + pr_warn("can't get link by id (%u): %s\n", id, strerror(errno)); 669 + err = -errno; 670 + break; 671 + } 672 + 673 + link_len = sizeof(struct bpf_link_info); 674 + memset(&link_info, 0, link_len); 675 + err = bpf_obj_get_info_by_fd(fd, &link_info, &link_len); 676 + if (err) { 677 + pr_warn("can't get link info: %s\n", strerror(errno)); 678 + close(fd); 679 + break; 680 + } 681 + if (link_info.type == BPF_LINK_TYPE_XDP) { 682 + if (link_info.xdp.ifindex == ifindex) { 683 + *link_fd = fd; 684 + if (prog_id) 685 + *prog_id = link_info.prog_id; 686 + break; 687 + } 688 + } 689 + close(fd); 690 + } 691 + 692 + return err; 693 + } 694 + 695 + static bool xsk_probe_bpf_link(void) 696 + { 697 + DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts, 698 + .flags = XDP_FLAGS_SKB_MODE); 699 + struct bpf_load_program_attr prog_attr; 700 + struct bpf_insn insns[2] = { 701 + BPF_MOV64_IMM(BPF_REG_0, XDP_PASS), 702 + BPF_EXIT_INSN() 703 + }; 704 + int prog_fd, link_fd = -1; 705 + int ifindex_lo = 1; 706 + bool ret = false; 707 + int err; 708 + 709 + err = xsk_link_lookup(ifindex_lo, NULL, &link_fd); 710 + if (err) 711 + return ret; 712 + 713 + if (link_fd >= 0) 714 + return true; 715 + 716 + memset(&prog_attr, 0, sizeof(prog_attr)); 717 + prog_attr.prog_type = BPF_PROG_TYPE_XDP; 718 + prog_attr.insns = insns; 719 + prog_attr.insns_cnt = ARRAY_SIZE(insns); 720 + prog_attr.license = "GPL"; 721 + 722 + prog_fd = bpf_load_program_xattr(&prog_attr, NULL, 0); 723 + if (prog_fd < 0) 724 + return ret; 725 + 726 + link_fd = bpf_link_create(prog_fd, ifindex_lo, BPF_XDP, &opts); 727 + close(prog_fd); 728 + 729 + if (link_fd >= 0) { 730 + ret = true; 731 + close(link_fd); 732 + } 733 + 734 + return ret; 672 735 } 673 736 674 737 static int xsk_create_xsk_struct(int ifindex, struct xsk_socket *xsk) ··· 784 663 ctx->ifname[IFNAMSIZ - 1] = 0; 785 664 786 665 xsk->ctx = ctx; 666 + xsk->ctx->has_bpf_link = xsk_probe_bpf_link(); 787 667 788 668 return 0; 789 669 } 790 670 791 - static int __xsk_setup_xdp_prog(struct xsk_socket *_xdp, 792 - int *xsks_map_fd) 671 + static int xsk_init_xdp_res(struct xsk_socket *xsk, 672 + int *xsks_map_fd) 673 + { 674 + struct xsk_ctx *ctx = xsk->ctx; 675 + int err; 676 + 677 + err = xsk_create_bpf_maps(xsk); 678 + if (err) 679 + return err; 680 + 681 + err = xsk_load_xdp_prog(xsk); 682 + if (err) 683 + goto err_load_xdp_prog; 684 + 685 + if (ctx->has_bpf_link) 686 + err = xsk_create_bpf_link(xsk); 687 + else 688 + err = bpf_set_link_xdp_fd(xsk->ctx->ifindex, ctx->prog_fd, 689 + xsk->config.xdp_flags); 690 + 691 + if (err) 692 + goto err_attach_xdp_prog; 693 + 694 + if (!xsk->rx) 695 + return err; 696 + 697 + err = xsk_set_bpf_maps(xsk); 698 + if (err) 699 + goto err_set_bpf_maps; 700 + 701 + return err; 702 + 703 + err_set_bpf_maps: 704 + if (ctx->has_bpf_link) 705 + close(ctx->link_fd); 706 + else 707 + bpf_set_link_xdp_fd(ctx->ifindex, -1, 0); 708 + err_attach_xdp_prog: 709 + close(ctx->prog_fd); 710 + err_load_xdp_prog: 711 + xsk_delete_bpf_maps(xsk); 712 + return err; 713 + } 714 + 715 + static int xsk_lookup_xdp_res(struct xsk_socket *xsk, int *xsks_map_fd, int prog_id) 716 + { 717 + struct xsk_ctx *ctx = xsk->ctx; 718 + int err; 719 + 720 + ctx->prog_fd = bpf_prog_get_fd_by_id(prog_id); 721 + if (ctx->prog_fd < 0) { 722 + err = -errno; 723 + goto err_prog_fd; 724 + } 725 + err = xsk_lookup_bpf_maps(xsk); 726 + if (err) 727 + goto err_lookup_maps; 728 + 729 + if (!xsk->rx) 730 + return err; 731 + 732 + err = xsk_set_bpf_maps(xsk); 733 + if (err) 734 + goto err_set_maps; 735 + 736 + return err; 737 + 738 + err_set_maps: 739 + close(ctx->xsks_map_fd); 740 + err_lookup_maps: 741 + close(ctx->prog_fd); 742 + err_prog_fd: 743 + if (ctx->has_bpf_link) 744 + close(ctx->link_fd); 745 + return err; 746 + } 747 + 748 + static int __xsk_setup_xdp_prog(struct xsk_socket *_xdp, int *xsks_map_fd) 793 749 { 794 750 struct xsk_socket *xsk = _xdp; 795 751 struct xsk_ctx *ctx = xsk->ctx; 796 752 __u32 prog_id = 0; 797 753 int err; 798 754 799 - err = bpf_get_link_xdp_id(ctx->ifindex, &prog_id, 800 - xsk->config.xdp_flags); 755 + if (ctx->has_bpf_link) 756 + err = xsk_link_lookup(ctx->ifindex, &prog_id, &ctx->link_fd); 757 + else 758 + err = bpf_get_link_xdp_id(ctx->ifindex, &prog_id, xsk->config.xdp_flags); 759 + 801 760 if (err) 802 761 return err; 803 762 804 - if (!prog_id) { 805 - err = xsk_create_bpf_maps(xsk); 806 - if (err) 807 - return err; 763 + err = !prog_id ? xsk_init_xdp_res(xsk, xsks_map_fd) : 764 + xsk_lookup_xdp_res(xsk, xsks_map_fd, prog_id); 808 765 809 - err = xsk_load_xdp_prog(xsk); 810 - if (err) { 811 - goto err_load_xdp_prog; 812 - } 813 - } else { 814 - ctx->prog_fd = bpf_prog_get_fd_by_id(prog_id); 815 - if (ctx->prog_fd < 0) 816 - return -errno; 817 - err = xsk_lookup_bpf_maps(xsk); 818 - if (err) { 819 - close(ctx->prog_fd); 820 - return err; 821 - } 822 - } 823 - 824 - if (xsk->rx) { 825 - err = xsk_set_bpf_maps(xsk); 826 - if (err) { 827 - if (!prog_id) { 828 - goto err_set_bpf_maps; 829 - } else { 830 - close(ctx->prog_fd); 831 - return err; 832 - } 833 - } 834 - } 835 - if (xsks_map_fd) 766 + if (!err && xsks_map_fd) 836 767 *xsks_map_fd = ctx->xsks_map_fd; 837 - 838 - return 0; 839 - 840 - err_set_bpf_maps: 841 - close(ctx->prog_fd); 842 - bpf_set_link_xdp_fd(ctx->ifindex, -1, 0); 843 - err_load_xdp_prog: 844 - xsk_delete_bpf_maps(xsk); 845 768 846 769 return err; 847 770 } ··· 1063 898 } 1064 899 } 1065 900 xsk->ctx = ctx; 901 + xsk->ctx->has_bpf_link = xsk_probe_bpf_link(); 1066 902 1067 903 if (rx) { 1068 904 err = setsockopt(xsk->fd, SOL_XDP, XDP_RX_RING, ··· 1220 1054 if (ctx->prog_fd != -1) { 1221 1055 xsk_delete_bpf_maps(xsk); 1222 1056 close(ctx->prog_fd); 1057 + if (ctx->has_bpf_link) 1058 + close(ctx->link_fd); 1223 1059 } 1224 1060 1225 1061 err = xsk_get_mmap_offsets(xsk->fd, &off);
+14
tools/testing/selftests/bpf/README.rst
··· 179 179 either crash when compiling these tests, or generate an incorrect BTF. 180 180 181 181 __ https://reviews.llvm.org/D83289 182 + 183 + Kernel function call test and Clang version 184 + =========================================== 185 + 186 + Some selftests (e.g. kfunc_call and bpf_tcp_ca) require a LLVM support 187 + to generate extern function in BTF. It was introduced in `Clang 13`__. 188 + 189 + Without it, the error from compiling bpf selftests looks like: 190 + 191 + .. code-block:: console 192 + 193 + libbpf: failed to find BTF for extern 'tcp_slow_start' [25] section: -2 194 + 195 + __ https://reviews.llvm.org/D93563
+2 -27
tools/testing/selftests/bpf/bpf_tcp_helpers.h
··· 187 187 typeof(y) __y = (y); \ 188 188 __x == 0 ? __y : ((__y == 0) ? __x : min(__x, __y)); }) 189 189 190 - static __always_inline __u32 tcp_slow_start(struct tcp_sock *tp, __u32 acked) 191 - { 192 - __u32 cwnd = min(tp->snd_cwnd + acked, tp->snd_ssthresh); 193 - 194 - acked -= cwnd - tp->snd_cwnd; 195 - tp->snd_cwnd = min(cwnd, tp->snd_cwnd_clamp); 196 - 197 - return acked; 198 - } 199 - 200 190 static __always_inline bool tcp_in_slow_start(const struct tcp_sock *tp) 201 191 { 202 192 return tp->snd_cwnd < tp->snd_ssthresh; ··· 203 213 return !!BPF_CORE_READ_BITFIELD(tp, is_cwnd_limited); 204 214 } 205 215 206 - static __always_inline void tcp_cong_avoid_ai(struct tcp_sock *tp, __u32 w, __u32 acked) 207 - { 208 - /* If credits accumulated at a higher w, apply them gently now. */ 209 - if (tp->snd_cwnd_cnt >= w) { 210 - tp->snd_cwnd_cnt = 0; 211 - tp->snd_cwnd++; 212 - } 213 - 214 - tp->snd_cwnd_cnt += acked; 215 - if (tp->snd_cwnd_cnt >= w) { 216 - __u32 delta = tp->snd_cwnd_cnt / w; 217 - 218 - tp->snd_cwnd_cnt -= delta * w; 219 - tp->snd_cwnd += delta; 220 - } 221 - tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_cwnd_clamp); 222 - } 216 + extern __u32 tcp_slow_start(struct tcp_sock *tp, __u32 acked) __ksym; 217 + extern void tcp_cong_avoid_ai(struct tcp_sock *tp, __u32 w, __u32 acked) __ksym; 223 218 224 219 #endif
+158
tools/testing/selftests/bpf/map_tests/lpm_trie_map_batch_ops.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + #include <arpa/inet.h> 4 + #include <linux/bpf.h> 5 + #include <netinet/in.h> 6 + #include <stdio.h> 7 + #include <errno.h> 8 + #include <string.h> 9 + #include <stdlib.h> 10 + 11 + #include <bpf/bpf.h> 12 + #include <bpf/libbpf.h> 13 + 14 + #include <test_maps.h> 15 + 16 + struct test_lpm_key { 17 + __u32 prefix; 18 + struct in_addr ipv4; 19 + }; 20 + 21 + static void map_batch_update(int map_fd, __u32 max_entries, 22 + struct test_lpm_key *keys, int *values) 23 + { 24 + __u32 i; 25 + int err; 26 + char buff[16] = { 0 }; 27 + DECLARE_LIBBPF_OPTS(bpf_map_batch_opts, opts, 28 + .elem_flags = 0, 29 + .flags = 0, 30 + ); 31 + 32 + for (i = 0; i < max_entries; i++) { 33 + keys[i].prefix = 32; 34 + snprintf(buff, 16, "192.168.1.%d", i + 1); 35 + inet_pton(AF_INET, buff, &keys[i].ipv4); 36 + values[i] = i + 1; 37 + } 38 + 39 + err = bpf_map_update_batch(map_fd, keys, values, &max_entries, &opts); 40 + CHECK(err, "bpf_map_update_batch()", "error:%s\n", strerror(errno)); 41 + } 42 + 43 + static void map_batch_verify(int *visited, __u32 max_entries, 44 + struct test_lpm_key *keys, int *values) 45 + { 46 + char buff[16] = { 0 }; 47 + int lower_byte = 0; 48 + __u32 i; 49 + 50 + memset(visited, 0, max_entries * sizeof(*visited)); 51 + for (i = 0; i < max_entries; i++) { 52 + inet_ntop(AF_INET, &keys[i].ipv4, buff, 32); 53 + CHECK(sscanf(buff, "192.168.1.%d", &lower_byte) == EOF, 54 + "sscanf()", "error: i %d\n", i); 55 + CHECK(lower_byte != values[i], "key/value checking", 56 + "error: i %d key %s value %d\n", i, buff, values[i]); 57 + visited[i] = 1; 58 + } 59 + for (i = 0; i < max_entries; i++) { 60 + CHECK(visited[i] != 1, "visited checking", 61 + "error: keys array at index %d missing\n", i); 62 + } 63 + } 64 + 65 + void test_lpm_trie_map_batch_ops(void) 66 + { 67 + struct bpf_create_map_attr xattr = { 68 + .name = "lpm_trie_map", 69 + .map_type = BPF_MAP_TYPE_LPM_TRIE, 70 + .key_size = sizeof(struct test_lpm_key), 71 + .value_size = sizeof(int), 72 + .map_flags = BPF_F_NO_PREALLOC, 73 + }; 74 + struct test_lpm_key *keys, key; 75 + int map_fd, *values, *visited; 76 + __u32 step, count, total, total_success; 77 + const __u32 max_entries = 10; 78 + __u64 batch = 0; 79 + int err; 80 + DECLARE_LIBBPF_OPTS(bpf_map_batch_opts, opts, 81 + .elem_flags = 0, 82 + .flags = 0, 83 + ); 84 + 85 + xattr.max_entries = max_entries; 86 + map_fd = bpf_create_map_xattr(&xattr); 87 + CHECK(map_fd == -1, "bpf_create_map_xattr()", "error:%s\n", 88 + strerror(errno)); 89 + 90 + keys = malloc(max_entries * sizeof(struct test_lpm_key)); 91 + values = malloc(max_entries * sizeof(int)); 92 + visited = malloc(max_entries * sizeof(int)); 93 + CHECK(!keys || !values || !visited, "malloc()", "error:%s\n", 94 + strerror(errno)); 95 + 96 + total_success = 0; 97 + for (step = 1; step < max_entries; step++) { 98 + map_batch_update(map_fd, max_entries, keys, values); 99 + map_batch_verify(visited, max_entries, keys, values); 100 + memset(keys, 0, max_entries * sizeof(*keys)); 101 + memset(values, 0, max_entries * sizeof(*values)); 102 + batch = 0; 103 + total = 0; 104 + /* iteratively lookup/delete elements with 'step' 105 + * elements each. 106 + */ 107 + count = step; 108 + while (true) { 109 + err = bpf_map_lookup_batch(map_fd, 110 + total ? &batch : NULL, &batch, 111 + keys + total, values + total, &count, &opts); 112 + 113 + CHECK((err && errno != ENOENT), "lookup with steps", 114 + "error: %s\n", strerror(errno)); 115 + 116 + total += count; 117 + if (err) 118 + break; 119 + } 120 + 121 + CHECK(total != max_entries, "lookup with steps", 122 + "total = %u, max_entries = %u\n", total, max_entries); 123 + 124 + map_batch_verify(visited, max_entries, keys, values); 125 + 126 + total = 0; 127 + count = step; 128 + while (total < max_entries) { 129 + if (max_entries - total < step) 130 + count = max_entries - total; 131 + err = bpf_map_delete_batch(map_fd, keys + total, &count, 132 + &opts); 133 + CHECK((err && errno != ENOENT), "delete batch", 134 + "error: %s\n", strerror(errno)); 135 + total += count; 136 + if (err) 137 + break; 138 + } 139 + CHECK(total != max_entries, "delete with steps", 140 + "total = %u, max_entries = %u\n", total, max_entries); 141 + 142 + /* check map is empty, errono == ENOENT */ 143 + err = bpf_map_get_next_key(map_fd, NULL, &key); 144 + CHECK(!err || errno != ENOENT, "bpf_map_get_next_key()", 145 + "error: %s\n", strerror(errno)); 146 + 147 + total_success++; 148 + } 149 + 150 + CHECK(total_success == 0, "check total_success", 151 + "unexpected failure\n"); 152 + 153 + printf("%s:PASS\n", __func__); 154 + 155 + free(keys); 156 + free(values); 157 + free(visited); 158 + }
+59
tools/testing/selftests/bpf/prog_tests/kfunc_call.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright (c) 2021 Facebook */ 3 + #include <test_progs.h> 4 + #include <network_helpers.h> 5 + #include "kfunc_call_test.skel.h" 6 + #include "kfunc_call_test_subprog.skel.h" 7 + 8 + static void test_main(void) 9 + { 10 + struct kfunc_call_test *skel; 11 + int prog_fd, retval, err; 12 + 13 + skel = kfunc_call_test__open_and_load(); 14 + if (!ASSERT_OK_PTR(skel, "skel")) 15 + return; 16 + 17 + prog_fd = bpf_program__fd(skel->progs.kfunc_call_test1); 18 + err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4), 19 + NULL, NULL, (__u32 *)&retval, NULL); 20 + ASSERT_OK(err, "bpf_prog_test_run(test1)"); 21 + ASSERT_EQ(retval, 12, "test1-retval"); 22 + 23 + prog_fd = bpf_program__fd(skel->progs.kfunc_call_test2); 24 + err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4), 25 + NULL, NULL, (__u32 *)&retval, NULL); 26 + ASSERT_OK(err, "bpf_prog_test_run(test2)"); 27 + ASSERT_EQ(retval, 3, "test2-retval"); 28 + 29 + kfunc_call_test__destroy(skel); 30 + } 31 + 32 + static void test_subprog(void) 33 + { 34 + struct kfunc_call_test_subprog *skel; 35 + int prog_fd, retval, err; 36 + 37 + skel = kfunc_call_test_subprog__open_and_load(); 38 + if (!ASSERT_OK_PTR(skel, "skel")) 39 + return; 40 + 41 + prog_fd = bpf_program__fd(skel->progs.kfunc_call_test1); 42 + err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4), 43 + NULL, NULL, (__u32 *)&retval, NULL); 44 + ASSERT_OK(err, "bpf_prog_test_run(test1)"); 45 + ASSERT_EQ(retval, 10, "test1-retval"); 46 + ASSERT_NEQ(skel->data->active_res, -1, "active_res"); 47 + ASSERT_EQ(skel->data->sk_state, BPF_TCP_CLOSE, "sk_state"); 48 + 49 + kfunc_call_test_subprog__destroy(skel); 50 + } 51 + 52 + void test_kfunc_call(void) 53 + { 54 + if (test__start_subtest("main")) 55 + test_main(); 56 + 57 + if (test__start_subtest("subprog")) 58 + test_subprog(); 59 + }
+40
tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
··· 7 7 #include "test_skmsg_load_helpers.skel.h" 8 8 #include "test_sockmap_update.skel.h" 9 9 #include "test_sockmap_invalid_update.skel.h" 10 + #include "test_sockmap_skb_verdict_attach.skel.h" 10 11 #include "bpf_iter_sockmap.skel.h" 11 12 12 13 #define TCP_REPAIR 19 /* TCP sock is under repair right now */ ··· 282 281 bpf_iter_sockmap__destroy(skel); 283 282 } 284 283 284 + static void test_sockmap_skb_verdict_attach(enum bpf_attach_type first, 285 + enum bpf_attach_type second) 286 + { 287 + struct test_sockmap_skb_verdict_attach *skel; 288 + int err, map, verdict; 289 + 290 + skel = test_sockmap_skb_verdict_attach__open_and_load(); 291 + if (CHECK_FAIL(!skel)) { 292 + perror("test_sockmap_skb_verdict_attach__open_and_load"); 293 + return; 294 + } 295 + 296 + verdict = bpf_program__fd(skel->progs.prog_skb_verdict); 297 + map = bpf_map__fd(skel->maps.sock_map); 298 + 299 + err = bpf_prog_attach(verdict, map, first, 0); 300 + if (CHECK_FAIL(err)) { 301 + perror("bpf_prog_attach"); 302 + goto out; 303 + } 304 + 305 + err = bpf_prog_attach(verdict, map, second, 0); 306 + assert(err == -1 && errno == EBUSY); 307 + 308 + err = bpf_prog_detach2(verdict, map, first); 309 + if (CHECK_FAIL(err)) { 310 + perror("bpf_prog_detach2"); 311 + goto out; 312 + } 313 + out: 314 + test_sockmap_skb_verdict_attach__destroy(skel); 315 + } 316 + 285 317 void test_sockmap_basic(void) 286 318 { 287 319 if (test__start_subtest("sockmap create_update_free")) ··· 335 301 test_sockmap_copy(BPF_MAP_TYPE_SOCKMAP); 336 302 if (test__start_subtest("sockhash copy")) 337 303 test_sockmap_copy(BPF_MAP_TYPE_SOCKHASH); 304 + if (test__start_subtest("sockmap skb_verdict attach")) { 305 + test_sockmap_skb_verdict_attach(BPF_SK_SKB_VERDICT, 306 + BPF_SK_SKB_STREAM_VERDICT); 307 + test_sockmap_skb_verdict_attach(BPF_SK_SKB_STREAM_VERDICT, 308 + BPF_SK_SKB_VERDICT); 309 + } 338 310 }
+136
tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
··· 1603 1603 } 1604 1604 } 1605 1605 1606 + static void udp_redir_to_connected(int family, int sotype, int sock_mapfd, 1607 + int verd_mapfd, enum redir_mode mode) 1608 + { 1609 + const char *log_prefix = redir_mode_str(mode); 1610 + struct sockaddr_storage addr; 1611 + int c0, c1, p0, p1; 1612 + unsigned int pass; 1613 + socklen_t len; 1614 + int err, n; 1615 + u64 value; 1616 + u32 key; 1617 + char b; 1618 + 1619 + zero_verdict_count(verd_mapfd); 1620 + 1621 + p0 = socket_loopback(family, sotype | SOCK_NONBLOCK); 1622 + if (p0 < 0) 1623 + return; 1624 + len = sizeof(addr); 1625 + err = xgetsockname(p0, sockaddr(&addr), &len); 1626 + if (err) 1627 + goto close_peer0; 1628 + 1629 + c0 = xsocket(family, sotype | SOCK_NONBLOCK, 0); 1630 + if (c0 < 0) 1631 + goto close_peer0; 1632 + err = xconnect(c0, sockaddr(&addr), len); 1633 + if (err) 1634 + goto close_cli0; 1635 + err = xgetsockname(c0, sockaddr(&addr), &len); 1636 + if (err) 1637 + goto close_cli0; 1638 + err = xconnect(p0, sockaddr(&addr), len); 1639 + if (err) 1640 + goto close_cli0; 1641 + 1642 + p1 = socket_loopback(family, sotype | SOCK_NONBLOCK); 1643 + if (p1 < 0) 1644 + goto close_cli0; 1645 + err = xgetsockname(p1, sockaddr(&addr), &len); 1646 + if (err) 1647 + goto close_cli0; 1648 + 1649 + c1 = xsocket(family, sotype | SOCK_NONBLOCK, 0); 1650 + if (c1 < 0) 1651 + goto close_peer1; 1652 + err = xconnect(c1, sockaddr(&addr), len); 1653 + if (err) 1654 + goto close_cli1; 1655 + err = xgetsockname(c1, sockaddr(&addr), &len); 1656 + if (err) 1657 + goto close_cli1; 1658 + err = xconnect(p1, sockaddr(&addr), len); 1659 + if (err) 1660 + goto close_cli1; 1661 + 1662 + key = 0; 1663 + value = p0; 1664 + err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST); 1665 + if (err) 1666 + goto close_cli1; 1667 + 1668 + key = 1; 1669 + value = p1; 1670 + err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST); 1671 + if (err) 1672 + goto close_cli1; 1673 + 1674 + n = write(c1, "a", 1); 1675 + if (n < 0) 1676 + FAIL_ERRNO("%s: write", log_prefix); 1677 + if (n == 0) 1678 + FAIL("%s: incomplete write", log_prefix); 1679 + if (n < 1) 1680 + goto close_cli1; 1681 + 1682 + key = SK_PASS; 1683 + err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass); 1684 + if (err) 1685 + goto close_cli1; 1686 + if (pass != 1) 1687 + FAIL("%s: want pass count 1, have %d", log_prefix, pass); 1688 + 1689 + n = read(mode == REDIR_INGRESS ? p0 : c0, &b, 1); 1690 + if (n < 0) 1691 + FAIL_ERRNO("%s: read", log_prefix); 1692 + if (n == 0) 1693 + FAIL("%s: incomplete read", log_prefix); 1694 + 1695 + close_cli1: 1696 + xclose(c1); 1697 + close_peer1: 1698 + xclose(p1); 1699 + close_cli0: 1700 + xclose(c0); 1701 + close_peer0: 1702 + xclose(p0); 1703 + } 1704 + 1705 + static void udp_skb_redir_to_connected(struct test_sockmap_listen *skel, 1706 + struct bpf_map *inner_map, int family) 1707 + { 1708 + int verdict = bpf_program__fd(skel->progs.prog_skb_verdict); 1709 + int verdict_map = bpf_map__fd(skel->maps.verdict_map); 1710 + int sock_map = bpf_map__fd(inner_map); 1711 + int err; 1712 + 1713 + err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0); 1714 + if (err) 1715 + return; 1716 + 1717 + skel->bss->test_ingress = false; 1718 + udp_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map, 1719 + REDIR_EGRESS); 1720 + skel->bss->test_ingress = true; 1721 + udp_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map, 1722 + REDIR_INGRESS); 1723 + 1724 + xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT); 1725 + } 1726 + 1727 + static void test_udp_redir(struct test_sockmap_listen *skel, struct bpf_map *map, 1728 + int family) 1729 + { 1730 + const char *family_name, *map_name; 1731 + char s[MAX_TEST_NAME]; 1732 + 1733 + family_name = family_str(family); 1734 + map_name = map_type_str(map); 1735 + snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__); 1736 + if (!test__start_subtest(s)) 1737 + return; 1738 + udp_skb_redir_to_connected(skel, map, family); 1739 + } 1740 + 1606 1741 static void run_tests(struct test_sockmap_listen *skel, struct bpf_map *map, 1607 1742 int family) 1608 1743 { ··· 1746 1611 test_redir(skel, map, family, SOCK_STREAM); 1747 1612 test_reuseport(skel, map, family, SOCK_STREAM); 1748 1613 test_reuseport(skel, map, family, SOCK_DGRAM); 1614 + test_udp_redir(skel, map, family); 1749 1615 } 1750 1616 1751 1617 void test_sockmap_listen(void)
+4 -2
tools/testing/selftests/bpf/prog_tests/test_ima.c
··· 68 68 goto close_prog; 69 69 70 70 snprintf(cmd, sizeof(cmd), "./ima_setup.sh setup %s", measured_dir); 71 - if (CHECK_FAIL(system(cmd))) 71 + err = system(cmd); 72 + if (CHECK(err, "failed to run command", "%s, errno = %d\n", cmd, errno)) 72 73 goto close_clean; 73 74 74 75 err = run_measured_process(measured_dir, &skel->bss->monitored_pid); ··· 82 81 83 82 close_clean: 84 83 snprintf(cmd, sizeof(cmd), "./ima_setup.sh cleanup %s", measured_dir); 85 - CHECK_FAIL(system(cmd)); 84 + err = system(cmd); 85 + CHECK(err, "failed to run command", "%s, errno = %d\n", cmd, errno); 86 86 close_prog: 87 87 ima__destroy(skel); 88 88 }
+18 -18
tools/testing/selftests/bpf/progs/bpf_cubic.c
··· 174 174 * as long as it is used in one of the func ptr 175 175 * under SEC(".struct_ops"). 176 176 */ 177 - SEC("struct_ops/bictcp_init") 178 - void BPF_PROG(bictcp_init, struct sock *sk) 177 + SEC("struct_ops/bpf_cubic_init") 178 + void BPF_PROG(bpf_cubic_init, struct sock *sk) 179 179 { 180 180 struct bictcp *ca = inet_csk_ca(sk); 181 181 ··· 192 192 * The remaining tcp-cubic functions have an easier way. 193 193 */ 194 194 SEC("no-sec-prefix-bictcp_cwnd_event") 195 - void BPF_PROG(bictcp_cwnd_event, struct sock *sk, enum tcp_ca_event event) 195 + void BPF_PROG(bpf_cubic_cwnd_event, struct sock *sk, enum tcp_ca_event event) 196 196 { 197 197 if (event == CA_EVENT_TX_START) { 198 198 struct bictcp *ca = inet_csk_ca(sk); ··· 384 384 } 385 385 386 386 /* Or simply use the BPF_STRUCT_OPS to avoid the SEC boiler plate. */ 387 - void BPF_STRUCT_OPS(bictcp_cong_avoid, struct sock *sk, __u32 ack, __u32 acked) 387 + void BPF_STRUCT_OPS(bpf_cubic_cong_avoid, struct sock *sk, __u32 ack, __u32 acked) 388 388 { 389 389 struct tcp_sock *tp = tcp_sk(sk); 390 390 struct bictcp *ca = inet_csk_ca(sk); ··· 403 403 tcp_cong_avoid_ai(tp, ca->cnt, acked); 404 404 } 405 405 406 - __u32 BPF_STRUCT_OPS(bictcp_recalc_ssthresh, struct sock *sk) 406 + __u32 BPF_STRUCT_OPS(bpf_cubic_recalc_ssthresh, struct sock *sk) 407 407 { 408 408 const struct tcp_sock *tp = tcp_sk(sk); 409 409 struct bictcp *ca = inet_csk_ca(sk); ··· 420 420 return max((tp->snd_cwnd * beta) / BICTCP_BETA_SCALE, 2U); 421 421 } 422 422 423 - void BPF_STRUCT_OPS(bictcp_state, struct sock *sk, __u8 new_state) 423 + void BPF_STRUCT_OPS(bpf_cubic_state, struct sock *sk, __u8 new_state) 424 424 { 425 425 if (new_state == TCP_CA_Loss) { 426 426 bictcp_reset(inet_csk_ca(sk)); ··· 496 496 } 497 497 } 498 498 499 - void BPF_STRUCT_OPS(bictcp_acked, struct sock *sk, 499 + void BPF_STRUCT_OPS(bpf_cubic_acked, struct sock *sk, 500 500 const struct ack_sample *sample) 501 501 { 502 502 const struct tcp_sock *tp = tcp_sk(sk); ··· 525 525 hystart_update(sk, delay); 526 526 } 527 527 528 - __u32 BPF_STRUCT_OPS(tcp_reno_undo_cwnd, struct sock *sk) 529 - { 530 - const struct tcp_sock *tp = tcp_sk(sk); 528 + extern __u32 tcp_reno_undo_cwnd(struct sock *sk) __ksym; 531 529 532 - return max(tp->snd_cwnd, tp->prior_cwnd); 530 + __u32 BPF_STRUCT_OPS(bpf_cubic_undo_cwnd, struct sock *sk) 531 + { 532 + return tcp_reno_undo_cwnd(sk); 533 533 } 534 534 535 535 SEC(".struct_ops") 536 536 struct tcp_congestion_ops cubic = { 537 - .init = (void *)bictcp_init, 538 - .ssthresh = (void *)bictcp_recalc_ssthresh, 539 - .cong_avoid = (void *)bictcp_cong_avoid, 540 - .set_state = (void *)bictcp_state, 541 - .undo_cwnd = (void *)tcp_reno_undo_cwnd, 542 - .cwnd_event = (void *)bictcp_cwnd_event, 543 - .pkts_acked = (void *)bictcp_acked, 537 + .init = (void *)bpf_cubic_init, 538 + .ssthresh = (void *)bpf_cubic_recalc_ssthresh, 539 + .cong_avoid = (void *)bpf_cubic_cong_avoid, 540 + .set_state = (void *)bpf_cubic_state, 541 + .undo_cwnd = (void *)bpf_cubic_undo_cwnd, 542 + .cwnd_event = (void *)bpf_cubic_cwnd_event, 543 + .pkts_acked = (void *)bpf_cubic_acked, 544 544 .name = "bpf_cubic", 545 545 };
+6 -16
tools/testing/selftests/bpf/progs/bpf_dctcp.c
··· 194 194 return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd); 195 195 } 196 196 197 - SEC("struct_ops/tcp_reno_cong_avoid") 198 - void BPF_PROG(tcp_reno_cong_avoid, struct sock *sk, __u32 ack, __u32 acked) 197 + extern void tcp_reno_cong_avoid(struct sock *sk, __u32 ack, __u32 acked) __ksym; 198 + 199 + SEC("struct_ops/dctcp_reno_cong_avoid") 200 + void BPF_PROG(dctcp_cong_avoid, struct sock *sk, __u32 ack, __u32 acked) 199 201 { 200 - struct tcp_sock *tp = tcp_sk(sk); 201 - 202 - if (!tcp_is_cwnd_limited(sk)) 203 - return; 204 - 205 - /* In "safe" area, increase. */ 206 - if (tcp_in_slow_start(tp)) { 207 - acked = tcp_slow_start(tp, acked); 208 - if (!acked) 209 - return; 210 - } 211 - /* In dangerous area, increase slowly. */ 212 - tcp_cong_avoid_ai(tp, tp->snd_cwnd, acked); 202 + tcp_reno_cong_avoid(sk, ack, acked); 213 203 } 214 204 215 205 SEC(".struct_ops") ··· 216 226 .in_ack_event = (void *)dctcp_update_alpha, 217 227 .cwnd_event = (void *)dctcp_cwnd_event, 218 228 .ssthresh = (void *)dctcp_ssthresh, 219 - .cong_avoid = (void *)tcp_reno_cong_avoid, 229 + .cong_avoid = (void *)dctcp_cong_avoid, 220 230 .undo_cwnd = (void *)dctcp_cwnd_undo, 221 231 .set_state = (void *)dctcp_state, 222 232 .flags = TCP_CONG_NEEDS_ECN,
+47
tools/testing/selftests/bpf/progs/kfunc_call_test.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright (c) 2021 Facebook */ 3 + #include <linux/bpf.h> 4 + #include <bpf/bpf_helpers.h> 5 + #include "bpf_tcp_helpers.h" 6 + 7 + extern int bpf_kfunc_call_test2(struct sock *sk, __u32 a, __u32 b) __ksym; 8 + extern __u64 bpf_kfunc_call_test1(struct sock *sk, __u32 a, __u64 b, 9 + __u32 c, __u64 d) __ksym; 10 + 11 + SEC("classifier") 12 + int kfunc_call_test2(struct __sk_buff *skb) 13 + { 14 + struct bpf_sock *sk = skb->sk; 15 + 16 + if (!sk) 17 + return -1; 18 + 19 + sk = bpf_sk_fullsock(sk); 20 + if (!sk) 21 + return -1; 22 + 23 + return bpf_kfunc_call_test2((struct sock *)sk, 1, 2); 24 + } 25 + 26 + SEC("classifier") 27 + int kfunc_call_test1(struct __sk_buff *skb) 28 + { 29 + struct bpf_sock *sk = skb->sk; 30 + __u64 a = 1ULL << 32; 31 + __u32 ret; 32 + 33 + if (!sk) 34 + return -1; 35 + 36 + sk = bpf_sk_fullsock(sk); 37 + if (!sk) 38 + return -1; 39 + 40 + a = bpf_kfunc_call_test1((struct sock *)sk, 1, a | 2, 3, a | 4); 41 + ret = a >> 32; /* ret should be 2 */ 42 + ret += (__u32)a; /* ret should be 12 */ 43 + 44 + return ret; 45 + } 46 + 47 + char _license[] SEC("license") = "GPL";
+42
tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright (c) 2021 Facebook */ 3 + #include <linux/bpf.h> 4 + #include <bpf/bpf_helpers.h> 5 + #include "bpf_tcp_helpers.h" 6 + 7 + extern const int bpf_prog_active __ksym; 8 + extern __u64 bpf_kfunc_call_test1(struct sock *sk, __u32 a, __u64 b, 9 + __u32 c, __u64 d) __ksym; 10 + extern struct sock *bpf_kfunc_call_test3(struct sock *sk) __ksym; 11 + int active_res = -1; 12 + int sk_state = -1; 13 + 14 + int __noinline f1(struct __sk_buff *skb) 15 + { 16 + struct bpf_sock *sk = skb->sk; 17 + int *active; 18 + 19 + if (!sk) 20 + return -1; 21 + 22 + sk = bpf_sk_fullsock(sk); 23 + if (!sk) 24 + return -1; 25 + 26 + active = (int *)bpf_per_cpu_ptr(&bpf_prog_active, 27 + bpf_get_smp_processor_id()); 28 + if (active) 29 + active_res = *active; 30 + 31 + sk_state = bpf_kfunc_call_test3((struct sock *)sk)->__sk_common.skc_state; 32 + 33 + return (__u32)bpf_kfunc_call_test1((struct sock *)sk, 1, 2, 3, 4); 34 + } 35 + 36 + SEC("classifier") 37 + int kfunc_call_test1(struct __sk_buff *skb) 38 + { 39 + return f1(skb); 40 + } 41 + 42 + char _license[] SEC("license") = "GPL";
+22
tools/testing/selftests/bpf/progs/test_sockmap_listen.c
··· 29 29 } verdict_map SEC(".maps"); 30 30 31 31 static volatile bool test_sockmap; /* toggled by user-space */ 32 + static volatile bool test_ingress; /* toggled by user-space */ 32 33 33 34 SEC("sk_skb/stream_parser") 34 35 int prog_stream_parser(struct __sk_buff *skb) ··· 48 47 verdict = bpf_sk_redirect_map(skb, &sock_map, zero, 0); 49 48 else 50 49 verdict = bpf_sk_redirect_hash(skb, &sock_hash, &zero, 0); 50 + 51 + count = bpf_map_lookup_elem(&verdict_map, &verdict); 52 + if (count) 53 + (*count)++; 54 + 55 + return verdict; 56 + } 57 + 58 + SEC("sk_skb/skb_verdict") 59 + int prog_skb_verdict(struct __sk_buff *skb) 60 + { 61 + unsigned int *count; 62 + __u32 zero = 0; 63 + int verdict; 64 + 65 + if (test_sockmap) 66 + verdict = bpf_sk_redirect_map(skb, &sock_map, zero, 67 + test_ingress ? BPF_F_INGRESS : 0); 68 + else 69 + verdict = bpf_sk_redirect_hash(skb, &sock_hash, &zero, 70 + test_ingress ? BPF_F_INGRESS : 0); 51 71 52 72 count = bpf_map_lookup_elem(&verdict_map, &verdict); 53 73 if (count)
+18
tools/testing/selftests/bpf/progs/test_sockmap_skb_verdict_attach.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #include "vmlinux.h" 3 + #include <bpf/bpf_helpers.h> 4 + 5 + struct { 6 + __uint(type, BPF_MAP_TYPE_SOCKMAP); 7 + __uint(max_entries, 2); 8 + __type(key, __u32); 9 + __type(value, __u64); 10 + } sock_map SEC(".maps"); 11 + 12 + SEC("sk_skb/skb_verdict") 13 + int prog_skb_verdict(struct __sk_buff *skb) 14 + { 15 + return SK_DROP; 16 + } 17 + 18 + char _license[] SEC("license") = "GPL";
+2 -1
tools/testing/selftests/bpf/test_xsk.sh
··· 107 107 echo "setting up ${VETH0}: namespace: ${NS0}" 108 108 fi 109 109 ip netns add ${NS1} 110 - ip link add ${VETH0} type veth peer name ${VETH1} 110 + ip link add ${VETH0} numtxqueues 4 numrxqueues 4 type veth peer name ${VETH1} numtxqueues 4 numrxqueues 4 111 111 if [ -f /proc/net/if_inet6 ]; then 112 112 echo 1 > /proc/sys/net/ipv6/conf/${VETH0}/disable_ipv6 113 113 fi ··· 118 118 ip netns exec ${NS1} ip link set ${VETH1} mtu ${MTU} 119 119 ip link set ${VETH0} mtu ${MTU} 120 120 ip netns exec ${NS1} ip link set ${VETH1} up 121 + ip netns exec ${NS1} ip link set dev lo up 121 122 ip link set ${VETH0} up 122 123 } 123 124
+6 -6
tools/testing/selftests/bpf/verifier/calls.c
··· 19 19 BPF_MOV64_IMM(BPF_REG_0, 2), 20 20 BPF_EXIT_INSN(), 21 21 }, 22 - .errstr_unpriv = "function calls to other bpf functions are allowed for", 22 + .errstr_unpriv = "loading/calling other bpf or kernel functions are allowed for", 23 23 .result_unpriv = REJECT, 24 24 .result = ACCEPT, 25 25 .retval = 1, ··· 136 136 { 137 137 "calls: wrong src reg", 138 138 .insns = { 139 - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 2, 0, 0), 139 + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 3, 0, 0), 140 140 BPF_MOV64_IMM(BPF_REG_0, 1), 141 141 BPF_EXIT_INSN(), 142 142 }, ··· 397 397 BPF_MOV64_IMM(BPF_REG_0, 1), 398 398 BPF_EXIT_INSN(), 399 399 }, 400 - .errstr_unpriv = "function calls to other bpf functions are allowed for", 400 + .errstr_unpriv = "loading/calling other bpf or kernel functions are allowed for", 401 401 .fixup_map_hash_48b = { 3 }, 402 402 .result_unpriv = REJECT, 403 403 .result = ACCEPT, ··· 1977 1977 BPF_EXIT_INSN(), 1978 1978 }, 1979 1979 .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, 1980 - .errstr_unpriv = "function calls to other bpf functions are allowed for", 1980 + .errstr_unpriv = "loading/calling other bpf or kernel functions are allowed for", 1981 1981 .result_unpriv = REJECT, 1982 1982 .result = ACCEPT, 1983 1983 }, ··· 2003 2003 BPF_EXIT_INSN(), 2004 2004 }, 2005 2005 .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, 2006 - .errstr_unpriv = "function calls to other bpf functions are allowed for", 2006 + .errstr_unpriv = "loading/calling other bpf or kernel functions are allowed for", 2007 2007 .errstr = "!read_ok", 2008 2008 .result = REJECT, 2009 2009 }, ··· 2028 2028 BPF_EXIT_INSN(), 2029 2029 }, 2030 2030 .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, 2031 - .errstr_unpriv = "function calls to other bpf functions are allowed for", 2031 + .errstr_unpriv = "loading/calling other bpf or kernel functions are allowed for", 2032 2032 .errstr = "!read_ok", 2033 2033 .result = REJECT, 2034 2034 },
+5 -5
tools/testing/selftests/bpf/verifier/dead_code.c
··· 85 85 BPF_MOV64_IMM(BPF_REG_0, 12), 86 86 BPF_EXIT_INSN(), 87 87 }, 88 - .errstr_unpriv = "function calls to other bpf functions are allowed for", 88 + .errstr_unpriv = "loading/calling other bpf or kernel functions are allowed for", 89 89 .result_unpriv = REJECT, 90 90 .result = ACCEPT, 91 91 .retval = 7, ··· 103 103 BPF_MOV64_IMM(BPF_REG_0, 12), 104 104 BPF_EXIT_INSN(), 105 105 }, 106 - .errstr_unpriv = "function calls to other bpf functions are allowed for", 106 + .errstr_unpriv = "loading/calling other bpf or kernel functions are allowed for", 107 107 .result_unpriv = REJECT, 108 108 .result = ACCEPT, 109 109 .retval = 7, ··· 121 121 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -5), 122 122 BPF_EXIT_INSN(), 123 123 }, 124 - .errstr_unpriv = "function calls to other bpf functions are allowed for", 124 + .errstr_unpriv = "loading/calling other bpf or kernel functions are allowed for", 125 125 .result_unpriv = REJECT, 126 126 .result = ACCEPT, 127 127 .retval = 7, ··· 137 137 BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), 138 138 BPF_EXIT_INSN(), 139 139 }, 140 - .errstr_unpriv = "function calls to other bpf functions are allowed for", 140 + .errstr_unpriv = "loading/calling other bpf or kernel functions are allowed for", 141 141 .result_unpriv = REJECT, 142 142 .result = ACCEPT, 143 143 .retval = 2, ··· 152 152 BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), 153 153 BPF_EXIT_INSN(), 154 154 }, 155 - .errstr_unpriv = "function calls to other bpf functions are allowed for", 155 + .errstr_unpriv = "loading/calling other bpf or kernel functions are allowed for", 156 156 .result_unpriv = REJECT, 157 157 .result = ACCEPT, 158 158 .retval = 2,
+28 -11
tools/testing/selftests/bpf/vmtest.sh
··· 24 24 usage() 25 25 { 26 26 cat <<EOF 27 - Usage: $0 [-i] [-d <output_dir>] -- [<command>] 27 + Usage: $0 [-i] [-s] [-d <output_dir>] -- [<command>] 28 28 29 29 <command> is the command you would normally run when you are in 30 30 tools/testing/selftests/bpf. e.g: 31 31 32 32 $0 -- ./test_progs -t test_lsm 33 33 34 - If no command is specified, "${DEFAULT_COMMAND}" will be run by 35 - default. 34 + If no command is specified and a debug shell (-s) is not requested, 35 + "${DEFAULT_COMMAND}" will be run by default. 36 36 37 37 If you build your kernel using KBUILD_OUTPUT= or O= options, these 38 38 can be passed as environment variables to the script: ··· 49 49 -d) Update the output directory (default: ${OUTPUT_DIR}) 50 50 -j) Number of jobs for compilation, similar to -j in make 51 51 (default: ${NUM_COMPILE_JOBS}) 52 + -s) Instead of powering off the VM, start an interactive 53 + shell. If <command> is specified, the shell runs after 54 + the command finishes executing 52 55 EOF 53 56 } 54 57 ··· 152 149 local init_script_dir="${OUTPUT_DIR}/${MOUNT_DIR}/etc/rcS.d" 153 150 local init_script="${init_script_dir}/S50-startup" 154 151 local command="$1" 152 + local exit_command="$2" 155 153 156 154 mount_image 157 155 ··· 166 162 167 163 fi 168 164 169 - sudo bash -c "cat >${init_script}" <<EOF 170 - #!/bin/bash 165 + sudo bash -c "echo '#!/bin/bash' > ${init_script}" 171 166 167 + if [[ "${command}" != "" ]]; then 168 + sudo bash -c "cat >>${init_script}" <<EOF 172 169 # Have a default value in the exit status file 173 170 # incase the VM is forcefully stopped. 174 171 echo "130" > "/root/${EXIT_STATUS_FILE}" ··· 180 175 stdbuf -oL -eL ${command} 181 176 echo "\$?" > "/root/${EXIT_STATUS_FILE}" 182 177 } 2>&1 | tee "/root/${LOG_FILE}" 183 - poweroff -f 178 + # Ensure that the logs are written to disk 179 + sync 184 180 EOF 181 + fi 185 182 183 + sudo bash -c "echo ${exit_command} >> ${init_script}" 186 184 sudo chmod a+x "${init_script}" 187 185 unmount_image 188 186 } ··· 285 277 local kernel_bzimage="${kernel_checkout}/${X86_BZIMAGE}" 286 278 local command="${DEFAULT_COMMAND}" 287 279 local update_image="no" 280 + local exit_command="poweroff -f" 281 + local debug_shell="no" 288 282 289 - while getopts 'hkid:j:' opt; do 283 + while getopts 'hskid:j:' opt; do 290 284 case ${opt} in 291 285 i) 292 286 update_image="yes" ··· 298 288 ;; 299 289 j) 300 290 NUM_COMPILE_JOBS="$OPTARG" 291 + ;; 292 + s) 293 + command="" 294 + debug_shell="yes" 295 + exit_command="bash" 301 296 ;; 302 297 h) 303 298 usage ··· 322 307 done 323 308 shift $((OPTIND -1)) 324 309 325 - if [[ $# -eq 0 ]]; then 310 + if [[ $# -eq 0 && "${debug_shell}" == "no" ]]; then 326 311 echo "No command specified, will run ${DEFAULT_COMMAND} in the vm" 327 312 else 328 313 command="$@" ··· 370 355 fi 371 356 372 357 update_selftests "${kernel_checkout}" "${make_command}" 373 - update_init_script "${command}" 358 + update_init_script "${command}" "${exit_command}" 374 359 run_vm "${kernel_bzimage}" 375 - copy_logs 376 - echo "Logs saved in ${OUTPUT_DIR}/${LOG_FILE}" 360 + if [[ "${command}" != "" ]]; then 361 + copy_logs 362 + echo "Logs saved in ${OUTPUT_DIR}/${LOG_FILE}" 363 + fi 377 364 } 378 365 379 366 catch()
+322 -388
tools/testing/selftests/bpf/xdpxceiver.c
··· 41 41 * Reduce the size of the RX ring to a fraction of the fill ring size. 42 42 * iv. fill queue empty 43 43 * Do not populate the fill queue and then try to receive pkts. 44 + * f. bpf_link resource persistence 45 + * Configure sockets at indexes 0 and 1, run a traffic on queue ids 0, 46 + * then remove xsk sockets from queue 0 on both veth interfaces and 47 + * finally run a traffic on queues ids 1 44 48 * 45 - * Total tests: 10 49 + * Total tests: 12 46 50 * 47 51 * Flow: 48 52 * ----- ··· 97 93 #include "xdpxceiver.h" 98 94 #include "../kselftest.h" 99 95 96 + static const char *MAC1 = "\x00\x0A\x56\x9E\xEE\x62"; 97 + static const char *MAC2 = "\x00\x0A\x56\x9E\xEE\x61"; 98 + static const char *IP1 = "192.168.100.162"; 99 + static const char *IP2 = "192.168.100.161"; 100 + static const u16 UDP_PORT1 = 2020; 101 + static const u16 UDP_PORT2 = 2121; 102 + 100 103 static void __exit_with_error(int error, const char *file, const char *func, int line) 101 104 { 102 105 if (configured_mode == TEST_MODE_UNCONFIGURED) { ··· 119 108 #define exit_with_error(error) __exit_with_error(error, __FILE__, __func__, __LINE__) 120 109 121 110 #define print_ksft_result(void)\ 122 - (ksft_test_result_pass("PASS: %s %s %s%s%s\n", configured_mode ? "DRV" : "SKB",\ 111 + (ksft_test_result_pass("PASS: %s %s %s%s%s%s\n", configured_mode ? "DRV" : "SKB",\ 123 112 test_type == TEST_TYPE_POLL ? "POLL" : "NOPOLL",\ 124 113 test_type == TEST_TYPE_TEARDOWN ? "Socket Teardown" : "",\ 125 114 test_type == TEST_TYPE_BIDI ? "Bi-directional Sockets" : "",\ 126 - test_type == TEST_TYPE_STATS ? "Stats" : "")) 127 - 128 - static void pthread_init_mutex(void) 129 - { 130 - pthread_mutex_init(&sync_mutex, NULL); 131 - pthread_mutex_init(&sync_mutex_tx, NULL); 132 - pthread_cond_init(&signal_rx_condition, NULL); 133 - pthread_cond_init(&signal_tx_condition, NULL); 134 - } 135 - 136 - static void pthread_destroy_mutex(void) 137 - { 138 - pthread_mutex_destroy(&sync_mutex); 139 - pthread_mutex_destroy(&sync_mutex_tx); 140 - pthread_cond_destroy(&signal_rx_condition); 141 - pthread_cond_destroy(&signal_tx_condition); 142 - } 115 + test_type == TEST_TYPE_STATS ? "Stats" : "",\ 116 + test_type == TEST_TYPE_BPF_RES ? "BPF RES" : "")) 143 117 144 118 static void *memset32_htonl(void *dest, u32 val, u32 size) 145 119 { ··· 143 147 } 144 148 145 149 /* 146 - * This function code has been taken from 147 - * Linux kernel lib/checksum.c 148 - */ 149 - static inline unsigned short from32to16(unsigned int x) 150 - { 151 - /* add up 16-bit and 16-bit for 16+c bit */ 152 - x = (x & 0xffff) + (x >> 16); 153 - /* add up carry.. */ 154 - x = (x & 0xffff) + (x >> 16); 155 - return x; 156 - } 157 - 158 - /* 159 150 * Fold a partial checksum 160 151 * This function code has been taken from 161 152 * Linux kernel include/asm-generic/checksum.h 162 153 */ 163 - static inline __u16 csum_fold(__u32 csum) 154 + static __u16 csum_fold(__u32 csum) 164 155 { 165 156 u32 sum = (__force u32)csum; 166 157 ··· 160 177 * This function code has been taken from 161 178 * Linux kernel lib/checksum.c 162 179 */ 163 - static inline u32 from64to32(u64 x) 180 + static u32 from64to32(u64 x) 164 181 { 165 182 /* add up 32-bit and 32-bit for 32+c bit */ 166 183 x = (x & 0xffffffff) + (x >> 32); ··· 169 186 return (u32)x; 170 187 } 171 188 172 - __u32 csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __u32 sum); 173 - 174 189 /* 175 190 * This function code has been taken from 176 191 * Linux kernel lib/checksum.c 177 192 */ 178 - __u32 csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __u32 sum) 193 + static __u32 csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __u32 sum) 179 194 { 180 195 unsigned long long s = (__force u32)sum; 181 196 ··· 191 210 * This function has been taken from 192 211 * Linux kernel include/asm-generic/checksum.h 193 212 */ 194 - static inline __u16 195 - csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __u32 sum) 213 + static __u16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __u32 sum) 196 214 { 197 215 return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum)); 198 216 } 199 217 200 - static inline u16 udp_csum(u32 saddr, u32 daddr, u32 len, u8 proto, u16 *udp_pkt) 218 + static u16 udp_csum(u32 saddr, u32 daddr, u32 len, u8 proto, u16 *udp_pkt) 201 219 { 202 220 u32 csum = 0; 203 221 u32 cnt = 0; ··· 251 271 memcpy(xsk_umem__get_data(umem->buffer, addr), pkt_data, PKT_SIZE); 252 272 } 253 273 254 - static void xsk_configure_umem(struct ifobject *data, void *buffer, u64 size) 274 + static void xsk_configure_umem(struct ifobject *data, void *buffer, int idx) 255 275 { 256 - int ret; 257 276 struct xsk_umem_config cfg = { 258 277 .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS, 259 278 .comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS, ··· 260 281 .frame_headroom = frame_headroom, 261 282 .flags = XSK_UMEM__DEFAULT_FLAGS 262 283 }; 284 + int size = num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE; 285 + struct xsk_umem_info *umem; 286 + int ret; 263 287 264 - data->umem = calloc(1, sizeof(struct xsk_umem_info)); 265 - if (!data->umem) 288 + umem = calloc(1, sizeof(struct xsk_umem_info)); 289 + if (!umem) 266 290 exit_with_error(errno); 267 291 268 - ret = xsk_umem__create(&data->umem->umem, buffer, size, 269 - &data->umem->fq, &data->umem->cq, &cfg); 292 + ret = xsk_umem__create(&umem->umem, buffer, size, 293 + &umem->fq, &umem->cq, &cfg); 270 294 if (ret) 271 295 exit_with_error(ret); 272 296 273 - data->umem->buffer = buffer; 297 + umem->buffer = buffer; 298 + 299 + data->umem_arr[idx] = umem; 274 300 } 275 301 276 302 static void xsk_populate_fill_ring(struct xsk_umem_info *umem) ··· 291 307 xsk_ring_prod__submit(&umem->fq, XSK_RING_PROD__DEFAULT_NUM_DESCS); 292 308 } 293 309 294 - static int xsk_configure_socket(struct ifobject *ifobject) 310 + static int xsk_configure_socket(struct ifobject *ifobject, int idx) 295 311 { 296 312 struct xsk_socket_config cfg; 313 + struct xsk_socket_info *xsk; 297 314 struct xsk_ring_cons *rxr; 298 315 struct xsk_ring_prod *txr; 299 316 int ret; 300 317 301 - ifobject->xsk = calloc(1, sizeof(struct xsk_socket_info)); 302 - if (!ifobject->xsk) 318 + xsk = calloc(1, sizeof(struct xsk_socket_info)); 319 + if (!xsk) 303 320 exit_with_error(errno); 304 321 305 - ifobject->xsk->umem = ifobject->umem; 322 + xsk->umem = ifobject->umem; 306 323 cfg.rx_size = rxqsize; 307 324 cfg.tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS; 308 325 cfg.libbpf_flags = 0; ··· 311 326 cfg.bind_flags = xdp_bind_flags; 312 327 313 328 if (test_type != TEST_TYPE_BIDI) { 314 - rxr = (ifobject->fv.vector == rx) ? &ifobject->xsk->rx : NULL; 315 - txr = (ifobject->fv.vector == tx) ? &ifobject->xsk->tx : NULL; 329 + rxr = (ifobject->fv.vector == rx) ? &xsk->rx : NULL; 330 + txr = (ifobject->fv.vector == tx) ? &xsk->tx : NULL; 316 331 } else { 317 - rxr = &ifobject->xsk->rx; 318 - txr = &ifobject->xsk->tx; 332 + rxr = &xsk->rx; 333 + txr = &xsk->tx; 319 334 } 320 335 321 - ret = xsk_socket__create(&ifobject->xsk->xsk, ifobject->ifname, 322 - opt_queue, ifobject->umem->umem, rxr, txr, &cfg); 323 - 336 + ret = xsk_socket__create(&xsk->xsk, ifobject->ifname, idx, 337 + ifobject->umem->umem, rxr, txr, &cfg); 324 338 if (ret) 325 339 return 1; 340 + 341 + ifobject->xsk_arr[idx] = xsk; 326 342 327 343 return 0; 328 344 } ··· 350 364 ksft_print_msg(str, prog); 351 365 } 352 366 353 - static bool switch_namespace(int idx) 367 + static int switch_namespace(const char *nsname) 354 368 { 355 369 char fqns[26] = "/var/run/netns/"; 356 370 int nsfd; 357 371 358 - strncat(fqns, ifdict[idx]->nsname, sizeof(fqns) - strlen(fqns) - 1); 372 + if (!nsname || strlen(nsname) == 0) 373 + return -1; 374 + 375 + strncat(fqns, nsname, sizeof(fqns) - strlen(fqns) - 1); 359 376 nsfd = open(fqns, O_RDONLY); 360 377 361 378 if (nsfd == -1) ··· 367 378 if (setns(nsfd, 0) == -1) 368 379 exit_with_error(errno); 369 380 370 - return true; 371 - } 381 + print_verbose("NS switched: %s\n", nsname); 372 382 373 - static void *nsswitchthread(void *args) 374 - { 375 - struct targs *targs = args; 376 - 377 - targs->retptr = false; 378 - 379 - if (switch_namespace(targs->idx)) { 380 - ifdict[targs->idx]->ifindex = if_nametoindex(ifdict[targs->idx]->ifname); 381 - if (!ifdict[targs->idx]->ifindex) { 382 - ksft_test_result_fail("ERROR: [%s] interface \"%s\" does not exist\n", 383 - __func__, ifdict[targs->idx]->ifname); 384 - } else { 385 - print_verbose("Interface found: %s\n", ifdict[targs->idx]->ifname); 386 - targs->retptr = true; 387 - } 388 - } 389 - pthread_exit(NULL); 383 + return nsfd; 390 384 } 391 385 392 386 static int validate_interfaces(void) ··· 380 408 if (!strcmp(ifdict[i]->ifname, "")) { 381 409 ret = false; 382 410 ksft_test_result_fail("ERROR: interfaces: -i <int>,<ns> -i <int>,<ns>."); 383 - } 384 - if (strcmp(ifdict[i]->nsname, "")) { 385 - struct targs *targs; 386 - 387 - targs = malloc(sizeof(*targs)); 388 - if (!targs) 389 - exit_with_error(errno); 390 - 391 - targs->idx = i; 392 - if (pthread_create(&ns_thread, NULL, nsswitchthread, targs)) 393 - exit_with_error(errno); 394 - 395 - pthread_join(ns_thread, NULL); 396 - 397 - if (targs->retptr) 398 - print_verbose("NS switched: %s\n", ifdict[i]->nsname); 399 - 400 - free(targs); 401 - } else { 402 - ifdict[i]->ifindex = if_nametoindex(ifdict[i]->ifname); 403 - if (!ifdict[i]->ifindex) { 404 - ksft_test_result_fail 405 - ("ERROR: interface \"%s\" does not exist\n", ifdict[i]->ifname); 406 - ret = false; 407 - } else { 408 - print_verbose("Interface found: %s\n", ifdict[i]->ifname); 409 - } 410 411 } 411 412 } 412 413 return ret; ··· 392 447 opterr = 0; 393 448 394 449 for (;;) { 395 - c = getopt_long(argc, argv, "i:q:DC:v", long_options, &option_index); 450 + c = getopt_long(argc, argv, "i:DC:v", long_options, &option_index); 396 451 397 452 if (c == -1) 398 453 break; ··· 411 466 memcpy(ifdict[interface_index]->nsname, token, 412 467 MAX_INTERFACES_NAMESPACE_CHARS); 413 468 interface_index++; 414 - break; 415 - case 'q': 416 - opt_queue = atoi(optarg); 417 469 break; 418 470 case 'D': 419 471 debug_pkt_dump = 1; ··· 448 506 exit_with_error(errno); 449 507 } 450 508 451 - static inline void complete_tx_only(struct xsk_socket_info *xsk, int batch_size) 509 + static void complete_tx_only(struct xsk_socket_info *xsk, int batch_size) 452 510 { 453 511 unsigned int rcvd; 454 512 u32 idx; ··· 456 514 if (!xsk->outstanding_tx) 457 515 return; 458 516 459 - if (!NEED_WAKEUP || xsk_ring_prod__needs_wakeup(&xsk->tx)) 517 + if (xsk_ring_prod__needs_wakeup(&xsk->tx)) 460 518 kick_tx(xsk); 461 519 462 520 rcvd = xsk_ring_cons__peek(&xsk->umem->cq, batch_size, &idx); ··· 544 602 xsk_ring_prod__submit(&xsk->tx, batch_size); 545 603 if (!tx_invalid_test) { 546 604 xsk->outstanding_tx += batch_size; 547 - } else { 548 - if (!NEED_WAKEUP || xsk_ring_prod__needs_wakeup(&xsk->tx)) 549 - kick_tx(xsk); 605 + } else if (xsk_ring_prod__needs_wakeup(&xsk->tx)) { 606 + kick_tx(xsk); 550 607 } 551 608 *frameptr += batch_size; 552 609 *frameptr %= num_frames; 553 610 complete_tx_only(xsk, batch_size); 554 611 } 555 612 556 - static inline int get_batch_size(int pkt_cnt) 613 + static int get_batch_size(int pkt_cnt) 557 614 { 558 615 if (!opt_pkt_count) 559 616 return BATCH_SIZE; ··· 608 667 609 668 static void worker_pkt_dump(void) 610 669 { 611 - struct in_addr ipaddr; 670 + struct ethhdr *ethhdr; 671 + struct iphdr *iphdr; 672 + struct udphdr *udphdr; 673 + char s[128]; 674 + int payload; 675 + void *ptr; 612 676 613 677 fprintf(stdout, "---------------------------------------\n"); 614 678 for (int iter = 0; iter < num_frames - 1; iter++) { 679 + ptr = pkt_buf[iter]->payload; 680 + ethhdr = ptr; 681 + iphdr = ptr + sizeof(*ethhdr); 682 + udphdr = ptr + sizeof(*ethhdr) + sizeof(*iphdr); 683 + 615 684 /*extract L2 frame */ 616 685 fprintf(stdout, "DEBUG>> L2: dst mac: "); 617 686 for (int i = 0; i < ETH_ALEN; i++) 618 - fprintf(stdout, "%02X", ((struct ethhdr *) 619 - pkt_buf[iter]->payload)->h_dest[i]); 687 + fprintf(stdout, "%02X", ethhdr->h_dest[i]); 620 688 621 689 fprintf(stdout, "\nDEBUG>> L2: src mac: "); 622 690 for (int i = 0; i < ETH_ALEN; i++) 623 - fprintf(stdout, "%02X", ((struct ethhdr *) 624 - pkt_buf[iter]->payload)->h_source[i]); 691 + fprintf(stdout, "%02X", ethhdr->h_source[i]); 625 692 626 693 /*extract L3 frame */ 627 - fprintf(stdout, "\nDEBUG>> L3: ip_hdr->ihl: %02X\n", 628 - ((struct iphdr *)(pkt_buf[iter]->payload + sizeof(struct ethhdr)))->ihl); 629 - 630 - ipaddr.s_addr = 631 - ((struct iphdr *)(pkt_buf[iter]->payload + sizeof(struct ethhdr)))->saddr; 632 - fprintf(stdout, "DEBUG>> L3: ip_hdr->saddr: %s\n", inet_ntoa(ipaddr)); 633 - 634 - ipaddr.s_addr = 635 - ((struct iphdr *)(pkt_buf[iter]->payload + sizeof(struct ethhdr)))->daddr; 636 - fprintf(stdout, "DEBUG>> L3: ip_hdr->daddr: %s\n", inet_ntoa(ipaddr)); 637 - 694 + fprintf(stdout, "\nDEBUG>> L3: ip_hdr->ihl: %02X\n", iphdr->ihl); 695 + fprintf(stdout, "DEBUG>> L3: ip_hdr->saddr: %s\n", 696 + inet_ntop(AF_INET, &iphdr->saddr, s, sizeof(s))); 697 + fprintf(stdout, "DEBUG>> L3: ip_hdr->daddr: %s\n", 698 + inet_ntop(AF_INET, &iphdr->daddr, s, sizeof(s))); 638 699 /*extract L4 frame */ 639 - fprintf(stdout, "DEBUG>> L4: udp_hdr->src: %d\n", 640 - ntohs(((struct udphdr *)(pkt_buf[iter]->payload + 641 - sizeof(struct ethhdr) + 642 - sizeof(struct iphdr)))->source)); 643 - 644 - fprintf(stdout, "DEBUG>> L4: udp_hdr->dst: %d\n", 645 - ntohs(((struct udphdr *)(pkt_buf[iter]->payload + 646 - sizeof(struct ethhdr) + 647 - sizeof(struct iphdr)))->dest)); 700 + fprintf(stdout, "DEBUG>> L4: udp_hdr->src: %d\n", ntohs(udphdr->source)); 701 + fprintf(stdout, "DEBUG>> L4: udp_hdr->dst: %d\n", ntohs(udphdr->dest)); 648 702 /*extract L5 frame */ 649 - int payload = *((uint32_t *)(pkt_buf[iter]->payload + PKT_HDR_SIZE)); 703 + payload = *((uint32_t *)(ptr + PKT_HDR_SIZE)); 650 704 651 705 if (payload == EOT) { 652 706 print_verbose("End-of-transmission frame received\n"); ··· 745 809 } 746 810 } 747 811 748 - static void thread_common_ops(struct ifobject *ifobject, void *bufs, pthread_mutex_t *mutexptr, 749 - atomic_int *spinningptr) 812 + static void thread_common_ops(struct ifobject *ifobject, void *bufs) 750 813 { 814 + int umem_sz = num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE; 751 815 int ctr = 0; 752 816 int ret; 753 817 754 - xsk_configure_umem(ifobject, bufs, num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE); 755 - ret = xsk_configure_socket(ifobject); 818 + ifobject->ns_fd = switch_namespace(ifobject->nsname); 819 + 820 + if (test_type == TEST_TYPE_BPF_RES) 821 + umem_sz *= 2; 822 + 823 + bufs = mmap(NULL, umem_sz, 824 + PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 825 + if (bufs == MAP_FAILED) 826 + exit_with_error(errno); 827 + 828 + xsk_configure_umem(ifobject, bufs, 0); 829 + ifobject->umem = ifobject->umem_arr[0]; 830 + ret = xsk_configure_socket(ifobject, 0); 756 831 757 832 /* Retry Create Socket if it fails as xsk_socket__create() 758 833 * is asynchronous 759 - * 760 - * Essential to lock Mutex here to prevent Tx thread from 761 - * entering before Rx and causing a deadlock 762 834 */ 763 - pthread_mutex_lock(mutexptr); 764 835 while (ret && ctr < SOCK_RECONF_CTR) { 765 - atomic_store(spinningptr, 1); 766 - xsk_configure_umem(ifobject, bufs, num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE); 767 - ret = xsk_configure_socket(ifobject); 836 + xsk_configure_umem(ifobject, bufs, 0); 837 + ifobject->umem = ifobject->umem_arr[0]; 838 + ret = xsk_configure_socket(ifobject, 0); 768 839 usleep(USLEEP_MAX); 769 840 ctr++; 770 841 } 771 - atomic_store(spinningptr, 0); 772 - pthread_mutex_unlock(mutexptr); 773 842 774 843 if (ctr >= SOCK_RECONF_CTR) 775 844 exit_with_error(ret); 845 + 846 + ifobject->umem = ifobject->umem_arr[0]; 847 + ifobject->xsk = ifobject->xsk_arr[0]; 848 + 849 + if (test_type == TEST_TYPE_BPF_RES) { 850 + xsk_configure_umem(ifobject, (u8 *)bufs + (umem_sz / 2), 1); 851 + ifobject->umem = ifobject->umem_arr[1]; 852 + ret = xsk_configure_socket(ifobject, 1); 853 + } 854 + 855 + ifobject->umem = ifobject->umem_arr[0]; 856 + ifobject->xsk = ifobject->xsk_arr[0]; 857 + print_verbose("Interface [%s] vector [%s]\n", 858 + ifobject->ifname, ifobject->fv.vector == tx ? "Tx" : "Rx"); 776 859 } 777 860 778 - static void *worker_testapp_validate(void *arg) 861 + static bool testapp_is_test_two_stepped(void) 862 + { 863 + return (test_type != TEST_TYPE_BIDI && test_type != TEST_TYPE_BPF_RES) || second_step; 864 + } 865 + 866 + static void testapp_cleanup_xsk_res(struct ifobject *ifobj) 867 + { 868 + if (testapp_is_test_two_stepped()) { 869 + xsk_socket__delete(ifobj->xsk->xsk); 870 + (void)xsk_umem__delete(ifobj->umem->umem); 871 + } 872 + } 873 + 874 + static void *worker_testapp_validate_tx(void *arg) 779 875 { 780 876 struct udphdr *udp_hdr = 781 877 (struct udphdr *)(pkt_data + sizeof(struct ethhdr) + sizeof(struct iphdr)); ··· 817 849 struct generic_data data; 818 850 void *bufs = NULL; 819 851 820 - pthread_attr_setstacksize(&attr, THREAD_STACK); 852 + if (!second_step) 853 + thread_common_ops(ifobject, bufs); 821 854 822 - if (!bidi_pass) { 823 - bufs = mmap(NULL, num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE, 824 - PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 825 - if (bufs == MAP_FAILED) 855 + for (int i = 0; i < num_frames; i++) { 856 + /*send EOT frame */ 857 + if (i == (num_frames - 1)) 858 + data.seqnum = -1; 859 + else 860 + data.seqnum = i; 861 + gen_udp_hdr(&data, ifobject, udp_hdr); 862 + gen_ip_hdr(ifobject, ip_hdr); 863 + gen_udp_csum(udp_hdr, ip_hdr); 864 + gen_eth_hdr(ifobject, eth_hdr); 865 + gen_eth_frame(ifobject->umem, i * XSK_UMEM__DEFAULT_FRAME_SIZE); 866 + } 867 + 868 + print_verbose("Sending %d packets on interface %s\n", 869 + (opt_pkt_count - 1), ifobject->ifname); 870 + tx_only_all(ifobject); 871 + 872 + testapp_cleanup_xsk_res(ifobject); 873 + pthread_exit(NULL); 874 + } 875 + 876 + static void *worker_testapp_validate_rx(void *arg) 877 + { 878 + struct ifobject *ifobject = (struct ifobject *)arg; 879 + struct pollfd fds[MAX_SOCKS] = { }; 880 + void *bufs = NULL; 881 + 882 + if (!second_step) 883 + thread_common_ops(ifobject, bufs); 884 + 885 + if (stat_test_type != STAT_TEST_RX_FILL_EMPTY) 886 + xsk_populate_fill_ring(ifobject->umem); 887 + 888 + TAILQ_INIT(&head); 889 + if (debug_pkt_dump) { 890 + pkt_buf = calloc(num_frames, sizeof(*pkt_buf)); 891 + if (!pkt_buf) 826 892 exit_with_error(errno); 827 - 828 - if (strcmp(ifobject->nsname, "")) 829 - switch_namespace(ifobject->ifdict_index); 830 893 } 831 894 832 - if (ifobject->fv.vector == tx) { 833 - int spinningrxctr = 0; 895 + fds[0].fd = xsk_socket__fd(ifobject->xsk->xsk); 896 + fds[0].events = POLLIN; 834 897 835 - if (!bidi_pass) 836 - thread_common_ops(ifobject, bufs, &sync_mutex_tx, &spinning_tx); 898 + pthread_barrier_wait(&barr); 837 899 838 - while (atomic_load(&spinning_rx) && spinningrxctr < SOCK_RECONF_CTR) { 839 - spinningrxctr++; 840 - usleep(USLEEP_MAX); 900 + while (1) { 901 + if (test_type != TEST_TYPE_STATS) { 902 + rx_pkt(ifobject->xsk, fds); 903 + worker_pkt_validate(); 904 + } else { 905 + worker_stats_validate(ifobject); 841 906 } 842 - 843 - print_verbose("Interface [%s] vector [Tx]\n", ifobject->ifname); 844 - for (int i = 0; i < num_frames; i++) { 845 - /*send EOT frame */ 846 - if (i == (num_frames - 1)) 847 - data.seqnum = -1; 848 - else 849 - data.seqnum = i; 850 - gen_udp_hdr(&data, ifobject, udp_hdr); 851 - gen_ip_hdr(ifobject, ip_hdr); 852 - gen_udp_csum(udp_hdr, ip_hdr); 853 - gen_eth_hdr(ifobject, eth_hdr); 854 - gen_eth_frame(ifobject->umem, i * XSK_UMEM__DEFAULT_FRAME_SIZE); 855 - } 856 - 857 - print_verbose("Sending %d packets on interface %s\n", 858 - (opt_pkt_count - 1), ifobject->ifname); 859 - tx_only_all(ifobject); 860 - } else if (ifobject->fv.vector == rx) { 861 - struct pollfd fds[MAX_SOCKS] = { }; 862 - int ret; 863 - 864 - if (!bidi_pass) 865 - thread_common_ops(ifobject, bufs, &sync_mutex_tx, &spinning_rx); 866 - 867 - print_verbose("Interface [%s] vector [Rx]\n", ifobject->ifname); 868 - if (stat_test_type != STAT_TEST_RX_FILL_EMPTY) 869 - xsk_populate_fill_ring(ifobject->umem); 870 - 871 - TAILQ_INIT(&head); 872 - if (debug_pkt_dump) { 873 - pkt_buf = calloc(num_frames, sizeof(*pkt_buf)); 874 - if (!pkt_buf) 875 - exit_with_error(errno); 876 - } 877 - 878 - fds[0].fd = xsk_socket__fd(ifobject->xsk->xsk); 879 - fds[0].events = POLLIN; 880 - 881 - pthread_mutex_lock(&sync_mutex); 882 - pthread_cond_signal(&signal_rx_condition); 883 - pthread_mutex_unlock(&sync_mutex); 884 - 885 - while (1) { 886 - if (test_type == TEST_TYPE_POLL) { 887 - ret = poll(fds, 1, POLL_TMOUT); 888 - if (ret <= 0) 889 - continue; 890 - } 891 - 892 - if (test_type != TEST_TYPE_STATS) { 893 - rx_pkt(ifobject->xsk, fds); 894 - worker_pkt_validate(); 895 - } else { 896 - worker_stats_validate(ifobject); 897 - } 898 - 899 - if (sigvar) 900 - break; 901 - } 902 - 903 - if (test_type != TEST_TYPE_STATS) 904 - print_verbose("Received %d packets on interface %s\n", 905 - pkt_counter, ifobject->ifname); 906 - 907 - if (test_type == TEST_TYPE_TEARDOWN) 908 - print_verbose("Destroying socket\n"); 907 + if (sigvar) 908 + break; 909 909 } 910 910 911 - if ((test_type != TEST_TYPE_BIDI) || bidi_pass) { 912 - xsk_socket__delete(ifobject->xsk->xsk); 913 - (void)xsk_umem__delete(ifobject->umem->umem); 914 - } 911 + print_verbose("Received %d packets on interface %s\n", 912 + pkt_counter, ifobject->ifname); 913 + 914 + if (test_type == TEST_TYPE_TEARDOWN) 915 + print_verbose("Destroying socket\n"); 916 + 917 + testapp_cleanup_xsk_res(ifobject); 915 918 pthread_exit(NULL); 916 919 } 917 920 918 921 static void testapp_validate(void) 919 922 { 920 - struct timespec max_wait = { 0, 0 }; 921 923 bool bidi = test_type == TEST_TYPE_BIDI; 924 + bool bpf = test_type == TEST_TYPE_BPF_RES; 922 925 923 - pthread_attr_init(&attr); 924 - pthread_attr_setstacksize(&attr, THREAD_STACK); 925 - 926 - if ((test_type == TEST_TYPE_BIDI) && bidi_pass) { 927 - pthread_init_mutex(); 928 - if (!switching_notify) { 929 - print_verbose("Switching Tx/Rx vectors\n"); 930 - switching_notify++; 931 - } 932 - } 933 - 934 - pthread_mutex_lock(&sync_mutex); 926 + if (pthread_barrier_init(&barr, NULL, 2)) 927 + exit_with_error(errno); 935 928 936 929 /*Spawn RX thread */ 937 - if (!bidi || !bidi_pass) { 938 - if (pthread_create(&t0, &attr, worker_testapp_validate, ifdict[1])) 939 - exit_with_error(errno); 940 - } else if (bidi && bidi_pass) { 941 - /*switch Tx/Rx vectors */ 942 - ifdict[0]->fv.vector = rx; 943 - if (pthread_create(&t0, &attr, worker_testapp_validate, ifdict[0])) 944 - exit_with_error(errno); 945 - } 930 + pthread_create(&t0, NULL, ifdict_rx->func_ptr, ifdict_rx); 946 931 947 - if (clock_gettime(CLOCK_REALTIME, &max_wait)) 932 + pthread_barrier_wait(&barr); 933 + if (pthread_barrier_destroy(&barr)) 948 934 exit_with_error(errno); 949 - max_wait.tv_sec += TMOUT_SEC; 950 - 951 - if (pthread_cond_timedwait(&signal_rx_condition, &sync_mutex, &max_wait) == ETIMEDOUT) 952 - exit_with_error(errno); 953 - 954 - pthread_mutex_unlock(&sync_mutex); 955 935 956 936 /*Spawn TX thread */ 957 - if (!bidi || !bidi_pass) { 958 - if (pthread_create(&t1, &attr, worker_testapp_validate, ifdict[0])) 959 - exit_with_error(errno); 960 - } else if (bidi && bidi_pass) { 961 - /*switch Tx/Rx vectors */ 962 - ifdict[1]->fv.vector = tx; 963 - if (pthread_create(&t1, &attr, worker_testapp_validate, ifdict[1])) 964 - exit_with_error(errno); 965 - } 937 + pthread_create(&t1, NULL, ifdict_tx->func_ptr, ifdict_tx); 966 938 967 939 pthread_join(t1, NULL); 968 940 pthread_join(t0, NULL); 969 941 970 - if (debug_pkt_dump) { 942 + if (debug_pkt_dump && test_type != TEST_TYPE_STATS) { 971 943 worker_pkt_dump(); 972 944 for (int iter = 0; iter < num_frames - 1; iter++) { 973 945 free(pkt_buf[iter]->payload); ··· 916 1008 free(pkt_buf); 917 1009 } 918 1010 919 - if (!(test_type == TEST_TYPE_TEARDOWN) && !bidi && !(test_type == TEST_TYPE_STATS)) 1011 + if (!(test_type == TEST_TYPE_TEARDOWN) && !bidi && !bpf && !(test_type == TEST_TYPE_STATS)) 920 1012 print_ksft_result(); 921 1013 } 922 1014 923 - static void testapp_sockets(void) 1015 + static void testapp_teardown(void) 924 1016 { 925 - for (int i = 0; i < ((test_type == TEST_TYPE_TEARDOWN) ? MAX_TEARDOWN_ITER : MAX_BIDI_ITER); 926 - i++) { 1017 + int i; 1018 + 1019 + for (i = 0; i < MAX_TEARDOWN_ITER; i++) { 927 1020 pkt_counter = 0; 928 1021 prev_pkt = -1; 929 1022 sigvar = 0; 930 1023 print_verbose("Creating socket\n"); 931 1024 testapp_validate(); 932 - test_type == TEST_TYPE_BIDI ? bidi_pass++ : bidi_pass; 1025 + } 1026 + 1027 + print_ksft_result(); 1028 + } 1029 + 1030 + static void swap_vectors(struct ifobject *ifobj1, struct ifobject *ifobj2) 1031 + { 1032 + void *(*tmp_func_ptr)(void *) = ifobj1->func_ptr; 1033 + enum fvector tmp_vector = ifobj1->fv.vector; 1034 + 1035 + ifobj1->func_ptr = ifobj2->func_ptr; 1036 + ifobj1->fv.vector = ifobj2->fv.vector; 1037 + 1038 + ifobj2->func_ptr = tmp_func_ptr; 1039 + ifobj2->fv.vector = tmp_vector; 1040 + 1041 + ifdict_tx = ifobj1; 1042 + ifdict_rx = ifobj2; 1043 + } 1044 + 1045 + static void testapp_bidi(void) 1046 + { 1047 + for (int i = 0; i < MAX_BIDI_ITER; i++) { 1048 + pkt_counter = 0; 1049 + prev_pkt = -1; 1050 + sigvar = 0; 1051 + print_verbose("Creating socket\n"); 1052 + testapp_validate(); 1053 + if (!second_step) { 1054 + print_verbose("Switching Tx/Rx vectors\n"); 1055 + swap_vectors(ifdict[1], ifdict[0]); 1056 + } 1057 + second_step = true; 1058 + } 1059 + 1060 + swap_vectors(ifdict[0], ifdict[1]); 1061 + 1062 + print_ksft_result(); 1063 + } 1064 + 1065 + static void swap_xsk_res(void) 1066 + { 1067 + xsk_socket__delete(ifdict_tx->xsk->xsk); 1068 + xsk_umem__delete(ifdict_tx->umem->umem); 1069 + xsk_socket__delete(ifdict_rx->xsk->xsk); 1070 + xsk_umem__delete(ifdict_rx->umem->umem); 1071 + ifdict_tx->umem = ifdict_tx->umem_arr[1]; 1072 + ifdict_tx->xsk = ifdict_tx->xsk_arr[1]; 1073 + ifdict_rx->umem = ifdict_rx->umem_arr[1]; 1074 + ifdict_rx->xsk = ifdict_rx->xsk_arr[1]; 1075 + } 1076 + 1077 + static void testapp_bpf_res(void) 1078 + { 1079 + int i; 1080 + 1081 + for (i = 0; i < MAX_BPF_ITER; i++) { 1082 + pkt_counter = 0; 1083 + prev_pkt = -1; 1084 + sigvar = 0; 1085 + print_verbose("Creating socket\n"); 1086 + testapp_validate(); 1087 + if (!second_step) 1088 + swap_xsk_res(); 1089 + second_step = true; 933 1090 } 934 1091 935 1092 print_ksft_result(); ··· 1026 1053 print_ksft_result(); 1027 1054 } 1028 1055 1029 - static void init_iface_config(struct ifaceconfigobj *ifaceconfig) 1056 + static void init_iface(struct ifobject *ifobj, const char *dst_mac, 1057 + const char *src_mac, const char *dst_ip, 1058 + const char *src_ip, const u16 dst_port, 1059 + const u16 src_port, enum fvector vector) 1030 1060 { 1031 - /*Init interface0 */ 1032 - ifdict[0]->fv.vector = tx; 1033 - memcpy(ifdict[0]->dst_mac, ifaceconfig->dst_mac, ETH_ALEN); 1034 - memcpy(ifdict[0]->src_mac, ifaceconfig->src_mac, ETH_ALEN); 1035 - ifdict[0]->dst_ip = ifaceconfig->dst_ip.s_addr; 1036 - ifdict[0]->src_ip = ifaceconfig->src_ip.s_addr; 1037 - ifdict[0]->dst_port = ifaceconfig->dst_port; 1038 - ifdict[0]->src_port = ifaceconfig->src_port; 1061 + struct in_addr ip; 1039 1062 1040 - /*Init interface1 */ 1041 - ifdict[1]->fv.vector = rx; 1042 - memcpy(ifdict[1]->dst_mac, ifaceconfig->src_mac, ETH_ALEN); 1043 - memcpy(ifdict[1]->src_mac, ifaceconfig->dst_mac, ETH_ALEN); 1044 - ifdict[1]->dst_ip = ifaceconfig->src_ip.s_addr; 1045 - ifdict[1]->src_ip = ifaceconfig->dst_ip.s_addr; 1046 - ifdict[1]->dst_port = ifaceconfig->src_port; 1047 - ifdict[1]->src_port = ifaceconfig->dst_port; 1048 - } 1063 + memcpy(ifobj->dst_mac, dst_mac, ETH_ALEN); 1064 + memcpy(ifobj->src_mac, src_mac, ETH_ALEN); 1049 1065 1050 - static void *nsdisablemodethread(void *args) 1051 - { 1052 - struct targs *targs = args; 1066 + inet_aton(dst_ip, &ip); 1067 + ifobj->dst_ip = ip.s_addr; 1053 1068 1054 - targs->retptr = false; 1069 + inet_aton(src_ip, &ip); 1070 + ifobj->src_ip = ip.s_addr; 1055 1071 1056 - if (switch_namespace(targs->idx)) { 1057 - targs->retptr = bpf_set_link_xdp_fd(ifdict[targs->idx]->ifindex, -1, targs->flags); 1072 + ifobj->dst_port = dst_port; 1073 + ifobj->src_port = src_port; 1074 + 1075 + if (vector == tx) { 1076 + ifobj->fv.vector = tx; 1077 + ifobj->func_ptr = worker_testapp_validate_tx; 1078 + ifdict_tx = ifobj; 1058 1079 } else { 1059 - targs->retptr = errno; 1060 - print_verbose("Failed to switch namespace to %s\n", ifdict[targs->idx]->nsname); 1061 - } 1062 - 1063 - pthread_exit(NULL); 1064 - } 1065 - 1066 - static void disable_xdp_mode(int mode) 1067 - { 1068 - int err = 0; 1069 - __u32 flags = XDP_FLAGS_UPDATE_IF_NOEXIST | mode; 1070 - char *mode_str = mode & XDP_FLAGS_SKB_MODE ? "skb" : "drv"; 1071 - 1072 - for (int i = 0; i < MAX_INTERFACES; i++) { 1073 - if (strcmp(ifdict[i]->nsname, "")) { 1074 - struct targs *targs; 1075 - 1076 - targs = malloc(sizeof(*targs)); 1077 - memset(targs, 0, sizeof(*targs)); 1078 - if (!targs) 1079 - exit_with_error(errno); 1080 - 1081 - targs->idx = i; 1082 - targs->flags = flags; 1083 - if (pthread_create(&ns_thread, NULL, nsdisablemodethread, targs)) 1084 - exit_with_error(errno); 1085 - 1086 - pthread_join(ns_thread, NULL); 1087 - err = targs->retptr; 1088 - free(targs); 1089 - } else { 1090 - err = bpf_set_link_xdp_fd(ifdict[i]->ifindex, -1, flags); 1091 - } 1092 - 1093 - if (err) { 1094 - print_verbose("Failed to disable %s mode on interface %s\n", 1095 - mode_str, ifdict[i]->ifname); 1096 - exit_with_error(err); 1097 - } 1098 - 1099 - print_verbose("Disabled %s mode for interface: %s\n", mode_str, ifdict[i]->ifname); 1100 - configured_mode = mode & XDP_FLAGS_SKB_MODE ? TEST_MODE_DRV : TEST_MODE_SKB; 1080 + ifobj->fv.vector = rx; 1081 + ifobj->func_ptr = worker_testapp_validate_rx; 1082 + ifdict_rx = ifobj; 1101 1083 } 1102 1084 } 1103 1085 ··· 1063 1135 /* reset defaults after potential previous test */ 1064 1136 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; 1065 1137 pkt_counter = 0; 1066 - switching_notify = 0; 1067 - bidi_pass = 0; 1138 + second_step = 0; 1068 1139 prev_pkt = -1; 1069 - ifdict[0]->fv.vector = tx; 1070 - ifdict[1]->fv.vector = rx; 1071 1140 sigvar = 0; 1072 1141 stat_test_type = -1; 1073 1142 rxqsize = XSK_RING_CONS__DEFAULT_NUM_DESCS; 1074 1143 frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM; 1075 1144 1145 + configured_mode = mode; 1146 + 1076 1147 switch (mode) { 1077 1148 case (TEST_MODE_SKB): 1078 - if (configured_mode == TEST_MODE_DRV) 1079 - disable_xdp_mode(XDP_FLAGS_DRV_MODE); 1080 1149 xdp_flags |= XDP_FLAGS_SKB_MODE; 1081 1150 break; 1082 1151 case (TEST_MODE_DRV): 1083 - if (configured_mode == TEST_MODE_SKB) 1084 - disable_xdp_mode(XDP_FLAGS_SKB_MODE); 1085 1152 xdp_flags |= XDP_FLAGS_DRV_MODE; 1086 1153 break; 1087 1154 default: 1088 1155 break; 1089 1156 } 1090 1157 1091 - pthread_init_mutex(); 1092 - 1093 - if (test_type == TEST_TYPE_STATS) 1158 + switch (test_type) { 1159 + case TEST_TYPE_STATS: 1094 1160 testapp_stats(); 1095 - else if ((test_type != TEST_TYPE_TEARDOWN) && (test_type != TEST_TYPE_BIDI)) 1161 + break; 1162 + case TEST_TYPE_TEARDOWN: 1163 + testapp_teardown(); 1164 + break; 1165 + case TEST_TYPE_BIDI: 1166 + testapp_bidi(); 1167 + break; 1168 + case TEST_TYPE_BPF_RES: 1169 + testapp_bpf_res(); 1170 + break; 1171 + default: 1096 1172 testapp_validate(); 1097 - else 1098 - testapp_sockets(); 1099 - 1100 - pthread_destroy_mutex(); 1173 + break; 1174 + } 1101 1175 } 1102 1176 1103 1177 int main(int argc, char **argv) 1104 1178 { 1105 1179 struct rlimit _rlim = { RLIM_INFINITY, RLIM_INFINITY }; 1180 + bool failure = false; 1181 + int i, j; 1106 1182 1107 1183 if (setrlimit(RLIMIT_MEMLOCK, &_rlim)) 1108 1184 exit_with_error(errno); 1109 - 1110 - const char *MAC1 = "\x00\x0A\x56\x9E\xEE\x62"; 1111 - const char *MAC2 = "\x00\x0A\x56\x9E\xEE\x61"; 1112 - const char *IP1 = "192.168.100.162"; 1113 - const char *IP2 = "192.168.100.161"; 1114 - u16 UDP_DST_PORT = 2020; 1115 - u16 UDP_SRC_PORT = 2121; 1116 - int i, j; 1117 - 1118 - ifaceconfig = malloc(sizeof(struct ifaceconfigobj)); 1119 - memcpy(ifaceconfig->dst_mac, MAC1, ETH_ALEN); 1120 - memcpy(ifaceconfig->src_mac, MAC2, ETH_ALEN); 1121 - inet_aton(IP1, &ifaceconfig->dst_ip); 1122 - inet_aton(IP2, &ifaceconfig->src_ip); 1123 - ifaceconfig->dst_port = UDP_DST_PORT; 1124 - ifaceconfig->src_port = UDP_SRC_PORT; 1125 1185 1126 1186 for (int i = 0; i < MAX_INTERFACES; i++) { 1127 1187 ifdict[i] = malloc(sizeof(struct ifobject)); ··· 1117 1201 exit_with_error(errno); 1118 1202 1119 1203 ifdict[i]->ifdict_index = i; 1204 + ifdict[i]->xsk_arr = calloc(2, sizeof(struct xsk_socket_info *)); 1205 + if (!ifdict[i]->xsk_arr) { 1206 + failure = true; 1207 + goto cleanup; 1208 + } 1209 + ifdict[i]->umem_arr = calloc(2, sizeof(struct xsk_umem_info *)); 1210 + if (!ifdict[i]->umem_arr) { 1211 + failure = true; 1212 + goto cleanup; 1213 + } 1120 1214 } 1121 1215 1122 1216 setlocale(LC_ALL, ""); ··· 1135 1209 1136 1210 num_frames = ++opt_pkt_count; 1137 1211 1138 - init_iface_config(ifaceconfig); 1139 - 1140 - disable_xdp_mode(XDP_FLAGS_DRV_MODE); 1212 + init_iface(ifdict[0], MAC1, MAC2, IP1, IP2, UDP_PORT1, UDP_PORT2, tx); 1213 + init_iface(ifdict[1], MAC2, MAC1, IP2, IP1, UDP_PORT2, UDP_PORT1, rx); 1141 1214 1142 1215 ksft_set_plan(TEST_MODE_MAX * TEST_TYPE_MAX); 1143 1216 ··· 1145 1220 run_pkt_test(i, j); 1146 1221 } 1147 1222 1148 - for (int i = 0; i < MAX_INTERFACES; i++) 1223 + cleanup: 1224 + for (int i = 0; i < MAX_INTERFACES; i++) { 1225 + if (ifdict[i]->ns_fd != -1) 1226 + close(ifdict[i]->ns_fd); 1227 + free(ifdict[i]->xsk_arr); 1228 + free(ifdict[i]->umem_arr); 1149 1229 free(ifdict[i]); 1230 + } 1231 + 1232 + if (failure) 1233 + exit_with_error(errno); 1150 1234 1151 1235 ksft_exit_pass(); 1152 1236
+15 -34
tools/testing/selftests/bpf/xdpxceiver.h
··· 23 23 #define MAX_SOCKS 1 24 24 #define MAX_TEARDOWN_ITER 10 25 25 #define MAX_BIDI_ITER 2 26 + #define MAX_BPF_ITER 2 26 27 #define PKT_HDR_SIZE (sizeof(struct ethhdr) + sizeof(struct iphdr) + \ 27 28 sizeof(struct udphdr)) 28 29 #define MIN_PKT_SIZE 64 ··· 34 33 #define IP_PKT_TOS 0x9 35 34 #define UDP_PKT_SIZE (IP_PKT_SIZE - sizeof(struct iphdr)) 36 35 #define UDP_PKT_DATA_SIZE (UDP_PKT_SIZE - sizeof(struct udphdr)) 37 - #define TMOUT_SEC (3) 38 36 #define EOT (-1) 39 37 #define USLEEP_MAX 200000 40 - #define THREAD_STACK 60000000 41 38 #define SOCK_RECONF_CTR 10 42 39 #define BATCH_SIZE 64 43 40 #define POLL_TMOUT 1000 44 - #define NEED_WAKEUP true 45 41 #define DEFAULT_PKT_CNT 10000 46 42 #define RX_FULL_RXQSIZE 32 47 43 ··· 61 63 TEST_TYPE_TEARDOWN, 62 64 TEST_TYPE_BIDI, 63 65 TEST_TYPE_STATS, 66 + TEST_TYPE_BPF_RES, 64 67 TEST_TYPE_MAX 65 68 }; 66 69 ··· 76 77 static int configured_mode = TEST_MODE_UNCONFIGURED; 77 78 static u8 debug_pkt_dump; 78 79 static u32 num_frames; 79 - static u8 switching_notify; 80 - static u8 bidi_pass; 80 + static bool second_step; 81 81 static int test_type; 82 82 83 - static int opt_queue; 84 83 static int opt_pkt_count; 85 84 static u8 opt_verbose; 86 85 ··· 122 125 u32 seqnum; 123 126 }; 124 127 125 - struct ifaceconfigobj { 126 - u8 dst_mac[ETH_ALEN]; 127 - u8 src_mac[ETH_ALEN]; 128 - struct in_addr dst_ip; 129 - struct in_addr src_ip; 130 - u16 src_port; 131 - u16 dst_port; 132 - } *ifaceconfig; 133 - 134 128 struct ifobject { 135 - int ifindex; 136 - int ifdict_index; 137 129 char ifname[MAX_INTERFACE_NAME_CHARS]; 138 130 char nsname[MAX_INTERFACES_NAMESPACE_CHARS]; 139 - struct flow_vector fv; 140 131 struct xsk_socket_info *xsk; 132 + struct xsk_socket_info **xsk_arr; 133 + struct xsk_umem_info **umem_arr; 141 134 struct xsk_umem_info *umem; 142 - u8 dst_mac[ETH_ALEN]; 143 - u8 src_mac[ETH_ALEN]; 135 + void *(*func_ptr)(void *arg); 136 + struct flow_vector fv; 137 + int ns_fd; 138 + int ifdict_index; 144 139 u32 dst_ip; 145 140 u32 src_ip; 146 141 u16 src_port; 147 142 u16 dst_port; 143 + u8 dst_mac[ETH_ALEN]; 144 + u8 src_mac[ETH_ALEN]; 148 145 }; 149 146 150 147 static struct ifobject *ifdict[MAX_INTERFACES]; 148 + static struct ifobject *ifdict_rx; 149 + static struct ifobject *ifdict_tx; 151 150 152 151 /*threads*/ 153 - atomic_int spinning_tx; 154 - atomic_int spinning_rx; 155 - pthread_mutex_t sync_mutex; 156 - pthread_mutex_t sync_mutex_tx; 157 - pthread_cond_t signal_rx_condition; 158 - pthread_cond_t signal_tx_condition; 159 - pthread_t t0, t1, ns_thread; 160 - pthread_attr_t attr; 161 - 162 - struct targs { 163 - u8 retptr; 164 - int idx; 165 - u32 flags; 166 - }; 152 + pthread_barrier_t barr; 153 + pthread_t t0, t1; 167 154 168 155 TAILQ_HEAD(head_s, pkt) head = TAILQ_HEAD_INITIALIZER(head); 169 156 struct head_s *head_p;