Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'Add SEC("ksyscall") support'

Andrii Nakryiko says:

====================

Add SEC("ksyscall")/SEC("kretsyscall") sections and corresponding
bpf_program__attach_ksyscall() API that simplifies tracing kernel syscalls
through kprobe mechanism. Kprobing syscalls isn't trivial due to varying
syscall handler names in the kernel and various ways syscall argument are
passed, depending on kernel architecture and configuration. SEC("ksyscall")
allows user to not care about such details and just get access to syscall
input arguments, while libbpf takes care of necessary feature detection logic.

There are still more quirks that are not straightforward to hide completely
(see comments about mmap(), clone() and compat syscalls), so in such more
advanced scenarios user might need to fall back to plain SEC("kprobe")
approach, but for absolute majority of users SEC("ksyscall") is a big
improvement.

As part of this patch set libbpf adds two more virtual __kconfig externs, in
addition to existing LINUX_KERNEL_VERSION: LINUX_HAS_BPF_COOKIE and
LINUX_HAS_SYSCALL_WRAPPER, which let's libbpf-provided BPF-side code minimize
external dependencies and assumptions and let's user-space part of libbpf to
perform all the feature detection logic. This benefits USDT support code,
which now doesn't depend on BPF CO-RE for its functionality.

v1->v2:
- normalize extern variable-related warn and debug message formats (Alan);
rfc->v1:
- drop dependency on kallsyms and speed up SYSCALL_WRAPPER detection (Alexei);
- drop dependency on /proc/config.gz in bpf_tracing.h (Yaniv);
- add doc comment and ephasize mmap(), clone() and compat quirks that are
not supported (Ilya);
- use mechanism similar to LINUX_KERNEL_VERSION to also improve USDT code.
====================

Reviewed-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>

+290 -110
+38 -13
tools/lib/bpf/bpf_tracing.h
··· 2 2 #ifndef __BPF_TRACING_H__ 3 3 #define __BPF_TRACING_H__ 4 4 5 + #include <bpf/bpf_helpers.h> 6 + 5 7 /* Scan the ARCH passed in from ARCH env variable (see Makefile) */ 6 8 #if defined(__TARGET_ARCH_x86) 7 9 #define bpf_target_x86 ··· 142 140 #define __PT_RC_REG gprs[2] 143 141 #define __PT_SP_REG gprs[15] 144 142 #define __PT_IP_REG psw.addr 145 - #define PT_REGS_PARM1_SYSCALL(x) ({ _Pragma("GCC error \"use PT_REGS_PARM1_CORE_SYSCALL() instead\""); 0l; }) 143 + #define PT_REGS_PARM1_SYSCALL(x) PT_REGS_PARM1_CORE_SYSCALL(x) 146 144 #define PT_REGS_PARM1_CORE_SYSCALL(x) BPF_CORE_READ((const struct pt_regs___s390 *)(x), orig_gpr2) 147 145 148 146 #elif defined(bpf_target_arm) ··· 176 174 #define __PT_RC_REG regs[0] 177 175 #define __PT_SP_REG sp 178 176 #define __PT_IP_REG pc 179 - #define PT_REGS_PARM1_SYSCALL(x) ({ _Pragma("GCC error \"use PT_REGS_PARM1_CORE_SYSCALL() instead\""); 0l; }) 177 + #define PT_REGS_PARM1_SYSCALL(x) PT_REGS_PARM1_CORE_SYSCALL(x) 180 178 #define PT_REGS_PARM1_CORE_SYSCALL(x) BPF_CORE_READ((const struct pt_regs___arm64 *)(x), orig_x0) 181 179 182 180 #elif defined(bpf_target_mips) ··· 495 493 } \ 496 494 static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args) 497 495 496 + /* If kernel has CONFIG_ARCH_HAS_SYSCALL_WRAPPER, read pt_regs directly */ 498 497 #define ___bpf_syscall_args0() ctx 499 - #define ___bpf_syscall_args1(x) ___bpf_syscall_args0(), (void *)PT_REGS_PARM1_CORE_SYSCALL(regs) 500 - #define ___bpf_syscall_args2(x, args...) ___bpf_syscall_args1(args), (void *)PT_REGS_PARM2_CORE_SYSCALL(regs) 501 - #define ___bpf_syscall_args3(x, args...) ___bpf_syscall_args2(args), (void *)PT_REGS_PARM3_CORE_SYSCALL(regs) 502 - #define ___bpf_syscall_args4(x, args...) ___bpf_syscall_args3(args), (void *)PT_REGS_PARM4_CORE_SYSCALL(regs) 503 - #define ___bpf_syscall_args5(x, args...) ___bpf_syscall_args4(args), (void *)PT_REGS_PARM5_CORE_SYSCALL(regs) 498 + #define ___bpf_syscall_args1(x) ___bpf_syscall_args0(), (void *)PT_REGS_PARM1_SYSCALL(regs) 499 + #define ___bpf_syscall_args2(x, args...) ___bpf_syscall_args1(args), (void *)PT_REGS_PARM2_SYSCALL(regs) 500 + #define ___bpf_syscall_args3(x, args...) ___bpf_syscall_args2(args), (void *)PT_REGS_PARM3_SYSCALL(regs) 501 + #define ___bpf_syscall_args4(x, args...) ___bpf_syscall_args3(args), (void *)PT_REGS_PARM4_SYSCALL(regs) 502 + #define ___bpf_syscall_args5(x, args...) ___bpf_syscall_args4(args), (void *)PT_REGS_PARM5_SYSCALL(regs) 504 503 #define ___bpf_syscall_args(args...) ___bpf_apply(___bpf_syscall_args, ___bpf_narg(args))(args) 505 504 505 + /* If kernel doesn't have CONFIG_ARCH_HAS_SYSCALL_WRAPPER, we have to BPF_CORE_READ from pt_regs */ 506 + #define ___bpf_syswrap_args0() ctx 507 + #define ___bpf_syswrap_args1(x) ___bpf_syswrap_args0(), (void *)PT_REGS_PARM1_CORE_SYSCALL(regs) 508 + #define ___bpf_syswrap_args2(x, args...) ___bpf_syswrap_args1(args), (void *)PT_REGS_PARM2_CORE_SYSCALL(regs) 509 + #define ___bpf_syswrap_args3(x, args...) ___bpf_syswrap_args2(args), (void *)PT_REGS_PARM3_CORE_SYSCALL(regs) 510 + #define ___bpf_syswrap_args4(x, args...) ___bpf_syswrap_args3(args), (void *)PT_REGS_PARM4_CORE_SYSCALL(regs) 511 + #define ___bpf_syswrap_args5(x, args...) ___bpf_syswrap_args4(args), (void *)PT_REGS_PARM5_CORE_SYSCALL(regs) 512 + #define ___bpf_syswrap_args(args...) ___bpf_apply(___bpf_syswrap_args, ___bpf_narg(args))(args) 513 + 506 514 /* 507 - * BPF_KPROBE_SYSCALL is a variant of BPF_KPROBE, which is intended for 515 + * BPF_KSYSCALL is a variant of BPF_KPROBE, which is intended for 508 516 * tracing syscall functions, like __x64_sys_close. It hides the underlying 509 517 * platform-specific low-level way of getting syscall input arguments from 510 518 * struct pt_regs, and provides a familiar typed and named function arguments 511 519 * syntax and semantics of accessing syscall input parameters. 512 520 * 513 - * Original struct pt_regs* context is preserved as 'ctx' argument. This might 521 + * Original struct pt_regs * context is preserved as 'ctx' argument. This might 514 522 * be necessary when using BPF helpers like bpf_perf_event_output(). 515 523 * 516 - * This macro relies on BPF CO-RE support. 524 + * At the moment BPF_KSYSCALL does not handle all the calling convention 525 + * quirks for mmap(), clone() and compat syscalls transparrently. This may or 526 + * may not change in the future. User needs to take extra measures to handle 527 + * such quirks explicitly, if necessary. 528 + * 529 + * This macro relies on BPF CO-RE support and virtual __kconfig externs. 517 530 */ 518 - #define BPF_KPROBE_SYSCALL(name, args...) \ 531 + #define BPF_KSYSCALL(name, args...) \ 519 532 name(struct pt_regs *ctx); \ 533 + extern _Bool LINUX_HAS_SYSCALL_WRAPPER __kconfig; \ 520 534 static __attribute__((always_inline)) typeof(name(0)) \ 521 535 ____##name(struct pt_regs *ctx, ##args); \ 522 536 typeof(name(0)) name(struct pt_regs *ctx) \ 523 537 { \ 524 - struct pt_regs *regs = PT_REGS_SYSCALL_REGS(ctx); \ 538 + struct pt_regs *regs = LINUX_HAS_SYSCALL_WRAPPER \ 539 + ? (struct pt_regs *)PT_REGS_PARM1(ctx) \ 540 + : ctx; \ 525 541 _Pragma("GCC diagnostic push") \ 526 542 _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \ 527 - return ____##name(___bpf_syscall_args(args)); \ 543 + if (LINUX_HAS_SYSCALL_WRAPPER) \ 544 + return ____##name(___bpf_syswrap_args(args)); \ 545 + else \ 546 + return ____##name(___bpf_syscall_args(args)); \ 528 547 _Pragma("GCC diagnostic pop") \ 529 548 } \ 530 549 static __attribute__((always_inline)) typeof(name(0)) \ 531 550 ____##name(struct pt_regs *ctx, ##args) 551 + 552 + #define BPF_KPROBE_SYSCALL BPF_KSYSCALL 532 553 533 554 #endif
+175 -41
tools/lib/bpf/libbpf.c
··· 1694 1694 switch (ext->kcfg.type) { 1695 1695 case KCFG_BOOL: 1696 1696 if (value == 'm') { 1697 - pr_warn("extern (kcfg) %s=%c should be tristate or char\n", 1697 + pr_warn("extern (kcfg) '%s': value '%c' implies tristate or char type\n", 1698 1698 ext->name, value); 1699 1699 return -EINVAL; 1700 1700 } ··· 1715 1715 case KCFG_INT: 1716 1716 case KCFG_CHAR_ARR: 1717 1717 default: 1718 - pr_warn("extern (kcfg) %s=%c should be bool, tristate, or char\n", 1718 + pr_warn("extern (kcfg) '%s': value '%c' implies bool, tristate, or char type\n", 1719 1719 ext->name, value); 1720 1720 return -EINVAL; 1721 1721 } ··· 1729 1729 size_t len; 1730 1730 1731 1731 if (ext->kcfg.type != KCFG_CHAR_ARR) { 1732 - pr_warn("extern (kcfg) %s=%s should be char array\n", ext->name, value); 1732 + pr_warn("extern (kcfg) '%s': value '%s' implies char array type\n", 1733 + ext->name, value); 1733 1734 return -EINVAL; 1734 1735 } 1735 1736 ··· 1744 1743 /* strip quotes */ 1745 1744 len -= 2; 1746 1745 if (len >= ext->kcfg.sz) { 1747 - pr_warn("extern (kcfg) '%s': long string config %s of (%zu bytes) truncated to %d bytes\n", 1746 + pr_warn("extern (kcfg) '%s': long string '%s' of (%zu bytes) truncated to %d bytes\n", 1748 1747 ext->name, value, len, ext->kcfg.sz - 1); 1749 1748 len = ext->kcfg.sz - 1; 1750 1749 } ··· 1801 1800 static int set_kcfg_value_num(struct extern_desc *ext, void *ext_val, 1802 1801 __u64 value) 1803 1802 { 1804 - if (ext->kcfg.type != KCFG_INT && ext->kcfg.type != KCFG_CHAR) { 1805 - pr_warn("extern (kcfg) %s=%llu should be integer\n", 1803 + if (ext->kcfg.type != KCFG_INT && ext->kcfg.type != KCFG_CHAR && 1804 + ext->kcfg.type != KCFG_BOOL) { 1805 + pr_warn("extern (kcfg) '%s': value '%llu' implies integer, char, or boolean type\n", 1806 1806 ext->name, (unsigned long long)value); 1807 1807 return -EINVAL; 1808 1808 } 1809 + if (ext->kcfg.type == KCFG_BOOL && value > 1) { 1810 + pr_warn("extern (kcfg) '%s': value '%llu' isn't boolean compatible\n", 1811 + ext->name, (unsigned long long)value); 1812 + return -EINVAL; 1813 + 1814 + } 1809 1815 if (!is_kcfg_value_in_range(ext, value)) { 1810 - pr_warn("extern (kcfg) %s=%llu value doesn't fit in %d bytes\n", 1816 + pr_warn("extern (kcfg) '%s': value '%llu' doesn't fit in %d bytes\n", 1811 1817 ext->name, (unsigned long long)value, ext->kcfg.sz); 1812 1818 return -ERANGE; 1813 1819 } ··· 1878 1870 /* assume integer */ 1879 1871 err = parse_u64(value, &num); 1880 1872 if (err) { 1881 - pr_warn("extern (kcfg) %s=%s should be integer\n", 1882 - ext->name, value); 1873 + pr_warn("extern (kcfg) '%s': value '%s' isn't a valid integer\n", ext->name, value); 1883 1874 return err; 1875 + } 1876 + if (ext->kcfg.type != KCFG_INT && ext->kcfg.type != KCFG_CHAR) { 1877 + pr_warn("extern (kcfg) '%s': value '%s' implies integer type\n", ext->name, value); 1878 + return -EINVAL; 1884 1879 } 1885 1880 err = set_kcfg_value_num(ext, ext_val, num); 1886 1881 break; 1887 1882 } 1888 1883 if (err) 1889 1884 return err; 1890 - pr_debug("extern (kcfg) %s=%s\n", ext->name, value); 1885 + pr_debug("extern (kcfg) '%s': set to %s\n", ext->name, value); 1891 1886 return 0; 1892 1887 } 1893 1888 ··· 3698 3687 ext->kcfg.type = find_kcfg_type(obj->btf, t->type, 3699 3688 &ext->kcfg.is_signed); 3700 3689 if (ext->kcfg.type == KCFG_UNKNOWN) { 3701 - pr_warn("extern (kcfg) '%s' type is unsupported\n", ext_name); 3690 + pr_warn("extern (kcfg) '%s': type is unsupported\n", ext_name); 3702 3691 return -ENOTSUP; 3703 3692 } 3704 3693 } else if (strcmp(sec_name, KSYMS_SEC) == 0) { ··· 4670 4659 strs, sizeof(strs))); 4671 4660 } 4672 4661 4662 + static int probe_kern_syscall_wrapper(void); 4663 + 4673 4664 enum kern_feature_result { 4674 4665 FEAT_UNKNOWN = 0, 4675 4666 FEAT_SUPPORTED = 1, ··· 4739 4726 }, 4740 4727 [FEAT_BTF_ENUM64] = { 4741 4728 "BTF_KIND_ENUM64 support", probe_kern_btf_enum64, 4729 + }, 4730 + [FEAT_SYSCALL_WRAPPER] = { 4731 + "Kernel using syscall wrapper", probe_kern_syscall_wrapper, 4742 4732 }, 4743 4733 }; 4744 4734 ··· 7303 7287 return 0; 7304 7288 7305 7289 if (ext->is_set && ext->ksym.addr != sym_addr) { 7306 - pr_warn("extern (ksym) '%s' resolution is ambiguous: 0x%llx or 0x%llx\n", 7290 + pr_warn("extern (ksym) '%s': resolution is ambiguous: 0x%llx or 0x%llx\n", 7307 7291 sym_name, ext->ksym.addr, sym_addr); 7308 7292 return -EINVAL; 7309 7293 } 7310 7294 if (!ext->is_set) { 7311 7295 ext->is_set = true; 7312 7296 ext->ksym.addr = sym_addr; 7313 - pr_debug("extern (ksym) %s=0x%llx\n", sym_name, sym_addr); 7297 + pr_debug("extern (ksym) '%s': set to 0x%llx\n", sym_name, sym_addr); 7314 7298 } 7315 7299 return 0; 7316 7300 } ··· 7514 7498 for (i = 0; i < obj->nr_extern; i++) { 7515 7499 ext = &obj->externs[i]; 7516 7500 7517 - if (ext->type == EXT_KCFG && 7518 - strcmp(ext->name, "LINUX_KERNEL_VERSION") == 0) { 7519 - void *ext_val = kcfg_data + ext->kcfg.data_off; 7520 - __u32 kver = get_kernel_version(); 7521 - 7522 - if (!kver) { 7523 - pr_warn("failed to get kernel version\n"); 7524 - return -EINVAL; 7525 - } 7526 - err = set_kcfg_value_num(ext, ext_val, kver); 7527 - if (err) 7528 - return err; 7529 - pr_debug("extern (kcfg) %s=0x%x\n", ext->name, kver); 7530 - } else if (ext->type == EXT_KCFG && str_has_pfx(ext->name, "CONFIG_")) { 7531 - need_config = true; 7532 - } else if (ext->type == EXT_KSYM) { 7501 + if (ext->type == EXT_KSYM) { 7533 7502 if (ext->ksym.type_id) 7534 7503 need_vmlinux_btf = true; 7535 7504 else 7536 7505 need_kallsyms = true; 7506 + continue; 7507 + } else if (ext->type == EXT_KCFG) { 7508 + void *ext_ptr = kcfg_data + ext->kcfg.data_off; 7509 + __u64 value = 0; 7510 + 7511 + /* Kconfig externs need actual /proc/config.gz */ 7512 + if (str_has_pfx(ext->name, "CONFIG_")) { 7513 + need_config = true; 7514 + continue; 7515 + } 7516 + 7517 + /* Virtual kcfg externs are customly handled by libbpf */ 7518 + if (strcmp(ext->name, "LINUX_KERNEL_VERSION") == 0) { 7519 + value = get_kernel_version(); 7520 + if (!value) { 7521 + pr_warn("extern (kcfg) '%s': failed to get kernel version\n", ext->name); 7522 + return -EINVAL; 7523 + } 7524 + } else if (strcmp(ext->name, "LINUX_HAS_BPF_COOKIE") == 0) { 7525 + value = kernel_supports(obj, FEAT_BPF_COOKIE); 7526 + } else if (strcmp(ext->name, "LINUX_HAS_SYSCALL_WRAPPER") == 0) { 7527 + value = kernel_supports(obj, FEAT_SYSCALL_WRAPPER); 7528 + } else if (!str_has_pfx(ext->name, "LINUX_") || !ext->is_weak) { 7529 + /* Currently libbpf supports only CONFIG_ and LINUX_ prefixed 7530 + * __kconfig externs, where LINUX_ ones are virtual and filled out 7531 + * customly by libbpf (their values don't come from Kconfig). 7532 + * If LINUX_xxx variable is not recognized by libbpf, but is marked 7533 + * __weak, it defaults to zero value, just like for CONFIG_xxx 7534 + * externs. 7535 + */ 7536 + pr_warn("extern (kcfg) '%s': unrecognized virtual extern\n", ext->name); 7537 + return -EINVAL; 7538 + } 7539 + 7540 + err = set_kcfg_value_num(ext, ext_ptr, value); 7541 + if (err) 7542 + return err; 7543 + pr_debug("extern (kcfg) '%s': set to 0x%llx\n", 7544 + ext->name, (long long)value); 7537 7545 } else { 7538 - pr_warn("unrecognized extern '%s'\n", ext->name); 7546 + pr_warn("extern '%s': unrecognized extern kind\n", ext->name); 7539 7547 return -EINVAL; 7540 7548 } 7541 7549 } ··· 7595 7555 ext = &obj->externs[i]; 7596 7556 7597 7557 if (!ext->is_set && !ext->is_weak) { 7598 - pr_warn("extern %s (strong) not resolved\n", ext->name); 7558 + pr_warn("extern '%s' (strong): not resolved\n", ext->name); 7599 7559 return -ESRCH; 7600 7560 } else if (!ext->is_set) { 7601 - pr_debug("extern %s (weak) not resolved, defaulting to zero\n", 7561 + pr_debug("extern '%s' (weak): not resolved, defaulting to zero\n", 7602 7562 ext->name); 7603 7563 } 7604 7564 } ··· 8426 8386 8427 8387 static int attach_kprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link); 8428 8388 static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link); 8389 + static int attach_ksyscall(const struct bpf_program *prog, long cookie, struct bpf_link **link); 8429 8390 static int attach_usdt(const struct bpf_program *prog, long cookie, struct bpf_link **link); 8430 8391 static int attach_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link); 8431 8392 static int attach_raw_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link); ··· 8447 8406 SEC_DEF("uretprobe.s+", KPROBE, 0, SEC_SLEEPABLE, attach_uprobe), 8448 8407 SEC_DEF("kprobe.multi+", KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi), 8449 8408 SEC_DEF("kretprobe.multi+", KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi), 8409 + SEC_DEF("ksyscall+", KPROBE, 0, SEC_NONE, attach_ksyscall), 8410 + SEC_DEF("kretsyscall+", KPROBE, 0, SEC_NONE, attach_ksyscall), 8450 8411 SEC_DEF("usdt+", KPROBE, 0, SEC_NONE, attach_usdt), 8451 8412 SEC_DEF("tc", SCHED_CLS, 0, SEC_NONE), 8452 8413 SEC_DEF("classifier", SCHED_CLS, 0, SEC_NONE), ··· 9805 9762 { 9806 9763 struct perf_event_attr attr = {}; 9807 9764 char errmsg[STRERR_BUFSIZE]; 9808 - int type, pfd, err; 9765 + int type, pfd; 9809 9766 9810 9767 if (ref_ctr_off >= (1ULL << PERF_UPROBE_REF_CTR_OFFSET_BITS)) 9811 9768 return -EINVAL; ··· 9841 9798 pid < 0 ? -1 : pid /* pid */, 9842 9799 pid == -1 ? 0 : -1 /* cpu */, 9843 9800 -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC); 9844 - if (pfd < 0) { 9845 - err = -errno; 9846 - pr_warn("%s perf_event_open() failed: %s\n", 9847 - uprobe ? "uprobe" : "kprobe", 9848 - libbpf_strerror_r(err, errmsg, sizeof(errmsg))); 9849 - return err; 9850 - } 9851 - return pfd; 9801 + return pfd >= 0 ? pfd : -errno; 9852 9802 } 9853 9803 9854 9804 static int append_to_file(const char *file, const char *fmt, ...) ··· 9946 9910 return err; 9947 9911 } 9948 9912 9913 + static const char *arch_specific_syscall_pfx(void) 9914 + { 9915 + #if defined(__x86_64__) 9916 + return "x64"; 9917 + #elif defined(__i386__) 9918 + return "ia32"; 9919 + #elif defined(__s390x__) 9920 + return "s390x"; 9921 + #elif defined(__s390__) 9922 + return "s390"; 9923 + #elif defined(__arm__) 9924 + return "arm"; 9925 + #elif defined(__aarch64__) 9926 + return "arm64"; 9927 + #elif defined(__mips__) 9928 + return "mips"; 9929 + #elif defined(__riscv) 9930 + return "riscv"; 9931 + #else 9932 + return NULL; 9933 + #endif 9934 + } 9935 + 9936 + static int probe_kern_syscall_wrapper(void) 9937 + { 9938 + char syscall_name[64]; 9939 + const char *ksys_pfx; 9940 + 9941 + ksys_pfx = arch_specific_syscall_pfx(); 9942 + if (!ksys_pfx) 9943 + return 0; 9944 + 9945 + snprintf(syscall_name, sizeof(syscall_name), "__%s_sys_bpf", ksys_pfx); 9946 + 9947 + if (determine_kprobe_perf_type() >= 0) { 9948 + int pfd; 9949 + 9950 + pfd = perf_event_open_probe(false, false, syscall_name, 0, getpid(), 0); 9951 + if (pfd >= 0) 9952 + close(pfd); 9953 + 9954 + return pfd >= 0 ? 1 : 0; 9955 + } else { /* legacy mode */ 9956 + char probe_name[128]; 9957 + 9958 + gen_kprobe_legacy_event_name(probe_name, sizeof(probe_name), syscall_name, 0); 9959 + if (add_kprobe_event_legacy(probe_name, false, syscall_name, 0) < 0) 9960 + return 0; 9961 + 9962 + (void)remove_kprobe_event_legacy(probe_name, false); 9963 + return 1; 9964 + } 9965 + } 9966 + 9949 9967 struct bpf_link * 9950 9968 bpf_program__attach_kprobe_opts(const struct bpf_program *prog, 9951 9969 const char *func_name, ··· 10083 9993 ); 10084 9994 10085 9995 return bpf_program__attach_kprobe_opts(prog, func_name, &opts); 9996 + } 9997 + 9998 + struct bpf_link *bpf_program__attach_ksyscall(const struct bpf_program *prog, 9999 + const char *syscall_name, 10000 + const struct bpf_ksyscall_opts *opts) 10001 + { 10002 + LIBBPF_OPTS(bpf_kprobe_opts, kprobe_opts); 10003 + char func_name[128]; 10004 + 10005 + if (!OPTS_VALID(opts, bpf_ksyscall_opts)) 10006 + return libbpf_err_ptr(-EINVAL); 10007 + 10008 + if (kernel_supports(prog->obj, FEAT_SYSCALL_WRAPPER)) { 10009 + snprintf(func_name, sizeof(func_name), "__%s_sys_%s", 10010 + arch_specific_syscall_pfx(), syscall_name); 10011 + } else { 10012 + snprintf(func_name, sizeof(func_name), "__se_sys_%s", syscall_name); 10013 + } 10014 + 10015 + kprobe_opts.retprobe = OPTS_GET(opts, retprobe, false); 10016 + kprobe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0); 10017 + 10018 + return bpf_program__attach_kprobe_opts(prog, func_name, &kprobe_opts); 10086 10019 } 10087 10020 10088 10021 /* Adapted from perf/util/string.c */ ··· 10276 10163 *link = bpf_program__attach_kprobe_opts(prog, func, &opts); 10277 10164 free(func); 10278 10165 return libbpf_get_error(*link); 10166 + } 10167 + 10168 + static int attach_ksyscall(const struct bpf_program *prog, long cookie, struct bpf_link **link) 10169 + { 10170 + LIBBPF_OPTS(bpf_ksyscall_opts, opts); 10171 + const char *syscall_name; 10172 + 10173 + *link = NULL; 10174 + 10175 + /* no auto-attach for SEC("ksyscall") and SEC("kretsyscall") */ 10176 + if (strcmp(prog->sec_name, "ksyscall") == 0 || strcmp(prog->sec_name, "kretsyscall") == 0) 10177 + return 0; 10178 + 10179 + opts.retprobe = str_has_pfx(prog->sec_name, "kretsyscall/"); 10180 + if (opts.retprobe) 10181 + syscall_name = prog->sec_name + sizeof("kretsyscall/") - 1; 10182 + else 10183 + syscall_name = prog->sec_name + sizeof("ksyscall/") - 1; 10184 + 10185 + *link = bpf_program__attach_ksyscall(prog, syscall_name, &opts); 10186 + return *link ? 0 : -errno; 10279 10187 } 10280 10188 10281 10189 static int attach_kprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link)
+46
tools/lib/bpf/libbpf.h
··· 457 457 const char *pattern, 458 458 const struct bpf_kprobe_multi_opts *opts); 459 459 460 + struct bpf_ksyscall_opts { 461 + /* size of this struct, for forward/backward compatiblity */ 462 + size_t sz; 463 + /* custom user-provided value fetchable through bpf_get_attach_cookie() */ 464 + __u64 bpf_cookie; 465 + /* attach as return probe? */ 466 + bool retprobe; 467 + size_t :0; 468 + }; 469 + #define bpf_ksyscall_opts__last_field retprobe 470 + 471 + /** 472 + * @brief **bpf_program__attach_ksyscall()** attaches a BPF program 473 + * to kernel syscall handler of a specified syscall. Optionally it's possible 474 + * to request to install retprobe that will be triggered at syscall exit. It's 475 + * also possible to associate BPF cookie (though options). 476 + * 477 + * Libbpf automatically will determine correct full kernel function name, 478 + * which depending on system architecture and kernel version/configuration 479 + * could be of the form __<arch>_sys_<syscall> or __se_sys_<syscall>, and will 480 + * attach specified program using kprobe/kretprobe mechanism. 481 + * 482 + * **bpf_program__attach_ksyscall()** is an API counterpart of declarative 483 + * **SEC("ksyscall/<syscall>")** annotation of BPF programs. 484 + * 485 + * At the moment **SEC("ksyscall")** and **bpf_program__attach_ksyscall()** do 486 + * not handle all the calling convention quirks for mmap(), clone() and compat 487 + * syscalls. It also only attaches to "native" syscall interfaces. If host 488 + * system supports compat syscalls or defines 32-bit syscalls in 64-bit 489 + * kernel, such syscall interfaces won't be attached to by libbpf. 490 + * 491 + * These limitations may or may not change in the future. Therefore it is 492 + * recommended to use SEC("kprobe") for these syscalls or if working with 493 + * compat and 32-bit interfaces is required. 494 + * 495 + * @param prog BPF program to attach 496 + * @param syscall_name Symbolic name of the syscall (e.g., "bpf") 497 + * @param opts Additional options (see **struct bpf_ksyscall_opts**) 498 + * @return Reference to the newly created BPF link; or NULL is returned on 499 + * error, error code is stored in errno 500 + */ 501 + LIBBPF_API struct bpf_link * 502 + bpf_program__attach_ksyscall(const struct bpf_program *prog, 503 + const char *syscall_name, 504 + const struct bpf_ksyscall_opts *opts); 505 + 460 506 struct bpf_uprobe_opts { 461 507 /* size of this struct, for forward/backward compatiblity */ 462 508 size_t sz;
+1
tools/lib/bpf/libbpf.map
··· 356 356 LIBBPF_1.0.0 { 357 357 global: 358 358 bpf_prog_query_opts; 359 + bpf_program__attach_ksyscall; 359 360 btf__add_enum64; 360 361 btf__add_enum64_value; 361 362 libbpf_bpf_attach_type_str;
+2
tools/lib/bpf/libbpf_internal.h
··· 352 352 FEAT_BPF_COOKIE, 353 353 /* BTF_KIND_ENUM64 support and BTF_KIND_ENUM kflag support */ 354 354 FEAT_BTF_ENUM64, 355 + /* Kernel uses syscall wrapper (CONFIG_ARCH_HAS_SYSCALL_WRAPPER) */ 356 + FEAT_SYSCALL_WRAPPER, 355 357 __FEAT_CNT, 356 358 }; 357 359
+2 -14
tools/lib/bpf/usdt.bpf.h
··· 6 6 #include <linux/errno.h> 7 7 #include <bpf/bpf_helpers.h> 8 8 #include <bpf/bpf_tracing.h> 9 - #include <bpf/bpf_core_read.h> 10 9 11 10 /* Below types and maps are internal implementation details of libbpf's USDT 12 11 * support and are subjects to change. Also, bpf_usdt_xxx() API helpers should ··· 28 29 */ 29 30 #ifndef BPF_USDT_MAX_IP_CNT 30 31 #define BPF_USDT_MAX_IP_CNT (4 * BPF_USDT_MAX_SPEC_CNT) 31 - #endif 32 - /* We use BPF CO-RE to detect support for BPF cookie from BPF side. This is 33 - * the only dependency on CO-RE, so if it's undesirable, user can override 34 - * BPF_USDT_HAS_BPF_COOKIE to specify whether to BPF cookie is supported or not. 35 - */ 36 - #ifndef BPF_USDT_HAS_BPF_COOKIE 37 - #define BPF_USDT_HAS_BPF_COOKIE \ 38 - bpf_core_enum_value_exists(enum bpf_func_id___usdt, BPF_FUNC_get_attach_cookie___usdt) 39 32 #endif 40 33 41 34 enum __bpf_usdt_arg_type { ··· 74 83 __type(value, __u32); 75 84 } __bpf_usdt_ip_to_spec_id SEC(".maps") __weak; 76 85 77 - /* don't rely on user's BPF code to have latest definition of bpf_func_id */ 78 - enum bpf_func_id___usdt { 79 - BPF_FUNC_get_attach_cookie___usdt = 0xBAD, /* value doesn't matter */ 80 - }; 86 + extern const _Bool LINUX_HAS_BPF_COOKIE __kconfig; 81 87 82 88 static __always_inline 83 89 int __bpf_usdt_spec_id(struct pt_regs *ctx) 84 90 { 85 - if (!BPF_USDT_HAS_BPF_COOKIE) { 91 + if (!LINUX_HAS_BPF_COOKIE) { 86 92 long ip = PT_REGS_IP(ctx); 87 93 int *spec_id_ptr; 88 94
+7 -10
tools/testing/selftests/bpf/prog_tests/core_extern.c
··· 39 39 "CONFIG_STR=\"abracad\"\n" 40 40 "CONFIG_MISSING=0", 41 41 .data = { 42 + .unkn_virt_val = 0, 42 43 .bpf_syscall = false, 43 44 .tristate_val = TRI_MODULE, 44 45 .bool_val = true, ··· 122 121 void test_core_extern(void) 123 122 { 124 123 const uint32_t kern_ver = get_kernel_version(); 125 - int err, duration = 0, i, j; 124 + int err, i, j; 126 125 struct test_core_extern *skel = NULL; 127 126 uint64_t *got, *exp; 128 127 int n = sizeof(*skel->data) / sizeof(uint64_t); ··· 137 136 continue; 138 137 139 138 skel = test_core_extern__open_opts(&opts); 140 - if (CHECK(!skel, "skel_open", "skeleton open failed\n")) 139 + if (!ASSERT_OK_PTR(skel, "skel_open")) 141 140 goto cleanup; 142 141 err = test_core_extern__load(skel); 143 142 if (t->fails) { 144 - CHECK(!err, "skel_load", 145 - "shouldn't succeed open/load of skeleton\n"); 143 + ASSERT_ERR(err, "skel_load_should_fail"); 146 144 goto cleanup; 147 - } else if (CHECK(err, "skel_load", 148 - "failed to open/load skeleton\n")) { 145 + } else if (!ASSERT_OK(err, "skel_load")) { 149 146 goto cleanup; 150 147 } 151 148 err = test_core_extern__attach(skel); 152 - if (CHECK(err, "attach_raw_tp", "failed attach: %d\n", err)) 149 + if (!ASSERT_OK(err, "attach_raw_tp")) 153 150 goto cleanup; 154 151 155 152 usleep(1); ··· 157 158 got = (uint64_t *)skel->data; 158 159 exp = (uint64_t *)&t->data; 159 160 for (j = 0; j < n; j++) { 160 - CHECK(got[j] != exp[j], "check_res", 161 - "result #%d: expected %llx, but got %llx\n", 162 - j, (__u64)exp[j], (__u64)got[j]); 161 + ASSERT_EQ(got[j], exp[j], "result"); 163 162 } 164 163 cleanup: 165 164 test_core_extern__destroy(skel);
+3 -3
tools/testing/selftests/bpf/progs/bpf_syscall_macro.c
··· 64 64 return 0; 65 65 } 66 66 67 - SEC("kprobe/" SYS_PREFIX "sys_prctl") 68 - int BPF_KPROBE_SYSCALL(prctl_enter, int option, unsigned long arg2, 69 - unsigned long arg3, unsigned long arg4, unsigned long arg5) 67 + SEC("ksyscall/prctl") 68 + int BPF_KSYSCALL(prctl_enter, int option, unsigned long arg2, 69 + unsigned long arg3, unsigned long arg4, unsigned long arg5) 70 70 { 71 71 pid_t pid = bpf_get_current_pid_tgid() >> 32; 72 72
+7 -8
tools/testing/selftests/bpf/progs/test_attach_probe.c
··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 // Copyright (c) 2017 Facebook 3 3 4 - #include <linux/ptrace.h> 5 - #include <linux/bpf.h> 4 + #include "vmlinux.h" 6 5 #include <bpf/bpf_helpers.h> 7 6 #include <bpf/bpf_tracing.h> 8 - #include <stdbool.h> 7 + #include <bpf/bpf_core_read.h> 9 8 #include "bpf_misc.h" 10 9 11 10 int kprobe_res = 0; ··· 30 31 return 0; 31 32 } 32 33 33 - SEC("kprobe/" SYS_PREFIX "sys_nanosleep") 34 - int BPF_KPROBE(handle_kprobe_auto) 34 + SEC("ksyscall/nanosleep") 35 + int BPF_KSYSCALL(handle_kprobe_auto, struct __kernel_timespec *req, struct __kernel_timespec *rem) 35 36 { 36 37 kprobe2_res = 11; 37 38 return 0; ··· 55 56 return 0; 56 57 } 57 58 58 - SEC("kretprobe/" SYS_PREFIX "sys_nanosleep") 59 - int BPF_KRETPROBE(handle_kretprobe_auto) 59 + SEC("kretsyscall/nanosleep") 60 + int BPF_KRETPROBE(handle_kretprobe_auto, int ret) 60 61 { 61 62 kretprobe2_res = 22; 62 - return 0; 63 + return ret; 63 64 } 64 65 65 66 SEC("uprobe")
+3
tools/testing/selftests/bpf/progs/test_core_extern.c
··· 11 11 static int (*bpf_missing_helper)(const void *arg1, int arg2) = (void *) 999; 12 12 13 13 extern int LINUX_KERNEL_VERSION __kconfig; 14 + extern int LINUX_UNKNOWN_VIRTUAL_EXTERN __kconfig __weak; 14 15 extern bool CONFIG_BPF_SYSCALL __kconfig; /* strong */ 15 16 extern enum libbpf_tristate CONFIG_TRISTATE __kconfig __weak; 16 17 extern bool CONFIG_BOOL __kconfig __weak; ··· 23 22 extern uint64_t CONFIG_MISSING __kconfig __weak; 24 23 25 24 uint64_t kern_ver = -1; 25 + uint64_t unkn_virt_val = -1; 26 26 uint64_t bpf_syscall = -1; 27 27 uint64_t tristate_val = -1; 28 28 uint64_t bool_val = -1; ··· 40 38 int i; 41 39 42 40 kern_ver = LINUX_KERNEL_VERSION; 41 + unkn_virt_val = LINUX_UNKNOWN_VIRTUAL_EXTERN; 43 42 bpf_syscall = CONFIG_BPF_SYSCALL; 44 43 tristate_val = CONFIG_TRISTATE; 45 44 bool_val = CONFIG_BOOL;
+6 -21
tools/testing/selftests/bpf/progs/test_probe_user.c
··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 - 3 - #include <linux/ptrace.h> 4 - #include <linux/bpf.h> 5 - 6 - #include <netinet/in.h> 7 - 2 + #include "vmlinux.h" 8 3 #include <bpf/bpf_helpers.h> 9 4 #include <bpf/bpf_tracing.h> 5 + #include <bpf/bpf_core_read.h> 10 6 #include "bpf_misc.h" 11 7 12 8 static struct sockaddr_in old; 13 9 14 - SEC("kprobe/" SYS_PREFIX "sys_connect") 15 - int BPF_KPROBE(handle_sys_connect) 10 + SEC("ksyscall/connect") 11 + int BPF_KSYSCALL(handle_sys_connect, int fd, struct sockaddr_in *uservaddr, int addrlen) 16 12 { 17 - #if SYSCALL_WRAPPER == 1 18 - struct pt_regs *real_regs; 19 - #endif 20 13 struct sockaddr_in new; 21 - void *ptr; 22 14 23 - #if SYSCALL_WRAPPER == 0 24 - ptr = (void *)PT_REGS_PARM2(ctx); 25 - #else 26 - real_regs = (struct pt_regs *)PT_REGS_PARM1(ctx); 27 - bpf_probe_read_kernel(&ptr, sizeof(ptr), &PT_REGS_PARM2(real_regs)); 28 - #endif 29 - 30 - bpf_probe_read_user(&old, sizeof(old), ptr); 15 + bpf_probe_read_user(&old, sizeof(old), uservaddr); 31 16 __builtin_memset(&new, 0xab, sizeof(new)); 32 - bpf_probe_write_user(ptr, &new, sizeof(new)); 17 + bpf_probe_write_user(uservaddr, &new, sizeof(new)); 33 18 34 19 return 0; 35 20 }