Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

perf bpf: Fix bpf prologue generation

Issue:

bpf_probe_read() is no longer available for architecture which has
overlapping address space. Hence bpf prologue generation fails

Fix:

Use bpf_probe_read_kernel for kernel member access. For user attribute
access in kprobes, use bpf_probe_read_user.

Other:

@user attribute was introduced in commit 1e032f7cfa14 ("perf-probe: Add
user memory access attribute support")

Test:

1. ulimit -l 128 ; ./perf record -e tests/bpf_sched_setscheduler.c
2. cat tests/bpf_sched_setscheduler.c

static void (*bpf_trace_printk)(const char *fmt, int fmt_size, ...) =
(void *) 6;
static int (*bpf_probe_read_user)(void *dst, __u32 size,
const void *unsafe_ptr) = (void *) 112;
static int (*bpf_probe_read_kernel)(void *dst, __u32 size,
const void *unsafe_ptr) = (void *) 113;

SEC("func=do_sched_setscheduler pid policy param->sched_priority@user")
int bpf_func__setscheduler(void *ctx, int err, pid_t pid, int policy,
int param)
{
char fmt[] = "prio: %ld";
bpf_trace_printk(fmt, sizeof(fmt), param);
return 1;
}

char _license[] SEC("license") = "GPL";
int _version SEC("version") = LINUX_VERSION_CODE;

3. ./perf script
sched 305669 [000] 1614458.838675: perf_bpf_probe:func: (2904e508)
pid=261614 policy=2 sched_priority=1

4. cat /sys/kernel/debug/tracing/trace
<...>-309956 [006] .... 1616098.093957: 0: prio: 1

Committer testing:

I had to add some missing headers in the bpf_sched_setscheduler.c test
proggie, then instead of using record+script I used 'perf trace' to
drive everything in one go:

# cat bpf_sched_setscheduler.c
#include <linux/types.h>
#include <bpf.h>

static void (*bpf_trace_printk)(const char *fmt, int fmt_size, ...) = (void *) 6;
static int (*bpf_probe_read_user)(void *dst, __u32 size, const void *unsafe_ptr) = (void *) 112;
static int (*bpf_probe_read_kernel)(void *dst, __u32 size, const void *unsafe_ptr) = (void *) 113;

SEC("func=do_sched_setscheduler pid policy param->sched_priority@user")
int bpf_func__setscheduler(void *ctx, int err, pid_t pid, int policy, int param)
{
char fmt[] = "prio: %ld";
bpf_trace_printk(fmt, sizeof(fmt), param);
return 1;
}

char _license[] SEC("license") = "GPL";
int _version SEC("version") = LINUX_VERSION_CODE;
#
#
# perf trace -e bpf_sched_setscheduler.c chrt -f 42 sleep 1
0.000 chrt/80125 perf_bpf_probe:func(__probe_ip: -1676607808, policy: 1, sched_priority: 42)
#

And even with backtraces :-)

# perf trace -e bpf_sched_setscheduler.c/max-stack=8/ chrt -f 42 sleep 1
0.000 chrt/79805 perf_bpf_probe:func(__probe_ip: -1676607808, policy: 1, sched_priority: 42)
do_sched_setscheduler ([kernel.kallsyms])
__x64_sys_sched_setscheduler ([kernel.kallsyms])
do_syscall_64 ([kernel.kallsyms])
entry_SYSCALL_64 ([kernel.kallsyms])
__GI___sched_setscheduler (/usr/lib64/libc-2.30.so)
#

Signed-off-by: Sumanth Korikkar <sumanthk@linux.ibm.com>
Reviewed-by: Thomas Richter <tmricht@linux.ibm.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Ilya Leoshkevich <iii@linux.ibm.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: bpf@vger.kernel.org
LPU-Reference: 20200609081019.60234-3-sumanthk@linux.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

authored by

Sumanth Korikkar and committed by
Arnaldo Carvalho de Melo
d38c692f 9256c303

+10 -4
+10 -4
tools/perf/util/bpf-prologue.c
··· 142 142 gen_read_mem(struct bpf_insn_pos *pos, 143 143 int src_base_addr_reg, 144 144 int dst_addr_reg, 145 - long offset) 145 + long offset, 146 + int probeid) 146 147 { 147 148 /* mov arg3, src_base_addr_reg */ 148 149 if (src_base_addr_reg != BPF_REG_ARG3) ··· 160 159 ins(BPF_MOV64_REG(BPF_REG_ARG1, dst_addr_reg), pos); 161 160 162 161 /* Call probe_read */ 163 - ins(BPF_EMIT_CALL(BPF_FUNC_probe_read), pos); 162 + ins(BPF_EMIT_CALL(probeid), pos); 164 163 /* 165 164 * Error processing: if read fail, goto error code, 166 165 * will be relocated. Target should be the start of ··· 242 241 gen_prologue_slowpath(struct bpf_insn_pos *pos, 243 242 struct probe_trace_arg *args, int nargs) 244 243 { 245 - int err, i; 244 + int err, i, probeid; 246 245 247 246 for (i = 0; i < nargs; i++) { 248 247 struct probe_trace_arg *arg = &args[i]; ··· 277 276 stack_offset), pos); 278 277 279 278 ref = arg->ref; 279 + probeid = BPF_FUNC_probe_read_kernel; 280 280 while (ref) { 281 281 pr_debug("prologue: arg %d: offset %ld\n", 282 282 i, ref->offset); 283 + 284 + if (ref->user_access) 285 + probeid = BPF_FUNC_probe_read_user; 286 + 283 287 err = gen_read_mem(pos, BPF_REG_3, BPF_REG_7, 284 - ref->offset); 288 + ref->offset, probeid); 285 289 if (err) { 286 290 pr_err("prologue: failed to generate probe_read function call\n"); 287 291 goto errout;