Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

bpf, libbpf: simplify and cleanup perf ring buffer walk

Simplify bpf_perf_event_read_simple() a bit and fix up some minor
things along the way: the return code in the header is not of type
int but enum bpf_perf_event_ret instead. Once callback indicated
to break the loop walking event data, it also needs to be consumed
in data_tail since it has been processed already.

Moreover, bpf_perf_event_print_t callback should avoid void * as
we actually get a pointer to struct perf_event_header and thus
applications can make use of container_of() to have type checks.
The walk also doesn't have to use modulo op since the ring size is
required to be power of two.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>

authored by

Daniel Borkmann and committed by
Alexei Starovoitov
3dca2115 84430d42

+44 -49
+6 -4
tools/bpf/bpftool/map_perf_ring.c
··· 50 50 stop = true; 51 51 } 52 52 53 - static enum bpf_perf_event_ret print_bpf_output(void *event, void *priv) 53 + static enum bpf_perf_event_ret 54 + print_bpf_output(struct perf_event_header *event, void *private_data) 54 55 { 55 - struct event_ring_info *ring = priv; 56 - struct perf_event_sample *e = event; 56 + struct perf_event_sample *e = container_of(event, struct perf_event_sample, 57 + header); 58 + struct event_ring_info *ring = private_data; 57 59 struct { 58 60 struct perf_event_header header; 59 61 __u64 id; 60 62 __u64 lost; 61 - } *lost = event; 63 + } *lost = (typeof(lost))event; 62 64 63 65 if (json_output) { 64 66 jsonw_start_object(json_wtr);
+26 -35
tools/lib/bpf/libbpf.c
··· 2415 2415 } 2416 2416 2417 2417 enum bpf_perf_event_ret 2418 - bpf_perf_event_read_simple(void *mem, unsigned long size, 2419 - unsigned long page_size, void **buf, size_t *buf_len, 2420 - bpf_perf_event_print_t fn, void *priv) 2418 + bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size, 2419 + void **copy_mem, size_t *copy_size, 2420 + bpf_perf_event_print_t fn, void *private_data) 2421 2421 { 2422 - struct perf_event_mmap_page *header = mem; 2422 + struct perf_event_mmap_page *header = mmap_mem; 2423 2423 __u64 data_head = ring_buffer_read_head(header); 2424 2424 __u64 data_tail = header->data_tail; 2425 - int ret = LIBBPF_PERF_EVENT_ERROR; 2426 - void *base, *begin, *end; 2425 + void *base = ((__u8 *)header) + page_size; 2426 + int ret = LIBBPF_PERF_EVENT_CONT; 2427 + struct perf_event_header *ehdr; 2428 + size_t ehdr_size; 2427 2429 2428 - if (data_head == data_tail) 2429 - return LIBBPF_PERF_EVENT_CONT; 2430 + while (data_head != data_tail) { 2431 + ehdr = base + (data_tail & (mmap_size - 1)); 2432 + ehdr_size = ehdr->size; 2430 2433 2431 - base = ((char *)header) + page_size; 2434 + if (((void *)ehdr) + ehdr_size > base + mmap_size) { 2435 + void *copy_start = ehdr; 2436 + size_t len_first = base + mmap_size - copy_start; 2437 + size_t len_secnd = ehdr_size - len_first; 2432 2438 2433 - begin = base + data_tail % size; 2434 - end = base + data_head % size; 2435 - 2436 - while (begin != end) { 2437 - struct perf_event_header *ehdr; 2438 - 2439 - ehdr = begin; 2440 - if (begin + ehdr->size > base + size) { 2441 - long len = base + size - begin; 2442 - 2443 - if (*buf_len < ehdr->size) { 2444 - free(*buf); 2445 - *buf = malloc(ehdr->size); 2446 - if (!*buf) { 2439 + if (*copy_size < ehdr_size) { 2440 + free(*copy_mem); 2441 + *copy_mem = malloc(ehdr_size); 2442 + if (!*copy_mem) { 2443 + *copy_size = 0; 2447 2444 ret = LIBBPF_PERF_EVENT_ERROR; 2448 2445 break; 2449 2446 } 2450 - *buf_len = ehdr->size; 2447 + *copy_size = ehdr_size; 2451 2448 } 2452 2449 2453 - memcpy(*buf, begin, len); 2454 - memcpy(*buf + len, base, ehdr->size - len); 2455 - ehdr = (void *)*buf; 2456 - begin = base + ehdr->size - len; 2457 - } else if (begin + ehdr->size == base + size) { 2458 - begin = base; 2459 - } else { 2460 - begin += ehdr->size; 2450 + memcpy(*copy_mem, copy_start, len_first); 2451 + memcpy(*copy_mem + len_first, base, len_secnd); 2452 + ehdr = *copy_mem; 2461 2453 } 2462 2454 2463 - ret = fn(ehdr, priv); 2455 + ret = fn(ehdr, private_data); 2456 + data_tail += ehdr_size; 2464 2457 if (ret != LIBBPF_PERF_EVENT_CONT) 2465 2458 break; 2466 - 2467 - data_tail += ehdr->size; 2468 2459 } 2469 2460 2470 2461 ring_buffer_write_tail(header, data_tail);
+8 -7
tools/lib/bpf/libbpf.h
··· 297 297 LIBBPF_PERF_EVENT_CONT = -2, 298 298 }; 299 299 300 - typedef enum bpf_perf_event_ret (*bpf_perf_event_print_t)(void *event, 301 - void *priv); 302 - LIBBPF_API int bpf_perf_event_read_simple(void *mem, unsigned long size, 303 - unsigned long page_size, 304 - void **buf, size_t *buf_len, 305 - bpf_perf_event_print_t fn, 306 - void *priv); 300 + struct perf_event_header; 301 + typedef enum bpf_perf_event_ret 302 + (*bpf_perf_event_print_t)(struct perf_event_header *hdr, 303 + void *private_data); 304 + LIBBPF_API enum bpf_perf_event_ret 305 + bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size, 306 + void **copy_mem, size_t *copy_size, 307 + bpf_perf_event_print_t fn, void *private_data); 307 308 308 309 struct nlattr; 309 310 typedef int (*libbpf_dump_nlmsg_t)(void *cookie, void *msg, struct nlattr **tb);
+4 -3
tools/testing/selftests/bpf/trace_helpers.c
··· 125 125 char data[]; 126 126 }; 127 127 128 - static enum bpf_perf_event_ret bpf_perf_event_print(void *event, void *priv) 128 + static enum bpf_perf_event_ret 129 + bpf_perf_event_print(struct perf_event_header *hdr, void *private_data) 129 130 { 130 - struct perf_event_sample *e = event; 131 - perf_event_print_fn fn = priv; 131 + struct perf_event_sample *e = (struct perf_event_sample *)hdr; 132 + perf_event_print_fn fn = private_data; 132 133 int ret; 133 134 134 135 if (e->header.type == PERF_RECORD_SAMPLE) {