Merge tag 'perf-tools-fixes-for-v6.6-1-2023-09-25' into perf-tools-next

tjh.dev / kernel

fork atom

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

fork atom

Merge tag 'perf-tools-fixes-for-v6.6-1-2023-09-25' into perf-tools-next

To pick up the 'perf bench sched-seccomp-notify' changes to allow us to
continue build testing perf-tools-next with the set of distro
containers, where some older ones don't have a recent enough seccomp.h
UAPI header that contains defines needed by this new 'perf bench'
workload.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

Arnaldo Carvalho de Melo 2 years ago 87cd3d48 d7c9ae8d

+255 -539

16 changed files

expand all collapse all

tools

arch

x86

include

asm

msr-index.h

include

uapi

asm-generic

unistd.h

drm

drm.h

linux

seccomp.h

perf

arch

mips

entry

syscalls

syscall_n64.tbl

powerpc

entry

syscalls

syscall.tbl

s390

entry

syscalls

syscall.tbl

x86

entry

syscalls

syscall_64.tbl

bench

sched-seccomp-notify.c

check-headers.sh

pmu-events

jevents.py

metric.py

util

bpf-prologue.c

bpf_skel

augmented_raw_syscalls.bpf.c

hashmap.h

pmu.c

+12

tools/arch/x86/include/asm/msr-index.h

reviewed

··· 57 57 58 58 #define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */ 59 59 #define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */ 60 60 + #define PRED_CMD_SBPB BIT(7) /* Selective Branch Prediction Barrier */ 60 61 61 62 #define MSR_PPIN_CTL 0x0000004e 62 63 #define MSR_PPIN 0x0000004f ··· 156 155 * Not susceptible to Post-Barrier 157 156 * Return Stack Buffer Predictions. 158 157 */ 158 158 + #define ARCH_CAP_GDS_CTRL BIT(25) /* 159 159 + * CPU is vulnerable to Gather 160 160 + * Data Sampling (GDS) and 161 161 + * has controls for mitigation. 162 162 + */ 163 163 + #define ARCH_CAP_GDS_NO BIT(26) /* 164 164 + * CPU is not vulnerable to Gather 165 165 + * Data Sampling (GDS). 166 166 + */ 159 167 160 168 #define ARCH_CAP_XAPIC_DISABLE BIT(21) /* 161 169 * IA32_XAPIC_DISABLE_STATUS MSR ··· 188 178 #define RNGDS_MITG_DIS BIT(0) /* SRBDS support */ 189 179 #define RTM_ALLOW BIT(1) /* TSX development mode */ 190 180 #define FB_CLEAR_DIS BIT(3) /* CPU Fill buffer clear disable */ 181 181 + #define GDS_MITG_DIS BIT(4) /* Disable GDS mitigation */ 182 182 + #define GDS_MITG_LOCKED BIT(5) /* GDS mitigation locked */ 191 183 192 184 #define MSR_IA32_SYSENTER_CS 0x00000174 193 185 #define MSR_IA32_SYSENTER_ESP 0x00000175

+4 -1

tools/include/uapi/asm-generic/unistd.h

reviewed

··· 820 820 #define __NR_cachestat 451 821 821 __SYSCALL(__NR_cachestat, sys_cachestat) 822 822 823 823 + #define __NR_fchmodat2 452 824 824 + __SYSCALL(__NR_fchmodat2, sys_fchmodat2) 825 825 + 823 826 #undef __NR_syscalls 824 824 - #define __NR_syscalls 452 827 827 + #define __NR_syscalls 453 825 828 826 829 /* 827 830 * 32 bit systems traditionally used different

+69 -15

tools/include/uapi/drm/drm.h

reviewed

··· 673 673 * Bitfield of supported PRIME sharing capabilities. See &DRM_PRIME_CAP_IMPORT 674 674 * and &DRM_PRIME_CAP_EXPORT. 675 675 * 676 676 - * PRIME buffers are exposed as dma-buf file descriptors. See 677 677 - * Documentation/gpu/drm-mm.rst, section "PRIME Buffer Sharing". 676 676 + * Starting from kernel version 6.6, both &DRM_PRIME_CAP_IMPORT and 677 677 + * &DRM_PRIME_CAP_EXPORT are always advertised. 678 678 + * 679 679 + * PRIME buffers are exposed as dma-buf file descriptors. 680 680 + * See :ref:`prime_buffer_sharing`. 678 681 */ 679 682 #define DRM_CAP_PRIME 0x5 680 683 /** ··· 685 682 * 686 683 * If this bit is set in &DRM_CAP_PRIME, the driver supports importing PRIME 687 684 * buffers via the &DRM_IOCTL_PRIME_FD_TO_HANDLE ioctl. 685 685 + * 686 686 + * Starting from kernel version 6.6, this bit is always set in &DRM_CAP_PRIME. 688 687 */ 689 688 #define DRM_PRIME_CAP_IMPORT 0x1 690 689 /** ··· 694 689 * 695 690 * If this bit is set in &DRM_CAP_PRIME, the driver supports exporting PRIME 696 691 * buffers via the &DRM_IOCTL_PRIME_HANDLE_TO_FD ioctl. 692 692 + * 693 693 + * Starting from kernel version 6.6, this bit is always set in &DRM_CAP_PRIME. 697 694 */ 698 695 #define DRM_PRIME_CAP_EXPORT 0x2 699 696 /** ··· 763 756 /** 764 757 * DRM_CAP_SYNCOBJ 765 758 * 766 766 - * If set to 1, the driver supports sync objects. See 767 767 - * Documentation/gpu/drm-mm.rst, section "DRM Sync Objects". 759 759 + * If set to 1, the driver supports sync objects. See :ref:`drm_sync_objects`. 768 760 */ 769 761 #define DRM_CAP_SYNCOBJ 0x13 770 762 /** 771 763 * DRM_CAP_SYNCOBJ_TIMELINE 772 764 * 773 765 * If set to 1, the driver supports timeline operations on sync objects. See 774 774 - * Documentation/gpu/drm-mm.rst, section "DRM Sync Objects". 766 766 + * :ref:`drm_sync_objects`. 775 767 */ 776 768 #define DRM_CAP_SYNCOBJ_TIMELINE 0x14 777 769 ··· 912 906 __u32 count_handles; 913 907 __u32 flags; 914 908 __u32 first_signaled; /* only valid when not waiting all */ 909 909 + __u32 pad; 910 910 + }; 911 911 + 912 912 + /** 913 913 + * struct drm_syncobj_eventfd 914 914 + * @handle: syncobj handle. 915 915 + * @flags: Zero to wait for the point to be signalled, or 916 916 + * &DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE to wait for a fence to be 917 917 + * available for the point. 918 918 + * @point: syncobj timeline point (set to zero for binary syncobjs). 919 919 + * @fd: Existing eventfd to sent events to. 920 920 + * @pad: Must be zero. 921 921 + * 922 922 + * Register an eventfd to be signalled by a syncobj. The eventfd counter will 923 923 + * be incremented by one. 924 924 + */ 925 925 + struct drm_syncobj_eventfd { 926 926 + __u32 handle; 927 927 + __u32 flags; 928 928 + __u64 point; 929 929 + __s32 fd; 915 930 __u32 pad; 916 931 }; 917 932 ··· 1196 1169 */ 1197 1170 #define DRM_IOCTL_MODE_GETFB2 DRM_IOWR(0xCE, struct drm_mode_fb_cmd2) 1198 1171 1172 1172 + #define DRM_IOCTL_SYNCOBJ_EVENTFD DRM_IOWR(0xCF, struct drm_syncobj_eventfd) 1173 1173 + 1199 1174 /* 1200 1175 * Device specific ioctls should only be in their respective headers 1201 1176 * The device specific ioctl range is from 0x40 to 0x9f. ··· 1209 1180 #define DRM_COMMAND_BASE 0x40 1210 1181 #define DRM_COMMAND_END 0xA0 1211 1182 1212 1212 - /* 1213 1213 - * Header for events written back to userspace on the drm fd. The 1214 1214 - * type defines the type of event, the length specifies the total 1215 1215 - * length of the event (including the header), and user_data is 1216 1216 - * typically a 64 bit value passed with the ioctl that triggered the 1217 1217 - * event. A read on the drm fd will always only return complete 1218 1218 - * events, that is, if for example the read buffer is 100 bytes, and 1219 1219 - * there are two 64 byte events pending, only one will be returned. 1183 1183 + /** 1184 1184 + * struct drm_event - Header for DRM events 1185 1185 + * @type: event type. 1186 1186 + * @length: total number of payload bytes (including header). 1220 1187 * 1221 1221 - * Event types 0 - 0x7fffffff are generic drm events, 0x80000000 and 1222 1222 - * up are chipset specific. 1188 1188 + * This struct is a header for events written back to user-space on the DRM FD. 1189 1189 + * A read on the DRM FD will always only return complete events: e.g. if the 1190 1190 + * read buffer is 100 bytes large and there are two 64 byte events pending, 1191 1191 + * only one will be returned. 1192 1192 + * 1193 1193 + * Event types 0 - 0x7fffffff are generic DRM events, 0x80000000 and 1194 1194 + * up are chipset specific. Generic DRM events include &DRM_EVENT_VBLANK, 1195 1195 + * &DRM_EVENT_FLIP_COMPLETE and &DRM_EVENT_CRTC_SEQUENCE. 1223 1196 */ 1224 1197 struct drm_event { 1225 1198 __u32 type; 1226 1199 __u32 length; 1227 1200 }; 1228 1201 1202 1202 + /** 1203 1203 + * DRM_EVENT_VBLANK - vertical blanking event 1204 1204 + * 1205 1205 + * This event is sent in response to &DRM_IOCTL_WAIT_VBLANK with the 1206 1206 + * &_DRM_VBLANK_EVENT flag set. 1207 1207 + * 1208 1208 + * The event payload is a struct drm_event_vblank. 1209 1209 + */ 1229 1210 #define DRM_EVENT_VBLANK 0x01 1211 1211 + /** 1212 1212 + * DRM_EVENT_FLIP_COMPLETE - page-flip completion event 1213 1213 + * 1214 1214 + * This event is sent in response to an atomic commit or legacy page-flip with 1215 1215 + * the &DRM_MODE_PAGE_FLIP_EVENT flag set. 1216 1216 + * 1217 1217 + * The event payload is a struct drm_event_vblank. 1218 1218 + */ 1230 1219 #define DRM_EVENT_FLIP_COMPLETE 0x02 1220 1220 + /** 1221 1221 + * DRM_EVENT_CRTC_SEQUENCE - CRTC sequence event 1222 1222 + * 1223 1223 + * This event is sent in response to &DRM_IOCTL_CRTC_QUEUE_SEQUENCE. 1224 1224 + * 1225 1225 + * The event payload is a struct drm_event_crtc_sequence. 1226 1226 + */ 1231 1227 #define DRM_EVENT_CRTC_SEQUENCE 0x03 1232 1228 1233 1229 struct drm_event_vblank {

+157

tools/include/uapi/linux/seccomp.h

reviewed

··· 1 1 + /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ 2 2 + #ifndef _UAPI_LINUX_SECCOMP_H 3 3 + #define _UAPI_LINUX_SECCOMP_H 4 4 + 5 5 + #include <linux/compiler.h> 6 6 + #include <linux/types.h> 7 7 + 8 8 + 9 9 + /* Valid values for seccomp.mode and prctl(PR_SET_SECCOMP, <mode>) */ 10 10 + #define SECCOMP_MODE_DISABLED 0 /* seccomp is not in use. */ 11 11 + #define SECCOMP_MODE_STRICT 1 /* uses hard-coded filter. */ 12 12 + #define SECCOMP_MODE_FILTER 2 /* uses user-supplied filter. */ 13 13 + 14 14 + /* Valid operations for seccomp syscall. */ 15 15 + #define SECCOMP_SET_MODE_STRICT 0 16 16 + #define SECCOMP_SET_MODE_FILTER 1 17 17 + #define SECCOMP_GET_ACTION_AVAIL 2 18 18 + #define SECCOMP_GET_NOTIF_SIZES 3 19 19 + 20 20 + /* Valid flags for SECCOMP_SET_MODE_FILTER */ 21 21 + #define SECCOMP_FILTER_FLAG_TSYNC (1UL << 0) 22 22 + #define SECCOMP_FILTER_FLAG_LOG (1UL << 1) 23 23 + #define SECCOMP_FILTER_FLAG_SPEC_ALLOW (1UL << 2) 24 24 + #define SECCOMP_FILTER_FLAG_NEW_LISTENER (1UL << 3) 25 25 + #define SECCOMP_FILTER_FLAG_TSYNC_ESRCH (1UL << 4) 26 26 + /* Received notifications wait in killable state (only respond to fatal signals) */ 27 27 + #define SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV (1UL << 5) 28 28 + 29 29 + /* 30 30 + * All BPF programs must return a 32-bit value. 31 31 + * The bottom 16-bits are for optional return data. 32 32 + * The upper 16-bits are ordered from least permissive values to most, 33 33 + * as a signed value (so 0x8000000 is negative). 34 34 + * 35 35 + * The ordering ensures that a min_t() over composed return values always 36 36 + * selects the least permissive choice. 37 37 + */ 38 38 + #define SECCOMP_RET_KILL_PROCESS 0x80000000U /* kill the process */ 39 39 + #define SECCOMP_RET_KILL_THREAD 0x00000000U /* kill the thread */ 40 40 + #define SECCOMP_RET_KILL SECCOMP_RET_KILL_THREAD 41 41 + #define SECCOMP_RET_TRAP 0x00030000U /* disallow and force a SIGSYS */ 42 42 + #define SECCOMP_RET_ERRNO 0x00050000U /* returns an errno */ 43 43 + #define SECCOMP_RET_USER_NOTIF 0x7fc00000U /* notifies userspace */ 44 44 + #define SECCOMP_RET_TRACE 0x7ff00000U /* pass to a tracer or disallow */ 45 45 + #define SECCOMP_RET_LOG 0x7ffc0000U /* allow after logging */ 46 46 + #define SECCOMP_RET_ALLOW 0x7fff0000U /* allow */ 47 47 + 48 48 + /* Masks for the return value sections. */ 49 49 + #define SECCOMP_RET_ACTION_FULL 0xffff0000U 50 50 + #define SECCOMP_RET_ACTION 0x7fff0000U 51 51 + #define SECCOMP_RET_DATA 0x0000ffffU 52 52 + 53 53 + /** 54 54 + * struct seccomp_data - the format the BPF program executes over. 55 55 + * @nr: the system call number 56 56 + * @arch: indicates system call convention as an AUDIT_ARCH_* value 57 57 + * as defined in <linux/audit.h>. 58 58 + * @instruction_pointer: at the time of the system call. 59 59 + * @args: up to 6 system call arguments always stored as 64-bit values 60 60 + * regardless of the architecture. 61 61 + */ 62 62 + struct seccomp_data { 63 63 + int nr; 64 64 + __u32 arch; 65 65 + __u64 instruction_pointer; 66 66 + __u64 args[6]; 67 67 + }; 68 68 + 69 69 + struct seccomp_notif_sizes { 70 70 + __u16 seccomp_notif; 71 71 + __u16 seccomp_notif_resp; 72 72 + __u16 seccomp_data; 73 73 + }; 74 74 + 75 75 + struct seccomp_notif { 76 76 + __u64 id; 77 77 + __u32 pid; 78 78 + __u32 flags; 79 79 + struct seccomp_data data; 80 80 + }; 81 81 + 82 82 + /* 83 83 + * Valid flags for struct seccomp_notif_resp 84 84 + * 85 85 + * Note, the SECCOMP_USER_NOTIF_FLAG_CONTINUE flag must be used with caution! 86 86 + * If set by the process supervising the syscalls of another process the 87 87 + * syscall will continue. This is problematic because of an inherent TOCTOU. 88 88 + * An attacker can exploit the time while the supervised process is waiting on 89 89 + * a response from the supervising process to rewrite syscall arguments which 90 90 + * are passed as pointers of the intercepted syscall. 91 91 + * It should be absolutely clear that this means that the seccomp notifier 92 92 + * _cannot_ be used to implement a security policy! It should only ever be used 93 93 + * in scenarios where a more privileged process supervises the syscalls of a 94 94 + * lesser privileged process to get around kernel-enforced security 95 95 + * restrictions when the privileged process deems this safe. In other words, 96 96 + * in order to continue a syscall the supervising process should be sure that 97 97 + * another security mechanism or the kernel itself will sufficiently block 98 98 + * syscalls if arguments are rewritten to something unsafe. 99 99 + * 100 100 + * Similar precautions should be applied when stacking SECCOMP_RET_USER_NOTIF 101 101 + * or SECCOMP_RET_TRACE. For SECCOMP_RET_USER_NOTIF filters acting on the 102 102 + * same syscall, the most recently added filter takes precedence. This means 103 103 + * that the new SECCOMP_RET_USER_NOTIF filter can override any 104 104 + * SECCOMP_IOCTL_NOTIF_SEND from earlier filters, essentially allowing all 105 105 + * such filtered syscalls to be executed by sending the response 106 106 + * SECCOMP_USER_NOTIF_FLAG_CONTINUE. Note that SECCOMP_RET_TRACE can equally 107 107 + * be overriden by SECCOMP_USER_NOTIF_FLAG_CONTINUE. 108 108 + */ 109 109 + #define SECCOMP_USER_NOTIF_FLAG_CONTINUE (1UL << 0) 110 110 + 111 111 + struct seccomp_notif_resp { 112 112 + __u64 id; 113 113 + __s64 val; 114 114 + __s32 error; 115 115 + __u32 flags; 116 116 + }; 117 117 + 118 118 + #define SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP (1UL << 0) 119 119 + 120 120 + /* valid flags for seccomp_notif_addfd */ 121 121 + #define SECCOMP_ADDFD_FLAG_SETFD (1UL << 0) /* Specify remote fd */ 122 122 + #define SECCOMP_ADDFD_FLAG_SEND (1UL << 1) /* Addfd and return it, atomically */ 123 123 + 124 124 + /** 125 125 + * struct seccomp_notif_addfd 126 126 + * @id: The ID of the seccomp notification 127 127 + * @flags: SECCOMP_ADDFD_FLAG_* 128 128 + * @srcfd: The local fd number 129 129 + * @newfd: Optional remote FD number if SETFD option is set, otherwise 0. 130 130 + * @newfd_flags: The O_* flags the remote FD should have applied 131 131 + */ 132 132 + struct seccomp_notif_addfd { 133 133 + __u64 id; 134 134 + __u32 flags; 135 135 + __u32 srcfd; 136 136 + __u32 newfd; 137 137 + __u32 newfd_flags; 138 138 + }; 139 139 + 140 140 + #define SECCOMP_IOC_MAGIC '!' 141 141 + #define SECCOMP_IO(nr) _IO(SECCOMP_IOC_MAGIC, nr) 142 142 + #define SECCOMP_IOR(nr, type) _IOR(SECCOMP_IOC_MAGIC, nr, type) 143 143 + #define SECCOMP_IOW(nr, type) _IOW(SECCOMP_IOC_MAGIC, nr, type) 144 144 + #define SECCOMP_IOWR(nr, type) _IOWR(SECCOMP_IOC_MAGIC, nr, type) 145 145 + 146 146 + /* Flags for seccomp notification fd ioctl. */ 147 147 + #define SECCOMP_IOCTL_NOTIF_RECV SECCOMP_IOWR(0, struct seccomp_notif) 148 148 + #define SECCOMP_IOCTL_NOTIF_SEND SECCOMP_IOWR(1, \ 149 149 + struct seccomp_notif_resp) 150 150 + #define SECCOMP_IOCTL_NOTIF_ID_VALID SECCOMP_IOW(2, __u64) 151 151 + /* On success, the return value is the remote process's added fd number */ 152 152 + #define SECCOMP_IOCTL_NOTIF_ADDFD SECCOMP_IOW(3, \ 153 153 + struct seccomp_notif_addfd) 154 154 + 155 155 + #define SECCOMP_IOCTL_NOTIF_SET_FLAGS SECCOMP_IOW(4, __u64) 156 156 + 157 157 + #endif /* _UAPI_LINUX_SECCOMP_H */

tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl

reviewed

··· 366 366 449 n64 futex_waitv sys_futex_waitv 367 367 450 common set_mempolicy_home_node sys_set_mempolicy_home_node 368 368 451 n64 cachestat sys_cachestat 369 369 + 452 n64 fchmodat2 sys_fchmodat2

tools/perf/arch/powerpc/entry/syscalls/syscall.tbl

reviewed

··· 538 538 449 common futex_waitv sys_futex_waitv 539 539 450 nospu set_mempolicy_home_node sys_set_mempolicy_home_node 540 540 451 common cachestat sys_cachestat 541 541 + 452 common fchmodat2 sys_fchmodat2

tools/perf/arch/s390/entry/syscalls/syscall.tbl

reviewed

··· 454 454 449 common futex_waitv sys_futex_waitv sys_futex_waitv 455 455 450 common set_mempolicy_home_node sys_set_mempolicy_home_node sys_set_mempolicy_home_node 456 456 451 common cachestat sys_cachestat sys_cachestat 457 457 + 452 common fchmodat2 sys_fchmodat2 sys_fchmodat2

tools/perf/arch/x86/entry/syscalls/syscall_64.tbl

reviewed

··· 373 373 449 common futex_waitv sys_futex_waitv 374 374 450 common set_mempolicy_home_node sys_set_mempolicy_home_node 375 375 451 common cachestat sys_cachestat 376 376 + 452 common fchmodat2 sys_fchmodat2 377 377 + 453 64 map_shadow_stack sys_map_shadow_stack 376 378 377 379 # 378 380 # Due to a historical design error, certain syscalls are numbered differently

+1 -1

tools/perf/bench/sched-seccomp-notify.c

reviewed

··· 9 9 #include <sys/syscall.h> 10 10 #include <sys/ioctl.h> 11 11 #include <linux/time64.h> 12 12 - #include <linux/seccomp.h> 12 12 + #include <uapi/linux/seccomp.h> 13 13 #include <sys/prctl.h> 14 14 15 15 #include <unistd.h>

tools/perf/check-headers.sh

reviewed

··· 21 21 "include/uapi/linux/perf_event.h" 22 22 "include/uapi/linux/prctl.h" 23 23 "include/uapi/linux/sched.h" 24 24 + "include/uapi/linux/seccomp.h" 24 25 "include/uapi/linux/stat.h" 25 26 "include/uapi/linux/usbdevice_fs.h" 26 27 "include/uapi/linux/vhost.h"

+1 -1

tools/perf/pmu-events/jevents.py

reviewed

··· 1000 1000 } 1001 1001 } 1002 1002 free(cpuid); 1003 1003 - if (!pmu) 1003 1003 + if (!pmu || !table) 1004 1004 return table; 1005 1005 1006 1006 for (i = 0; i < table->num_pmus; i++) {

+2 -2

tools/perf/pmu-events/metric.py

reviewed

··· 413 413 # pylint: disable=invalid-name 414 414 return Function('has_event', event) 415 415 416 416 - def strcmp_cpuid_str(event: str) -> Function: 416 416 + def strcmp_cpuid_str(cpuid: Event) -> Function: 417 417 # pylint: disable=redefined-builtin 418 418 # pylint: disable=invalid-name 419 419 - return Function('strcmp_cpuid_str', event) 419 419 + return Function('strcmp_cpuid_str', cpuid) 420 420 421 421 class Metric: 422 422 """An individual metric that will specifiable on the perf command line."""

-508

tools/perf/util/bpf-prologue.c

reviewed

··· 1 1 - // SPDX-License-Identifier: GPL-2.0 2 2 - /* 3 3 - * bpf-prologue.c 4 4 - * 5 5 - * Copyright (C) 2015 He Kuang <hekuang@huawei.com> 6 6 - * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com> 7 7 - * Copyright (C) 2015 Huawei Inc. 8 8 - */ 9 9 - 10 10 - #include <bpf/libbpf.h> 11 11 - #include "debug.h" 12 12 - #include "bpf-loader.h" 13 13 - #include "bpf-prologue.h" 14 14 - #include "probe-finder.h" 15 15 - #include <errno.h> 16 16 - #include <stdlib.h> 17 17 - #include <dwarf-regs.h> 18 18 - #include <linux/filter.h> 19 19 - 20 20 - #define BPF_REG_SIZE 8 21 21 - 22 22 - #define JMP_TO_ERROR_CODE -1 23 23 - #define JMP_TO_SUCCESS_CODE -2 24 24 - #define JMP_TO_USER_CODE -3 25 25 - 26 26 - struct bpf_insn_pos { 27 27 - struct bpf_insn *begin; 28 28 - struct bpf_insn *end; 29 29 - struct bpf_insn *pos; 30 30 - }; 31 31 - 32 32 - static inline int 33 33 - pos_get_cnt(struct bpf_insn_pos *pos) 34 34 - { 35 35 - return pos->pos - pos->begin; 36 36 - } 37 37 - 38 38 - static int 39 39 - append_insn(struct bpf_insn new_insn, struct bpf_insn_pos *pos) 40 40 - { 41 41 - if (!pos->pos) 42 42 - return -BPF_LOADER_ERRNO__PROLOGUE2BIG; 43 43 - 44 44 - if (pos->pos + 1 >= pos->end) { 45 45 - pr_err("bpf prologue: prologue too long\n"); 46 46 - pos->pos = NULL; 47 47 - return -BPF_LOADER_ERRNO__PROLOGUE2BIG; 48 48 - } 49 49 - 50 50 - *(pos->pos)++ = new_insn; 51 51 - return 0; 52 52 - } 53 53 - 54 54 - static int 55 55 - check_pos(struct bpf_insn_pos *pos) 56 56 - { 57 57 - if (!pos->pos || pos->pos >= pos->end) 58 58 - return -BPF_LOADER_ERRNO__PROLOGUE2BIG; 59 59 - return 0; 60 60 - } 61 61 - 62 62 - /* 63 63 - * Convert type string (u8/u16/u32/u64/s8/s16/s32/s64 ..., see 64 64 - * Documentation/trace/kprobetrace.rst) to size field of BPF_LDX_MEM 65 65 - * instruction (BPF_{B,H,W,DW}). 66 66 - */ 67 67 - static int 68 68 - argtype_to_ldx_size(const char *type) 69 69 - { 70 70 - int arg_size = type ? atoi(&type[1]) : 64; 71 71 - 72 72 - switch (arg_size) { 73 73 - case 8: 74 74 - return BPF_B; 75 75 - case 16: 76 76 - return BPF_H; 77 77 - case 32: 78 78 - return BPF_W; 79 79 - case 64: 80 80 - default: 81 81 - return BPF_DW; 82 82 - } 83 83 - } 84 84 - 85 85 - static const char * 86 86 - insn_sz_to_str(int insn_sz) 87 87 - { 88 88 - switch (insn_sz) { 89 89 - case BPF_B: 90 90 - return "BPF_B"; 91 91 - case BPF_H: 92 92 - return "BPF_H"; 93 93 - case BPF_W: 94 94 - return "BPF_W"; 95 95 - case BPF_DW: 96 96 - return "BPF_DW"; 97 97 - default: 98 98 - return "UNKNOWN"; 99 99 - } 100 100 - } 101 101 - 102 102 - /* Give it a shorter name */ 103 103 - #define ins(i, p) append_insn((i), (p)) 104 104 - 105 105 - /* 106 106 - * Give a register name (in 'reg'), generate instruction to 107 107 - * load register into an eBPF register rd: 108 108 - * 'ldd target_reg, offset(ctx_reg)', where: 109 109 - * ctx_reg is pre initialized to pointer of 'struct pt_regs'. 110 110 - */ 111 111 - static int 112 112 - gen_ldx_reg_from_ctx(struct bpf_insn_pos *pos, int ctx_reg, 113 113 - const char *reg, int target_reg) 114 114 - { 115 115 - int offset = regs_query_register_offset(reg); 116 116 - 117 117 - if (offset < 0) { 118 118 - pr_err("bpf: prologue: failed to get register %s\n", 119 119 - reg); 120 120 - return offset; 121 121 - } 122 122 - ins(BPF_LDX_MEM(BPF_DW, target_reg, ctx_reg, offset), pos); 123 123 - 124 124 - return check_pos(pos); 125 125 - } 126 126 - 127 127 - /* 128 128 - * Generate a BPF_FUNC_probe_read function call. 129 129 - * 130 130 - * src_base_addr_reg is a register holding base address, 131 131 - * dst_addr_reg is a register holding dest address (on stack), 132 132 - * result is: 133 133 - * 134 134 - * *[dst_addr_reg] = *([src_base_addr_reg] + offset) 135 135 - * 136 136 - * Arguments of BPF_FUNC_probe_read: 137 137 - * ARG1: ptr to stack (dest) 138 138 - * ARG2: size (8) 139 139 - * ARG3: unsafe ptr (src) 140 140 - */ 141 141 - static int 142 142 - gen_read_mem(struct bpf_insn_pos *pos, 143 143 - int src_base_addr_reg, 144 144 - int dst_addr_reg, 145 145 - long offset, 146 146 - int probeid) 147 147 - { 148 148 - /* mov arg3, src_base_addr_reg */ 149 149 - if (src_base_addr_reg != BPF_REG_ARG3) 150 150 - ins(BPF_MOV64_REG(BPF_REG_ARG3, src_base_addr_reg), pos); 151 151 - /* add arg3, #offset */ 152 152 - if (offset) 153 153 - ins(BPF_ALU64_IMM(BPF_ADD, BPF_REG_ARG3, offset), pos); 154 154 - 155 155 - /* mov arg2, #reg_size */ 156 156 - ins(BPF_ALU64_IMM(BPF_MOV, BPF_REG_ARG2, BPF_REG_SIZE), pos); 157 157 - 158 158 - /* mov arg1, dst_addr_reg */ 159 159 - if (dst_addr_reg != BPF_REG_ARG1) 160 160 - ins(BPF_MOV64_REG(BPF_REG_ARG1, dst_addr_reg), pos); 161 161 - 162 162 - /* Call probe_read */ 163 163 - ins(BPF_EMIT_CALL(probeid), pos); 164 164 - /* 165 165 - * Error processing: if read fail, goto error code, 166 166 - * will be relocated. Target should be the start of 167 167 - * error processing code. 168 168 - */ 169 169 - ins(BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, JMP_TO_ERROR_CODE), 170 170 - pos); 171 171 - 172 172 - return check_pos(pos); 173 173 - } 174 174 - 175 175 - /* 176 176 - * Each arg should be bare register. Fetch and save them into argument 177 177 - * registers (r3 - r5). 178 178 - * 179 179 - * BPF_REG_1 should have been initialized with pointer to 180 180 - * 'struct pt_regs'. 181 181 - */ 182 182 - static int 183 183 - gen_prologue_fastpath(struct bpf_insn_pos *pos, 184 184 - struct probe_trace_arg *args, int nargs) 185 185 - { 186 186 - int i, err = 0; 187 187 - 188 188 - for (i = 0; i < nargs; i++) { 189 189 - err = gen_ldx_reg_from_ctx(pos, BPF_REG_1, args[i].value, 190 190 - BPF_PROLOGUE_START_ARG_REG + i); 191 191 - if (err) 192 192 - goto errout; 193 193 - } 194 194 - 195 195 - return check_pos(pos); 196 196 - errout: 197 197 - return err; 198 198 - } 199 199 - 200 200 - /* 201 201 - * Slow path: 202 202 - * At least one argument has the form of 'offset($rx)'. 203 203 - * 204 204 - * Following code first stores them into stack, then loads all of then 205 205 - * to r2 - r5. 206 206 - * Before final loading, the final result should be: 207 207 - * 208 208 - * low address 209 209 - * BPF_REG_FP - 24 ARG3 210 210 - * BPF_REG_FP - 16 ARG2 211 211 - * BPF_REG_FP - 8 ARG1 212 212 - * BPF_REG_FP 213 213 - * high address 214 214 - * 215 215 - * For each argument (described as: offn(...off2(off1(reg)))), 216 216 - * generates following code: 217 217 - * 218 218 - * r7 <- fp 219 219 - * r7 <- r7 - stack_offset // Ideal code should initialize r7 using 220 220 - * // fp before generating args. However, 221 221 - * // eBPF won't regard r7 as stack pointer 222 222 - * // if it is generated by minus 8 from 223 223 - * // another stack pointer except fp. 224 224 - * // This is why we have to set r7 225 225 - * // to fp for each variable. 226 226 - * r3 <- value of 'reg'-> generated using gen_ldx_reg_from_ctx() 227 227 - * (r7) <- r3 // skip following instructions for bare reg 228 228 - * r3 <- r3 + off1 . // skip if off1 == 0 229 229 - * r2 <- 8 \ 230 230 - * r1 <- r7 |-> generated by gen_read_mem() 231 231 - * call probe_read / 232 232 - * jnei r0, 0, err ./ 233 233 - * r3 <- (r7) 234 234 - * r3 <- r3 + off2 . // skip if off2 == 0 235 235 - * r2 <- 8 \ // r2 may be broken by probe_read, so set again 236 236 - * r1 <- r7 |-> generated by gen_read_mem() 237 237 - * call probe_read / 238 238 - * jnei r0, 0, err ./ 239 239 - * ... 240 240 - */ 241 241 - static int 242 242 - gen_prologue_slowpath(struct bpf_insn_pos *pos, 243 243 - struct probe_trace_arg *args, int nargs) 244 244 - { 245 245 - int err, i, probeid; 246 246 - 247 247 - for (i = 0; i < nargs; i++) { 248 248 - struct probe_trace_arg *arg = &args[i]; 249 249 - const char *reg = arg->value; 250 250 - struct probe_trace_arg_ref *ref = NULL; 251 251 - int stack_offset = (i + 1) * -8; 252 252 - 253 253 - pr_debug("prologue: fetch arg %d, base reg is %s\n", 254 254 - i, reg); 255 255 - 256 256 - /* value of base register is stored into ARG3 */ 257 257 - err = gen_ldx_reg_from_ctx(pos, BPF_REG_CTX, reg, 258 258 - BPF_REG_ARG3); 259 259 - if (err) { 260 260 - pr_err("prologue: failed to get offset of register %s\n", 261 261 - reg); 262 262 - goto errout; 263 263 - } 264 264 - 265 265 - /* Make r7 the stack pointer. */ 266 266 - ins(BPF_MOV64_REG(BPF_REG_7, BPF_REG_FP), pos); 267 267 - /* r7 += -8 */ 268 268 - ins(BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, stack_offset), pos); 269 269 - /* 270 270 - * Store r3 (base register) onto stack 271 271 - * Ensure fp[offset] is set. 272 272 - * fp is the only valid base register when storing 273 273 - * into stack. We are not allowed to use r7 as base 274 274 - * register here. 275 275 - */ 276 276 - ins(BPF_STX_MEM(BPF_DW, BPF_REG_FP, BPF_REG_ARG3, 277 277 - stack_offset), pos); 278 278 - 279 279 - ref = arg->ref; 280 280 - probeid = BPF_FUNC_probe_read_kernel; 281 281 - while (ref) { 282 282 - pr_debug("prologue: arg %d: offset %ld\n", 283 283 - i, ref->offset); 284 284 - 285 285 - if (ref->user_access) 286 286 - probeid = BPF_FUNC_probe_read_user; 287 287 - 288 288 - err = gen_read_mem(pos, BPF_REG_3, BPF_REG_7, 289 289 - ref->offset, probeid); 290 290 - if (err) { 291 291 - pr_err("prologue: failed to generate probe_read function call\n"); 292 292 - goto errout; 293 293 - } 294 294 - 295 295 - ref = ref->next; 296 296 - /* 297 297 - * Load previous result into ARG3. Use 298 298 - * BPF_REG_FP instead of r7 because verifier 299 299 - * allows FP based addressing only. 300 300 - */ 301 301 - if (ref) 302 302 - ins(BPF_LDX_MEM(BPF_DW, BPF_REG_ARG3, 303 303 - BPF_REG_FP, stack_offset), pos); 304 304 - } 305 305 - } 306 306 - 307 307 - /* Final pass: read to registers */ 308 308 - for (i = 0; i < nargs; i++) { 309 309 - int insn_sz = (args[i].ref) ? argtype_to_ldx_size(args[i].type) : BPF_DW; 310 310 - 311 311 - pr_debug("prologue: load arg %d, insn_sz is %s\n", 312 312 - i, insn_sz_to_str(insn_sz)); 313 313 - ins(BPF_LDX_MEM(insn_sz, BPF_PROLOGUE_START_ARG_REG + i, 314 314 - BPF_REG_FP, -BPF_REG_SIZE * (i + 1)), pos); 315 315 - } 316 316 - 317 317 - ins(BPF_JMP_IMM(BPF_JA, BPF_REG_0, 0, JMP_TO_SUCCESS_CODE), pos); 318 318 - 319 319 - return check_pos(pos); 320 320 - errout: 321 321 - return err; 322 322 - } 323 323 - 324 324 - static int 325 325 - prologue_relocate(struct bpf_insn_pos *pos, struct bpf_insn *error_code, 326 326 - struct bpf_insn *success_code, struct bpf_insn *user_code) 327 327 - { 328 328 - struct bpf_insn *insn; 329 329 - 330 330 - if (check_pos(pos)) 331 331 - return -BPF_LOADER_ERRNO__PROLOGUE2BIG; 332 332 - 333 333 - for (insn = pos->begin; insn < pos->pos; insn++) { 334 334 - struct bpf_insn *target; 335 335 - u8 class = BPF_CLASS(insn->code); 336 336 - u8 opcode; 337 337 - 338 338 - if (class != BPF_JMP) 339 339 - continue; 340 340 - opcode = BPF_OP(insn->code); 341 341 - if (opcode == BPF_CALL) 342 342 - continue; 343 343 - 344 344 - switch (insn->off) { 345 345 - case JMP_TO_ERROR_CODE: 346 346 - target = error_code; 347 347 - break; 348 348 - case JMP_TO_SUCCESS_CODE: 349 349 - target = success_code; 350 350 - break; 351 351 - case JMP_TO_USER_CODE: 352 352 - target = user_code; 353 353 - break; 354 354 - default: 355 355 - pr_err("bpf prologue: internal error: relocation failed\n"); 356 356 - return -BPF_LOADER_ERRNO__PROLOGUE; 357 357 - } 358 358 - 359 359 - insn->off = target - (insn + 1); 360 360 - } 361 361 - return 0; 362 362 - } 363 363 - 364 364 - int bpf__gen_prologue(struct probe_trace_arg *args, int nargs, 365 365 - struct bpf_insn *new_prog, size_t *new_cnt, 366 366 - size_t cnt_space) 367 367 - { 368 368 - struct bpf_insn *success_code = NULL; 369 369 - struct bpf_insn *error_code = NULL; 370 370 - struct bpf_insn *user_code = NULL; 371 371 - struct bpf_insn_pos pos; 372 372 - bool fastpath = true; 373 373 - int err = 0, i; 374 374 - 375 375 - if (!new_prog || !new_cnt) 376 376 - return -EINVAL; 377 377 - 378 378 - if (cnt_space > BPF_MAXINSNS) 379 379 - cnt_space = BPF_MAXINSNS; 380 380 - 381 381 - pos.begin = new_prog; 382 382 - pos.end = new_prog + cnt_space; 383 383 - pos.pos = new_prog; 384 384 - 385 385 - if (!nargs) { 386 386 - ins(BPF_ALU64_IMM(BPF_MOV, BPF_PROLOGUE_FETCH_RESULT_REG, 0), 387 387 - &pos); 388 388 - 389 389 - if (check_pos(&pos)) 390 390 - goto errout; 391 391 - 392 392 - *new_cnt = pos_get_cnt(&pos); 393 393 - return 0; 394 394 - } 395 395 - 396 396 - if (nargs > BPF_PROLOGUE_MAX_ARGS) { 397 397 - pr_warning("bpf: prologue: %d arguments are dropped\n", 398 398 - nargs - BPF_PROLOGUE_MAX_ARGS); 399 399 - nargs = BPF_PROLOGUE_MAX_ARGS; 400 400 - } 401 401 - 402 402 - /* First pass: validation */ 403 403 - for (i = 0; i < nargs; i++) { 404 404 - struct probe_trace_arg_ref *ref = args[i].ref; 405 405 - 406 406 - if (args[i].value[0] == '@') { 407 407 - /* TODO: fetch global variable */ 408 408 - pr_err("bpf: prologue: global %s%+ld not support\n", 409 409 - args[i].value, ref ? ref->offset : 0); 410 410 - return -ENOTSUP; 411 411 - } 412 412 - 413 413 - while (ref) { 414 414 - /* fastpath is true if all args has ref == NULL */ 415 415 - fastpath = false; 416 416 - 417 417 - /* 418 418 - * Instruction encodes immediate value using 419 419 - * s32, ref->offset is long. On systems which 420 420 - * can't fill long in s32, refuse to process if 421 421 - * ref->offset too large (or small). 422 422 - */ 423 423 - #ifdef __LP64__ 424 424 - #define OFFSET_MAX ((1LL << 31) - 1) 425 425 - #define OFFSET_MIN ((1LL << 31) * -1) 426 426 - if (ref->offset > OFFSET_MAX || 427 427 - ref->offset < OFFSET_MIN) { 428 428 - pr_err("bpf: prologue: offset out of bound: %ld\n", 429 429 - ref->offset); 430 430 - return -BPF_LOADER_ERRNO__PROLOGUEOOB; 431 431 - } 432 432 - #endif 433 433 - ref = ref->next; 434 434 - } 435 435 - } 436 436 - pr_debug("prologue: pass validation\n"); 437 437 - 438 438 - if (fastpath) { 439 439 - /* If all variables are registers... */ 440 440 - pr_debug("prologue: fast path\n"); 441 441 - err = gen_prologue_fastpath(&pos, args, nargs); 442 442 - if (err) 443 443 - goto errout; 444 444 - } else { 445 445 - pr_debug("prologue: slow path\n"); 446 446 - 447 447 - /* Initialization: move ctx to a callee saved register. */ 448 448 - ins(BPF_MOV64_REG(BPF_REG_CTX, BPF_REG_ARG1), &pos); 449 449 - 450 450 - err = gen_prologue_slowpath(&pos, args, nargs); 451 451 - if (err) 452 452 - goto errout; 453 453 - /* 454 454 - * start of ERROR_CODE (only slow pass needs error code) 455 455 - * mov r2 <- 1 // r2 is error number 456 456 - * mov r3 <- 0 // r3, r4... should be touched or 457 457 - * // verifier would complain 458 458 - * mov r4 <- 0 459 459 - * ... 460 460 - * goto usercode 461 461 - */ 462 462 - error_code = pos.pos; 463 463 - ins(BPF_ALU64_IMM(BPF_MOV, BPF_PROLOGUE_FETCH_RESULT_REG, 1), 464 464 - &pos); 465 465 - 466 466 - for (i = 0; i < nargs; i++) 467 467 - ins(BPF_ALU64_IMM(BPF_MOV, 468 468 - BPF_PROLOGUE_START_ARG_REG + i, 469 469 - 0), 470 470 - &pos); 471 471 - ins(BPF_JMP_IMM(BPF_JA, BPF_REG_0, 0, JMP_TO_USER_CODE), 472 472 - &pos); 473 473 - } 474 474 - 475 475 - /* 476 476 - * start of SUCCESS_CODE: 477 477 - * mov r2 <- 0 478 478 - * goto usercode // skip 479 479 - */ 480 480 - success_code = pos.pos; 481 481 - ins(BPF_ALU64_IMM(BPF_MOV, BPF_PROLOGUE_FETCH_RESULT_REG, 0), &pos); 482 482 - 483 483 - /* 484 484 - * start of USER_CODE: 485 485 - * Restore ctx to r1 486 486 - */ 487 487 - user_code = pos.pos; 488 488 - if (!fastpath) { 489 489 - /* 490 490 - * Only slow path needs restoring of ctx. In fast path, 491 491 - * register are loaded directly from r1. 492 492 - */ 493 493 - ins(BPF_MOV64_REG(BPF_REG_ARG1, BPF_REG_CTX), &pos); 494 494 - err = prologue_relocate(&pos, error_code, success_code, 495 495 - user_code); 496 496 - if (err) 497 497 - goto errout; 498 498 - } 499 499 - 500 500 - err = check_pos(&pos); 501 501 - if (err) 502 502 - goto errout; 503 503 - 504 504 - *new_cnt = pos_get_cnt(&pos); 505 505 - return 0; 506 506 - errout: 507 507 - return err; 508 508 - }

tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c

reviewed

··· 23 23 #define MAX_CPUS 4096 24 24 25 25 // FIXME: These should come from system headers 26 26 + #ifndef bool 26 27 typedef char bool; 28 28 + #endif 27 29 typedef int pid_t; 28 30 typedef long long int __s64; 29 31 typedef __s64 time64_t;

-10

tools/perf/util/hashmap.h

reviewed

··· 80 80 size_t sz; 81 81 }; 82 82 83 83 - #define HASHMAP_INIT(hash_fn, equal_fn, ctx) { \ 84 84 - .hash_fn = (hash_fn), \ 85 85 - .equal_fn = (equal_fn), \ 86 86 - .ctx = (ctx), \ 87 87 - .buckets = NULL, \ 88 88 - .cap = 0, \ 89 89 - .cap_bits = 0, \ 90 90 - .sz = 0, \ 91 91 - } 92 92 - 93 83 void hashmap__init(struct hashmap *map, hashmap_hash_fn hash_fn, 94 84 hashmap_equal_fn equal_fn, void *ctx); 95 85 struct hashmap *hashmap__new(hashmap_hash_fn hash_fn,

+1 -1

tools/perf/util/pmu.c

reviewed

··· 521 521 pmu_name = pe->pmu; 522 522 } 523 523 524 524 - alias = malloc(sizeof(*alias)); 524 524 + alias = zalloc(sizeof(*alias)); 525 525 if (!alias) 526 526 return -ENOMEM; 527 527