Merge tag 'perf-tools-fixes-for-v5.19-2022-07-02' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux

Pull perf tools fixes from Arnaldo Carvalho de Melo:

- BPF program info linear (BPIL) data is accessed assuming 64-bit
alignment resulting in undefined behavior as the data is just byte
aligned. Fix it, Found using -fsanitize=undefined.

- Fix 'perf offcpu' build on old kernels wrt task_struct's
state/__state field.

- Fix perf_event_attr.sample_type setting on the 'offcpu-time' event
synthesized by the 'perf offcpu' tool.

- Don't bail out when synthesizing PERF_RECORD_ events for pre-existing
threads when one goes away while parsing its procfs entries.

- Don't sort the task scan result from /proc, its not needed and
introduces bugs when the main thread isn't the first one to be
processed.

- Fix uninitialized 'offset' variable on aarch64 in the unwind code.

- Sync KVM headers with the kernel sources.

* tag 'perf-tools-fixes-for-v5.19-2022-07-02' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux:
perf synthetic-events: Ignore dead threads during event synthesis
perf synthetic-events: Don't sort the task scan result from /proc
perf unwind: Fix unitialized 'offset' variable on aarch64
tools headers UAPI: Sync linux/kvm.h with the kernel sources
perf bpf: 8 byte align bpil data
tools kvm headers arm64: Update KVM headers from the kernel sources
perf offcpu: Accept allowed sample types only
perf offcpu: Fix build failure on old kernels

Changed files
+134 -17
tools
arch
arm64
include
uapi
asm
include
uapi
linux
perf
+36
tools/arch/arm64/include/uapi/asm/kvm.h
··· 139 139 __u64 dbg_wvr[KVM_ARM_MAX_DBG_REGS]; 140 140 }; 141 141 142 + #define KVM_DEBUG_ARCH_HSR_HIGH_VALID (1 << 0) 142 143 struct kvm_debug_exit_arch { 143 144 __u32 hsr; 145 + __u32 hsr_high; /* ESR_EL2[61:32] */ 144 146 __u64 far; /* used for watchpoints */ 145 147 }; 146 148 ··· 333 331 KVM_REG_SIZE_U512 | 0xffff) 334 332 #define KVM_ARM64_SVE_VLS_WORDS \ 335 333 ((KVM_ARM64_SVE_VQ_MAX - KVM_ARM64_SVE_VQ_MIN) / 64 + 1) 334 + 335 + /* Bitmap feature firmware registers */ 336 + #define KVM_REG_ARM_FW_FEAT_BMAP (0x0016 << KVM_REG_ARM_COPROC_SHIFT) 337 + #define KVM_REG_ARM_FW_FEAT_BMAP_REG(r) (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | \ 338 + KVM_REG_ARM_FW_FEAT_BMAP | \ 339 + ((r) & 0xffff)) 340 + 341 + #define KVM_REG_ARM_STD_BMAP KVM_REG_ARM_FW_FEAT_BMAP_REG(0) 342 + 343 + enum { 344 + KVM_REG_ARM_STD_BIT_TRNG_V1_0 = 0, 345 + #ifdef __KERNEL__ 346 + KVM_REG_ARM_STD_BMAP_BIT_COUNT, 347 + #endif 348 + }; 349 + 350 + #define KVM_REG_ARM_STD_HYP_BMAP KVM_REG_ARM_FW_FEAT_BMAP_REG(1) 351 + 352 + enum { 353 + KVM_REG_ARM_STD_HYP_BIT_PV_TIME = 0, 354 + #ifdef __KERNEL__ 355 + KVM_REG_ARM_STD_HYP_BMAP_BIT_COUNT, 356 + #endif 357 + }; 358 + 359 + #define KVM_REG_ARM_VENDOR_HYP_BMAP KVM_REG_ARM_FW_FEAT_BMAP_REG(2) 360 + 361 + enum { 362 + KVM_REG_ARM_VENDOR_HYP_BIT_FUNC_FEAT = 0, 363 + KVM_REG_ARM_VENDOR_HYP_BIT_PTP = 1, 364 + #ifdef __KERNEL__ 365 + KVM_REG_ARM_VENDOR_HYP_BMAP_BIT_COUNT, 366 + #endif 367 + }; 336 368 337 369 /* Device Control API: ARM VGIC */ 338 370 #define KVM_DEV_ARM_VGIC_GRP_ADDR 0
+52 -2
tools/include/uapi/linux/kvm.h
··· 444 444 #define KVM_SYSTEM_EVENT_SHUTDOWN 1 445 445 #define KVM_SYSTEM_EVENT_RESET 2 446 446 #define KVM_SYSTEM_EVENT_CRASH 3 447 + #define KVM_SYSTEM_EVENT_WAKEUP 4 448 + #define KVM_SYSTEM_EVENT_SUSPEND 5 449 + #define KVM_SYSTEM_EVENT_SEV_TERM 6 447 450 __u32 type; 448 451 __u32 ndata; 449 452 union { ··· 649 646 #define KVM_MP_STATE_OPERATING 7 650 647 #define KVM_MP_STATE_LOAD 8 651 648 #define KVM_MP_STATE_AP_RESET_HOLD 9 649 + #define KVM_MP_STATE_SUSPENDED 10 652 650 653 651 struct kvm_mp_state { 654 652 __u32 mp_state; ··· 1154 1150 #define KVM_CAP_S390_MEM_OP_EXTENSION 211 1155 1151 #define KVM_CAP_PMU_CAPABILITY 212 1156 1152 #define KVM_CAP_DISABLE_QUIRKS2 213 1157 - /* #define KVM_CAP_VM_TSC_CONTROL 214 */ 1153 + #define KVM_CAP_VM_TSC_CONTROL 214 1158 1154 #define KVM_CAP_SYSTEM_EVENT_DATA 215 1155 + #define KVM_CAP_ARM_SYSTEM_SUSPEND 216 1159 1156 1160 1157 #ifdef KVM_CAP_IRQ_ROUTING 1161 1158 ··· 1245 1240 #define KVM_XEN_HVM_CONFIG_SHARED_INFO (1 << 2) 1246 1241 #define KVM_XEN_HVM_CONFIG_RUNSTATE (1 << 3) 1247 1242 #define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL (1 << 4) 1243 + #define KVM_XEN_HVM_CONFIG_EVTCHN_SEND (1 << 5) 1248 1244 1249 1245 struct kvm_xen_hvm_config { 1250 1246 __u32 flags; ··· 1484 1478 #define KVM_SET_PIT2 _IOW(KVMIO, 0xa0, struct kvm_pit_state2) 1485 1479 /* Available with KVM_CAP_PPC_GET_PVINFO */ 1486 1480 #define KVM_PPC_GET_PVINFO _IOW(KVMIO, 0xa1, struct kvm_ppc_pvinfo) 1487 - /* Available with KVM_CAP_TSC_CONTROL */ 1481 + /* Available with KVM_CAP_TSC_CONTROL for a vCPU, or with 1482 + * KVM_CAP_VM_TSC_CONTROL to set defaults for a VM */ 1488 1483 #define KVM_SET_TSC_KHZ _IO(KVMIO, 0xa2) 1489 1484 #define KVM_GET_TSC_KHZ _IO(KVMIO, 0xa3) 1490 1485 /* Available with KVM_CAP_PCI_2_3 */ ··· 1701 1694 struct { 1702 1695 __u64 gfn; 1703 1696 } shared_info; 1697 + struct { 1698 + __u32 send_port; 1699 + __u32 type; /* EVTCHNSTAT_ipi / EVTCHNSTAT_interdomain */ 1700 + __u32 flags; 1701 + #define KVM_XEN_EVTCHN_DEASSIGN (1 << 0) 1702 + #define KVM_XEN_EVTCHN_UPDATE (1 << 1) 1703 + #define KVM_XEN_EVTCHN_RESET (1 << 2) 1704 + /* 1705 + * Events sent by the guest are either looped back to 1706 + * the guest itself (potentially on a different port#) 1707 + * or signalled via an eventfd. 1708 + */ 1709 + union { 1710 + struct { 1711 + __u32 port; 1712 + __u32 vcpu; 1713 + __u32 priority; 1714 + } port; 1715 + struct { 1716 + __u32 port; /* Zero for eventfd */ 1717 + __s32 fd; 1718 + } eventfd; 1719 + __u32 padding[4]; 1720 + } deliver; 1721 + } evtchn; 1722 + __u32 xen_version; 1704 1723 __u64 pad[8]; 1705 1724 } u; 1706 1725 }; ··· 1735 1702 #define KVM_XEN_ATTR_TYPE_LONG_MODE 0x0 1736 1703 #define KVM_XEN_ATTR_TYPE_SHARED_INFO 0x1 1737 1704 #define KVM_XEN_ATTR_TYPE_UPCALL_VECTOR 0x2 1705 + /* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */ 1706 + #define KVM_XEN_ATTR_TYPE_EVTCHN 0x3 1707 + #define KVM_XEN_ATTR_TYPE_XEN_VERSION 0x4 1738 1708 1739 1709 /* Per-vCPU Xen attributes */ 1740 1710 #define KVM_XEN_VCPU_GET_ATTR _IOWR(KVMIO, 0xca, struct kvm_xen_vcpu_attr) 1741 1711 #define KVM_XEN_VCPU_SET_ATTR _IOW(KVMIO, 0xcb, struct kvm_xen_vcpu_attr) 1712 + 1713 + /* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */ 1714 + #define KVM_XEN_HVM_EVTCHN_SEND _IOW(KVMIO, 0xd0, struct kvm_irq_routing_xen_evtchn) 1742 1715 1743 1716 #define KVM_GET_SREGS2 _IOR(KVMIO, 0xcc, struct kvm_sregs2) 1744 1717 #define KVM_SET_SREGS2 _IOW(KVMIO, 0xcd, struct kvm_sregs2) ··· 1763 1724 __u64 time_blocked; 1764 1725 __u64 time_offline; 1765 1726 } runstate; 1727 + __u32 vcpu_id; 1728 + struct { 1729 + __u32 port; 1730 + __u32 priority; 1731 + __u64 expires_ns; 1732 + } timer; 1733 + __u8 vector; 1766 1734 } u; 1767 1735 }; 1768 1736 ··· 1780 1734 #define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT 0x3 1781 1735 #define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA 0x4 1782 1736 #define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST 0x5 1737 + /* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */ 1738 + #define KVM_XEN_VCPU_ATTR_TYPE_VCPU_ID 0x6 1739 + #define KVM_XEN_VCPU_ATTR_TYPE_TIMER 0x7 1740 + #define KVM_XEN_VCPU_ATTR_TYPE_UPCALL_VECTOR 0x8 1783 1741 1784 1742 /* Secure Encrypted Virtualization command */ 1785 1743 enum sev_cmd_id {
+2 -3
tools/perf/util/bpf-utils.c
··· 149 149 count = bpf_prog_info_read_offset_u32(&info, desc->count_offset); 150 150 size = bpf_prog_info_read_offset_u32(&info, desc->size_offset); 151 151 152 - data_len += count * size; 152 + data_len += roundup(count * size, sizeof(__u64)); 153 153 } 154 154 155 155 /* step 3: allocate continuous memory */ 156 - data_len = roundup(data_len, sizeof(__u64)); 157 156 info_linear = malloc(sizeof(struct perf_bpil) + data_len); 158 157 if (!info_linear) 159 158 return ERR_PTR(-ENOMEM); ··· 179 180 bpf_prog_info_set_offset_u64(&info_linear->info, 180 181 desc->array_offset, 181 182 ptr_to_u64(ptr)); 182 - ptr += count * size; 183 + ptr += roundup(count * size, sizeof(__u64)); 183 184 } 184 185 185 186 /* step 5: call syscall again to get required arrays */
+6 -1
tools/perf/util/bpf_off_cpu.c
··· 265 265 266 266 sample_type = evsel->core.attr.sample_type; 267 267 268 + if (sample_type & ~OFFCPU_SAMPLE_TYPES) { 269 + pr_err("not supported sample type: %llx\n", 270 + (unsigned long long)sample_type); 271 + return -1; 272 + } 273 + 268 274 if (sample_type & (PERF_SAMPLE_ID | PERF_SAMPLE_IDENTIFIER)) { 269 275 if (evsel->core.id) 270 276 sid = evsel->core.id[0]; ··· 325 319 } 326 320 if (sample_type & PERF_SAMPLE_CGROUP) 327 321 data.array[n++] = key.cgroup_id; 328 - /* TODO: handle more sample types */ 329 322 330 323 size = n * sizeof(u64); 331 324 data.hdr.size = size;
+14 -6
tools/perf/util/bpf_skel/off_cpu.bpf.c
··· 71 71 __uint(max_entries, 1); 72 72 } cgroup_filter SEC(".maps"); 73 73 74 + /* new kernel task_struct definition */ 75 + struct task_struct___new { 76 + long __state; 77 + } __attribute__((preserve_access_index)); 78 + 74 79 /* old kernel task_struct definition */ 75 80 struct task_struct___old { 76 81 long state; ··· 98 93 */ 99 94 static inline int get_task_state(struct task_struct *t) 100 95 { 101 - if (bpf_core_field_exists(t->__state)) 102 - return BPF_CORE_READ(t, __state); 96 + /* recast pointer to capture new type for compiler */ 97 + struct task_struct___new *t_new = (void *)t; 103 98 104 - /* recast pointer to capture task_struct___old type for compiler */ 105 - struct task_struct___old *t_old = (void *)t; 99 + if (bpf_core_field_exists(t_new->__state)) { 100 + return BPF_CORE_READ(t_new, __state); 101 + } else { 102 + /* recast pointer to capture old type for compiler */ 103 + struct task_struct___old *t_old = (void *)t; 106 104 107 - /* now use old "state" name of the field */ 108 - return BPF_CORE_READ(t_old, state); 105 + return BPF_CORE_READ(t_old, state); 106 + } 109 107 } 110 108 111 109 static inline __u64 get_cgroup_id(struct task_struct *t)
+9
tools/perf/util/evsel.c
··· 48 48 #include "util.h" 49 49 #include "hashmap.h" 50 50 #include "pmu-hybrid.h" 51 + #include "off_cpu.h" 51 52 #include "../perf-sys.h" 52 53 #include "util/parse-branch-options.h" 53 54 #include <internal/xyarray.h> ··· 1103 1102 } 1104 1103 } 1105 1104 1105 + static bool evsel__is_offcpu_event(struct evsel *evsel) 1106 + { 1107 + return evsel__is_bpf_output(evsel) && !strcmp(evsel->name, OFFCPU_EVENT); 1108 + } 1109 + 1106 1110 /* 1107 1111 * The enable_on_exec/disabled value strategy: 1108 1112 * ··· 1372 1366 */ 1373 1367 if (evsel__is_dummy_event(evsel)) 1374 1368 evsel__reset_sample_bit(evsel, BRANCH_STACK); 1369 + 1370 + if (evsel__is_offcpu_event(evsel)) 1371 + evsel->core.attr.sample_type &= OFFCPU_SAMPLE_TYPES; 1375 1372 } 1376 1373 1377 1374 int evsel__set_filter(struct evsel *evsel, const char *filter)
+9
tools/perf/util/off_cpu.h
··· 1 1 #ifndef PERF_UTIL_OFF_CPU_H 2 2 #define PERF_UTIL_OFF_CPU_H 3 3 4 + #include <linux/perf_event.h> 5 + 4 6 struct evlist; 5 7 struct target; 6 8 struct perf_session; 7 9 struct record_opts; 8 10 9 11 #define OFFCPU_EVENT "offcpu-time" 12 + 13 + #define OFFCPU_SAMPLE_TYPES (PERF_SAMPLE_IDENTIFIER | PERF_SAMPLE_IP | \ 14 + PERF_SAMPLE_TID | PERF_SAMPLE_TIME | \ 15 + PERF_SAMPLE_ID | PERF_SAMPLE_CPU | \ 16 + PERF_SAMPLE_PERIOD | PERF_SAMPLE_CALLCHAIN | \ 17 + PERF_SAMPLE_CGROUP) 18 + 10 19 11 20 #ifdef HAVE_BPF_SKEL 12 21 int off_cpu_prepare(struct evlist *evlist, struct target *target,
+5 -4
tools/perf/util/synthetic-events.c
··· 754 754 snprintf(filename, sizeof(filename), "%s/proc/%d/task", 755 755 machine->root_dir, pid); 756 756 757 - n = scandir(filename, &dirent, filter_task, alphasort); 757 + n = scandir(filename, &dirent, filter_task, NULL); 758 758 if (n < 0) 759 759 return n; 760 760 ··· 767 767 if (*end) 768 768 continue; 769 769 770 - rc = -1; 770 + /* some threads may exit just after scan, ignore it */ 771 771 if (perf_event__prepare_comm(comm_event, pid, _pid, machine, 772 772 &tgid, &ppid, &kernel_thread) != 0) 773 - break; 773 + continue; 774 774 775 + rc = -1; 775 776 if (perf_event__synthesize_fork(tool, fork_event, _pid, tgid, 776 777 ppid, process, machine) < 0) 777 778 break; ··· 988 987 return 0; 989 988 990 989 snprintf(proc_path, sizeof(proc_path), "%s/proc", machine->root_dir); 991 - n = scandir(proc_path, &dirent, filter_task, alphasort); 990 + n = scandir(proc_path, &dirent, filter_task, NULL); 992 991 if (n < 0) 993 992 return err; 994 993
+1 -1
tools/perf/util/unwind-libunwind-local.c
··· 197 197 #ifndef NO_LIBUNWIND_DEBUG_FRAME 198 198 static u64 elf_section_offset(int fd, const char *name) 199 199 { 200 - u64 address, offset; 200 + u64 address, offset = 0; 201 201 202 202 if (elf_section_address_and_offset(fd, name, &address, &offset)) 203 203 return 0;