Merge tag 'perf-urgent-for-mingo-4.18-20180730' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/urgent

Pull perf/urgent fixes from Arnaldo Carvalho de Melo:

- Update the tools copy of several files, including perf_event.h,
powerpc's asm/unistd.h (new io_pgetevents syscall), bpf.h and
x86's memcpy_64.s (used in 'perf bench mem'), silencing the
respective warnings during the perf tools build.

- Fix the build on the alpine:edge distro.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>

Changed files
+124 -63
tools
arch
powerpc
include
uapi
asm
x86
include
lib
include
uapi
perf
+1
tools/arch/powerpc/include/uapi/asm/unistd.h
··· 399 399 #define __NR_pkey_free 385 400 400 #define __NR_pkey_mprotect 386 401 401 #define __NR_rseq 387 402 + #define __NR_io_pgetevents 388 402 403 403 404 #endif /* _UAPI_ASM_POWERPC_UNISTD_H_ */
+13
tools/arch/x86/include/asm/mcsafe_test.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + #ifndef _MCSAFE_TEST_H_ 3 + #define _MCSAFE_TEST_H_ 4 + 5 + .macro MCSAFE_TEST_CTL 6 + .endm 7 + 8 + .macro MCSAFE_TEST_SRC reg count target 9 + .endm 10 + 11 + .macro MCSAFE_TEST_DST reg count target 12 + .endm 13 + #endif /* _MCSAFE_TEST_H_ */
+54 -58
tools/arch/x86/lib/memcpy_64.S
··· 3 3 #include <linux/linkage.h> 4 4 #include <asm/errno.h> 5 5 #include <asm/cpufeatures.h> 6 + #include <asm/mcsafe_test.h> 6 7 #include <asm/alternative-asm.h> 7 8 #include <asm/export.h> 8 9 ··· 184 183 ENDPROC(memcpy_orig) 185 184 186 185 #ifndef CONFIG_UML 186 + 187 + MCSAFE_TEST_CTL 188 + 187 189 /* 188 - * memcpy_mcsafe_unrolled - memory copy with machine check exception handling 190 + * __memcpy_mcsafe - memory copy with machine check exception handling 189 191 * Note that we only catch machine checks when reading the source addresses. 190 192 * Writes to target are posted and don't generate machine checks. 191 193 */ 192 - ENTRY(memcpy_mcsafe_unrolled) 194 + ENTRY(__memcpy_mcsafe) 193 195 cmpl $8, %edx 194 196 /* Less than 8 bytes? Go to byte copy loop */ 195 197 jb .L_no_whole_words ··· 208 204 subl $8, %ecx 209 205 negl %ecx 210 206 subl %ecx, %edx 211 - .L_copy_leading_bytes: 207 + .L_read_leading_bytes: 212 208 movb (%rsi), %al 209 + MCSAFE_TEST_SRC %rsi 1 .E_leading_bytes 210 + MCSAFE_TEST_DST %rdi 1 .E_leading_bytes 211 + .L_write_leading_bytes: 213 212 movb %al, (%rdi) 214 213 incq %rsi 215 214 incq %rdi 216 215 decl %ecx 217 - jnz .L_copy_leading_bytes 216 + jnz .L_read_leading_bytes 218 217 219 218 .L_8byte_aligned: 220 - /* Figure out how many whole cache lines (64-bytes) to copy */ 221 - movl %edx, %ecx 222 - andl $63, %edx 223 - shrl $6, %ecx 224 - jz .L_no_whole_cache_lines 225 - 226 - /* Loop copying whole cache lines */ 227 - .L_cache_w0: movq (%rsi), %r8 228 - .L_cache_w1: movq 1*8(%rsi), %r9 229 - .L_cache_w2: movq 2*8(%rsi), %r10 230 - .L_cache_w3: movq 3*8(%rsi), %r11 231 - movq %r8, (%rdi) 232 - movq %r9, 1*8(%rdi) 233 - movq %r10, 2*8(%rdi) 234 - movq %r11, 3*8(%rdi) 235 - .L_cache_w4: movq 4*8(%rsi), %r8 236 - .L_cache_w5: movq 5*8(%rsi), %r9 237 - .L_cache_w6: movq 6*8(%rsi), %r10 238 - .L_cache_w7: movq 7*8(%rsi), %r11 239 - movq %r8, 4*8(%rdi) 240 - movq %r9, 5*8(%rdi) 241 - movq %r10, 6*8(%rdi) 242 - movq %r11, 7*8(%rdi) 243 - leaq 64(%rsi), %rsi 244 - leaq 64(%rdi), %rdi 245 - decl %ecx 246 - jnz .L_cache_w0 247 - 248 - /* Are there any trailing 8-byte words? */ 249 - .L_no_whole_cache_lines: 250 219 movl %edx, %ecx 251 220 andl $7, %edx 252 221 shrl $3, %ecx 253 222 jz .L_no_whole_words 254 223 255 - /* Copy trailing words */ 256 - .L_copy_trailing_words: 224 + .L_read_words: 257 225 movq (%rsi), %r8 258 - mov %r8, (%rdi) 259 - leaq 8(%rsi), %rsi 260 - leaq 8(%rdi), %rdi 226 + MCSAFE_TEST_SRC %rsi 8 .E_read_words 227 + MCSAFE_TEST_DST %rdi 8 .E_write_words 228 + .L_write_words: 229 + movq %r8, (%rdi) 230 + addq $8, %rsi 231 + addq $8, %rdi 261 232 decl %ecx 262 - jnz .L_copy_trailing_words 233 + jnz .L_read_words 263 234 264 235 /* Any trailing bytes? */ 265 236 .L_no_whole_words: ··· 243 264 244 265 /* Copy trailing bytes */ 245 266 movl %edx, %ecx 246 - .L_copy_trailing_bytes: 267 + .L_read_trailing_bytes: 247 268 movb (%rsi), %al 269 + MCSAFE_TEST_SRC %rsi 1 .E_trailing_bytes 270 + MCSAFE_TEST_DST %rdi 1 .E_trailing_bytes 271 + .L_write_trailing_bytes: 248 272 movb %al, (%rdi) 249 273 incq %rsi 250 274 incq %rdi 251 275 decl %ecx 252 - jnz .L_copy_trailing_bytes 276 + jnz .L_read_trailing_bytes 253 277 254 278 /* Copy successful. Return zero */ 255 279 .L_done_memcpy_trap: 256 280 xorq %rax, %rax 257 281 ret 258 - ENDPROC(memcpy_mcsafe_unrolled) 259 - EXPORT_SYMBOL_GPL(memcpy_mcsafe_unrolled) 282 + ENDPROC(__memcpy_mcsafe) 283 + EXPORT_SYMBOL_GPL(__memcpy_mcsafe) 260 284 261 285 .section .fixup, "ax" 262 - /* Return -EFAULT for any failure */ 263 - .L_memcpy_mcsafe_fail: 264 - mov $-EFAULT, %rax 286 + /* 287 + * Return number of bytes not copied for any failure. Note that 288 + * there is no "tail" handling since the source buffer is 8-byte 289 + * aligned and poison is cacheline aligned. 290 + */ 291 + .E_read_words: 292 + shll $3, %ecx 293 + .E_leading_bytes: 294 + addl %edx, %ecx 295 + .E_trailing_bytes: 296 + mov %ecx, %eax 265 297 ret 298 + 299 + /* 300 + * For write fault handling, given the destination is unaligned, 301 + * we handle faults on multi-byte writes with a byte-by-byte 302 + * copy up to the write-protected page. 303 + */ 304 + .E_write_words: 305 + shll $3, %ecx 306 + addl %edx, %ecx 307 + movl %ecx, %edx 308 + jmp mcsafe_handle_tail 266 309 267 310 .previous 268 311 269 - _ASM_EXTABLE_FAULT(.L_copy_leading_bytes, .L_memcpy_mcsafe_fail) 270 - _ASM_EXTABLE_FAULT(.L_cache_w0, .L_memcpy_mcsafe_fail) 271 - _ASM_EXTABLE_FAULT(.L_cache_w1, .L_memcpy_mcsafe_fail) 272 - _ASM_EXTABLE_FAULT(.L_cache_w2, .L_memcpy_mcsafe_fail) 273 - _ASM_EXTABLE_FAULT(.L_cache_w3, .L_memcpy_mcsafe_fail) 274 - _ASM_EXTABLE_FAULT(.L_cache_w4, .L_memcpy_mcsafe_fail) 275 - _ASM_EXTABLE_FAULT(.L_cache_w5, .L_memcpy_mcsafe_fail) 276 - _ASM_EXTABLE_FAULT(.L_cache_w6, .L_memcpy_mcsafe_fail) 277 - _ASM_EXTABLE_FAULT(.L_cache_w7, .L_memcpy_mcsafe_fail) 278 - _ASM_EXTABLE_FAULT(.L_copy_trailing_words, .L_memcpy_mcsafe_fail) 279 - _ASM_EXTABLE_FAULT(.L_copy_trailing_bytes, .L_memcpy_mcsafe_fail) 312 + _ASM_EXTABLE_FAULT(.L_read_leading_bytes, .E_leading_bytes) 313 + _ASM_EXTABLE_FAULT(.L_read_words, .E_read_words) 314 + _ASM_EXTABLE_FAULT(.L_read_trailing_bytes, .E_trailing_bytes) 315 + _ASM_EXTABLE(.L_write_leading_bytes, .E_leading_bytes) 316 + _ASM_EXTABLE(.L_write_words, .E_write_words) 317 + _ASM_EXTABLE(.L_write_trailing_bytes, .E_trailing_bytes) 280 318 #endif
+23 -5
tools/include/uapi/linux/bpf.h
··· 1857 1857 * is resolved), the nexthop address is returned in ipv4_dst 1858 1858 * or ipv6_dst based on family, smac is set to mac address of 1859 1859 * egress device, dmac is set to nexthop mac address, rt_metric 1860 - * is set to metric from route (IPv4/IPv6 only). 1860 + * is set to metric from route (IPv4/IPv6 only), and ifindex 1861 + * is set to the device index of the nexthop from the FIB lookup. 1861 1862 * 1862 1863 * *plen* argument is the size of the passed in struct. 1863 1864 * *flags* argument can be a combination of one or more of the ··· 1874 1873 * *ctx* is either **struct xdp_md** for XDP programs or 1875 1874 * **struct sk_buff** tc cls_act programs. 1876 1875 * Return 1877 - * Egress device index on success, 0 if packet needs to continue 1878 - * up the stack for further processing or a negative error in case 1879 - * of failure. 1876 + * * < 0 if any input argument is invalid 1877 + * * 0 on success (packet is forwarded, nexthop neighbor exists) 1878 + * * > 0 one of **BPF_FIB_LKUP_RET_** codes explaining why the 1879 + * * packet is not forwarded or needs assist from full stack 1880 1880 * 1881 1881 * int bpf_sock_hash_update(struct bpf_sock_ops_kern *skops, struct bpf_map *map, void *key, u64 flags) 1882 1882 * Description ··· 2614 2612 #define BPF_FIB_LOOKUP_DIRECT BIT(0) 2615 2613 #define BPF_FIB_LOOKUP_OUTPUT BIT(1) 2616 2614 2615 + enum { 2616 + BPF_FIB_LKUP_RET_SUCCESS, /* lookup successful */ 2617 + BPF_FIB_LKUP_RET_BLACKHOLE, /* dest is blackholed; can be dropped */ 2618 + BPF_FIB_LKUP_RET_UNREACHABLE, /* dest is unreachable; can be dropped */ 2619 + BPF_FIB_LKUP_RET_PROHIBIT, /* dest not allowed; can be dropped */ 2620 + BPF_FIB_LKUP_RET_NOT_FWDED, /* packet is not forwarded */ 2621 + BPF_FIB_LKUP_RET_FWD_DISABLED, /* fwding is not enabled on ingress */ 2622 + BPF_FIB_LKUP_RET_UNSUPP_LWT, /* fwd requires encapsulation */ 2623 + BPF_FIB_LKUP_RET_NO_NEIGH, /* no neighbor entry for nh */ 2624 + BPF_FIB_LKUP_RET_FRAG_NEEDED, /* fragmentation required to fwd */ 2625 + }; 2626 + 2617 2627 struct bpf_fib_lookup { 2618 2628 /* input: network family for lookup (AF_INET, AF_INET6) 2619 2629 * output: network family of egress nexthop ··· 2639 2625 2640 2626 /* total length of packet from network header - used for MTU check */ 2641 2627 __u16 tot_len; 2642 - __u32 ifindex; /* L3 device index for lookup */ 2628 + 2629 + /* input: L3 device index for lookup 2630 + * output: device index from FIB lookup 2631 + */ 2632 + __u32 ifindex; 2643 2633 2644 2634 union { 2645 2635 /* inputs to lookup */
+2
tools/include/uapi/linux/perf_event.h
··· 143 143 PERF_SAMPLE_PHYS_ADDR = 1U << 19, 144 144 145 145 PERF_SAMPLE_MAX = 1U << 20, /* non-ABI */ 146 + 147 + __PERF_SAMPLE_CALLCHAIN_EARLY = 1ULL << 63, 146 148 }; 147 149 148 150 /*
+1
tools/perf/arch/x86/util/pmu.c
··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 #include <string.h> 3 3 4 + #include <linux/stddef.h> 4 5 #include <linux/perf_event.h> 5 6 6 7 #include "../../util/intel-pt.h"
+1
tools/perf/arch/x86/util/tsc.c
··· 2 2 #include <stdbool.h> 3 3 #include <errno.h> 4 4 5 + #include <linux/stddef.h> 5 6 #include <linux/perf_event.h> 6 7 7 8 #include "../../perf.h"
+1
tools/perf/bench/Build
··· 7 7 perf-y += futex-requeue.o 8 8 perf-y += futex-lock-pi.o 9 9 10 + perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-lib.o 10 11 perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-asm.o 11 12 perf-$(CONFIG_X86_64) += mem-memset-x86-64-asm.o 12 13
+1
tools/perf/bench/mem-memcpy-x86-64-asm.S
··· 6 6 #define altinstr_replacement text 7 7 #define globl p2align 4; .globl 8 8 #define _ASM_EXTABLE_FAULT(x, y) 9 + #define _ASM_EXTABLE(x, y) 9 10 10 11 #include "../../arch/x86/lib/memcpy_64.S" 11 12 /*
+24
tools/perf/bench/mem-memcpy-x86-64-lib.c
··· 1 + /* 2 + * From code in arch/x86/lib/usercopy_64.c, copied to keep tools/ copy 3 + * of the kernel's arch/x86/lib/memcpy_64.s used in 'perf bench mem memcpy' 4 + * happy. 5 + */ 6 + #include <linux/types.h> 7 + 8 + unsigned long __memcpy_mcsafe(void *dst, const void *src, size_t cnt); 9 + unsigned long mcsafe_handle_tail(char *to, char *from, unsigned len); 10 + 11 + unsigned long mcsafe_handle_tail(char *to, char *from, unsigned len) 12 + { 13 + for (; len; --len, to++, from++) { 14 + /* 15 + * Call the assembly routine back directly since 16 + * memcpy_mcsafe() may silently fallback to memcpy. 17 + */ 18 + unsigned long rem = __memcpy_mcsafe(to, from, 1); 19 + 20 + if (rem) 21 + break; 22 + } 23 + return len; 24 + }
+1
tools/perf/perf.h
··· 5 5 #include <time.h> 6 6 #include <stdbool.h> 7 7 #include <linux/types.h> 8 + #include <linux/stddef.h> 8 9 #include <linux/perf_event.h> 9 10 10 11 extern bool test_attr__enabled;
+1
tools/perf/util/header.h
··· 2 2 #ifndef __PERF_HEADER_H 3 3 #define __PERF_HEADER_H 4 4 5 + #include <linux/stddef.h> 5 6 #include <linux/perf_event.h> 6 7 #include <sys/types.h> 7 8 #include <stdbool.h>
+1
tools/perf/util/namespaces.h
··· 10 10 #define __PERF_NAMESPACES_H 11 11 12 12 #include <sys/types.h> 13 + #include <linux/stddef.h> 13 14 #include <linux/perf_event.h> 14 15 #include <linux/refcount.h> 15 16 #include <linux/types.h>