Merge tag 'powerpc-5.17-2' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux

tjh.dev / kernel

fork atom

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

fork atom

Merge tag 'powerpc-5.17-2' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux

Pull powerpc fixes from Michael Ellerman:

- A series of bpf fixes, including an oops fix and some codegen fixes.

- Fix a regression in syscall_get_arch() for compat processes.

- Fix boot failure on some 32-bit systems with KASAN enabled.

- A couple of other build/minor fixes.

Thanks to Athira Rajeev, Christophe Leroy, Dmitry V. Levin, Jiri Olsa,
Johan Almbladh, Maxime Bizon, Naveen N. Rao, and Nicholas Piggin.

* tag 'powerpc-5.17-2' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux:
powerpc/64s: Mask SRR0 before checking against the masked NIP
powerpc/perf: Only define power_pmu_wants_prompt_pmi() for CONFIG_PPC64
powerpc/32s: Fix kasan_init_region() for KASAN
powerpc/time: Fix build failure due to do_hard_irq_enable() on PPC32
powerpc/audit: Fix syscall_get_arch()
powerpc64/bpf: Limit 'ldbrx' to processors compliant with ISA v2.06
tools/bpf: Rename 'struct event' to avoid naming conflict
powerpc/bpf: Update ldimm64 instructions during extra pass
powerpc32/bpf: Fix codegen for bpf-to-bpf calls
bpf: Guard against accessing NULL pt_regs in bpf_get_task_stack()

Linus Torvalds 4 years ago dd81e1c7 ac5a9bb6

+129 -85

16 changed files

expand all collapse all

arch

powerpc

include

asm

book3s

mmu-hash.h

hw_irq.h

ppc-opcode.h

syscall.h

thread_info.h

kernel

interrupt_64.S

book3s32

mmu.c

kasan

book3s_32.c

net

bpf_jit_comp.c

bpf_jit_comp32.c

bpf_jit_comp64.c

perf

core-book3s.c

kernel

bpf

stackmap.c

tools

bpf

runqslower

runqslower.bpf.c

runqslower.c

runqslower.h

arch/powerpc/include/asm/book3s/32/mmu-hash.h

reviewed

··· 223 223 update_user_segment(15, val); 224 224 } 225 225 226 226 + int __init find_free_bat(void); 227 227 + unsigned int bat_block_size(unsigned long base, unsigned long top); 226 228 #endif /* !__ASSEMBLY__ */ 227 229 228 230 /* We happily ignore the smaller BATs on 601, we don't actually use

+1 -1

arch/powerpc/include/asm/hw_irq.h

reviewed

··· 473 473 return !(regs->msr & MSR_EE); 474 474 } 475 475 476 476 - static inline bool should_hard_irq_enable(void) 476 476 + static __always_inline bool should_hard_irq_enable(void) 477 477 { 478 478 return false; 479 479 }

arch/powerpc/include/asm/ppc-opcode.h

reviewed

+2 -2

arch/powerpc/include/asm/syscall.h

reviewed

··· 90 90 unsigned long val, mask = -1UL; 91 91 unsigned int n = 6; 92 92 93 93 - if (is_32bit_task()) 93 93 + if (is_tsk_32bit_task(task)) 94 94 mask = 0xffffffff; 95 95 96 96 while (n--) { ··· 105 105 106 106 static inline int syscall_get_arch(struct task_struct *task) 107 107 { 108 108 - if (is_32bit_task()) 108 108 + if (is_tsk_32bit_task(task)) 109 109 return AUDIT_ARCH_PPC; 110 110 else if (IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN)) 111 111 return AUDIT_ARCH_PPC64LE;

arch/powerpc/include/asm/thread_info.h

reviewed

··· 168 168 169 169 #ifdef CONFIG_COMPAT 170 170 #define is_32bit_task() (test_thread_flag(TIF_32BIT)) 171 171 + #define is_tsk_32bit_task(tsk) (test_tsk_thread_flag(tsk, TIF_32BIT)) 171 172 #else 172 173 #define is_32bit_task() (IS_ENABLED(CONFIG_PPC32)) 174 174 + #define is_tsk_32bit_task(tsk) (IS_ENABLED(CONFIG_PPC32)) 173 175 #endif 174 176 175 177 #if defined(CONFIG_PPC64)

arch/powerpc/kernel/interrupt_64.S

reviewed

··· 30 30 .ifc \srr,srr 31 31 mfspr r11,SPRN_SRR0 32 32 ld r12,_NIP(r1) 33 33 + clrrdi r11,r11,2 33 34 clrrdi r12,r12,2 34 35 100: tdne r11,r12 35 36 EMIT_WARN_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE) ··· 41 40 .else 42 41 mfspr r11,SPRN_HSRR0 43 42 ld r12,_NIP(r1) 43 43 + clrrdi r11,r11,2 44 44 clrrdi r12,r12,2 45 45 100: tdne r11,r12 46 46 EMIT_WARN_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE)

+5 -5

arch/powerpc/mm/book3s32/mmu.c

reviewed

··· 76 76 return 0; 77 77 } 78 78 79 79 - static int __init find_free_bat(void) 79 79 + int __init find_free_bat(void) 80 80 { 81 81 int b; 82 82 int n = mmu_has_feature(MMU_FTR_USE_HIGH_BATS) ? 8 : 4; ··· 100 100 * - block size has to be a power of two. This is calculated by finding the 101 101 * highest bit set to 1. 102 102 */ 103 103 - static unsigned int block_size(unsigned long base, unsigned long top) 103 103 + unsigned int bat_block_size(unsigned long base, unsigned long top) 104 104 { 105 105 unsigned int max_size = SZ_256M; 106 106 unsigned int base_shift = (ffs(base) - 1) & 31; ··· 145 145 int idx; 146 146 147 147 while ((idx = find_free_bat()) != -1 && base != top) { 148 148 - unsigned int size = block_size(base, top); 148 148 + unsigned int size = bat_block_size(base, top); 149 149 150 150 if (size < 128 << 10) 151 151 break; ··· 201 201 unsigned long size; 202 202 203 203 for (i = 0; i < nb - 1 && base < top;) { 204 204 - size = block_size(base, top); 204 204 + size = bat_block_size(base, top); 205 205 setibat(i++, PAGE_OFFSET + base, base, size, PAGE_KERNEL_TEXT); 206 206 base += size; 207 207 } 208 208 if (base < top) { 209 209 - size = block_size(base, top); 209 209 + size = bat_block_size(base, top); 210 210 if ((top - base) > size) { 211 211 size <<= 1; 212 212 if (strict_kernel_rwx_enabled() && base + size > border)

+29 -26

arch/powerpc/mm/kasan/book3s_32.c

reviewed

··· 10 10 { 11 11 unsigned long k_start = (unsigned long)kasan_mem_to_shadow(start); 12 12 unsigned long k_end = (unsigned long)kasan_mem_to_shadow(start + size); 13 13 - unsigned long k_cur = k_start; 14 14 - int k_size = k_end - k_start; 15 15 - int k_size_base = 1 << (ffs(k_size) - 1); 13 13 + unsigned long k_nobat = k_start; 14 14 + unsigned long k_cur; 15 15 + phys_addr_t phys; 16 16 int ret; 17 17 - void *block; 18 17 19 19 - block = memblock_alloc(k_size, k_size_base); 18 18 + while (k_nobat < k_end) { 19 19 + unsigned int k_size = bat_block_size(k_nobat, k_end); 20 20 + int idx = find_free_bat(); 20 21 21 21 - if (block && k_size_base >= SZ_128K && k_start == ALIGN(k_start, k_size_base)) { 22 22 - int shift = ffs(k_size - k_size_base); 23 23 - int k_size_more = shift ? 1 << (shift - 1) : 0; 22 22 + if (idx == -1) 23 23 + break; 24 24 + if (k_size < SZ_128K) 25 25 + break; 26 26 + phys = memblock_phys_alloc_range(k_size, k_size, 0, 27 27 + MEMBLOCK_ALLOC_ANYWHERE); 28 28 + if (!phys) 29 29 + break; 24 30 25 25 - setbat(-1, k_start, __pa(block), k_size_base, PAGE_KERNEL); 26 26 - if (k_size_more >= SZ_128K) 27 27 - setbat(-1, k_start + k_size_base, __pa(block) + k_size_base, 28 28 - k_size_more, PAGE_KERNEL); 29 29 - if (v_block_mapped(k_start)) 30 30 - k_cur = k_start + k_size_base; 31 31 - if (v_block_mapped(k_start + k_size_base)) 32 32 - k_cur = k_start + k_size_base + k_size_more; 33 33 - 34 34 - update_bats(); 31 31 + setbat(idx, k_nobat, phys, k_size, PAGE_KERNEL); 32 32 + k_nobat += k_size; 35 33 } 34 34 + if (k_nobat != k_start) 35 35 + update_bats(); 36 36 37 37 - if (!block) 38 38 - block = memblock_alloc(k_size, PAGE_SIZE); 39 39 - if (!block) 40 40 - return -ENOMEM; 37 37 + if (k_nobat < k_end) { 38 38 + phys = memblock_phys_alloc_range(k_end - k_nobat, PAGE_SIZE, 0, 39 39 + MEMBLOCK_ALLOC_ANYWHERE); 40 40 + if (!phys) 41 41 + return -ENOMEM; 42 42 + } 41 43 42 44 ret = kasan_init_shadow_page_tables(k_start, k_end); 43 45 if (ret) 44 46 return ret; 45 47 46 46 - kasan_update_early_region(k_start, k_cur, __pte(0)); 48 48 + kasan_update_early_region(k_start, k_nobat, __pte(0)); 47 49 48 48 - for (; k_cur < k_end; k_cur += PAGE_SIZE) { 50 50 + for (k_cur = k_nobat; k_cur < k_end; k_cur += PAGE_SIZE) { 49 51 pmd_t *pmd = pmd_off_k(k_cur); 50 50 - void *va = block + k_cur - k_start; 51 51 - pte_t pte = pfn_pte(PHYS_PFN(__pa(va)), PAGE_KERNEL); 52 52 + pte_t pte = pfn_pte(PHYS_PFN(phys + k_cur - k_nobat), PAGE_KERNEL); 52 53 53 54 __set_pte_at(&init_mm, k_cur, pte_offset_kernel(pmd, k_cur), pte, 0); 54 55 } 55 56 flush_tlb_kernel_range(k_start, k_end); 57 57 + memset(kasan_mem_to_shadow(start), 0, k_end - k_start); 58 58 + 56 59 return 0; 57 60 }

+23 -6

arch/powerpc/net/bpf_jit_comp.c

reviewed

··· 23 23 memset32(area, BREAKPOINT_INSTRUCTION, size / 4); 24 24 } 25 25 26 26 - /* Fix the branch target addresses for subprog calls */ 27 27 - static int bpf_jit_fixup_subprog_calls(struct bpf_prog *fp, u32 *image, 28 28 - struct codegen_context *ctx, u32 *addrs) 26 26 + /* Fix updated addresses (for subprog calls, ldimm64, et al) during extra pass */ 27 27 + static int bpf_jit_fixup_addresses(struct bpf_prog *fp, u32 *image, 28 28 + struct codegen_context *ctx, u32 *addrs) 29 29 { 30 30 const struct bpf_insn *insn = fp->insnsi; 31 31 bool func_addr_fixed; 32 32 u64 func_addr; 33 33 u32 tmp_idx; 34 34 - int i, ret; 34 34 + int i, j, ret; 35 35 36 36 for (i = 0; i < fp->len; i++) { 37 37 /* ··· 66 66 * of the JITed sequence remains unchanged. 67 67 */ 68 68 ctx->idx = tmp_idx; 69 69 + } else if (insn[i].code == (BPF_LD | BPF_IMM | BPF_DW)) { 70 70 + tmp_idx = ctx->idx; 71 71 + ctx->idx = addrs[i] / 4; 72 72 + #ifdef CONFIG_PPC32 73 73 + PPC_LI32(ctx->b2p[insn[i].dst_reg] - 1, (u32)insn[i + 1].imm); 74 74 + PPC_LI32(ctx->b2p[insn[i].dst_reg], (u32)insn[i].imm); 75 75 + for (j = ctx->idx - addrs[i] / 4; j < 4; j++) 76 76 + EMIT(PPC_RAW_NOP()); 77 77 + #else 78 78 + func_addr = ((u64)(u32)insn[i].imm) | (((u64)(u32)insn[i + 1].imm) << 32); 79 79 + PPC_LI64(b2p[insn[i].dst_reg], func_addr); 80 80 + /* overwrite rest with nops */ 81 81 + for (j = ctx->idx - addrs[i] / 4; j < 5; j++) 82 82 + EMIT(PPC_RAW_NOP()); 83 83 + #endif 84 84 + ctx->idx = tmp_idx; 85 85 + i++; 69 86 } 70 87 } 71 88 ··· 217 200 /* 218 201 * Do not touch the prologue and epilogue as they will remain 219 202 * unchanged. Only fix the branch target address for subprog 220 220 - * calls in the body. 203 203 + * calls in the body, and ldimm64 instructions. 221 204 * 222 205 * This does not change the offsets and lengths of the subprog 223 206 * call instruction sequences and hence, the size of the JITed 224 207 * image as well. 225 208 */ 226 226 - bpf_jit_fixup_subprog_calls(fp, code_base, &cgctx, addrs); 209 209 + bpf_jit_fixup_addresses(fp, code_base, &cgctx, addrs); 227 210 228 211 /* There is no need to perform the usual passes. */ 229 212 goto skip_codegen_passes;

arch/powerpc/net/bpf_jit_comp32.c

reviewed

··· 191 191 192 192 if (image && rel < 0x2000000 && rel >= -0x2000000) { 193 193 PPC_BL_ABS(func); 194 194 + EMIT(PPC_RAW_NOP()); 195 195 + EMIT(PPC_RAW_NOP()); 196 196 + EMIT(PPC_RAW_NOP()); 194 197 } else { 195 198 /* Load function address into r0 */ 196 199 EMIT(PPC_RAW_LIS(_R0, IMM_H(func))); ··· 293 290 bool func_addr_fixed; 294 291 u64 func_addr; 295 292 u32 true_cond; 293 293 + u32 tmp_idx; 294 294 + int j; 296 295 297 296 /* 298 297 * addrs[] maps a BPF bytecode address into a real offset from ··· 910 905 * 16 byte instruction that uses two 'struct bpf_insn' 911 906 */ 912 907 case BPF_LD | BPF_IMM | BPF_DW: /* dst = (u64) imm */ 908 908 + tmp_idx = ctx->idx; 913 909 PPC_LI32(dst_reg_h, (u32)insn[i + 1].imm); 914 910 PPC_LI32(dst_reg, (u32)insn[i].imm); 911 911 + /* padding to allow full 4 instructions for later patching */ 912 912 + for (j = ctx->idx - tmp_idx; j < 4; j++) 913 913 + EMIT(PPC_RAW_NOP()); 915 914 /* Adjust for two bpf instructions */ 916 915 addrs[++i] = ctx->idx * 4; 917 916 break;

+19 -10

arch/powerpc/net/bpf_jit_comp64.c

reviewed

··· 319 319 u64 imm64; 320 320 u32 true_cond; 321 321 u32 tmp_idx; 322 322 + int j; 322 323 323 324 /* 324 325 * addrs[] maps a BPF bytecode address into a real offset from ··· 634 633 EMIT(PPC_RAW_MR(dst_reg, b2p[TMP_REG_1])); 635 634 break; 636 635 case 64: 637 637 - /* 638 638 - * Way easier and faster(?) to store the value 639 639 - * into stack and then use ldbrx 640 640 - * 641 641 - * ctx->seen will be reliable in pass2, but 642 642 - * the instructions generated will remain the 643 643 - * same across all passes 644 644 - */ 636 636 + /* Store the value to stack and then use byte-reverse loads */ 645 637 PPC_BPF_STL(dst_reg, 1, bpf_jit_stack_local(ctx)); 646 638 EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], 1, bpf_jit_stack_local(ctx))); 647 647 - EMIT(PPC_RAW_LDBRX(dst_reg, 0, b2p[TMP_REG_1])); 639 639 + if (cpu_has_feature(CPU_FTR_ARCH_206)) { 640 640 + EMIT(PPC_RAW_LDBRX(dst_reg, 0, b2p[TMP_REG_1])); 641 641 + } else { 642 642 + EMIT(PPC_RAW_LWBRX(dst_reg, 0, b2p[TMP_REG_1])); 643 643 + if (IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN)) 644 644 + EMIT(PPC_RAW_SLDI(dst_reg, dst_reg, 32)); 645 645 + EMIT(PPC_RAW_LI(b2p[TMP_REG_2], 4)); 646 646 + EMIT(PPC_RAW_LWBRX(b2p[TMP_REG_2], b2p[TMP_REG_2], b2p[TMP_REG_1])); 647 647 + if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN)) 648 648 + EMIT(PPC_RAW_SLDI(b2p[TMP_REG_2], b2p[TMP_REG_2], 32)); 649 649 + EMIT(PPC_RAW_OR(dst_reg, dst_reg, b2p[TMP_REG_2])); 650 650 + } 648 651 break; 649 652 } 650 653 break; ··· 853 848 case BPF_LD | BPF_IMM | BPF_DW: /* dst = (u64) imm */ 854 849 imm64 = ((u64)(u32) insn[i].imm) | 855 850 (((u64)(u32) insn[i+1].imm) << 32); 851 851 + tmp_idx = ctx->idx; 852 852 + PPC_LI64(dst_reg, imm64); 853 853 + /* padding to allow full 5 instructions for later patching */ 854 854 + for (j = ctx->idx - tmp_idx; j < 5; j++) 855 855 + EMIT(PPC_RAW_NOP()); 856 856 /* Adjust for two bpf instructions */ 857 857 addrs[++i] = ctx->idx * 4; 858 858 - PPC_LI64(dst_reg, imm64); 859 858 break; 860 859 861 860 /*

+28 -30

arch/powerpc/perf/core-book3s.c

reviewed

··· 776 776 mtspr(SPRN_PMC6, pmcs[5]); 777 777 } 778 778 779 779 + /* 780 780 + * If the perf subsystem wants performance monitor interrupts as soon as 781 781 + * possible (e.g., to sample the instruction address and stack chain), 782 782 + * this should return true. The IRQ masking code can then enable MSR[EE] 783 783 + * in some places (e.g., interrupt handlers) that allows PMI interrupts 784 784 + * through to improve accuracy of profiles, at the cost of some performance. 785 785 + * 786 786 + * The PMU counters can be enabled by other means (e.g., sysfs raw SPR 787 787 + * access), but in that case there is no need for prompt PMI handling. 788 788 + * 789 789 + * This currently returns true if any perf counter is being used. It 790 790 + * could possibly return false if only events are being counted rather than 791 791 + * samples being taken, but for now this is good enough. 792 792 + */ 793 793 + bool power_pmu_wants_prompt_pmi(void) 794 794 + { 795 795 + struct cpu_hw_events *cpuhw; 796 796 + 797 797 + /* 798 798 + * This could simply test local_paca->pmcregs_in_use if that were not 799 799 + * under ifdef KVM. 800 800 + */ 801 801 + if (!ppmu) 802 802 + return false; 803 803 + 804 804 + cpuhw = this_cpu_ptr(&cpu_hw_events); 805 805 + return cpuhw->n_events; 806 806 + } 779 807 #endif /* CONFIG_PPC64 */ 780 808 781 809 static void perf_event_interrupt(struct pt_regs *regs); ··· 2464 2436 2465 2437 __perf_event_interrupt(regs); 2466 2438 perf_sample_event_took(sched_clock() - start_clock); 2467 2467 - } 2468 2468 - 2469 2469 - /* 2470 2470 - * If the perf subsystem wants performance monitor interrupts as soon as 2471 2471 - * possible (e.g., to sample the instruction address and stack chain), 2472 2472 - * this should return true. The IRQ masking code can then enable MSR[EE] 2473 2473 - * in some places (e.g., interrupt handlers) that allows PMI interrupts 2474 2474 - * though to improve accuracy of profiles, at the cost of some performance. 2475 2475 - * 2476 2476 - * The PMU counters can be enabled by other means (e.g., sysfs raw SPR 2477 2477 - * access), but in that case there is no need for prompt PMI handling. 2478 2478 - * 2479 2479 - * This currently returns true if any perf counter is being used. It 2480 2480 - * could possibly return false if only events are being counted rather than 2481 2481 - * samples being taken, but for now this is good enough. 2482 2482 - */ 2483 2483 - bool power_pmu_wants_prompt_pmi(void) 2484 2484 - { 2485 2485 - struct cpu_hw_events *cpuhw; 2486 2486 - 2487 2487 - /* 2488 2488 - * This could simply test local_paca->pmcregs_in_use if that were not 2489 2489 - * under ifdef KVM. 2490 2490 - */ 2491 2491 - 2492 2492 - if (!ppmu) 2493 2493 - return false; 2494 2494 - 2495 2495 - cpuhw = this_cpu_ptr(&cpu_hw_events); 2496 2496 - return cpuhw->n_events; 2497 2439 } 2498 2440 2499 2441 static int power_pmu_prepare_cpu(unsigned int cpu)

+3 -2

kernel/bpf/stackmap.c

reviewed

··· 472 472 u32, size, u64, flags) 473 473 { 474 474 struct pt_regs *regs; 475 475 - long res; 475 475 + long res = -EINVAL; 476 476 477 477 if (!try_get_task_stack(task)) 478 478 return -EFAULT; 479 479 480 480 regs = task_pt_regs(task); 481 481 - res = __bpf_get_stack(regs, task, NULL, buf, size, flags); 481 481 + if (regs) 482 482 + res = __bpf_get_stack(regs, task, NULL, buf, size, flags); 482 483 put_task_stack(task); 483 484 484 485 return res;

+1 -1

tools/bpf/runqslower/runqslower.bpf.c

reviewed

··· 68 68 */ 69 69 struct task_struct *prev = (struct task_struct *)ctx[1]; 70 70 struct task_struct *next = (struct task_struct *)ctx[2]; 71 71 - struct event event = {}; 71 71 + struct runq_event event = {}; 72 72 u64 *tsp, delta_us; 73 73 long state; 74 74 u32 pid;

+1 -1

tools/bpf/runqslower/runqslower.c

reviewed

··· 100 100 101 101 void handle_event(void *ctx, int cpu, void *data, __u32 data_sz) 102 102 { 103 103 - const struct event *e = data; 103 103 + const struct runq_event *e = data; 104 104 struct tm *tm; 105 105 char ts[32]; 106 106 time_t t;

+1 -1

tools/bpf/runqslower/runqslower.h

reviewed

··· 4 4 5 5 #define TASK_COMM_LEN 16 6 6 7 7 - struct event { 7 7 + struct runq_event { 8 8 char task[TASK_COMM_LEN]; 9 9 __u64 delta_us; 10 10 pid_t pid;