Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'powerpc-5.17-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux

Pull powerpc updates from Michael Ellerman:

- Optimise radix KVM guest entry/exit by 2x on Power9/Power10.

- Allow firmware to tell us whether to disable the entry and uaccess
flushes on Power10 or later CPUs.

- Add BPF_PROBE_MEM support for 32 and 64-bit BPF jits.

- Several fixes and improvements to our hard lockup watchdog.

- Activate HAVE_DYNAMIC_FTRACE_WITH_REGS on 32-bit.

- Allow building the 64-bit Book3S kernel without hash MMU support, ie.
Radix only.

- Add KUAP (SMAP) support for 40x, 44x, 8xx, Book3E (64-bit).

- Add new encodings for perf_mem_data_src.mem_hops field, and use them
on Power10.

- A series of small performance improvements to 64-bit interrupt entry.

- Several commits fixing issues when building with the clang integrated
assembler.

- Many other small features and fixes.

Thanks to Alan Modra, Alexey Kardashevskiy, Ammar Faizi, Anders Roxell,
Arnd Bergmann, Athira Rajeev, Cédric Le Goater, Christophe JAILLET,
Christophe Leroy, Christoph Hellwig, Daniel Axtens, David Yang, Erhard
Furtner, Fabiano Rosas, Greg Kroah-Hartman, Guo Ren, Hari Bathini, Jason
Wang, Joel Stanley, Julia Lawall, Kajol Jain, Kees Cook, Laurent Dufour,
Madhavan Srinivasan, Mark Brown, Minghao Chi, Nageswara R Sastry, Naresh
Kamboju, Nathan Chancellor, Nathan Lynch, Nicholas Piggin, Nick Child,
Oliver O'Halloran, Peiwei Hu, Randy Dunlap, Ravi Bangoria, Rob Herring,
Russell Currey, Sachin Sant, Sean Christopherson, Segher Boessenkool,
Thadeu Lima de Souza Cascardo, Tyrel Datwyler, Xiang wangx, and Yang
Guang.

* tag 'powerpc-5.17-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux: (240 commits)
powerpc/xmon: Dump XIVE information for online-only processors.
powerpc/opal: use default_groups in kobj_type
powerpc/cacheinfo: use default_groups in kobj_type
powerpc/sched: Remove unused TASK_SIZE_OF
powerpc/xive: Add missing null check after calling kmalloc
powerpc/floppy: Remove usage of the deprecated "pci-dma-compat.h" API
selftests/powerpc: Add a test of sigreturning to an unaligned address
powerpc/64s: Use EMIT_WARN_ENTRY for SRR debug warnings
powerpc/64s: Mask NIP before checking against SRR0
powerpc/perf: Fix spelling of "its"
powerpc/32: Fix boot failure with GCC latent entropy plugin
powerpc/code-patching: Replace patch_instruction() by ppc_inst_write() in selftests
powerpc/code-patching: Move code patching selftests in its own file
powerpc/code-patching: Move instr_is_branch_{i/b}form() in code-patching.h
powerpc/code-patching: Move patch_exception() outside code-patching.c
powerpc/code-patching: Use test_trampoline for prefixed patch test
powerpc/code-patching: Fix patch_branch() return on out-of-range failure
powerpc/code-patching: Reorganise do_patch_instruction() to ease error handling
powerpc/code-patching: Fix unmap_patch_area() error handling
powerpc/code-patching: Fix error handling in do_patch_instruction()
...

+5636 -3449
+15 -1
Documentation/admin-guide/kernel-parameters.txt
··· 3393 3393 Disable SMAP (Supervisor Mode Access Prevention) 3394 3394 even if it is supported by processor. 3395 3395 3396 - nosmep [X86,PPC] 3396 + nosmep [X86,PPC64s] 3397 3397 Disable SMEP (Supervisor Mode Execution Prevention) 3398 3398 even if it is supported by processor. 3399 3399 ··· 4165 4165 pmtmr= [X86] Manual setup of pmtmr I/O Port. 4166 4166 Override pmtimer IOPort with a hex value. 4167 4167 e.g. pmtmr=0x508 4168 + 4169 + pmu_override= [PPC] Override the PMU. 4170 + This option takes over the PMU facility, so it is no 4171 + longer usable by perf. Setting this option starts the 4172 + PMU counters by setting MMCR0 to 0 (the FC bit is 4173 + cleared). If a number is given, then MMCR1 is set to 4174 + that number, otherwise (e.g., 'pmu_override=on'), MMCR1 4175 + remains 0. 4168 4176 4169 4177 pm_debug_messages [SUSPEND,KNL] 4170 4178 Enable suspend/resume debug messages during boot up. ··· 6501 6493 off Fallback to firmware control of XIVE interrupt 6502 6494 controller on both pseries and powernv 6503 6495 platforms. Only useful on POWER9 and above. 6496 + 6497 + xive.store-eoi=off [PPC] 6498 + By default on POWER10 and above, the kernel will use 6499 + stores for EOI handling when the XIVE interrupt mode 6500 + is active. This option allows the XIVE driver to use 6501 + loads instead, as on POWER9. 6504 6502 6505 6503 xhci-hcd.quirks [USB,KNL] 6506 6504 A hex value specifying bitmask with supplemental xhci
+6 -4
arch/powerpc/Kconfig
··· 129 129 select ARCH_HAS_KCOV 130 130 select ARCH_HAS_MEMBARRIER_CALLBACKS 131 131 select ARCH_HAS_MEMBARRIER_SYNC_CORE 132 - select ARCH_HAS_MEMREMAP_COMPAT_ALIGN 132 + select ARCH_HAS_MEMREMAP_COMPAT_ALIGN if PPC_64S_HASH_MMU 133 133 select ARCH_HAS_MMIOWB if PPC64 134 134 select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE 135 135 select ARCH_HAS_PHYS_TO_DMA ··· 165 165 select BINFMT_ELF 166 166 select BUILDTIME_TABLE_SORT 167 167 select CLONE_BACKWARDS 168 + select CPUMASK_OFFSTACK if NR_CPUS >= 8192 168 169 select DCACHE_WORD_ACCESS if PPC64 && CPU_LITTLE_ENDIAN 169 170 select DMA_OPS_BYPASS if PPC64 170 171 select DMA_OPS if PPC64 ··· 206 205 select HAVE_DEBUG_KMEMLEAK 207 206 select HAVE_DEBUG_STACKOVERFLOW 208 207 select HAVE_DYNAMIC_FTRACE 209 - select HAVE_DYNAMIC_FTRACE_WITH_REGS if MPROFILE_KERNEL 208 + select HAVE_DYNAMIC_FTRACE_WITH_REGS if MPROFILE_KERNEL || PPC32 210 209 select HAVE_EBPF_JIT 211 210 select HAVE_EFFICIENT_UNALIGNED_ACCESS if !(CPU_LITTLE_ENDIAN && POWER7_CPU) 212 211 select HAVE_FAST_GUP ··· 230 229 select HAVE_KPROBES_ON_FTRACE 231 230 select HAVE_KRETPROBES 232 231 select HAVE_LD_DEAD_CODE_DATA_ELIMINATION 233 - select HAVE_LIVEPATCH if HAVE_DYNAMIC_FTRACE_WITH_REGS 232 + select HAVE_LIVEPATCH if HAVE_DYNAMIC_FTRACE_WITH_REGS && PPC64 234 233 select HAVE_MOD_ARCH_SPECIFIC 235 234 select HAVE_NMI if PERF_EVENTS || (PPC64 && PPC_BOOK3S) 236 235 select HAVE_OPTPROBES ··· 846 845 config PPC_SUBPAGE_PROT 847 846 bool "Support setting protections for 4k subpages (subpage_prot syscall)" 848 847 default n 849 - depends on PPC_BOOK3S_64 && PPC_64K_PAGES 848 + depends on PPC_64S_HASH_MMU && PPC_64K_PAGES 850 849 help 851 850 This option adds support for system call to allow user programs 852 851 to set access permissions (read/write, readonly, or no access) ··· 944 943 prompt "PowerPC Memory Protection Keys" 945 944 def_bool y 946 945 depends on PPC_BOOK3S_64 946 + depends on PPC_64S_HASH_MMU 947 947 select ARCH_USES_HIGH_VMA_FLAGS 948 948 select ARCH_HAS_PKEYS 949 949 help
+6 -3
arch/powerpc/Makefile
··· 245 245 # When using '-many -mpower4' gas will first try and find a matching power4 246 246 # mnemonic and failing that it will allow any valid mnemonic that GAS knows 247 247 # about. GCC will pass -many to GAS when assembling, clang does not. 248 - cpu-as-$(CONFIG_PPC_BOOK3S_64) += -Wa,-mpower4 -Wa,-many 248 + # LLVM IAS doesn't understand either flag: https://github.com/ClangBuiltLinux/linux/issues/675 249 + # but LLVM IAS only supports ISA >= 2.06 for Book3S 64 anyway... 250 + cpu-as-$(CONFIG_PPC_BOOK3S_64) += $(call as-option,-Wa$(comma)-mpower4) $(call as-option,-Wa$(comma)-many) 249 251 cpu-as-$(CONFIG_PPC_E500MC) += $(call as-option,-Wa$(comma)-me500mc) 250 252 251 253 KBUILD_AFLAGS += $(cpu-as-y) ··· 447 445 # Check toolchain versions: 448 446 # - gcc-4.6 is the minimum kernel-wide version so nothing required. 449 447 checkbin: 450 - @if test "x${CONFIG_CPU_LITTLE_ENDIAN}" = "xy" \ 451 - && $(LD) --version | head -1 | grep ' 2\.24$$' >/dev/null ; then \ 448 + @if test "x${CONFIG_LD_IS_LLD}" != "xy" -a \ 449 + "x$(call ld-ifversion, -le, 22400, y)" = "xy" ; then \ 452 450 echo -n '*** binutils 2.24 miscompiles weak symbols ' ; \ 453 451 echo 'in some circumstances.' ; \ 452 + echo '*** binutils 2.23 do not define the TOC symbol ' ; \ 454 453 echo -n '*** Please use a different binutils version.' ; \ 455 454 false ; \ 456 455 fi
+16 -17
arch/powerpc/boot/crt0.S
··· 28 28 p_bss_start: .8byte __bss_start 29 29 p_end: .8byte _end 30 30 31 - p_toc: .8byte __toc_start + 0x8000 - p_base 31 + p_toc: .8byte .TOC. - p_base 32 32 p_dyn: .8byte __dynamic_start - p_base 33 33 p_rela: .8byte __rela_dyn_start - p_base 34 34 p_prom: .8byte 0 ··· 226 226 #ifdef __powerpc64__ 227 227 228 228 #define PROM_FRAME_SIZE 512 229 - #define SAVE_GPR(n, base) std n,8*(n)(base) 230 - #define REST_GPR(n, base) ld n,8*(n)(base) 231 - #define SAVE_2GPRS(n, base) SAVE_GPR(n, base); SAVE_GPR(n+1, base) 232 - #define SAVE_4GPRS(n, base) SAVE_2GPRS(n, base); SAVE_2GPRS(n+2, base) 233 - #define SAVE_8GPRS(n, base) SAVE_4GPRS(n, base); SAVE_4GPRS(n+4, base) 234 - #define SAVE_10GPRS(n, base) SAVE_8GPRS(n, base); SAVE_2GPRS(n+8, base) 235 - #define REST_2GPRS(n, base) REST_GPR(n, base); REST_GPR(n+1, base) 236 - #define REST_4GPRS(n, base) REST_2GPRS(n, base); REST_2GPRS(n+2, base) 237 - #define REST_8GPRS(n, base) REST_4GPRS(n, base); REST_4GPRS(n+4, base) 238 - #define REST_10GPRS(n, base) REST_8GPRS(n, base); REST_2GPRS(n+8, base) 229 + 230 + .macro OP_REGS op, width, start, end, base, offset 231 + .Lreg=\start 232 + .rept (\end - \start + 1) 233 + \op .Lreg,\offset+\width*.Lreg(\base) 234 + .Lreg=.Lreg+1 235 + .endr 236 + .endm 237 + 238 + #define SAVE_GPRS(start, end, base) OP_REGS std, 8, start, end, base, 0 239 + #define REST_GPRS(start, end, base) OP_REGS ld, 8, start, end, base, 0 240 + #define SAVE_GPR(n, base) SAVE_GPRS(n, n, base) 241 + #define REST_GPR(n, base) REST_GPRS(n, n, base) 239 242 240 243 /* prom handles the jump into and return from firmware. The prom args pointer 241 244 is loaded in r3. */ ··· 249 246 stdu r1,-PROM_FRAME_SIZE(r1) /* Save SP and create stack space */ 250 247 251 248 SAVE_GPR(2, r1) 252 - SAVE_GPR(13, r1) 253 - SAVE_8GPRS(14, r1) 254 - SAVE_10GPRS(22, r1) 249 + SAVE_GPRS(13, 31, r1) 255 250 mfcr r10 256 251 std r10,8*32(r1) 257 252 mfmsr r10 ··· 284 283 285 284 /* Restore other registers */ 286 285 REST_GPR(2, r1) 287 - REST_GPR(13, r1) 288 - REST_8GPRS(14, r1) 289 - REST_10GPRS(22, r1) 286 + REST_GPRS(13, 31, r1) 290 287 ld r10,8*32(r1) 291 288 mtcr r10 292 289
-8
arch/powerpc/boot/dts/digsy_mtc.dts
··· 25 25 status = "disabled"; 26 26 }; 27 27 28 - spi@f00 { 29 - msp430@0 { 30 - compatible = "spidev"; 31 - spi-max-frequency = <32000>; 32 - reg = <0>; 33 - }; 34 - }; 35 - 36 28 psc@2000 { // PSC1 37 29 status = "disabled"; 38 30 };
-6
arch/powerpc/boot/dts/o2d.dtsi
··· 34 34 #address-cells = <1>; 35 35 #size-cells = <0>; 36 36 cell-index = <0>; 37 - 38 - spidev@0 { 39 - compatible = "spidev"; 40 - spi-max-frequency = <250000>; 41 - reg = <0>; 42 - }; 43 37 }; 44 38 45 39 psc@2200 { // PSC2
+2 -5
arch/powerpc/boot/zImage.lds.S
··· 36 36 } 37 37 38 38 #ifdef CONFIG_PPC64_BOOT_WRAPPER 39 - . = ALIGN(256); 40 - .got : 39 + .got : ALIGN(256) 41 40 { 42 - __toc_start = .; 43 - *(.got) 44 - *(.toc) 41 + *(.got .toc) 45 42 } 46 43 #endif 47 44
+2 -1
arch/powerpc/configs/microwatt_defconfig
··· 15 15 # CONFIG_COMPAT_BRK is not set 16 16 # CONFIG_SLAB_MERGE_DEFAULT is not set 17 17 CONFIG_PPC64=y 18 + CONFIG_POWER9_CPU=y 19 + # CONFIG_PPC_64S_HASH_MMU is not set 18 20 # CONFIG_PPC_KUEP is not set 19 21 # CONFIG_PPC_KUAP is not set 20 22 CONFIG_CPU_LITTLE_ENDIAN=y ··· 29 27 CONFIG_CPU_FREQ=y 30 28 CONFIG_HZ_100=y 31 29 CONFIG_PPC_4K_PAGES=y 32 - # CONFIG_PPC_MEM_KEYS is not set 33 30 # CONFIG_SECCOMP is not set 34 31 # CONFIG_MQ_IOSCHED_KYBER is not set 35 32 # CONFIG_COREDUMP is not set
-1
arch/powerpc/configs/ppc64_defconfig
··· 26 26 CONFIG_NR_CPUS=2048 27 27 CONFIG_PPC_SPLPAR=y 28 28 CONFIG_DTL=y 29 - CONFIG_SCANLOG=m 30 29 CONFIG_PPC_SMLPAR=y 31 30 CONFIG_IBMEBUS=y 32 31 CONFIG_PPC_SVM=y
-1
arch/powerpc/configs/pseries_defconfig
··· 38 38 CONFIG_PARTITION_ADVANCED=y 39 39 CONFIG_PPC_SPLPAR=y 40 40 CONFIG_DTL=y 41 - CONFIG_SCANLOG=m 42 41 CONFIG_PPC_SMLPAR=y 43 42 CONFIG_IBMEBUS=y 44 43 CONFIG_PAPR_SCM=m
+3 -7
arch/powerpc/crypto/md5-asm.S
··· 38 38 39 39 #define INITIALIZE \ 40 40 PPC_STLU r1,-INT_FRAME_SIZE(r1); \ 41 - SAVE_8GPRS(14, r1); /* push registers onto stack */ \ 42 - SAVE_4GPRS(22, r1); \ 43 - SAVE_GPR(26, r1) 41 + SAVE_GPRS(14, 26, r1) /* push registers onto stack */ 44 42 45 43 #define FINALIZE \ 46 - REST_8GPRS(14, r1); /* pop registers from stack */ \ 47 - REST_4GPRS(22, r1); \ 48 - REST_GPR(26, r1); \ 49 - addi r1,r1,INT_FRAME_SIZE; 44 + REST_GPRS(14, 26, r1); /* pop registers from stack */ \ 45 + addi r1,r1,INT_FRAME_SIZE 50 46 51 47 #ifdef __BIG_ENDIAN__ 52 48 #define LOAD_DATA(reg, off) \
+2 -4
arch/powerpc/crypto/sha1-powerpc-asm.S
··· 125 125 126 126 _GLOBAL(powerpc_sha_transform) 127 127 PPC_STLU r1,-INT_FRAME_SIZE(r1) 128 - SAVE_8GPRS(14, r1) 129 - SAVE_10GPRS(22, r1) 128 + SAVE_GPRS(14, 31, r1) 130 129 131 130 /* Load up A - E */ 132 131 lwz RA(0),0(r3) /* A */ ··· 183 184 stw RD(0),12(r3) 184 185 stw RE(0),16(r3) 185 186 186 - REST_8GPRS(14, r1) 187 - REST_10GPRS(22, r1) 187 + REST_GPRS(14, 31, r1) 188 188 addi r1,r1,INT_FRAME_SIZE 189 189 blr
-5
arch/powerpc/include/asm/asm-prototypes.h
··· 141 141 bool preserve_nv) { } 142 142 #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ 143 143 144 - void kvmhv_save_host_pmu(void); 145 - void kvmhv_load_host_pmu(void); 146 - void kvmhv_save_guest_pmu(struct kvm_vcpu *vcpu, bool pmu_in_use); 147 - void kvmhv_load_guest_pmu(struct kvm_vcpu *vcpu); 148 - 149 144 void kvmppc_p9_enter_guest(struct kvm_vcpu *vcpu); 150 145 151 146 long kvmppc_h_set_dabr(struct kvm_vcpu *vcpu, unsigned long dabr);
+28 -123
arch/powerpc/include/asm/atomic.h
··· 37 37 __asm__ __volatile__("stw%U0%X0 %1,%0" : "=m<>"(v->counter) : "r"(i)); 38 38 } 39 39 40 - #define ATOMIC_OP(op, asm_op) \ 40 + #define ATOMIC_OP(op, asm_op, suffix, sign, ...) \ 41 41 static __inline__ void arch_atomic_##op(int a, atomic_t *v) \ 42 42 { \ 43 43 int t; \ 44 44 \ 45 45 __asm__ __volatile__( \ 46 46 "1: lwarx %0,0,%3 # atomic_" #op "\n" \ 47 - #asm_op " %0,%2,%0\n" \ 47 + #asm_op "%I2" suffix " %0,%0,%2\n" \ 48 48 " stwcx. %0,0,%3 \n" \ 49 49 " bne- 1b\n" \ 50 50 : "=&r" (t), "+m" (v->counter) \ 51 - : "r" (a), "r" (&v->counter) \ 52 - : "cc"); \ 51 + : "r"#sign (a), "r" (&v->counter) \ 52 + : "cc", ##__VA_ARGS__); \ 53 53 } \ 54 54 55 - #define ATOMIC_OP_RETURN_RELAXED(op, asm_op) \ 55 + #define ATOMIC_OP_RETURN_RELAXED(op, asm_op, suffix, sign, ...) \ 56 56 static inline int arch_atomic_##op##_return_relaxed(int a, atomic_t *v) \ 57 57 { \ 58 58 int t; \ 59 59 \ 60 60 __asm__ __volatile__( \ 61 61 "1: lwarx %0,0,%3 # atomic_" #op "_return_relaxed\n" \ 62 - #asm_op " %0,%2,%0\n" \ 62 + #asm_op "%I2" suffix " %0,%0,%2\n" \ 63 63 " stwcx. %0,0,%3\n" \ 64 64 " bne- 1b\n" \ 65 65 : "=&r" (t), "+m" (v->counter) \ 66 - : "r" (a), "r" (&v->counter) \ 67 - : "cc"); \ 66 + : "r"#sign (a), "r" (&v->counter) \ 67 + : "cc", ##__VA_ARGS__); \ 68 68 \ 69 69 return t; \ 70 70 } 71 71 72 - #define ATOMIC_FETCH_OP_RELAXED(op, asm_op) \ 72 + #define ATOMIC_FETCH_OP_RELAXED(op, asm_op, suffix, sign, ...) \ 73 73 static inline int arch_atomic_fetch_##op##_relaxed(int a, atomic_t *v) \ 74 74 { \ 75 75 int res, t; \ 76 76 \ 77 77 __asm__ __volatile__( \ 78 78 "1: lwarx %0,0,%4 # atomic_fetch_" #op "_relaxed\n" \ 79 - #asm_op " %1,%3,%0\n" \ 79 + #asm_op "%I3" suffix " %1,%0,%3\n" \ 80 80 " stwcx. %1,0,%4\n" \ 81 81 " bne- 1b\n" \ 82 82 : "=&r" (res), "=&r" (t), "+m" (v->counter) \ 83 - : "r" (a), "r" (&v->counter) \ 84 - : "cc"); \ 83 + : "r"#sign (a), "r" (&v->counter) \ 84 + : "cc", ##__VA_ARGS__); \ 85 85 \ 86 86 return res; \ 87 87 } 88 88 89 - #define ATOMIC_OPS(op, asm_op) \ 90 - ATOMIC_OP(op, asm_op) \ 91 - ATOMIC_OP_RETURN_RELAXED(op, asm_op) \ 92 - ATOMIC_FETCH_OP_RELAXED(op, asm_op) 89 + #define ATOMIC_OPS(op, asm_op, suffix, sign, ...) \ 90 + ATOMIC_OP(op, asm_op, suffix, sign, ##__VA_ARGS__) \ 91 + ATOMIC_OP_RETURN_RELAXED(op, asm_op, suffix, sign, ##__VA_ARGS__)\ 92 + ATOMIC_FETCH_OP_RELAXED(op, asm_op, suffix, sign, ##__VA_ARGS__) 93 93 94 - ATOMIC_OPS(add, add) 95 - ATOMIC_OPS(sub, subf) 94 + ATOMIC_OPS(add, add, "c", I, "xer") 95 + ATOMIC_OPS(sub, sub, "c", I, "xer") 96 96 97 97 #define arch_atomic_add_return_relaxed arch_atomic_add_return_relaxed 98 98 #define arch_atomic_sub_return_relaxed arch_atomic_sub_return_relaxed ··· 101 101 #define arch_atomic_fetch_sub_relaxed arch_atomic_fetch_sub_relaxed 102 102 103 103 #undef ATOMIC_OPS 104 - #define ATOMIC_OPS(op, asm_op) \ 105 - ATOMIC_OP(op, asm_op) \ 106 - ATOMIC_FETCH_OP_RELAXED(op, asm_op) 104 + #define ATOMIC_OPS(op, asm_op, suffix, sign) \ 105 + ATOMIC_OP(op, asm_op, suffix, sign) \ 106 + ATOMIC_FETCH_OP_RELAXED(op, asm_op, suffix, sign) 107 107 108 - ATOMIC_OPS(and, and) 109 - ATOMIC_OPS(or, or) 110 - ATOMIC_OPS(xor, xor) 108 + ATOMIC_OPS(and, and, ".", K) 109 + ATOMIC_OPS(or, or, "", K) 110 + ATOMIC_OPS(xor, xor, "", K) 111 111 112 112 #define arch_atomic_fetch_and_relaxed arch_atomic_fetch_and_relaxed 113 113 #define arch_atomic_fetch_or_relaxed arch_atomic_fetch_or_relaxed ··· 117 117 #undef ATOMIC_FETCH_OP_RELAXED 118 118 #undef ATOMIC_OP_RETURN_RELAXED 119 119 #undef ATOMIC_OP 120 - 121 - static __inline__ void arch_atomic_inc(atomic_t *v) 122 - { 123 - int t; 124 - 125 - __asm__ __volatile__( 126 - "1: lwarx %0,0,%2 # atomic_inc\n\ 127 - addic %0,%0,1\n" 128 - " stwcx. %0,0,%2 \n\ 129 - bne- 1b" 130 - : "=&r" (t), "+m" (v->counter) 131 - : "r" (&v->counter) 132 - : "cc", "xer"); 133 - } 134 - #define arch_atomic_inc arch_atomic_inc 135 - 136 - static __inline__ int arch_atomic_inc_return_relaxed(atomic_t *v) 137 - { 138 - int t; 139 - 140 - __asm__ __volatile__( 141 - "1: lwarx %0,0,%2 # atomic_inc_return_relaxed\n" 142 - " addic %0,%0,1\n" 143 - " stwcx. %0,0,%2\n" 144 - " bne- 1b" 145 - : "=&r" (t), "+m" (v->counter) 146 - : "r" (&v->counter) 147 - : "cc", "xer"); 148 - 149 - return t; 150 - } 151 - 152 - static __inline__ void arch_atomic_dec(atomic_t *v) 153 - { 154 - int t; 155 - 156 - __asm__ __volatile__( 157 - "1: lwarx %0,0,%2 # atomic_dec\n\ 158 - addic %0,%0,-1\n" 159 - " stwcx. %0,0,%2\n\ 160 - bne- 1b" 161 - : "=&r" (t), "+m" (v->counter) 162 - : "r" (&v->counter) 163 - : "cc", "xer"); 164 - } 165 - #define arch_atomic_dec arch_atomic_dec 166 - 167 - static __inline__ int arch_atomic_dec_return_relaxed(atomic_t *v) 168 - { 169 - int t; 170 - 171 - __asm__ __volatile__( 172 - "1: lwarx %0,0,%2 # atomic_dec_return_relaxed\n" 173 - " addic %0,%0,-1\n" 174 - " stwcx. %0,0,%2\n" 175 - " bne- 1b" 176 - : "=&r" (t), "+m" (v->counter) 177 - : "r" (&v->counter) 178 - : "cc", "xer"); 179 - 180 - return t; 181 - } 182 - 183 - #define arch_atomic_inc_return_relaxed arch_atomic_inc_return_relaxed 184 - #define arch_atomic_dec_return_relaxed arch_atomic_dec_return_relaxed 185 120 186 121 #define arch_atomic_cmpxchg(v, o, n) \ 187 122 (arch_cmpxchg(&((v)->counter), (o), (n))) ··· 176 241 "1: lwarx %0,0,%1 # atomic_fetch_add_unless\n\ 177 242 cmpw 0,%0,%3 \n\ 178 243 beq 2f \n\ 179 - add %0,%2,%0 \n" 244 + add%I2c %0,%0,%2 \n" 180 245 " stwcx. %0,0,%1 \n\ 181 246 bne- 1b \n" 182 247 PPC_ATOMIC_EXIT_BARRIER 183 - " subf %0,%2,%0 \n\ 248 + " sub%I2c %0,%0,%2 \n\ 184 249 2:" 185 250 : "=&r" (t) 186 - : "r" (&v->counter), "r" (a), "r" (u) 187 - : "cc", "memory"); 251 + : "r" (&v->counter), "rI" (a), "r" (u) 252 + : "cc", "memory", "xer"); 188 253 189 254 return t; 190 255 } 191 256 #define arch_atomic_fetch_add_unless arch_atomic_fetch_add_unless 192 - 193 - /** 194 - * atomic_inc_not_zero - increment unless the number is zero 195 - * @v: pointer of type atomic_t 196 - * 197 - * Atomically increments @v by 1, so long as @v is non-zero. 198 - * Returns non-zero if @v was non-zero, and zero otherwise. 199 - */ 200 - static __inline__ int arch_atomic_inc_not_zero(atomic_t *v) 201 - { 202 - int t1, t2; 203 - 204 - __asm__ __volatile__ ( 205 - PPC_ATOMIC_ENTRY_BARRIER 206 - "1: lwarx %0,0,%2 # atomic_inc_not_zero\n\ 207 - cmpwi 0,%0,0\n\ 208 - beq- 2f\n\ 209 - addic %1,%0,1\n" 210 - " stwcx. %1,0,%2\n\ 211 - bne- 1b\n" 212 - PPC_ATOMIC_EXIT_BARRIER 213 - "\n\ 214 - 2:" 215 - : "=&r" (t1), "=&r" (t2) 216 - : "r" (&v->counter) 217 - : "cc", "xer", "memory"); 218 - 219 - return t1; 220 - } 221 - #define arch_atomic_inc_not_zero(v) arch_atomic_inc_not_zero((v)) 222 257 223 258 /* 224 259 * Atomically test *v and decrement if it is greater than 0.
+81 -8
arch/powerpc/include/asm/bitops.h
··· 71 71 __asm__ __volatile__ ( \ 72 72 prefix \ 73 73 "1:" PPC_LLARX "%0,0,%3,0\n" \ 74 - stringify_in_c(op) "%0,%0,%2\n" \ 74 + #op "%I2 %0,%0,%2\n" \ 75 75 PPC_STLCX "%0,0,%3\n" \ 76 76 "bne- 1b\n" \ 77 77 : "=&r" (old), "+m" (*p) \ 78 - : "r" (mask), "r" (p) \ 78 + : "rK" (mask), "r" (p) \ 79 79 : "cc", "memory"); \ 80 80 } 81 81 82 82 DEFINE_BITOP(set_bits, or, "") 83 - DEFINE_BITOP(clear_bits, andc, "") 84 - DEFINE_BITOP(clear_bits_unlock, andc, PPC_RELEASE_BARRIER) 85 83 DEFINE_BITOP(change_bits, xor, "") 84 + 85 + static __always_inline bool is_rlwinm_mask_valid(unsigned long x) 86 + { 87 + if (!x) 88 + return false; 89 + if (x & 1) 90 + x = ~x; // make the mask non-wrapping 91 + x += x & -x; // adding the low set bit results in at most one bit set 92 + 93 + return !(x & (x - 1)); 94 + } 95 + 96 + #define DEFINE_CLROP(fn, prefix) \ 97 + static inline void fn(unsigned long mask, volatile unsigned long *_p) \ 98 + { \ 99 + unsigned long old; \ 100 + unsigned long *p = (unsigned long *)_p; \ 101 + \ 102 + if (IS_ENABLED(CONFIG_PPC32) && \ 103 + __builtin_constant_p(mask) && is_rlwinm_mask_valid(~mask)) {\ 104 + asm volatile ( \ 105 + prefix \ 106 + "1:" "lwarx %0,0,%3\n" \ 107 + "rlwinm %0,%0,0,%2\n" \ 108 + "stwcx. %0,0,%3\n" \ 109 + "bne- 1b\n" \ 110 + : "=&r" (old), "+m" (*p) \ 111 + : "n" (~mask), "r" (p) \ 112 + : "cc", "memory"); \ 113 + } else { \ 114 + asm volatile ( \ 115 + prefix \ 116 + "1:" PPC_LLARX "%0,0,%3,0\n" \ 117 + "andc %0,%0,%2\n" \ 118 + PPC_STLCX "%0,0,%3\n" \ 119 + "bne- 1b\n" \ 120 + : "=&r" (old), "+m" (*p) \ 121 + : "r" (mask), "r" (p) \ 122 + : "cc", "memory"); \ 123 + } \ 124 + } 125 + 126 + DEFINE_CLROP(clear_bits, "") 127 + DEFINE_CLROP(clear_bits_unlock, PPC_RELEASE_BARRIER) 86 128 87 129 static inline void arch_set_bit(int nr, volatile unsigned long *addr) 88 130 { ··· 158 116 __asm__ __volatile__ ( \ 159 117 prefix \ 160 118 "1:" PPC_LLARX "%0,0,%3,%4\n" \ 161 - stringify_in_c(op) "%1,%0,%2\n" \ 119 + #op "%I2 %1,%0,%2\n" \ 162 120 PPC_STLCX "%1,0,%3\n" \ 163 121 "bne- 1b\n" \ 164 122 postfix \ 165 123 : "=&r" (old), "=&r" (t) \ 166 - : "r" (mask), "r" (p), "i" (IS_ENABLED(CONFIG_PPC64) ? eh : 0) \ 124 + : "rK" (mask), "r" (p), "i" (IS_ENABLED(CONFIG_PPC64) ? eh : 0) \ 167 125 : "cc", "memory"); \ 168 126 return (old & mask); \ 169 127 } ··· 172 130 PPC_ATOMIC_EXIT_BARRIER, 0) 173 131 DEFINE_TESTOP(test_and_set_bits_lock, or, "", 174 132 PPC_ACQUIRE_BARRIER, 1) 175 - DEFINE_TESTOP(test_and_clear_bits, andc, PPC_ATOMIC_ENTRY_BARRIER, 176 - PPC_ATOMIC_EXIT_BARRIER, 0) 177 133 DEFINE_TESTOP(test_and_change_bits, xor, PPC_ATOMIC_ENTRY_BARRIER, 178 134 PPC_ATOMIC_EXIT_BARRIER, 0) 135 + 136 + static inline unsigned long test_and_clear_bits(unsigned long mask, volatile unsigned long *_p) 137 + { 138 + unsigned long old, t; 139 + unsigned long *p = (unsigned long *)_p; 140 + 141 + if (IS_ENABLED(CONFIG_PPC32) && 142 + __builtin_constant_p(mask) && is_rlwinm_mask_valid(~mask)) { 143 + asm volatile ( 144 + PPC_ATOMIC_ENTRY_BARRIER 145 + "1:" "lwarx %0,0,%3\n" 146 + "rlwinm %1,%0,0,%2\n" 147 + "stwcx. %1,0,%3\n" 148 + "bne- 1b\n" 149 + PPC_ATOMIC_EXIT_BARRIER 150 + : "=&r" (old), "=&r" (t) 151 + : "n" (~mask), "r" (p) 152 + : "cc", "memory"); 153 + } else { 154 + asm volatile ( 155 + PPC_ATOMIC_ENTRY_BARRIER 156 + "1:" PPC_LLARX "%0,0,%3,0\n" 157 + "andc %1,%0,%2\n" 158 + PPC_STLCX "%1,0,%3\n" 159 + "bne- 1b\n" 160 + PPC_ATOMIC_EXIT_BARRIER 161 + : "=&r" (old), "=&r" (t) 162 + : "r" (mask), "r" (p) 163 + : "cc", "memory"); 164 + } 165 + 166 + return (old & mask); 167 + } 179 168 180 169 static inline int arch_test_and_set_bit(unsigned long nr, 181 170 volatile unsigned long *addr)
+24 -84
arch/powerpc/include/asm/book3s/32/kup.h
··· 12 12 #include <linux/jump_label.h> 13 13 14 14 extern struct static_key_false disable_kuap_key; 15 - extern struct static_key_false disable_kuep_key; 16 - 17 - static __always_inline bool kuap_is_disabled(void) 18 - { 19 - return !IS_ENABLED(CONFIG_PPC_KUAP) || static_branch_unlikely(&disable_kuap_key); 20 - } 21 15 22 16 static __always_inline bool kuep_is_disabled(void) 23 17 { 24 - return !IS_ENABLED(CONFIG_PPC_KUEP) || static_branch_unlikely(&disable_kuep_key); 25 - } 26 - 27 - static inline void kuep_lock(void) 28 - { 29 - if (kuep_is_disabled()) 30 - return; 31 - 32 - update_user_segments(mfsr(0) | SR_NX); 33 - /* 34 - * This isync() shouldn't be necessary as the kernel is not excepted to 35 - * run any instruction in userspace soon after the update of segments, 36 - * but hash based cores (at least G3) seem to exhibit a random 37 - * behaviour when the 'isync' is not there. 603 cores don't have this 38 - * behaviour so don't do the 'isync' as it saves several CPU cycles. 39 - */ 40 - if (mmu_has_feature(MMU_FTR_HPTE_TABLE)) 41 - isync(); /* Context sync required after mtsr() */ 42 - } 43 - 44 - static inline void kuep_unlock(void) 45 - { 46 - if (kuep_is_disabled()) 47 - return; 48 - 49 - update_user_segments(mfsr(0) & ~SR_NX); 50 - /* 51 - * This isync() shouldn't be necessary as a 'rfi' will soon be executed 52 - * to return to userspace, but hash based cores (at least G3) seem to 53 - * exhibit a random behaviour when the 'isync' is not there. 603 cores 54 - * don't have this behaviour so don't do the 'isync' as it saves several 55 - * CPU cycles. 56 - */ 57 - if (mmu_has_feature(MMU_FTR_HPTE_TABLE)) 58 - isync(); /* Context sync required after mtsr() */ 18 + return !IS_ENABLED(CONFIG_PPC_KUEP); 59 19 } 60 20 61 21 #ifdef CONFIG_PPC_KUAP ··· 24 64 25 65 #define KUAP_NONE (~0UL) 26 66 #define KUAP_ALL (~1UL) 67 + 68 + static __always_inline bool kuap_is_disabled(void) 69 + { 70 + return static_branch_unlikely(&disable_kuap_key); 71 + } 27 72 28 73 static inline void kuap_lock_one(unsigned long addr) 29 74 { ··· 57 92 void kuap_lock_all_ool(void); 58 93 void kuap_unlock_all_ool(void); 59 94 60 - static inline void kuap_lock(unsigned long addr, bool ool) 95 + static inline void kuap_lock_addr(unsigned long addr, bool ool) 61 96 { 62 97 if (likely(addr != KUAP_ALL)) 63 98 kuap_lock_one(addr); ··· 77 112 kuap_unlock_all_ool(); 78 113 } 79 114 80 - static inline void kuap_save_and_lock(struct pt_regs *regs) 115 + static inline void __kuap_lock(void) 116 + { 117 + } 118 + 119 + static inline void __kuap_save_and_lock(struct pt_regs *regs) 81 120 { 82 121 unsigned long kuap = current->thread.kuap; 83 - 84 - if (kuap_is_disabled()) 85 - return; 86 122 87 123 regs->kuap = kuap; 88 124 if (unlikely(kuap == KUAP_NONE)) 89 125 return; 90 126 91 127 current->thread.kuap = KUAP_NONE; 92 - kuap_lock(kuap, false); 128 + kuap_lock_addr(kuap, false); 93 129 } 94 130 95 131 static inline void kuap_user_restore(struct pt_regs *regs) 96 132 { 97 133 } 98 134 99 - static inline void kuap_kernel_restore(struct pt_regs *regs, unsigned long kuap) 135 + static inline void __kuap_kernel_restore(struct pt_regs *regs, unsigned long kuap) 100 136 { 101 - if (kuap_is_disabled()) 102 - return; 103 - 104 137 if (unlikely(kuap != KUAP_NONE)) { 105 138 current->thread.kuap = KUAP_NONE; 106 - kuap_lock(kuap, false); 139 + kuap_lock_addr(kuap, false); 107 140 } 108 141 109 142 if (likely(regs->kuap == KUAP_NONE)) ··· 112 149 kuap_unlock(regs->kuap, false); 113 150 } 114 151 115 - static inline unsigned long kuap_get_and_assert_locked(void) 152 + static inline unsigned long __kuap_get_and_assert_locked(void) 116 153 { 117 154 unsigned long kuap = current->thread.kuap; 118 - 119 - if (kuap_is_disabled()) 120 - return KUAP_NONE; 121 155 122 156 WARN_ON_ONCE(IS_ENABLED(CONFIG_PPC_KUAP_DEBUG) && kuap != KUAP_NONE); 123 157 124 158 return kuap; 125 159 } 126 160 127 - static inline void kuap_assert_locked(void) 161 + static __always_inline void __allow_user_access(void __user *to, const void __user *from, 162 + u32 size, unsigned long dir) 128 163 { 129 - kuap_get_and_assert_locked(); 130 - } 131 - 132 - static __always_inline void allow_user_access(void __user *to, const void __user *from, 133 - u32 size, unsigned long dir) 134 - { 135 - if (kuap_is_disabled()) 136 - return; 137 - 138 164 BUILD_BUG_ON(!__builtin_constant_p(dir)); 139 165 140 166 if (!(dir & KUAP_WRITE)) ··· 133 181 kuap_unlock_one((__force u32)to); 134 182 } 135 183 136 - static __always_inline void prevent_user_access(unsigned long dir) 184 + static __always_inline void __prevent_user_access(unsigned long dir) 137 185 { 138 186 u32 kuap = current->thread.kuap; 139 - 140 - if (kuap_is_disabled()) 141 - return; 142 187 143 188 BUILD_BUG_ON(!__builtin_constant_p(dir)); 144 189 ··· 143 194 return; 144 195 145 196 current->thread.kuap = KUAP_NONE; 146 - kuap_lock(kuap, true); 197 + kuap_lock_addr(kuap, true); 147 198 } 148 199 149 - static inline unsigned long prevent_user_access_return(void) 200 + static inline unsigned long __prevent_user_access_return(void) 150 201 { 151 202 unsigned long flags = current->thread.kuap; 152 203 153 - if (kuap_is_disabled()) 154 - return KUAP_NONE; 155 - 156 204 if (flags != KUAP_NONE) { 157 205 current->thread.kuap = KUAP_NONE; 158 - kuap_lock(flags, true); 206 + kuap_lock_addr(flags, true); 159 207 } 160 208 161 209 return flags; 162 210 } 163 211 164 - static inline void restore_user_access(unsigned long flags) 212 + static inline void __restore_user_access(unsigned long flags) 165 213 { 166 - if (kuap_is_disabled()) 167 - return; 168 - 169 214 if (flags != KUAP_NONE) { 170 215 current->thread.kuap = flags; 171 216 kuap_unlock(flags, true); ··· 167 224 } 168 225 169 226 static inline bool 170 - bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) 227 + __bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) 171 228 { 172 229 unsigned long kuap = regs->kuap; 173 - 174 - if (kuap_is_disabled()) 175 - return false; 176 230 177 231 if (!is_write || kuap == KUAP_ALL) 178 232 return false;
+81 -1
arch/powerpc/include/asm/book3s/32/mmu-hash.h
··· 64 64 #define SR_KP 0x20000000 /* User key */ 65 65 #define SR_KS 0x40000000 /* Supervisor key */ 66 66 67 - #ifndef __ASSEMBLY__ 67 + #ifdef __ASSEMBLY__ 68 + 69 + #include <asm/asm-offsets.h> 70 + 71 + .macro uus_addi sr reg1 reg2 imm 72 + .if NUM_USER_SEGMENTS > \sr 73 + addi \reg1,\reg2,\imm 74 + .endif 75 + .endm 76 + 77 + .macro uus_mtsr sr reg1 78 + .if NUM_USER_SEGMENTS > \sr 79 + mtsr \sr, \reg1 80 + .endif 81 + .endm 82 + 83 + /* 84 + * This isync() shouldn't be necessary as the kernel is not excepted to run 85 + * any instruction in userspace soon after the update of segments and 'rfi' 86 + * instruction is used to return to userspace, but hash based cores 87 + * (at least G3) seem to exhibit a random behaviour when the 'isync' is not 88 + * there. 603 cores don't have this behaviour so don't do the 'isync' as it 89 + * saves several CPU cycles. 90 + */ 91 + .macro uus_isync 92 + #ifdef CONFIG_PPC_BOOK3S_604 93 + BEGIN_MMU_FTR_SECTION 94 + isync 95 + END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE) 96 + #endif 97 + .endm 98 + 99 + .macro update_user_segments_by_4 tmp1 tmp2 tmp3 tmp4 100 + uus_addi 1, \tmp2, \tmp1, 0x111 101 + uus_addi 2, \tmp3, \tmp1, 0x222 102 + uus_addi 3, \tmp4, \tmp1, 0x333 103 + 104 + uus_mtsr 0, \tmp1 105 + uus_mtsr 1, \tmp2 106 + uus_mtsr 2, \tmp3 107 + uus_mtsr 3, \tmp4 108 + 109 + uus_addi 4, \tmp1, \tmp1, 0x444 110 + uus_addi 5, \tmp2, \tmp2, 0x444 111 + uus_addi 6, \tmp3, \tmp3, 0x444 112 + uus_addi 7, \tmp4, \tmp4, 0x444 113 + 114 + uus_mtsr 4, \tmp1 115 + uus_mtsr 5, \tmp2 116 + uus_mtsr 6, \tmp3 117 + uus_mtsr 7, \tmp4 118 + 119 + uus_addi 8, \tmp1, \tmp1, 0x444 120 + uus_addi 9, \tmp2, \tmp2, 0x444 121 + uus_addi 10, \tmp3, \tmp3, 0x444 122 + uus_addi 11, \tmp4, \tmp4, 0x444 123 + 124 + uus_mtsr 8, \tmp1 125 + uus_mtsr 9, \tmp2 126 + uus_mtsr 10, \tmp3 127 + uus_mtsr 11, \tmp4 128 + 129 + uus_addi 12, \tmp1, \tmp1, 0x444 130 + uus_addi 13, \tmp2, \tmp2, 0x444 131 + uus_addi 14, \tmp3, \tmp3, 0x444 132 + uus_addi 15, \tmp4, \tmp4, 0x444 133 + 134 + uus_mtsr 12, \tmp1 135 + uus_mtsr 13, \tmp2 136 + uus_mtsr 14, \tmp3 137 + uus_mtsr 15, \tmp4 138 + 139 + uus_isync 140 + .endm 141 + 142 + #else 68 143 69 144 /* 70 145 * This macro defines the mapping from contexts to VSIDs (virtual ··· 175 100 176 101 typedef struct { 177 102 unsigned long id; 103 + unsigned long sr0; 178 104 void __user *vdso; 179 105 } mm_context_t; 106 + 107 + #ifdef CONFIG_PPC_KUEP 108 + #define INIT_MM_CONTEXT(mm) .context.sr0 = SR_NX 109 + #endif 180 110 181 111 void update_bats(void); 182 112 static inline void cleanup_cpu_mmu_context(void) { }
-4
arch/powerpc/include/asm/book3s/64/hash.h
··· 99 99 * Defines the address of the vmemap area, in its own region on 100 100 * hash table CPUs. 101 101 */ 102 - #ifdef CONFIG_PPC_MM_SLICES 103 - #define HAVE_ARCH_UNMAPPED_AREA 104 - #define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN 105 - #endif /* CONFIG_PPC_MM_SLICES */ 106 102 107 103 /* PTEIDX nibble */ 108 104 #define _PTEIDX_SECONDARY 0x8
+28 -28
arch/powerpc/include/asm/book3s/64/kup.h
··· 229 229 230 230 #ifdef CONFIG_PPC_KUAP 231 231 232 + static __always_inline bool kuap_is_disabled(void) 233 + { 234 + return !mmu_has_feature(MMU_FTR_BOOK3S_KUAP); 235 + } 236 + 232 237 static inline void kuap_user_restore(struct pt_regs *regs) 233 238 { 234 239 bool restore_amr = false, restore_iamr = false; ··· 273 268 */ 274 269 } 275 270 276 - static inline void kuap_kernel_restore(struct pt_regs *regs, 277 - unsigned long amr) 271 + static inline void __kuap_kernel_restore(struct pt_regs *regs, unsigned long amr) 278 272 { 279 - if (mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) { 280 - if (unlikely(regs->amr != amr)) { 281 - isync(); 282 - mtspr(SPRN_AMR, regs->amr); 283 - /* 284 - * No isync required here because we are about to rfi 285 - * back to previous context before any user accesses 286 - * would be made, which is a CSI. 287 - */ 288 - } 289 - } 273 + if (likely(regs->amr == amr)) 274 + return; 275 + 276 + isync(); 277 + mtspr(SPRN_AMR, regs->amr); 290 278 /* 279 + * No isync required here because we are about to rfi 280 + * back to previous context before any user accesses 281 + * would be made, which is a CSI. 282 + * 291 283 * No need to restore IAMR when returning to kernel space. 292 284 */ 293 285 } 294 286 295 - static inline unsigned long kuap_get_and_assert_locked(void) 287 + static inline unsigned long __kuap_get_and_assert_locked(void) 296 288 { 297 - if (mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) { 298 - unsigned long amr = mfspr(SPRN_AMR); 299 - if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG)) /* kuap_check_amr() */ 300 - WARN_ON_ONCE(amr != AMR_KUAP_BLOCKED); 301 - return amr; 302 - } 303 - return 0; 289 + unsigned long amr = mfspr(SPRN_AMR); 290 + 291 + if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG)) /* kuap_check_amr() */ 292 + WARN_ON_ONCE(amr != AMR_KUAP_BLOCKED); 293 + return amr; 304 294 } 305 295 306 - static inline void kuap_assert_locked(void) 296 + /* Do nothing, book3s/64 does that in ASM */ 297 + static inline void __kuap_lock(void) 307 298 { 308 - if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG) && mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) 309 - WARN_ON_ONCE(mfspr(SPRN_AMR) != AMR_KUAP_BLOCKED); 299 + } 300 + 301 + static inline void __kuap_save_and_lock(struct pt_regs *regs) 302 + { 310 303 } 311 304 312 305 /* ··· 342 339 isync(); 343 340 } 344 341 345 - static inline bool bad_kuap_fault(struct pt_regs *regs, unsigned long address, 346 - bool is_write) 342 + static inline bool __bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) 347 343 { 348 - if (!mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) 349 - return false; 350 344 /* 351 345 * For radix this will be a storage protection fault (DSISR_PROTFAULT). 352 346 * For hash this will be a key fault (DSISR_KEYFAULT)
+7 -1
arch/powerpc/include/asm/book3s/64/mmu-hash.h
··· 523 523 void slb_dump_contents(struct slb_entry *slb_ptr); 524 524 525 525 extern void slb_vmalloc_update(void); 526 - extern void slb_set_size(u16 size); 527 526 void preload_new_slb_context(unsigned long start, unsigned long sp); 527 + 528 + #ifdef CONFIG_PPC_64S_HASH_MMU 529 + void slb_set_size(u16 size); 530 + #else 531 + static inline void slb_set_size(u16 size) { } 532 + #endif 533 + 528 534 #endif /* __ASSEMBLY__ */ 529 535 530 536 /*
+30 -8
arch/powerpc/include/asm/book3s/64/mmu.h
··· 4 4 5 5 #include <asm/page.h> 6 6 7 + #ifdef CONFIG_HUGETLB_PAGE 8 + #define HAVE_ARCH_HUGETLB_UNMAPPED_AREA 9 + #endif 10 + #define HAVE_ARCH_UNMAPPED_AREA 11 + #define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN 12 + 7 13 #ifndef __ASSEMBLY__ 8 14 /* 9 15 * Page size definition ··· 68 62 #define PRTS_MASK 0x1f /* process table size field */ 69 63 #define PRTB_MASK 0x0ffffffffffff000UL 70 64 65 + /* Number of supported LPID bits */ 66 + extern unsigned int mmu_lpid_bits; 67 + 71 68 /* Number of supported PID bits */ 72 69 extern unsigned int mmu_pid_bits; 73 70 ··· 85 76 #define PRTB_SIZE_SHIFT (mmu_pid_bits + 4) 86 77 #define PRTB_ENTRIES (1ul << mmu_pid_bits) 87 78 88 - /* 89 - * Power9 currently only support 64K partition table size. 90 - */ 91 - #define PATB_SIZE_SHIFT 16 79 + #define PATB_SIZE_SHIFT (mmu_lpid_bits + 4) 80 + #define PATB_ENTRIES (1ul << mmu_lpid_bits) 92 81 93 82 typedef unsigned long mm_context_id_t; 94 83 struct spinlock; ··· 105 98 * from EA and new context ids to build the new VAs. 106 99 */ 107 100 mm_context_id_t id; 101 + #ifdef CONFIG_PPC_64S_HASH_MMU 108 102 mm_context_id_t extended_id[TASK_SIZE_USER64/TASK_CONTEXT_SIZE]; 103 + #endif 109 104 }; 110 105 111 106 /* Number of bits in the mm_cpumask */ ··· 119 110 /* Number of user space windows opened in process mm_context */ 120 111 atomic_t vas_windows; 121 112 113 + #ifdef CONFIG_PPC_64S_HASH_MMU 122 114 struct hash_mm_context *hash_context; 115 + #endif 123 116 124 117 void __user *vdso; 125 118 /* ··· 144 133 #endif 145 134 } mm_context_t; 146 135 136 + #ifdef CONFIG_PPC_64S_HASH_MMU 147 137 static inline u16 mm_ctx_user_psize(mm_context_t *ctx) 148 138 { 149 139 return ctx->hash_context->user_psize; ··· 205 193 extern int mmu_linear_psize; 206 194 extern int mmu_virtual_psize; 207 195 extern int mmu_vmalloc_psize; 208 - extern int mmu_vmemmap_psize; 209 196 extern int mmu_io_psize; 197 + #else /* CONFIG_PPC_64S_HASH_MMU */ 198 + #ifdef CONFIG_PPC_64K_PAGES 199 + #define mmu_virtual_psize MMU_PAGE_64K 200 + #else 201 + #define mmu_virtual_psize MMU_PAGE_4K 202 + #endif 203 + #endif 204 + extern int mmu_vmemmap_psize; 210 205 211 206 /* MMU initialization */ 212 207 void mmu_early_init_devtree(void); ··· 252 233 * know which translations we will pick. Hence go with hash 253 234 * restrictions. 254 235 */ 255 - return hash__setup_initial_memory_limit(first_memblock_base, 256 - first_memblock_size); 236 + if (!early_radix_enabled()) 237 + hash__setup_initial_memory_limit(first_memblock_base, 238 + first_memblock_size); 257 239 } 258 240 259 241 #ifdef CONFIG_PPC_PSERIES 260 - extern void radix_init_pseries(void); 242 + void __init radix_init_pseries(void); 261 243 #else 262 244 static inline void radix_init_pseries(void) { } 263 245 #endif ··· 275 255 void cleanup_cpu_mmu_context(void); 276 256 #endif 277 257 258 + #ifdef CONFIG_PPC_64S_HASH_MMU 278 259 static inline int get_user_context(mm_context_t *ctx, unsigned long ea) 279 260 { 280 261 int index = ea >> MAX_EA_BITS_PER_CONTEXT; ··· 295 274 296 275 return get_vsid(context, ea, ssize); 297 276 } 277 + #endif 298 278 299 279 #endif /* __ASSEMBLY__ */ 300 280 #endif /* _ASM_POWERPC_BOOK3S_64_MMU_H_ */
+6
arch/powerpc/include/asm/book3s/64/tlbflush-hash.h
··· 112 112 113 113 struct mmu_gather; 114 114 extern void hash__tlb_flush(struct mmu_gather *tlb); 115 + void flush_tlb_pmd_range(struct mm_struct *mm, pmd_t *pmd, unsigned long addr); 116 + 117 + #ifdef CONFIG_PPC_64S_HASH_MMU 115 118 /* Private function for use by PCI IO mapping code */ 116 119 extern void __flush_hash_table_range(unsigned long start, unsigned long end); 117 120 extern void flush_tlb_pmd_range(struct mm_struct *mm, pmd_t *pmd, 118 121 unsigned long addr); 122 + #else 123 + static inline void __flush_hash_table_range(unsigned long start, unsigned long end) { } 124 + #endif 119 125 #endif /* _ASM_POWERPC_BOOK3S_64_TLBFLUSH_HASH_H */
-4
arch/powerpc/include/asm/book3s/64/tlbflush.h
··· 14 14 TLB_INVAL_SCOPE_LPID = 1, /* invalidate TLBs for current LPID */ 15 15 }; 16 16 17 - #ifdef CONFIG_PPC_NATIVE 18 17 static inline void tlbiel_all(void) 19 18 { 20 19 /* ··· 29 30 else 30 31 hash__tlbiel_all(TLB_INVAL_SCOPE_GLOBAL); 31 32 } 32 - #else 33 - static inline void tlbiel_all(void) { BUG(); } 34 - #endif 35 33 36 34 static inline void tlbiel_all_lpid(bool radix) 37 35 {
+4
arch/powerpc/include/asm/book3s/pgtable.h
··· 25 25 unsigned long size, pgprot_t vma_prot); 26 26 #define __HAVE_PHYS_MEM_ACCESS_PROT 27 27 28 + #if defined(CONFIG_PPC32) || defined(CONFIG_PPC_64S_HASH_MMU) 28 29 /* 29 30 * This gets called at the end of handling a page fault, when 30 31 * the kernel has put a new PTE into the page table for the process. ··· 36 35 * waiting for the inevitable extra hash-table miss exception. 37 36 */ 38 37 void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep); 38 + #else 39 + static inline void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) {} 40 + #endif 39 41 40 42 #endif /* __ASSEMBLY__ */ 41 43 #endif
+5 -5
arch/powerpc/include/asm/btext.h
··· 23 23 24 24 extern void btext_drawchar(char c); 25 25 extern void btext_drawstring(const char *str); 26 - extern void btext_drawhex(unsigned long v); 27 - extern void btext_drawtext(const char *c, unsigned int len); 26 + void __init btext_drawhex(unsigned long v); 27 + void __init btext_drawtext(const char *c, unsigned int len); 28 28 29 - extern void btext_clearscreen(void); 30 - extern void btext_flushscreen(void); 31 - extern void btext_flushline(void); 29 + void __init btext_clearscreen(void); 30 + void __init btext_flushscreen(void); 31 + void __init btext_flushline(void); 32 32 33 33 #endif /* __KERNEL__ */ 34 34 #endif /* __PPC_BTEXT_H */
+24 -16
arch/powerpc/include/asm/code-patching.h
··· 24 24 25 25 bool is_offset_in_branch_range(long offset); 26 26 bool is_offset_in_cond_branch_range(long offset); 27 - int create_branch(struct ppc_inst *instr, const u32 *addr, 27 + int create_branch(ppc_inst_t *instr, const u32 *addr, 28 28 unsigned long target, int flags); 29 - int create_cond_branch(struct ppc_inst *instr, const u32 *addr, 29 + int create_cond_branch(ppc_inst_t *instr, const u32 *addr, 30 30 unsigned long target, int flags); 31 31 int patch_branch(u32 *addr, unsigned long target, int flags); 32 - int patch_instruction(u32 *addr, struct ppc_inst instr); 33 - int raw_patch_instruction(u32 *addr, struct ppc_inst instr); 32 + int patch_instruction(u32 *addr, ppc_inst_t instr); 33 + int raw_patch_instruction(u32 *addr, ppc_inst_t instr); 34 34 35 35 static inline unsigned long patch_site_addr(s32 *site) 36 36 { 37 37 return (unsigned long)site + *site; 38 38 } 39 39 40 - static inline int patch_instruction_site(s32 *site, struct ppc_inst instr) 40 + static inline int patch_instruction_site(s32 *site, ppc_inst_t instr) 41 41 { 42 42 return patch_instruction((u32 *)patch_site_addr(site), instr); 43 43 } ··· 58 58 return modify_instruction((unsigned int *)patch_site_addr(site), clr, set); 59 59 } 60 60 61 - int instr_is_relative_branch(struct ppc_inst instr); 62 - int instr_is_relative_link_branch(struct ppc_inst instr); 61 + static inline unsigned int branch_opcode(ppc_inst_t instr) 62 + { 63 + return ppc_inst_primary_opcode(instr) & 0x3F; 64 + } 65 + 66 + static inline int instr_is_branch_iform(ppc_inst_t instr) 67 + { 68 + return branch_opcode(instr) == 18; 69 + } 70 + 71 + static inline int instr_is_branch_bform(ppc_inst_t instr) 72 + { 73 + return branch_opcode(instr) == 16; 74 + } 75 + 76 + int instr_is_relative_branch(ppc_inst_t instr); 77 + int instr_is_relative_link_branch(ppc_inst_t instr); 63 78 unsigned long branch_target(const u32 *instr); 64 - int translate_branch(struct ppc_inst *instr, const u32 *dest, const u32 *src); 65 - extern bool is_conditional_branch(struct ppc_inst instr); 66 - #ifdef CONFIG_PPC_BOOK3E_64 67 - void __patch_exception(int exc, unsigned long addr); 68 - #define patch_exception(exc, name) do { \ 69 - extern unsigned int name; \ 70 - __patch_exception((exc), (unsigned long)&name); \ 71 - } while (0) 72 - #endif 79 + int translate_branch(ppc_inst_t *instr, const u32 *dest, const u32 *src); 80 + bool is_conditional_branch(ppc_inst_t instr); 73 81 74 82 #define OP_RT_RA_MASK 0xffff0000UL 75 83 #define LIS_R2 (PPC_RAW_LIS(_R2, 0))
+3 -3
arch/powerpc/include/asm/cpm2.h
··· 1133 1133 CPM_CLK_DUMMY 1134 1134 }; 1135 1135 1136 - extern int cpm2_clk_setup(enum cpm_clk_target target, int clock, int mode); 1137 - extern int cpm2_smc_clk_setup(enum cpm_clk_target target, int clock); 1136 + int __init cpm2_clk_setup(enum cpm_clk_target target, int clock, int mode); 1137 + int __init cpm2_smc_clk_setup(enum cpm_clk_target target, int clock); 1138 1138 1139 1139 #define CPM_PIN_INPUT 0 1140 1140 #define CPM_PIN_OUTPUT 1 ··· 1143 1143 #define CPM_PIN_GPIO 4 1144 1144 #define CPM_PIN_OPENDRAIN 8 1145 1145 1146 - void cpm2_set_pin(int port, int pin, int flags); 1146 + void __init cpm2_set_pin(int port, int pin, int flags); 1147 1147 1148 1148 #endif /* __CPM2__ */ 1149 1149 #endif /* __KERNEL__ */
+1 -1
arch/powerpc/include/asm/cpuidle.h
··· 85 85 extern int nr_pnv_idle_states; 86 86 87 87 unsigned long pnv_cpu_offline(unsigned int cpu); 88 - int validate_psscr_val_mask(u64 *psscr_val, u64 *psscr_mask, u32 flags); 88 + int __init validate_psscr_val_mask(u64 *psscr_val, u64 *psscr_mask, u32 flags); 89 89 static inline void report_invalid_psscr_val(u64 psscr_val, int err) 90 90 { 91 91 switch (err) {
-33
arch/powerpc/include/asm/cputhreads.h
··· 32 32 #define threads_core_mask (*get_cpu_mask(0)) 33 33 #endif 34 34 35 - /* cpu_thread_mask_to_cores - Return a cpumask of one per cores 36 - * hit by the argument 37 - * 38 - * @threads: a cpumask of online threads 39 - * 40 - * This function returns a cpumask which will have one online cpu's 41 - * bit set for each core that has at least one thread set in the argument. 42 - * 43 - * This can typically be used for things like IPI for tlb invalidations 44 - * since those need to be done only once per core/TLB 45 - */ 46 - static inline cpumask_t cpu_thread_mask_to_cores(const struct cpumask *threads) 47 - { 48 - cpumask_t tmp, res; 49 - int i, cpu; 50 - 51 - cpumask_clear(&res); 52 - for (i = 0; i < NR_CPUS; i += threads_per_core) { 53 - cpumask_shift_left(&tmp, &threads_core_mask, i); 54 - if (cpumask_intersects(threads, &tmp)) { 55 - cpu = cpumask_next_and(-1, &tmp, cpu_online_mask); 56 - if (cpu < nr_cpu_ids) 57 - cpumask_set_cpu(cpu, &res); 58 - } 59 - } 60 - return res; 61 - } 62 - 63 35 static inline int cpu_nr_cores(void) 64 36 { 65 37 return nr_cpu_ids >> threads_shift; 66 - } 67 - 68 - static inline cpumask_t cpu_online_cores_map(void) 69 - { 70 - return cpu_thread_mask_to_cores(cpu_online_mask); 71 38 } 72 39 73 40 #ifdef CONFIG_SMP
+1 -1
arch/powerpc/include/asm/eeh.h
··· 460 460 } 461 461 462 462 463 - void eeh_cache_debugfs_init(void); 463 + void __init eeh_cache_debugfs_init(void); 464 464 465 465 #endif /* CONFIG_PPC64 */ 466 466 #endif /* __KERNEL__ */
+4
arch/powerpc/include/asm/exception-64e.h
··· 149 149 addi r11,r13,PACA_EXTLB; \ 150 150 TLB_MISS_RESTORE(r11) 151 151 152 + #ifndef __ASSEMBLY__ 153 + extern unsigned int interrupt_base_book3e; 154 + #endif 155 + 152 156 #define SET_IVOR(vector_number, vector_offset) \ 153 157 LOAD_REG_ADDR(r3,interrupt_base_book3e);\ 154 158 ori r3,r3,vector_offset@l; \
+3 -3
arch/powerpc/include/asm/fadump-internal.h
··· 137 137 }; 138 138 139 139 /* Helper functions */ 140 - s32 fadump_setup_cpu_notes_buf(u32 num_cpus); 140 + s32 __init fadump_setup_cpu_notes_buf(u32 num_cpus); 141 141 void fadump_free_cpu_notes_buf(void); 142 - u32 *fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs); 143 - void fadump_update_elfcore_header(char *bufp); 142 + u32 *__init fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs); 143 + void __init fadump_update_elfcore_header(char *bufp); 144 144 bool is_fadump_boot_mem_contiguous(void); 145 145 bool is_fadump_reserved_mem_contiguous(void); 146 146
-8
arch/powerpc/include/asm/firmware.h
··· 80 80 FW_FEATURE_POWERNV_ALWAYS = 0, 81 81 FW_FEATURE_PS3_POSSIBLE = FW_FEATURE_LPAR | FW_FEATURE_PS3_LV1, 82 82 FW_FEATURE_PS3_ALWAYS = FW_FEATURE_LPAR | FW_FEATURE_PS3_LV1, 83 - FW_FEATURE_NATIVE_POSSIBLE = 0, 84 - FW_FEATURE_NATIVE_ALWAYS = 0, 85 83 FW_FEATURE_POSSIBLE = 86 84 #ifdef CONFIG_PPC_PSERIES 87 85 FW_FEATURE_PSERIES_POSSIBLE | ··· 89 91 #endif 90 92 #ifdef CONFIG_PPC_PS3 91 93 FW_FEATURE_PS3_POSSIBLE | 92 - #endif 93 - #ifdef CONFIG_PPC_NATIVE 94 - FW_FEATURE_NATIVE_ALWAYS | 95 94 #endif 96 95 0, 97 96 FW_FEATURE_ALWAYS = ··· 100 105 #endif 101 106 #ifdef CONFIG_PPC_PS3 102 107 FW_FEATURE_PS3_ALWAYS & 103 - #endif 104 - #ifdef CONFIG_PPC_NATIVE 105 - FW_FEATURE_NATIVE_ALWAYS & 106 108 #endif 107 109 FW_FEATURE_POSSIBLE, 108 110
+5 -3
arch/powerpc/include/asm/floppy.h
··· 134 134 int dir; 135 135 136 136 doing_vdma = 0; 137 - dir = (mode == DMA_MODE_READ) ? PCI_DMA_FROMDEVICE : PCI_DMA_TODEVICE; 137 + dir = (mode == DMA_MODE_READ) ? DMA_FROM_DEVICE : DMA_TO_DEVICE; 138 138 139 139 if (bus_addr 140 140 && (addr != prev_addr || size != prev_size || dir != prev_dir)) { 141 141 /* different from last time -- unmap prev */ 142 - pci_unmap_single(isa_bridge_pcidev, bus_addr, prev_size, prev_dir); 142 + dma_unmap_single(&isa_bridge_pcidev->dev, bus_addr, prev_size, 143 + prev_dir); 143 144 bus_addr = 0; 144 145 } 145 146 146 147 if (!bus_addr) /* need to map it */ 147 - bus_addr = pci_map_single(isa_bridge_pcidev, addr, size, dir); 148 + bus_addr = dma_map_single(&isa_bridge_pcidev->dev, addr, size, 149 + dir); 148 150 149 151 /* remember this one as prev */ 150 152 prev_addr = addr;
+5 -7
arch/powerpc/include/asm/head-64.h
··· 98 98 . = sname##_len; 99 99 100 100 #define USE_FIXED_SECTION(sname) \ 101 - fs_label = start_##sname; \ 102 - fs_start = sname##_start; \ 103 101 use_ftsec sname; 104 102 105 103 #define USE_TEXT_SECTION() \ 106 - fs_label = start_text; \ 107 - fs_start = text_start; \ 108 104 .text 109 105 110 106 #define CLOSE_FIXED_SECTION(sname) \ ··· 157 161 * - ABS_ADDR is used to find the absolute address of any symbol, from within 158 162 * a fixed section. 159 163 */ 160 - #define DEFINE_FIXED_SYMBOL(label) \ 161 - label##_absolute = (label - fs_label + fs_start) 164 + // define label as being _in_ sname 165 + #define DEFINE_FIXED_SYMBOL(label, sname) \ 166 + label##_absolute = (label - start_ ## sname + sname ## _start) 162 167 163 168 #define FIXED_SYMBOL_ABS_ADDR(label) \ 164 169 (label##_absolute) 165 170 166 - #define ABS_ADDR(label) (label - fs_label + fs_start) 171 + // find label from _within_ sname 172 + #define ABS_ADDR(label, sname) (label - start_ ## sname + sname ## _start) 167 173 168 174 #endif /* __ASSEMBLY__ */ 169 175
+1 -1
arch/powerpc/include/asm/hugetlb.h
··· 15 15 16 16 extern bool hugetlb_disabled; 17 17 18 - void hugetlbpage_init_default(void); 18 + void __init hugetlbpage_init_default(void); 19 19 20 20 int slice_is_hugepage_only_range(struct mm_struct *mm, unsigned long addr, 21 21 unsigned long len);
+2 -3
arch/powerpc/include/asm/hw_breakpoint.h
··· 10 10 #define _PPC_BOOK3S_64_HW_BREAKPOINT_H 11 11 12 12 #include <asm/cpu_has_feature.h> 13 - #include <asm/inst.h> 14 13 15 14 #ifdef __KERNEL__ 16 15 struct arch_hw_breakpoint { ··· 55 56 return cpu_has_feature(CPU_FTR_DAWR1) ? 2 : 1; 56 57 } 57 58 58 - bool wp_check_constraints(struct pt_regs *regs, struct ppc_inst instr, 59 + bool wp_check_constraints(struct pt_regs *regs, ppc_inst_t instr, 59 60 unsigned long ea, int type, int size, 60 61 struct arch_hw_breakpoint *info); 61 62 62 - void wp_get_instr_detail(struct pt_regs *regs, struct ppc_inst *instr, 63 + void wp_get_instr_detail(struct pt_regs *regs, ppc_inst_t *instr, 63 64 int *type, int *size, unsigned long *ea); 64 65 65 66 #ifdef CONFIG_HAVE_HW_BREAKPOINT
+93 -14
arch/powerpc/include/asm/hw_irq.h
··· 61 61 62 62 static inline void __hard_irq_enable(void) 63 63 { 64 - if (IS_ENABLED(CONFIG_BOOKE) || IS_ENABLED(CONFIG_40x)) 64 + if (IS_ENABLED(CONFIG_BOOKE_OR_40x)) 65 65 wrtee(MSR_EE); 66 66 else if (IS_ENABLED(CONFIG_PPC_8xx)) 67 67 wrtspr(SPRN_EIE); ··· 73 73 74 74 static inline void __hard_irq_disable(void) 75 75 { 76 - if (IS_ENABLED(CONFIG_BOOKE) || IS_ENABLED(CONFIG_40x)) 76 + if (IS_ENABLED(CONFIG_BOOKE_OR_40x)) 77 77 wrtee(0); 78 78 else if (IS_ENABLED(CONFIG_PPC_8xx)) 79 79 wrtspr(SPRN_EID); ··· 85 85 86 86 static inline void __hard_EE_RI_disable(void) 87 87 { 88 - if (IS_ENABLED(CONFIG_BOOKE) || IS_ENABLED(CONFIG_40x)) 88 + if (IS_ENABLED(CONFIG_BOOKE_OR_40x)) 89 89 wrtee(0); 90 90 else if (IS_ENABLED(CONFIG_PPC_8xx)) 91 91 wrtspr(SPRN_NRI); ··· 97 97 98 98 static inline void __hard_RI_enable(void) 99 99 { 100 - if (IS_ENABLED(CONFIG_BOOKE) || IS_ENABLED(CONFIG_40x)) 100 + if (IS_ENABLED(CONFIG_BOOKE_OR_40x)) 101 101 return; 102 102 103 103 if (IS_ENABLED(CONFIG_PPC_8xx)) ··· 224 224 return arch_irqs_disabled_flags(arch_local_save_flags()); 225 225 } 226 226 227 + static inline void set_pmi_irq_pending(void) 228 + { 229 + /* 230 + * Invoked from PMU callback functions to set PMI bit in the paca. 231 + * This has to be called with irq's disabled (via hard_irq_disable()). 232 + */ 233 + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) 234 + WARN_ON_ONCE(mfmsr() & MSR_EE); 235 + 236 + get_paca()->irq_happened |= PACA_IRQ_PMI; 237 + } 238 + 239 + static inline void clear_pmi_irq_pending(void) 240 + { 241 + /* 242 + * Invoked from PMU callback functions to clear the pending PMI bit 243 + * in the paca. 244 + */ 245 + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) 246 + WARN_ON_ONCE(mfmsr() & MSR_EE); 247 + 248 + get_paca()->irq_happened &= ~PACA_IRQ_PMI; 249 + } 250 + 251 + static inline bool pmi_irq_pending(void) 252 + { 253 + /* 254 + * Invoked from PMU callback functions to check if there is a pending 255 + * PMI bit in the paca. 256 + */ 257 + if (get_paca()->irq_happened & PACA_IRQ_PMI) 258 + return true; 259 + 260 + return false; 261 + } 262 + 227 263 #ifdef CONFIG_PPC_BOOK3S 228 264 /* 229 265 * To support disabling and enabling of irq with PMI, set of ··· 342 306 return __lazy_irq_pending(local_paca->irq_happened); 343 307 } 344 308 309 + bool power_pmu_wants_prompt_pmi(void); 310 + 345 311 /* 346 - * This is called by asynchronous interrupts to conditionally 347 - * re-enable hard interrupts after having cleared the source 348 - * of the interrupt. They are kept disabled if there is a different 349 - * soft-masked interrupt pending that requires hard masking. 312 + * This is called by asynchronous interrupts to check whether to 313 + * conditionally re-enable hard interrupts after having cleared 314 + * the source of the interrupt. They are kept disabled if there 315 + * is a different soft-masked interrupt pending that requires hard 316 + * masking. 350 317 */ 351 - static inline void may_hard_irq_enable(void) 318 + static inline bool should_hard_irq_enable(void) 352 319 { 353 - if (!(get_paca()->irq_happened & PACA_IRQ_MUST_HARD_MASK)) { 354 - get_paca()->irq_happened &= ~PACA_IRQ_HARD_DIS; 355 - __hard_irq_enable(); 356 - } 320 + #ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG 321 + WARN_ON(irq_soft_mask_return() == IRQS_ENABLED); 322 + WARN_ON(mfmsr() & MSR_EE); 323 + #endif 324 + #ifdef CONFIG_PERF_EVENTS 325 + /* 326 + * If the PMU is not running, there is not much reason to enable 327 + * MSR[EE] in irq handlers because any interrupts would just be 328 + * soft-masked. 329 + * 330 + * TODO: Add test for 64e 331 + */ 332 + if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && !power_pmu_wants_prompt_pmi()) 333 + return false; 334 + 335 + if (get_paca()->irq_happened & PACA_IRQ_MUST_HARD_MASK) 336 + return false; 337 + 338 + return true; 339 + #else 340 + return false; 341 + #endif 342 + } 343 + 344 + /* 345 + * Do the hard enabling, only call this if should_hard_irq_enable is true. 346 + */ 347 + static inline void do_hard_irq_enable(void) 348 + { 349 + #ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG 350 + WARN_ON(irq_soft_mask_return() == IRQS_ENABLED); 351 + WARN_ON(get_paca()->irq_happened & PACA_IRQ_MUST_HARD_MASK); 352 + WARN_ON(mfmsr() & MSR_EE); 353 + #endif 354 + /* 355 + * This allows PMI interrupts (and watchdog soft-NMIs) through. 356 + * There is no other reason to enable this way. 357 + */ 358 + get_paca()->irq_happened &= ~PACA_IRQ_HARD_DIS; 359 + __hard_irq_enable(); 357 360 } 358 361 359 362 static inline bool arch_irq_disabled_regs(struct pt_regs *regs) ··· 473 398 return !(regs->msr & MSR_EE); 474 399 } 475 400 476 - static inline bool may_hard_irq_enable(void) 401 + static inline bool should_hard_irq_enable(void) 477 402 { 478 403 return false; 479 404 } ··· 482 407 { 483 408 BUILD_BUG(); 484 409 } 410 + 411 + static inline void clear_pmi_irq_pending(void) { } 412 + static inline void set_pmi_irq_pending(void) { } 413 + static inline bool pmi_irq_pending(void) { return false; } 485 414 486 415 static inline void irq_soft_mask_regs_set_state(struct pt_regs *regs, unsigned long val) 487 416 {
+1 -1
arch/powerpc/include/asm/i8259.h
··· 7 7 8 8 extern void i8259_init(struct device_node *node, unsigned long intack_addr); 9 9 extern unsigned int i8259_irq(void); 10 - extern struct irq_domain *i8259_get_host(void); 10 + struct irq_domain *__init i8259_get_host(void); 11 11 12 12 #endif /* __KERNEL__ */ 13 13 #endif /* _ASM_POWERPC_I8259_H */
+60 -35
arch/powerpc/include/asm/inst.h
··· 3 3 #define _ASM_POWERPC_INST_H 4 4 5 5 #include <asm/ppc-opcode.h> 6 - 7 - #ifdef CONFIG_PPC64 6 + #include <asm/reg.h> 7 + #include <asm/disassemble.h> 8 + #include <asm/uaccess.h> 8 9 9 10 #define ___get_user_instr(gu_op, dest, ptr) \ 10 11 ({ \ 11 12 long __gui_ret; \ 12 13 u32 __user *__gui_ptr = (u32 __user *)ptr; \ 13 - struct ppc_inst __gui_inst; \ 14 + ppc_inst_t __gui_inst; \ 14 15 unsigned int __prefix, __suffix; \ 15 16 \ 16 17 __chk_user_ptr(ptr); \ 17 18 __gui_ret = gu_op(__prefix, __gui_ptr); \ 18 19 if (__gui_ret == 0) { \ 19 - if ((__prefix >> 26) == OP_PREFIX) { \ 20 + if (IS_ENABLED(CONFIG_PPC64) && (__prefix >> 26) == OP_PREFIX) { \ 20 21 __gui_ret = gu_op(__suffix, __gui_ptr + 1); \ 21 22 __gui_inst = ppc_inst_prefix(__prefix, __suffix); \ 22 23 } else { \ ··· 28 27 } \ 29 28 __gui_ret; \ 30 29 }) 31 - #else /* !CONFIG_PPC64 */ 32 - #define ___get_user_instr(gu_op, dest, ptr) \ 33 - ({ \ 34 - __chk_user_ptr(ptr); \ 35 - gu_op((dest).val, (u32 __user *)(ptr)); \ 36 - }) 37 - #endif /* CONFIG_PPC64 */ 38 30 39 31 #define get_user_instr(x, ptr) ___get_user_instr(get_user, x, ptr) 40 32 ··· 37 43 * Instruction data type for POWER 38 44 */ 39 45 40 - struct ppc_inst { 41 - u32 val; 42 - #ifdef CONFIG_PPC64 43 - u32 suffix; 44 - #endif 45 - } __packed; 46 - 47 - static inline u32 ppc_inst_val(struct ppc_inst x) 46 + #if defined(CONFIG_PPC64) || defined(__CHECKER__) 47 + static inline u32 ppc_inst_val(ppc_inst_t x) 48 48 { 49 49 return x.val; 50 50 } 51 51 52 - static inline int ppc_inst_primary_opcode(struct ppc_inst x) 52 + #define ppc_inst(x) ((ppc_inst_t){ .val = (x) }) 53 + 54 + #else 55 + static inline u32 ppc_inst_val(ppc_inst_t x) 56 + { 57 + return x; 58 + } 59 + #define ppc_inst(x) (x) 60 + #endif 61 + 62 + static inline int ppc_inst_primary_opcode(ppc_inst_t x) 53 63 { 54 64 return ppc_inst_val(x) >> 26; 55 65 } 56 66 57 - #define ppc_inst(x) ((struct ppc_inst){ .val = (x) }) 58 - 59 67 #ifdef CONFIG_PPC64 60 - #define ppc_inst_prefix(x, y) ((struct ppc_inst){ .val = (x), .suffix = (y) }) 68 + #define ppc_inst_prefix(x, y) ((ppc_inst_t){ .val = (x), .suffix = (y) }) 61 69 62 - static inline u32 ppc_inst_suffix(struct ppc_inst x) 70 + static inline u32 ppc_inst_suffix(ppc_inst_t x) 63 71 { 64 72 return x.suffix; 65 73 } 66 74 67 75 #else 68 - #define ppc_inst_prefix(x, y) ppc_inst(x) 76 + #define ppc_inst_prefix(x, y) ((void)y, ppc_inst(x)) 69 77 70 - static inline u32 ppc_inst_suffix(struct ppc_inst x) 78 + static inline u32 ppc_inst_suffix(ppc_inst_t x) 71 79 { 72 80 return 0; 73 81 } 74 82 75 83 #endif /* CONFIG_PPC64 */ 76 84 77 - static inline struct ppc_inst ppc_inst_read(const u32 *ptr) 85 + static inline ppc_inst_t ppc_inst_read(const u32 *ptr) 78 86 { 79 87 if (IS_ENABLED(CONFIG_PPC64) && (*ptr >> 26) == OP_PREFIX) 80 88 return ppc_inst_prefix(*ptr, *(ptr + 1)); ··· 84 88 return ppc_inst(*ptr); 85 89 } 86 90 87 - static inline bool ppc_inst_prefixed(struct ppc_inst x) 91 + static inline bool ppc_inst_prefixed(ppc_inst_t x) 88 92 { 89 93 return IS_ENABLED(CONFIG_PPC64) && ppc_inst_primary_opcode(x) == OP_PREFIX; 90 94 } 91 95 92 - static inline struct ppc_inst ppc_inst_swab(struct ppc_inst x) 96 + static inline ppc_inst_t ppc_inst_swab(ppc_inst_t x) 93 97 { 94 98 return ppc_inst_prefix(swab32(ppc_inst_val(x)), swab32(ppc_inst_suffix(x))); 95 99 } 96 100 97 - static inline bool ppc_inst_equal(struct ppc_inst x, struct ppc_inst y) 101 + static inline bool ppc_inst_equal(ppc_inst_t x, ppc_inst_t y) 98 102 { 99 103 if (ppc_inst_val(x) != ppc_inst_val(y)) 100 104 return false; ··· 103 107 return ppc_inst_suffix(x) == ppc_inst_suffix(y); 104 108 } 105 109 106 - static inline int ppc_inst_len(struct ppc_inst x) 110 + static inline int ppc_inst_len(ppc_inst_t x) 107 111 { 108 112 return ppc_inst_prefixed(x) ? 8 : 4; 109 113 } ··· 114 118 */ 115 119 static inline u32 *ppc_inst_next(u32 *location, u32 *value) 116 120 { 117 - struct ppc_inst tmp; 121 + ppc_inst_t tmp; 118 122 119 123 tmp = ppc_inst_read(value); 120 124 121 125 return (void *)location + ppc_inst_len(tmp); 122 126 } 123 127 124 - static inline unsigned long ppc_inst_as_ulong(struct ppc_inst x) 128 + static inline unsigned long ppc_inst_as_ulong(ppc_inst_t x) 125 129 { 126 130 if (IS_ENABLED(CONFIG_PPC32)) 127 131 return ppc_inst_val(x); ··· 131 135 return (u64)ppc_inst_val(x) << 32 | ppc_inst_suffix(x); 132 136 } 133 137 138 + static inline void ppc_inst_write(u32 *ptr, ppc_inst_t x) 139 + { 140 + if (!ppc_inst_prefixed(x)) 141 + *ptr = ppc_inst_val(x); 142 + else 143 + *(u64 *)ptr = ppc_inst_as_ulong(x); 144 + } 145 + 134 146 #define PPC_INST_STR_LEN sizeof("00000000 00000000") 135 147 136 - static inline char *__ppc_inst_as_str(char str[PPC_INST_STR_LEN], struct ppc_inst x) 148 + static inline char *__ppc_inst_as_str(char str[PPC_INST_STR_LEN], ppc_inst_t x) 137 149 { 138 150 if (ppc_inst_prefixed(x)) 139 151 sprintf(str, "%08x %08x", ppc_inst_val(x), ppc_inst_suffix(x)); ··· 158 154 __str; \ 159 155 }) 160 156 161 - int copy_inst_from_kernel_nofault(struct ppc_inst *inst, u32 *src); 157 + static inline int copy_inst_from_kernel_nofault(ppc_inst_t *inst, u32 *src) 158 + { 159 + unsigned int val, suffix; 160 + 161 + if (unlikely(!is_kernel_addr((unsigned long)src))) 162 + return -ERANGE; 163 + 164 + /* See https://github.com/ClangBuiltLinux/linux/issues/1521 */ 165 + #if defined(CONFIG_CC_IS_CLANG) && CONFIG_CLANG_VERSION < 140000 166 + val = suffix = 0; 167 + #endif 168 + __get_kernel_nofault(&val, src, u32, Efault); 169 + if (IS_ENABLED(CONFIG_PPC64) && get_op(val) == OP_PREFIX) { 170 + __get_kernel_nofault(&suffix, src + 1, u32, Efault); 171 + *inst = ppc_inst_prefix(val, suffix); 172 + } else { 173 + *inst = ppc_inst(val); 174 + } 175 + return 0; 176 + Efault: 177 + return -EFAULT; 178 + } 162 179 163 180 #endif /* _ASM_POWERPC_INST_H */
+59 -14
arch/powerpc/include/asm/interrupt.h
··· 97 97 local_paca->hsrr_valid = 0; 98 98 } 99 99 #else 100 + static inline unsigned long search_kernel_restart_table(unsigned long addr) 101 + { 102 + return 0; 103 + } 104 + 100 105 static inline bool is_implicit_soft_masked(struct pt_regs *regs) 101 106 { 102 107 return false; ··· 144 139 if (!arch_irq_disabled_regs(regs)) 145 140 trace_hardirqs_off(); 146 141 147 - if (user_mode(regs)) { 148 - kuep_lock(); 149 - account_cpu_user_entry(); 150 - } else { 142 + if (user_mode(regs)) 143 + kuap_lock(); 144 + else 151 145 kuap_save_and_lock(regs); 152 - } 146 + 147 + if (user_mode(regs)) 148 + account_cpu_user_entry(); 153 149 #endif 154 150 155 151 #ifdef CONFIG_PPC64 156 - if (irq_soft_mask_set_return(IRQS_ALL_DISABLED) == IRQS_ENABLED) 152 + bool trace_enable = false; 153 + 154 + if (IS_ENABLED(CONFIG_TRACE_IRQFLAGS)) { 155 + if (irq_soft_mask_set_return(IRQS_ALL_DISABLED) == IRQS_ENABLED) 156 + trace_enable = true; 157 + } else { 158 + irq_soft_mask_set(IRQS_ALL_DISABLED); 159 + } 160 + 161 + /* 162 + * If the interrupt was taken with HARD_DIS clear, then enable MSR[EE]. 163 + * Asynchronous interrupts get here with HARD_DIS set (see below), so 164 + * this enables MSR[EE] for synchronous interrupts. IRQs remain 165 + * soft-masked. The interrupt handler may later call 166 + * interrupt_cond_local_irq_enable() to achieve a regular process 167 + * context. 168 + */ 169 + if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) { 170 + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) 171 + BUG_ON(!(regs->msr & MSR_EE)); 172 + __hard_irq_enable(); 173 + } else { 174 + __hard_RI_enable(); 175 + } 176 + 177 + /* Do this when RI=1 because it can cause SLB faults */ 178 + if (trace_enable) 157 179 trace_hardirqs_off(); 158 - local_paca->irq_happened |= PACA_IRQ_HARD_DIS; 159 180 160 181 if (user_mode(regs)) { 182 + kuap_lock(); 161 183 CT_WARN_ON(ct_state() != CONTEXT_USER); 162 184 user_exit_irqoff(); 163 185 164 186 account_cpu_user_entry(); 165 187 account_stolen_time(); 166 188 } else { 189 + kuap_save_and_lock(regs); 167 190 /* 168 191 * CT_WARN_ON comes here via program_check_exception, 169 192 * so avoid recursion. 170 193 */ 171 194 if (TRAP(regs) != INTERRUPT_PROGRAM) { 172 195 CT_WARN_ON(ct_state() != CONTEXT_KERNEL); 173 - BUG_ON(is_implicit_soft_masked(regs)); 196 + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) 197 + BUG_ON(is_implicit_soft_masked(regs)); 174 198 } 175 - #ifdef CONFIG_PPC_BOOK3S 199 + 176 200 /* Move this under a debugging check */ 177 - if (arch_irq_disabled_regs(regs)) 201 + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG) && 202 + arch_irq_disabled_regs(regs)) 178 203 BUG_ON(search_kernel_restart_table(regs->nip)); 179 - #endif 180 204 } 181 205 if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) 182 206 BUG_ON(!arch_irq_disabled_regs(regs) && !(regs->msr & MSR_EE)); ··· 234 200 235 201 static inline void interrupt_async_enter_prepare(struct pt_regs *regs, struct interrupt_state *state) 236 202 { 203 + #ifdef CONFIG_PPC64 204 + /* Ensure interrupt_enter_prepare does not enable MSR[EE] */ 205 + local_paca->irq_happened |= PACA_IRQ_HARD_DIS; 206 + #endif 207 + interrupt_enter_prepare(regs, state); 237 208 #ifdef CONFIG_PPC_BOOK3S_64 209 + /* 210 + * RI=1 is set by interrupt_enter_prepare, so this thread flags access 211 + * has to come afterward (it can cause SLB faults). 212 + */ 238 213 if (cpu_has_feature(CPU_FTR_CTRL) && 239 214 !test_thread_local_flags(_TLF_RUNLATCH)) 240 215 __ppc64_runlatch_on(); 241 216 #endif 242 - 243 - interrupt_enter_prepare(regs, state); 244 217 irq_enter(); 245 218 } 246 219 ··· 316 275 */ 317 276 regs->softe = IRQS_ALL_DISABLED; 318 277 } 278 + 279 + __hard_RI_enable(); 319 280 320 281 /* Don't do any per-CPU operations until interrupt state is fixed */ 321 282 ··· 415 372 interrupt_handler long func(struct pt_regs *regs) \ 416 373 { \ 417 374 long ret; \ 375 + \ 376 + __hard_RI_enable(); \ 418 377 \ 419 378 ret = ____##func (regs); \ 420 379 \ ··· 609 564 610 565 /* slb.c */ 611 566 DECLARE_INTERRUPT_HANDLER_RAW(do_slb_fault); 612 - DECLARE_INTERRUPT_HANDLER(do_bad_slb_fault); 567 + DECLARE_INTERRUPT_HANDLER(do_bad_segment_interrupt); 613 568 614 569 /* hash_utils.c */ 615 570 DECLARE_INTERRUPT_HANDLER_RAW(do_hash_fault);
+1 -1
arch/powerpc/include/asm/iommu.h
··· 275 275 size_t size, enum dma_data_direction direction, 276 276 unsigned long attrs); 277 277 278 - extern void iommu_init_early_pSeries(void); 278 + void __init iommu_init_early_pSeries(void); 279 279 extern void iommu_init_early_dart(struct pci_controller_ops *controller_ops); 280 280 extern void iommu_init_early_pasemi(void); 281 281
+1 -1
arch/powerpc/include/asm/ipic.h
··· 65 65 IPIC_MCP_MU = 7, 66 66 }; 67 67 68 - extern void ipic_set_default_priority(void); 68 + void __init ipic_set_default_priority(void); 69 69 extern u32 ipic_get_mcp_status(void); 70 70 extern void ipic_clear_mcp_status(u32 mask); 71 71
+1 -1
arch/powerpc/include/asm/irq.h
··· 36 36 37 37 struct pt_regs; 38 38 39 - #if defined(CONFIG_BOOKE) || defined(CONFIG_40x) 39 + #ifdef CONFIG_BOOKE_OR_40x 40 40 /* 41 41 * Per-cpu stacks for handling critical, debug and machine check 42 42 * level interrupts.
+1 -1
arch/powerpc/include/asm/kexec.h
··· 84 84 extern int crash_shutdown_unregister(crash_shutdown_t handler); 85 85 86 86 extern void crash_kexec_secondary(struct pt_regs *regs); 87 - extern int overlaps_crashkernel(unsigned long start, unsigned long size); 87 + int __init overlaps_crashkernel(unsigned long start, unsigned long size); 88 88 extern void reserve_crashkernel(void); 89 89 extern void machine_kexec_mask_interrupts(void); 90 90
+100 -22
arch/powerpc/include/asm/kup.h
··· 14 14 #include <asm/nohash/32/kup-8xx.h> 15 15 #endif 16 16 17 + #ifdef CONFIG_BOOKE_OR_40x 18 + #include <asm/nohash/kup-booke.h> 19 + #endif 20 + 17 21 #ifdef CONFIG_PPC_BOOK3S_32 18 22 #include <asm/book3s/32/kup.h> 19 23 #endif ··· 36 32 37 33 #include <linux/pgtable.h> 38 34 39 - #ifdef CONFIG_PPC_KUEP 35 + void setup_kup(void); 40 36 void setup_kuep(bool disabled); 41 - #else 42 - static inline void setup_kuep(bool disabled) { } 43 - #endif /* CONFIG_PPC_KUEP */ 44 - 45 - #ifndef CONFIG_PPC_BOOK3S_32 46 - static inline void kuep_lock(void) { } 47 - static inline void kuep_unlock(void) { } 48 - #endif 49 37 50 38 #ifdef CONFIG_PPC_KUAP 51 39 void setup_kuap(bool disabled); 52 40 #else 53 41 static inline void setup_kuap(bool disabled) { } 54 42 43 + static __always_inline bool kuap_is_disabled(void) { return true; } 44 + 55 45 static inline bool 56 - bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) 46 + __bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) 57 47 { 58 48 return false; 59 49 } 60 50 61 - static inline void kuap_assert_locked(void) { } 62 - static inline void kuap_save_and_lock(struct pt_regs *regs) { } 51 + static inline void __kuap_assert_locked(void) { } 52 + static inline void __kuap_lock(void) { } 53 + static inline void __kuap_save_and_lock(struct pt_regs *regs) { } 63 54 static inline void kuap_user_restore(struct pt_regs *regs) { } 64 - static inline void kuap_kernel_restore(struct pt_regs *regs, unsigned long amr) { } 55 + static inline void __kuap_kernel_restore(struct pt_regs *regs, unsigned long amr) { } 65 56 66 - static inline unsigned long kuap_get_and_assert_locked(void) 57 + static inline unsigned long __kuap_get_and_assert_locked(void) 67 58 { 68 59 return 0; 69 60 } ··· 69 70 * platforms. 70 71 */ 71 72 #ifndef CONFIG_PPC_BOOK3S_64 72 - static inline void allow_user_access(void __user *to, const void __user *from, 73 - unsigned long size, unsigned long dir) { } 74 - static inline void prevent_user_access(unsigned long dir) { } 75 - static inline unsigned long prevent_user_access_return(void) { return 0UL; } 76 - static inline void restore_user_access(unsigned long flags) { } 73 + static inline void __allow_user_access(void __user *to, const void __user *from, 74 + unsigned long size, unsigned long dir) { } 75 + static inline void __prevent_user_access(unsigned long dir) { } 76 + static inline unsigned long __prevent_user_access_return(void) { return 0UL; } 77 + static inline void __restore_user_access(unsigned long flags) { } 77 78 #endif /* CONFIG_PPC_BOOK3S_64 */ 78 79 #endif /* CONFIG_PPC_KUAP */ 79 80 80 - static __always_inline void setup_kup(void) 81 + static __always_inline bool 82 + bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) 81 83 { 82 - setup_kuep(disable_kuep); 83 - setup_kuap(disable_kuap); 84 + if (kuap_is_disabled()) 85 + return false; 86 + 87 + return __bad_kuap_fault(regs, address, is_write); 84 88 } 89 + 90 + static __always_inline void kuap_assert_locked(void) 91 + { 92 + if (kuap_is_disabled()) 93 + return; 94 + 95 + if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG)) 96 + __kuap_get_and_assert_locked(); 97 + } 98 + 99 + static __always_inline void kuap_lock(void) 100 + { 101 + if (kuap_is_disabled()) 102 + return; 103 + 104 + __kuap_lock(); 105 + } 106 + 107 + static __always_inline void kuap_save_and_lock(struct pt_regs *regs) 108 + { 109 + if (kuap_is_disabled()) 110 + return; 111 + 112 + __kuap_save_and_lock(regs); 113 + } 114 + 115 + static __always_inline void kuap_kernel_restore(struct pt_regs *regs, unsigned long amr) 116 + { 117 + if (kuap_is_disabled()) 118 + return; 119 + 120 + __kuap_kernel_restore(regs, amr); 121 + } 122 + 123 + static __always_inline unsigned long kuap_get_and_assert_locked(void) 124 + { 125 + if (kuap_is_disabled()) 126 + return 0; 127 + 128 + return __kuap_get_and_assert_locked(); 129 + } 130 + 131 + #ifndef CONFIG_PPC_BOOK3S_64 132 + static __always_inline void allow_user_access(void __user *to, const void __user *from, 133 + unsigned long size, unsigned long dir) 134 + { 135 + if (kuap_is_disabled()) 136 + return; 137 + 138 + __allow_user_access(to, from, size, dir); 139 + } 140 + 141 + static __always_inline void prevent_user_access(unsigned long dir) 142 + { 143 + if (kuap_is_disabled()) 144 + return; 145 + 146 + __prevent_user_access(dir); 147 + } 148 + 149 + static __always_inline unsigned long prevent_user_access_return(void) 150 + { 151 + if (kuap_is_disabled()) 152 + return 0; 153 + 154 + return __prevent_user_access_return(); 155 + } 156 + 157 + static __always_inline void restore_user_access(unsigned long flags) 158 + { 159 + if (kuap_is_disabled()) 160 + return; 161 + 162 + __restore_user_access(flags); 163 + } 164 + #endif /* CONFIG_PPC_BOOK3S_64 */ 85 165 86 166 static __always_inline void allow_read_from_user(const void __user *from, unsigned long size) 87 167 {
+1
arch/powerpc/include/asm/kvm_asm.h
··· 79 79 #define BOOK3S_INTERRUPT_FP_UNAVAIL 0x800 80 80 #define BOOK3S_INTERRUPT_DECREMENTER 0x900 81 81 #define BOOK3S_INTERRUPT_HV_DECREMENTER 0x980 82 + #define BOOK3S_INTERRUPT_NESTED_HV_DECREMENTER 0x1980 82 83 #define BOOK3S_INTERRUPT_DOORBELL 0xa00 83 84 #define BOOK3S_INTERRUPT_SYSCALL 0xc00 84 85 #define BOOK3S_INTERRUPT_TRACE 0xd00
+6
arch/powerpc/include/asm/kvm_book3s.h
··· 406 406 return vcpu->arch.fault_dar; 407 407 } 408 408 409 + /* Expiry time of vcpu DEC relative to host TB */ 410 + static inline u64 kvmppc_dec_expires_host_tb(struct kvm_vcpu *vcpu) 411 + { 412 + return vcpu->arch.dec_expires - vcpu->arch.vcore->tb_offset; 413 + } 414 + 409 415 static inline bool is_kvmppc_resume_guest(int r) 410 416 { 411 417 return (r == RESUME_GUEST || r == RESUME_GUEST_NV);
+3 -2
arch/powerpc/include/asm/kvm_book3s_64.h
··· 44 44 struct mutex tlb_lock; /* serialize page faults and tlbies */ 45 45 struct kvm_nested_guest *next; 46 46 cpumask_t need_tlb_flush; 47 - cpumask_t cpu_in_guest; 48 47 short prev_cpu[NR_CPUS]; 49 48 u8 radix; /* is this nested guest radix */ 50 49 }; ··· 153 154 return radix; 154 155 } 155 156 156 - int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpcr); 157 + unsigned long kvmppc_msr_hard_disable_set_facilities(struct kvm_vcpu *vcpu, unsigned long msr); 158 + 159 + int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpcr, u64 *tb); 157 160 158 161 #define KVM_DEFAULT_HPT_ORDER 24 /* 16MB HPT by default */ 159 162 #endif
+1 -1
arch/powerpc/include/asm/kvm_guest.h
··· 16 16 return static_branch_unlikely(&kvm_guest); 17 17 } 18 18 19 - int check_kvm_guest(void); 19 + int __init check_kvm_guest(void); 20 20 #else 21 21 static inline bool is_kvm_guest(void) { return false; } 22 22 static inline int check_kvm_guest(void) { return 0; }
+5 -2
arch/powerpc/include/asm/kvm_host.h
··· 287 287 u32 online_vcores; 288 288 atomic_t hpte_mod_interest; 289 289 cpumask_t need_tlb_flush; 290 - cpumask_t cpu_in_guest; 291 290 u8 radix; 292 291 u8 fwnmi_enabled; 293 292 u8 secure_guest; ··· 578 579 ulong cfar; 579 580 ulong ppr; 580 581 u32 pspb; 582 + u8 load_ebb; 583 + #ifdef CONFIG_PPC_TRANSACTIONAL_MEM 584 + u8 load_tm; 585 + #endif 581 586 ulong fscr; 582 587 ulong shadow_fscr; 583 588 ulong ebbhr; ··· 744 741 745 742 struct hrtimer dec_timer; 746 743 u64 dec_jiffies; 747 - u64 dec_expires; 744 + u64 dec_expires; /* Relative to guest timebase. */ 748 745 unsigned long pending_exceptions; 749 746 u8 ceded; 750 747 u8 prodded;
+2 -2
arch/powerpc/include/asm/kvm_ppc.h
··· 552 552 extern void kvm_hv_vm_deactivated(void); 553 553 extern bool kvm_hv_mode_active(void); 554 554 555 - extern void kvmppc_check_need_tlb_flush(struct kvm *kvm, int pcpu, 556 - struct kvm_nested_guest *nested); 555 + extern void kvmppc_check_need_tlb_flush(struct kvm *kvm, int pcpu); 557 556 558 557 #else 559 558 static inline void __init kvm_cma_reserve(void) ··· 759 760 void kvmppc_subcore_enter_guest(void); 760 761 void kvmppc_subcore_exit_guest(void); 761 762 long kvmppc_realmode_hmi_handler(void); 763 + long kvmppc_p9_realmode_hmi_handler(struct kvm_vcpu *vcpu); 762 764 long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, 763 765 long pte_index, unsigned long pteh, unsigned long ptel); 764 766 long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags,
-2
arch/powerpc/include/asm/machdep.h
··· 235 235 machine_id == &mach_##name; \ 236 236 }) 237 237 238 - extern void probe_machine(void); 239 - 240 238 #ifdef CONFIG_PPC_PMAC 241 239 /* 242 240 * Power macintoshes have either a CUDA, PMU or SMU controlling
+13 -3
arch/powerpc/include/asm/mmu.h
··· 157 157 158 158 enum { 159 159 MMU_FTRS_POSSIBLE = 160 - #if defined(CONFIG_PPC_BOOK3S_64) || defined(CONFIG_PPC_BOOK3S_604) 160 + #if defined(CONFIG_PPC_BOOK3S_604) 161 161 MMU_FTR_HPTE_TABLE | 162 162 #endif 163 163 #ifdef CONFIG_PPC_8xx ··· 184 184 MMU_FTR_USE_TLBRSRV | MMU_FTR_USE_PAIRED_MAS | 185 185 #endif 186 186 #ifdef CONFIG_PPC_BOOK3S_64 187 + MMU_FTR_KERNEL_RO | 188 + #ifdef CONFIG_PPC_64S_HASH_MMU 187 189 MMU_FTR_NO_SLBIE_B | MMU_FTR_16M_PAGE | MMU_FTR_TLBIEL | 188 190 MMU_FTR_LOCKLESS_TLBIE | MMU_FTR_CI_LARGE_PAGE | 189 191 MMU_FTR_1T_SEGMENT | MMU_FTR_TLBIE_CROP_VA | 190 - MMU_FTR_KERNEL_RO | MMU_FTR_68_BIT_VA | 192 + MMU_FTR_68_BIT_VA | MMU_FTR_HPTE_TABLE | 191 193 #endif 192 194 #ifdef CONFIG_PPC_RADIX_MMU 193 195 MMU_FTR_TYPE_RADIX | 194 196 MMU_FTR_GTSE | 195 197 #endif /* CONFIG_PPC_RADIX_MMU */ 198 + #endif 196 199 #ifdef CONFIG_PPC_KUAP 197 200 MMU_FTR_BOOK3S_KUAP | 198 201 #endif /* CONFIG_PPC_KUAP */ ··· 225 222 #endif 226 223 #ifdef CONFIG_E500 227 224 #define MMU_FTRS_ALWAYS MMU_FTR_TYPE_FSL_E 225 + #endif 226 + 227 + /* BOOK3S_64 options */ 228 + #if defined(CONFIG_PPC_RADIX_MMU) && !defined(CONFIG_PPC_64S_HASH_MMU) 229 + #define MMU_FTRS_ALWAYS MMU_FTR_TYPE_RADIX 230 + #elif !defined(CONFIG_PPC_RADIX_MMU) && defined(CONFIG_PPC_64S_HASH_MMU) 231 + #define MMU_FTRS_ALWAYS MMU_FTR_HPTE_TABLE 228 232 #endif 229 233 230 234 #ifndef MMU_FTRS_ALWAYS ··· 339 329 return mmu_has_feature(MMU_FTR_TYPE_RADIX); 340 330 } 341 331 342 - static inline bool early_radix_enabled(void) 332 + static __always_inline bool early_radix_enabled(void) 343 333 { 344 334 return early_mmu_has_feature(MMU_FTR_TYPE_RADIX); 345 335 }
+3 -1
arch/powerpc/include/asm/mmu_context.h
··· 71 71 } 72 72 73 73 extern int hash__alloc_context_id(void); 74 - extern void hash__reserve_context_id(int id); 74 + void __init hash__reserve_context_id(int id); 75 75 extern void __destroy_context(int context_id); 76 76 static inline void mmu_context_init(void) { } 77 77 78 + #ifdef CONFIG_PPC_64S_HASH_MMU 78 79 static inline int alloc_extended_context(struct mm_struct *mm, 79 80 unsigned long ea) 80 81 { ··· 101 100 return true; 102 101 return false; 103 102 } 103 + #endif 104 104 105 105 #else 106 106 extern void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next,
+1 -1
arch/powerpc/include/asm/mpic.h
··· 472 472 extern void mpic_cpu_set_priority(int prio); 473 473 474 474 /* Request IPIs on primary mpic */ 475 - extern void mpic_request_ipis(void); 475 + void __init mpic_request_ipis(void); 476 476 477 477 /* Send a message (IPI) to a given target (cpu number or MSG_*) */ 478 478 void smp_mpic_message_pass(int target, int msg);
+12 -38
arch/powerpc/include/asm/nohash/32/kup-8xx.h
··· 20 20 return static_branch_unlikely(&disable_kuap_key); 21 21 } 22 22 23 - static inline void kuap_save_and_lock(struct pt_regs *regs) 23 + static inline void __kuap_lock(void) 24 24 { 25 - if (kuap_is_disabled()) 26 - return; 25 + } 27 26 27 + static inline void __kuap_save_and_lock(struct pt_regs *regs) 28 + { 28 29 regs->kuap = mfspr(SPRN_MD_AP); 29 30 mtspr(SPRN_MD_AP, MD_APG_KUAP); 30 31 } ··· 34 33 { 35 34 } 36 35 37 - static inline void kuap_kernel_restore(struct pt_regs *regs, unsigned long kuap) 36 + static inline void __kuap_kernel_restore(struct pt_regs *regs, unsigned long kuap) 38 37 { 39 - if (kuap_is_disabled()) 40 - return; 41 - 42 38 mtspr(SPRN_MD_AP, regs->kuap); 43 39 } 44 40 45 - static inline unsigned long kuap_get_and_assert_locked(void) 41 + static inline unsigned long __kuap_get_and_assert_locked(void) 46 42 { 47 43 unsigned long kuap; 48 - 49 - if (kuap_is_disabled()) 50 - return MD_APG_INIT; 51 44 52 45 kuap = mfspr(SPRN_MD_AP); 53 46 ··· 51 56 return kuap; 52 57 } 53 58 54 - static inline void kuap_assert_locked(void) 59 + static inline void __allow_user_access(void __user *to, const void __user *from, 60 + unsigned long size, unsigned long dir) 55 61 { 56 - if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG) && !kuap_is_disabled()) 57 - kuap_get_and_assert_locked(); 58 - } 59 - 60 - static inline void allow_user_access(void __user *to, const void __user *from, 61 - unsigned long size, unsigned long dir) 62 - { 63 - if (kuap_is_disabled()) 64 - return; 65 - 66 62 mtspr(SPRN_MD_AP, MD_APG_INIT); 67 63 } 68 64 69 - static inline void prevent_user_access(unsigned long dir) 65 + static inline void __prevent_user_access(unsigned long dir) 70 66 { 71 - if (kuap_is_disabled()) 72 - return; 73 - 74 67 mtspr(SPRN_MD_AP, MD_APG_KUAP); 75 68 } 76 69 77 - static inline unsigned long prevent_user_access_return(void) 70 + static inline unsigned long __prevent_user_access_return(void) 78 71 { 79 72 unsigned long flags; 80 - 81 - if (kuap_is_disabled()) 82 - return MD_APG_INIT; 83 73 84 74 flags = mfspr(SPRN_MD_AP); 85 75 ··· 73 93 return flags; 74 94 } 75 95 76 - static inline void restore_user_access(unsigned long flags) 96 + static inline void __restore_user_access(unsigned long flags) 77 97 { 78 - if (kuap_is_disabled()) 79 - return; 80 - 81 98 mtspr(SPRN_MD_AP, flags); 82 99 } 83 100 84 101 static inline bool 85 - bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) 102 + __bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) 86 103 { 87 - if (kuap_is_disabled()) 88 - return false; 89 - 90 104 return !((regs->kuap ^ MD_APG_KUAP) & 0xff000000); 91 105 } 92 106
-1
arch/powerpc/include/asm/nohash/32/mmu-44x.h
··· 113 113 114 114 /* patch sites */ 115 115 extern s32 patch__tlb_44x_hwater_D, patch__tlb_44x_hwater_I; 116 - extern s32 patch__tlb_44x_kuep, patch__tlb_47x_kuep; 117 116 118 117 #endif /* !__ASSEMBLY__ */ 119 118
+2 -4
arch/powerpc/include/asm/nohash/32/mmu-8xx.h
··· 39 39 * 0 => Kernel => 11 (all accesses performed according as user iaw page definition) 40 40 * 1 => Kernel+Accessed => 01 (all accesses performed according to page definition) 41 41 * 2 => User => 11 (all accesses performed according as user iaw page definition) 42 - * 3 => User+Accessed => 00 (all accesses performed as supervisor iaw page definition) for INIT 43 - * => 10 (all accesses performed according to swaped page definition) for KUEP 42 + * 3 => User+Accessed => 10 (all accesses performed according to swaped page definition) for KUEP 44 43 * 4-15 => Not Used 45 44 */ 46 - #define MI_APG_INIT 0xdc000000 47 - #define MI_APG_KUEP 0xde000000 45 + #define MI_APG_INIT 0xde000000 48 46 49 47 /* The effective page number register. When read, contains the information 50 48 * about the last instruction TLB miss. When MI_RPN is written, bits in
+6
arch/powerpc/include/asm/nohash/64/pgtable.h
··· 313 313 unsigned long phys); 314 314 extern void vmemmap_remove_mapping(unsigned long start, 315 315 unsigned long page_size); 316 + void __patch_exception(int exc, unsigned long addr); 317 + #define patch_exception(exc, name) do { \ 318 + extern unsigned int name; \ 319 + __patch_exception((exc), (unsigned long)&name); \ 320 + } while (0) 321 + 316 322 #endif /* __ASSEMBLY__ */ 317 323 318 324 #endif /* _ASM_POWERPC_NOHASH_64_PGTABLE_H */
+110
arch/powerpc/include/asm/nohash/kup-booke.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + #ifndef _ASM_POWERPC_KUP_BOOKE_H_ 3 + #define _ASM_POWERPC_KUP_BOOKE_H_ 4 + 5 + #include <asm/bug.h> 6 + 7 + #ifdef CONFIG_PPC_KUAP 8 + 9 + #ifdef __ASSEMBLY__ 10 + 11 + .macro kuap_check_amr gpr1, gpr2 12 + .endm 13 + 14 + #else 15 + 16 + #include <linux/jump_label.h> 17 + #include <linux/sched.h> 18 + 19 + #include <asm/reg.h> 20 + 21 + extern struct static_key_false disable_kuap_key; 22 + 23 + static __always_inline bool kuap_is_disabled(void) 24 + { 25 + return static_branch_unlikely(&disable_kuap_key); 26 + } 27 + 28 + static inline void __kuap_lock(void) 29 + { 30 + mtspr(SPRN_PID, 0); 31 + isync(); 32 + } 33 + 34 + static inline void __kuap_save_and_lock(struct pt_regs *regs) 35 + { 36 + regs->kuap = mfspr(SPRN_PID); 37 + mtspr(SPRN_PID, 0); 38 + isync(); 39 + } 40 + 41 + static inline void kuap_user_restore(struct pt_regs *regs) 42 + { 43 + if (kuap_is_disabled()) 44 + return; 45 + 46 + mtspr(SPRN_PID, current->thread.pid); 47 + 48 + /* Context synchronisation is performed by rfi */ 49 + } 50 + 51 + static inline void __kuap_kernel_restore(struct pt_regs *regs, unsigned long kuap) 52 + { 53 + if (regs->kuap) 54 + mtspr(SPRN_PID, current->thread.pid); 55 + 56 + /* Context synchronisation is performed by rfi */ 57 + } 58 + 59 + static inline unsigned long __kuap_get_and_assert_locked(void) 60 + { 61 + unsigned long kuap = mfspr(SPRN_PID); 62 + 63 + if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG)) 64 + WARN_ON_ONCE(kuap); 65 + 66 + return kuap; 67 + } 68 + 69 + static inline void __allow_user_access(void __user *to, const void __user *from, 70 + unsigned long size, unsigned long dir) 71 + { 72 + mtspr(SPRN_PID, current->thread.pid); 73 + isync(); 74 + } 75 + 76 + static inline void __prevent_user_access(unsigned long dir) 77 + { 78 + mtspr(SPRN_PID, 0); 79 + isync(); 80 + } 81 + 82 + static inline unsigned long __prevent_user_access_return(void) 83 + { 84 + unsigned long flags = mfspr(SPRN_PID); 85 + 86 + mtspr(SPRN_PID, 0); 87 + isync(); 88 + 89 + return flags; 90 + } 91 + 92 + static inline void __restore_user_access(unsigned long flags) 93 + { 94 + if (flags) { 95 + mtspr(SPRN_PID, current->thread.pid); 96 + isync(); 97 + } 98 + } 99 + 100 + static inline bool 101 + __bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) 102 + { 103 + return !regs->kuap; 104 + } 105 + 106 + #endif /* !__ASSEMBLY__ */ 107 + 108 + #endif /* CONFIG_PPC_KUAP */ 109 + 110 + #endif /* _ASM_POWERPC_KUP_BOOKE_H_ */
+1
arch/powerpc/include/asm/opal-api.h
··· 1094 1094 OPAL_XIVE_IRQ_SHIFT_BUG = 0x00000008, /* P9 DD1.0 workaround */ 1095 1095 OPAL_XIVE_IRQ_MASK_VIA_FW = 0x00000010, /* P9 DD1.0 workaround */ 1096 1096 OPAL_XIVE_IRQ_EOI_VIA_FW = 0x00000020, /* P9 DD1.0 workaround */ 1097 + OPAL_XIVE_IRQ_STORE_EOI2 = 0x00000040, 1097 1098 }; 1098 1099 1099 1100 /* Flags for OPAL_XIVE_GET/SET_QUEUE_INFO */
+1 -1
arch/powerpc/include/asm/opal.h
··· 314 314 int depth, void *data); 315 315 extern int early_init_dt_scan_recoverable_ranges(unsigned long node, 316 316 const char *uname, int depth, void *data); 317 - extern void opal_configure_cores(void); 317 + void __init opal_configure_cores(void); 318 318 319 319 extern int opal_get_chars(uint32_t vtermno, char *buf, int count); 320 320 extern int opal_put_chars(uint32_t vtermno, const char *buf, int total_len);
+8
arch/powerpc/include/asm/paca.h
··· 97 97 /* this becomes non-zero. */ 98 98 u8 kexec_state; /* set when kexec down has irqs off */ 99 99 #ifdef CONFIG_PPC_BOOK3S_64 100 + #ifdef CONFIG_PPC_64S_HASH_MMU 100 101 struct slb_shadow *slb_shadow_ptr; 102 + #endif 101 103 struct dtl_entry *dispatch_log; 102 104 struct dtl_entry *dispatch_log_end; 103 105 #endif ··· 112 110 /* used for most interrupts/exceptions */ 113 111 u64 exgen[EX_SIZE] __attribute__((aligned(0x80))); 114 112 113 + #ifdef CONFIG_PPC_64S_HASH_MMU 115 114 /* SLB related definitions */ 116 115 u16 vmalloc_sllp; 117 116 u8 slb_cache_ptr; ··· 123 120 u32 slb_used_bitmap; /* Bitmaps for first 32 SLB entries. */ 124 121 u32 slb_kern_bitmap; 125 122 u32 slb_cache[SLB_CACHE_ENTRIES]; 123 + #endif 126 124 #endif /* CONFIG_PPC_BOOK3S_64 */ 127 125 128 126 #ifdef CONFIG_PPC_BOOK3E ··· 153 149 #endif /* CONFIG_PPC_BOOK3E */ 154 150 155 151 #ifdef CONFIG_PPC_BOOK3S 152 + #ifdef CONFIG_PPC_64S_HASH_MMU 156 153 #ifdef CONFIG_PPC_MM_SLICES 157 154 unsigned char mm_ctx_low_slices_psize[BITS_PER_LONG / BITS_PER_BYTE]; 158 155 unsigned char mm_ctx_high_slices_psize[SLICE_ARRAY_SIZE]; 159 156 #else 160 157 u16 mm_ctx_user_psize; 161 158 u16 mm_ctx_sllp; 159 + #endif 162 160 #endif 163 161 #endif 164 162 ··· 274 268 #endif /* CONFIG_PPC_PSERIES */ 275 269 276 270 #ifdef CONFIG_PPC_BOOK3S_64 271 + #ifdef CONFIG_PPC_64S_HASH_MMU 277 272 /* Capture SLB related old contents in MCE handler. */ 278 273 struct slb_entry *mce_faulty_slbs; 279 274 u16 slb_save_cache_ptr; 275 + #endif 280 276 #endif /* CONFIG_PPC_BOOK3S_64 */ 281 277 #ifdef CONFIG_STACKPROTECTOR 282 278 unsigned long canary;
+1 -1
arch/powerpc/include/asm/pci.h
··· 48 48 } 49 49 50 50 #ifdef CONFIG_PCI 51 - extern void set_pci_dma_ops(const struct dma_map_ops *dma_ops); 51 + void __init set_pci_dma_ops(const struct dma_map_ops *dma_ops); 52 52 #else /* CONFIG_PCI */ 53 53 #define set_pci_dma_ops(d) 54 54 #endif
+1 -1
arch/powerpc/include/asm/perf_event_server.h
··· 98 98 #define PPMU_LIMITED_PMC_REQD 2 /* have to put this on a limited PMC */ 99 99 #define PPMU_ONLY_COUNT_RUN 4 /* only counting in run state */ 100 100 101 - extern int register_power_pmu(struct power_pmu *); 101 + int __init register_power_pmu(struct power_pmu *pmu); 102 102 103 103 struct pt_regs; 104 104 extern unsigned long perf_misc_flags(struct pt_regs *regs);
+6
arch/powerpc/include/asm/ppc-opcode.h
··· 249 249 #define PPC_INST_COPY 0x7c20060c 250 250 #define PPC_INST_DCBA 0x7c0005ec 251 251 #define PPC_INST_DCBA_MASK 0xfc0007fe 252 + #define PPC_INST_DSSALL 0x7e00066c 252 253 #define PPC_INST_ISEL 0x7c00001e 253 254 #define PPC_INST_ISEL_MASK 0xfc00003e 254 255 #define PPC_INST_LSWI 0x7c0004aa ··· 394 393 (0x7c000264 | ___PPC_RB(rb) | ___PPC_RS(rs) | ___PPC_RIC(ric) | ___PPC_PRS(prs) | ___PPC_R(r)) 395 394 #define PPC_RAW_TLBIEL(rb, rs, ric, prs, r) \ 396 395 (0x7c000224 | ___PPC_RB(rb) | ___PPC_RS(rs) | ___PPC_RIC(ric) | ___PPC_PRS(prs) | ___PPC_R(r)) 396 + #define PPC_RAW_TLBIEL_v205(rb, l) (0x7c000224 | ___PPC_RB(rb) | (l << 21)) 397 397 #define PPC_RAW_TLBSRX_DOT(a, b) (0x7c0006a5 | __PPC_RA0(a) | __PPC_RB(b)) 398 398 #define PPC_RAW_TLBIVAX(a, b) (0x7c000624 | __PPC_RA0(a) | __PPC_RB(b)) 399 399 #define PPC_RAW_ERATWE(s, a, w) (0x7c0001a6 | __PPC_RS(s) | __PPC_RA(a) | __PPC_WS(w)) ··· 568 566 #define PPC_RAW_MTSPR(spr, d) (0x7c0003a6 | ___PPC_RS(d) | __PPC_SPR(spr)) 569 567 #define PPC_RAW_EIEIO() (0x7c0006ac) 570 568 569 + #define PPC_RAW_BRANCH(addr) (PPC_INST_BRANCH | ((addr) & 0x03fffffc)) 570 + 571 571 /* Deal with instructions that older assemblers aren't aware of */ 572 572 #define PPC_BCCTR_FLUSH stringify_in_c(.long PPC_INST_BCCTR_FLUSH) 573 573 #define PPC_CP_ABORT stringify_in_c(.long PPC_RAW_CP_ABORT) ··· 579 575 #define PPC_DCBZL(a, b) stringify_in_c(.long PPC_RAW_DCBZL(a, b)) 580 576 #define PPC_DIVDE(t, a, b) stringify_in_c(.long PPC_RAW_DIVDE(t, a, b)) 581 577 #define PPC_DIVDEU(t, a, b) stringify_in_c(.long PPC_RAW_DIVDEU(t, a, b)) 578 + #define PPC_DSSALL stringify_in_c(.long PPC_INST_DSSALL) 582 579 #define PPC_LQARX(t, a, b, eh) stringify_in_c(.long PPC_RAW_LQARX(t, a, b, eh)) 583 580 #define PPC_STQCX(t, a, b) stringify_in_c(.long PPC_RAW_STQCX(t, a, b)) 584 581 #define PPC_MADDHD(t, a, b, c) stringify_in_c(.long PPC_RAW_MADDHD(t, a, b, c)) ··· 607 602 stringify_in_c(.long PPC_RAW_TLBIE_5(rb, rs, ric, prs, r)) 608 603 #define PPC_TLBIEL(rb,rs,ric,prs,r) \ 609 604 stringify_in_c(.long PPC_RAW_TLBIEL(rb, rs, ric, prs, r)) 605 + #define PPC_TLBIEL_v205(rb, l) stringify_in_c(.long PPC_RAW_TLBIEL_v205(rb, l)) 610 606 #define PPC_TLBSRX_DOT(a, b) stringify_in_c(.long PPC_RAW_TLBSRX_DOT(a, b)) 611 607 #define PPC_TLBIVAX(a, b) stringify_in_c(.long PPC_RAW_TLBIVAX(a, b)) 612 608
+27 -16
arch/powerpc/include/asm/ppc_asm.h
··· 17 17 #define SZL (BITS_PER_LONG/8) 18 18 19 19 /* 20 + * This expands to a sequence of operations with reg incrementing from 21 + * start to end inclusive, of this form: 22 + * 23 + * op reg, (offset + (width * reg))(base) 24 + * 25 + * Note that offset is not the offset of the first operation unless start 26 + * is zero (or width is zero). 27 + */ 28 + .macro OP_REGS op, width, start, end, base, offset 29 + .Lreg=\start 30 + .rept (\end - \start + 1) 31 + \op .Lreg, \offset + \width * .Lreg(\base) 32 + .Lreg=.Lreg+1 33 + .endr 34 + .endm 35 + 36 + /* 20 37 * Macros for storing registers into and loading registers from 21 38 * exception frames. 22 39 */ 23 40 #ifdef __powerpc64__ 24 - #define SAVE_GPR(n, base) std n,GPR0+8*(n)(base) 25 - #define REST_GPR(n, base) ld n,GPR0+8*(n)(base) 26 - #define SAVE_NVGPRS(base) SAVE_8GPRS(14, base); SAVE_10GPRS(22, base) 27 - #define REST_NVGPRS(base) REST_8GPRS(14, base); REST_10GPRS(22, base) 41 + #define SAVE_GPRS(start, end, base) OP_REGS std, 8, start, end, base, GPR0 42 + #define REST_GPRS(start, end, base) OP_REGS ld, 8, start, end, base, GPR0 43 + #define SAVE_NVGPRS(base) SAVE_GPRS(14, 31, base) 44 + #define REST_NVGPRS(base) REST_GPRS(14, 31, base) 28 45 #else 29 - #define SAVE_GPR(n, base) stw n,GPR0+4*(n)(base) 30 - #define REST_GPR(n, base) lwz n,GPR0+4*(n)(base) 31 - #define SAVE_NVGPRS(base) SAVE_GPR(13, base); SAVE_8GPRS(14, base); SAVE_10GPRS(22, base) 32 - #define REST_NVGPRS(base) REST_GPR(13, base); REST_8GPRS(14, base); REST_10GPRS(22, base) 46 + #define SAVE_GPRS(start, end, base) OP_REGS stw, 4, start, end, base, GPR0 47 + #define REST_GPRS(start, end, base) OP_REGS lwz, 4, start, end, base, GPR0 48 + #define SAVE_NVGPRS(base) SAVE_GPRS(13, 31, base) 49 + #define REST_NVGPRS(base) REST_GPRS(13, 31, base) 33 50 #endif 34 51 35 - #define SAVE_2GPRS(n, base) SAVE_GPR(n, base); SAVE_GPR(n+1, base) 36 - #define SAVE_4GPRS(n, base) SAVE_2GPRS(n, base); SAVE_2GPRS(n+2, base) 37 - #define SAVE_8GPRS(n, base) SAVE_4GPRS(n, base); SAVE_4GPRS(n+4, base) 38 - #define SAVE_10GPRS(n, base) SAVE_8GPRS(n, base); SAVE_2GPRS(n+8, base) 39 - #define REST_2GPRS(n, base) REST_GPR(n, base); REST_GPR(n+1, base) 40 - #define REST_4GPRS(n, base) REST_2GPRS(n, base); REST_2GPRS(n+2, base) 41 - #define REST_8GPRS(n, base) REST_4GPRS(n, base); REST_4GPRS(n+4, base) 42 - #define REST_10GPRS(n, base) REST_8GPRS(n, base); REST_2GPRS(n+8, base) 52 + #define SAVE_GPR(n, base) SAVE_GPRS(n, n, base) 53 + #define REST_GPR(n, base) REST_GPRS(n, n, base) 43 54 44 55 #define SAVE_FPR(n, base) stfd n,8*TS_FPRWIDTH*(n)(base) 45 56 #define SAVE_2FPRS(n, base) SAVE_FPR(n, base); SAVE_FPR(n+1, base)
+16 -2
arch/powerpc/include/asm/processor.h
··· 157 157 #ifdef CONFIG_PPC_BOOK3S_32 158 158 unsigned long r0, r3, r4, r5, r6, r8, r9, r11; 159 159 unsigned long lr, ctr; 160 + unsigned long sr0; 160 161 #endif 161 162 #endif /* CONFIG_PPC32 */ 163 + #if defined(CONFIG_BOOKE_OR_40x) && defined(CONFIG_PPC_KUAP) 164 + unsigned long pid; /* value written in PID reg. at interrupt exit */ 165 + #endif 162 166 /* Debug Registers */ 163 167 struct debug_reg debug; 164 168 #ifdef CONFIG_PPC_FPU_REGS ··· 195 191 int used_vsr; /* set if process has used VSX */ 196 192 #endif /* CONFIG_VSX */ 197 193 #ifdef CONFIG_SPE 198 - unsigned long evr[32]; /* upper 32-bits of SPE regs */ 199 - u64 acc; /* Accumulator */ 194 + struct_group(spe, 195 + unsigned long evr[32]; /* upper 32-bits of SPE regs */ 196 + u64 acc; /* Accumulator */ 197 + ); 200 198 unsigned long spefscr; /* SPE & eFP status */ 201 199 unsigned long spefscr_last; /* SPEFSCR value on last prctl 202 200 call or trap return */ ··· 282 276 #define SPEFSCR_INIT 283 277 #endif 284 278 279 + #ifdef CONFIG_PPC_BOOK3S_32 280 + #define SR0_INIT .sr0 = IS_ENABLED(CONFIG_PPC_KUEP) ? SR_NX : 0, 281 + #else 282 + #define SR0_INIT 283 + #endif 284 + 285 285 #if defined(CONFIG_PPC_BOOK3S_32) && defined(CONFIG_PPC_KUAP) 286 286 #define INIT_THREAD { \ 287 287 .ksp = INIT_SP, \ ··· 295 283 .kuap = ~0UL, /* KUAP_NONE */ \ 296 284 .fpexc_mode = MSR_FE0 | MSR_FE1, \ 297 285 SPEFSCR_INIT \ 286 + SR0_INIT \ 298 287 } 299 288 #elif defined(CONFIG_PPC32) 300 289 #define INIT_THREAD { \ ··· 303 290 .pgdir = swapper_pg_dir, \ 304 291 .fpexc_mode = MSR_FE0 | MSR_FE1, \ 305 292 SPEFSCR_INIT \ 293 + SR0_INIT \ 306 294 } 307 295 #else 308 296 #define INIT_THREAD { \
+1 -1
arch/powerpc/include/asm/ptrace.h
··· 291 291 292 292 static inline bool cpu_has_msr_ri(void) 293 293 { 294 - return !IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x); 294 + return !IS_ENABLED(CONFIG_BOOKE_OR_40x); 295 295 } 296 296 297 297 static inline bool regs_is_unrecoverable(struct pt_regs *regs)
+14 -2
arch/powerpc/include/asm/reg.h
··· 18 18 #include <asm/feature-fixups.h> 19 19 20 20 /* Pickup Book E specific registers. */ 21 - #if defined(CONFIG_BOOKE) || defined(CONFIG_40x) 21 + #ifdef CONFIG_BOOKE_OR_40x 22 22 #include <asm/reg_booke.h> 23 - #endif /* CONFIG_BOOKE || CONFIG_40x */ 23 + #endif 24 24 25 25 #ifdef CONFIG_FSL_EMB_PERFMON 26 26 #include <asm/reg_fsl_emb.h> ··· 1366 1366 1367 1367 /* Macros for setting and retrieving special purpose registers */ 1368 1368 #ifndef __ASSEMBLY__ 1369 + 1370 + #if defined(CONFIG_PPC64) || defined(__CHECKER__) 1371 + typedef struct { 1372 + u32 val; 1373 + #ifdef CONFIG_PPC64 1374 + u32 suffix; 1375 + #endif 1376 + } __packed ppc_inst_t; 1377 + #else 1378 + typedef u32 ppc_inst_t; 1379 + #endif 1380 + 1369 1381 #define mfmsr() ({unsigned long rval; \ 1370 1382 asm volatile("mfmsr %0" : "=r" (rval) : \ 1371 1383 : "memory"); rval;})
+1 -1
arch/powerpc/include/asm/rtas.h
··· 264 264 extern int rtas_set_rtc_time(struct rtc_time *rtc_time); 265 265 266 266 extern unsigned int rtas_busy_delay_time(int status); 267 - extern unsigned int rtas_busy_delay(int status); 267 + bool rtas_busy_delay(int status); 268 268 269 269 extern int early_init_dt_scan_rtas(unsigned long node, 270 270 const char *uname, int depth, void *data);
+7 -7
arch/powerpc/include/asm/sections.h
··· 25 25 extern char end_virt_trampolines[]; 26 26 #endif 27 27 28 + /* 29 + * This assumes the kernel is never compiled -mcmodel=small or 30 + * the total .toc is always less than 64k. 31 + */ 28 32 static inline unsigned long kernel_toc_addr(void) 29 33 { 30 - /* Defined by the linker, see vmlinux.lds.S */ 31 - extern unsigned long __toc_start; 34 + unsigned long toc_ptr; 32 35 33 - /* 34 - * The TOC register (r2) points 32kB into the TOC, so that 64kB of 35 - * the TOC can be addressed using a single machine instruction. 36 - */ 37 - return (unsigned long)(&__toc_start) + 0x8000UL; 36 + asm volatile("mr %0, 2" : "=r" (toc_ptr)); 37 + return toc_ptr; 38 38 } 39 39 40 40 static inline int overlaps_interrupt_vector_text(unsigned long start,
+4 -5
arch/powerpc/include/asm/setup.h
··· 9 9 10 10 extern unsigned int rtas_data; 11 11 extern unsigned long long memory_limit; 12 - extern bool init_mem_is_free; 13 12 extern void *zalloc_maybe_bootmem(size_t size, gfp_t mask); 14 13 15 14 struct device_node; ··· 31 32 extern bool pseries_enable_reloc_on_exc(void); 32 33 extern void pseries_disable_reloc_on_exc(void); 33 34 extern void pseries_big_endian_exceptions(void); 34 - extern void pseries_little_endian_exceptions(void); 35 + void __init pseries_little_endian_exceptions(void); 35 36 #else 36 37 static inline bool pseries_enable_reloc_on_exc(void) { return false; } 37 38 static inline void pseries_disable_reloc_on_exc(void) {} ··· 54 55 void setup_uaccess_flush(bool enable); 55 56 void do_rfi_flush_fixups(enum l1d_flush_type types); 56 57 #ifdef CONFIG_PPC_BARRIER_NOSPEC 57 - void setup_barrier_nospec(void); 58 + void __init setup_barrier_nospec(void); 58 59 #else 59 60 static inline void setup_barrier_nospec(void) { } 60 61 #endif ··· 70 71 #endif 71 72 72 73 #ifdef CONFIG_PPC_FSL_BOOK3E 73 - void setup_spectre_v2(void); 74 + void __init setup_spectre_v2(void); 74 75 #else 75 76 static inline void setup_spectre_v2(void) {} 76 77 #endif 77 - void do_btb_flush_fixups(void); 78 + void __init do_btb_flush_fixups(void); 78 79 79 80 #endif /* !__ASSEMBLY__ */ 80 81
+1 -1
arch/powerpc/include/asm/smu.h
··· 456 456 /* 457 457 * Init routine, presence check.... 458 458 */ 459 - extern int smu_init(void); 459 + int __init smu_init(void); 460 460 extern int smu_present(void); 461 461 struct platform_device; 462 462 extern struct platform_device *smu_get_ofdev(void);
+2 -2
arch/powerpc/include/asm/sstep.h
··· 145 145 * otherwise. 146 146 */ 147 147 extern int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, 148 - struct ppc_inst instr); 148 + ppc_inst_t instr); 149 149 150 150 /* 151 151 * Emulate an instruction that can be executed just by updating ··· 162 162 * 0 if it could not be emulated, or -1 for an instruction that 163 163 * should not be emulated (rfid, mtmsrd clearing MSR_RI, etc.). 164 164 */ 165 - extern int emulate_step(struct pt_regs *regs, struct ppc_inst instr); 165 + int emulate_step(struct pt_regs *regs, ppc_inst_t instr); 166 166 167 167 /* 168 168 * Emulate a load or store instruction by reading/writing the
+3
arch/powerpc/include/asm/switch_to.h
··· 112 112 #endif 113 113 } 114 114 115 + void kvmppc_save_user_regs(void); 116 + void kvmppc_save_current_sprs(void); 117 + 115 118 extern int set_thread_tidr(struct task_struct *t); 116 119 117 120 #endif /* _ASM_POWERPC_SWITCH_TO_H */
+1 -5
arch/powerpc/include/asm/task_size_64.h
··· 44 44 */ 45 45 #define TASK_SIZE_USER32 (0x0000000100000000UL - (1 * PAGE_SIZE)) 46 46 47 - #define TASK_SIZE_OF(tsk) \ 48 - (test_tsk_thread_flag(tsk, TIF_32BIT) ? TASK_SIZE_USER32 : \ 49 - TASK_SIZE_USER64) 50 - 51 - #define TASK_SIZE TASK_SIZE_OF(current) 47 + #define TASK_SIZE (is_32bit_task() ? TASK_SIZE_USER32 : TASK_SIZE_USER64) 52 48 53 49 #define TASK_UNMAPPED_BASE_USER32 (PAGE_ALIGN(TASK_SIZE_USER32 / 4)) 54 50 #define TASK_UNMAPPED_BASE_USER64 (PAGE_ALIGN(DEFAULT_MAP_WINDOW_USER64 / 4))
+11 -12
arch/powerpc/include/asm/time.h
··· 18 18 #include <asm/vdso/timebase.h> 19 19 20 20 /* time.c */ 21 + extern u64 decrementer_max; 22 + 21 23 extern unsigned long tb_ticks_per_jiffy; 22 24 extern unsigned long tb_ticks_per_usec; 23 25 extern unsigned long tb_ticks_per_sec; ··· 99 97 extern void secondary_cpu_time_init(void); 100 98 extern void __init time_init(void); 101 99 102 - #ifdef CONFIG_PPC64 103 - static inline unsigned long test_irq_work_pending(void) 104 - { 105 - unsigned long x; 106 - 107 - asm volatile("lbz %0,%1(13)" 108 - : "=r" (x) 109 - : "i" (offsetof(struct paca_struct, irq_work_pending))); 110 - return x; 111 - } 112 - #endif 113 - 114 100 DECLARE_PER_CPU(u64, decrementers_next_tb); 101 + 102 + static inline u64 timer_get_next_tb(void) 103 + { 104 + return __this_cpu_read(decrementers_next_tb); 105 + } 106 + 107 + #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 108 + void timer_rearm_host_dec(u64 now); 109 + #endif 115 110 116 111 /* Convert timebase ticks to nanoseconds */ 117 112 unsigned long long tb_to_ns(unsigned long long tb_ticks);
+5 -5
arch/powerpc/include/asm/udbg.h
··· 23 23 __attribute__ ((format (printf, 1, 2))); 24 24 extern void udbg_progress(char *s, unsigned short hex); 25 25 26 - extern void udbg_uart_init_mmio(void __iomem *addr, unsigned int stride); 27 - extern void udbg_uart_init_pio(unsigned long port, unsigned int stride); 26 + void __init udbg_uart_init_mmio(void __iomem *addr, unsigned int stride); 27 + void __init udbg_uart_init_pio(unsigned long port, unsigned int stride); 28 28 29 - extern void udbg_uart_setup(unsigned int speed, unsigned int clock); 30 - extern unsigned int udbg_probe_uart_speed(unsigned int clock); 29 + void __init udbg_uart_setup(unsigned int speed, unsigned int clock); 30 + unsigned int __init udbg_probe_uart_speed(unsigned int clock); 31 31 32 32 struct device_node; 33 - extern void udbg_scc_init(int force_scc); 33 + void __init udbg_scc_init(int force_scc); 34 34 extern int udbg_adb_init(int force_btext); 35 35 extern void udbg_adb_init_early(void); 36 36
-1
arch/powerpc/include/asm/uprobes.h
··· 11 11 12 12 #include <linux/notifier.h> 13 13 #include <asm/probes.h> 14 - #include <asm/inst.h> 15 14 16 15 typedef ppc_opcode_t uprobe_opcode_t; 17 16
+2 -2
arch/powerpc/include/asm/xics.h
··· 38 38 39 39 /* PAPR ICP */ 40 40 #ifdef CONFIG_PPC_ICP_HV 41 - extern int icp_hv_init(void); 41 + int __init icp_hv_init(void); 42 42 #else 43 43 static inline int icp_hv_init(void) { return -ENODEV; } 44 44 #endif 45 45 46 46 #ifdef CONFIG_PPC_POWERNV 47 - extern int icp_opal_init(void); 47 + int __init icp_opal_init(void); 48 48 extern void icp_opal_flush_interrupt(void); 49 49 #else 50 50 static inline int icp_opal_init(void) { return -ENODEV; }
+1 -1
arch/powerpc/include/asm/xmon.h
··· 12 12 13 13 #ifdef CONFIG_XMON 14 14 extern void xmon_setup(void); 15 - extern void xmon_register_spus(struct list_head *list); 15 + void __init xmon_register_spus(struct list_head *list); 16 16 struct pt_regs; 17 17 extern int xmon(struct pt_regs *excp); 18 18 extern irqreturn_t xmon_irq(int, void *);
+1
arch/powerpc/kernel/Makefile
··· 11 11 CFLAGS_btext.o += -fPIC 12 12 endif 13 13 14 + CFLAGS_early_32.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) 14 15 CFLAGS_cputable.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) 15 16 CFLAGS_prom_init.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) 16 17 CFLAGS_btext.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
+2 -2
arch/powerpc/kernel/align.c
··· 105 105 * so we don't need the address swizzling. 106 106 */ 107 107 static int emulate_spe(struct pt_regs *regs, unsigned int reg, 108 - struct ppc_inst ppc_instr) 108 + ppc_inst_t ppc_instr) 109 109 { 110 110 union { 111 111 u64 ll; ··· 300 300 301 301 int fix_alignment(struct pt_regs *regs) 302 302 { 303 - struct ppc_inst instr; 303 + ppc_inst_t instr; 304 304 struct instruction_op op; 305 305 int r, type; 306 306
+4 -1
arch/powerpc/kernel/asm-offsets.c
··· 54 54 #endif 55 55 56 56 #ifdef CONFIG_PPC32 57 - #if defined(CONFIG_BOOKE) || defined(CONFIG_40x) 57 + #ifdef CONFIG_BOOKE_OR_40x 58 58 #include "head_booke.h" 59 59 #endif 60 60 #endif ··· 139 139 OFFSET(THR11, thread_struct, r11); 140 140 OFFSET(THLR, thread_struct, lr); 141 141 OFFSET(THCTR, thread_struct, ctr); 142 + OFFSET(THSR0, thread_struct, sr0); 142 143 #endif 143 144 #ifdef CONFIG_SPE 144 145 OFFSET(THREAD_EVR0, thread_struct, evr[0]); ··· 219 218 OFFSET(PACA_EXGEN, paca_struct, exgen); 220 219 OFFSET(PACA_EXMC, paca_struct, exmc); 221 220 OFFSET(PACA_EXNMI, paca_struct, exnmi); 221 + #ifdef CONFIG_PPC_64S_HASH_MMU 222 222 OFFSET(PACA_SLBSHADOWPTR, paca_struct, slb_shadow_ptr); 223 223 OFFSET(SLBSHADOW_STACKVSID, slb_shadow, save_area[SLB_NUM_BOLTED - 1].vsid); 224 224 OFFSET(SLBSHADOW_STACKESID, slb_shadow, save_area[SLB_NUM_BOLTED - 1].esid); 225 225 OFFSET(SLBSHADOW_SAVEAREA, slb_shadow, save_area); 226 + #endif 226 227 OFFSET(LPPACA_PMCINUSE, lppaca, pmcregs_in_use); 227 228 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 228 229 OFFSET(PACA_PMCINUSE, paca_struct, pmcregs_in_use);
+9 -7
arch/powerpc/kernel/btext.c
··· 161 161 boot_text_mapped = 1; 162 162 } 163 163 164 - static int btext_initialize(struct device_node *np) 164 + static int __init btext_initialize(struct device_node *np) 165 165 { 166 166 unsigned int width, height, depth, pitch; 167 167 unsigned long address = 0; ··· 241 241 rc = btext_initialize(np); 242 242 printk("result: %d\n", rc); 243 243 } 244 - if (rc == 0) 244 + if (rc == 0) { 245 + of_node_put(np); 245 246 break; 247 + } 246 248 } 247 249 return rc; 248 250 } ··· 292 290 } 293 291 EXPORT_SYMBOL(btext_update_display); 294 292 295 - void btext_clearscreen(void) 293 + void __init btext_clearscreen(void) 296 294 { 297 295 unsigned int *base = (unsigned int *)calc_base(0, 0); 298 296 unsigned long width = ((dispDeviceRect[2] - dispDeviceRect[0]) * ··· 310 308 rmci_maybe_off(); 311 309 } 312 310 313 - void btext_flushscreen(void) 311 + void __init btext_flushscreen(void) 314 312 { 315 313 unsigned int *base = (unsigned int *)calc_base(0, 0); 316 314 unsigned long width = ((dispDeviceRect[2] - dispDeviceRect[0]) * ··· 329 327 __asm__ __volatile__ ("sync" ::: "memory"); 330 328 } 331 329 332 - void btext_flushline(void) 330 + void __init btext_flushline(void) 333 331 { 334 332 unsigned int *base = (unsigned int *)calc_base(0, g_loc_Y << 4); 335 333 unsigned long width = ((dispDeviceRect[2] - dispDeviceRect[0]) * ··· 544 542 btext_drawchar(*c++); 545 543 } 546 544 547 - void btext_drawtext(const char *c, unsigned int len) 545 + void __init btext_drawtext(const char *c, unsigned int len) 548 546 { 549 547 if (!boot_text_mapped) 550 548 return; ··· 552 550 btext_drawchar(*c++); 553 551 } 554 552 555 - void btext_drawhex(unsigned long v) 553 + void __init btext_drawhex(unsigned long v) 556 554 { 557 555 if (!boot_text_mapped) 558 556 return;
+3 -2
arch/powerpc/kernel/cacheinfo.c
··· 710 710 __ATTR(shared_cpu_list, 0444, shared_cpu_list_show, NULL); 711 711 712 712 /* Attributes which should always be created -- the kobject/sysfs core 713 - * does this automatically via kobj_type->default_attrs. This is the 713 + * does this automatically via kobj_type->default_groups. This is the 714 714 * minimum data required to uniquely identify a cache. 715 715 */ 716 716 static struct attribute *cache_index_default_attrs[] = { ··· 720 720 &cache_shared_cpu_list_attr.attr, 721 721 NULL, 722 722 }; 723 + ATTRIBUTE_GROUPS(cache_index_default); 723 724 724 725 /* Attributes which should be created if the cache device node has the 725 726 * right properties -- see cacheinfo_create_index_opt_attrs ··· 739 738 static struct kobj_type cache_index_type = { 740 739 .release = cache_index_release, 741 740 .sysfs_ops = &cache_index_ops, 742 - .default_attrs = cache_index_default_attrs, 741 + .default_groups = cache_index_default_groups, 743 742 }; 744 743 745 744 static void cacheinfo_create_index_opt_attrs(struct cache_index_dir *dir)
+10 -2
arch/powerpc/kernel/cpu_setup_power.c
··· 109 109 static void init_PMU(void) 110 110 { 111 111 mtspr(SPRN_MMCRA, 0); 112 - mtspr(SPRN_MMCR0, 0); 112 + mtspr(SPRN_MMCR0, MMCR0_FC); 113 113 mtspr(SPRN_MMCR1, 0); 114 114 mtspr(SPRN_MMCR2, 0); 115 115 } ··· 123 123 { 124 124 mtspr(SPRN_MMCR3, 0); 125 125 mtspr(SPRN_MMCRA, MMCRA_BHRB_DISABLE); 126 - mtspr(SPRN_MMCR0, MMCR0_PMCCEXT); 126 + mtspr(SPRN_MMCR0, MMCR0_FC | MMCR0_PMCCEXT); 127 127 } 128 128 129 129 /* ··· 137 137 return; 138 138 139 139 mtspr(SPRN_LPID, 0); 140 + mtspr(SPRN_AMOR, ~0); 140 141 mtspr(SPRN_PCR, PCR_MASK); 141 142 init_LPCR_ISA206(mfspr(SPRN_LPCR), LPCR_LPES1 >> LPCR_LPES_SH); 142 143 } ··· 151 150 return; 152 151 153 152 mtspr(SPRN_LPID, 0); 153 + mtspr(SPRN_AMOR, ~0); 154 154 mtspr(SPRN_PCR, PCR_MASK); 155 155 init_LPCR_ISA206(mfspr(SPRN_LPCR), LPCR_LPES1 >> LPCR_LPES_SH); 156 156 } ··· 166 164 return; 167 165 168 166 mtspr(SPRN_LPID, 0); 167 + mtspr(SPRN_AMOR, ~0); 169 168 mtspr(SPRN_PCR, PCR_MASK); 170 169 init_LPCR_ISA206(mfspr(SPRN_LPCR) | LPCR_PECEDH, 0); /* LPES = 0 */ 171 170 init_HFSCR(); ··· 187 184 return; 188 185 189 186 mtspr(SPRN_LPID, 0); 187 + mtspr(SPRN_AMOR, ~0); 190 188 mtspr(SPRN_PCR, PCR_MASK); 191 189 init_LPCR_ISA206(mfspr(SPRN_LPCR) | LPCR_PECEDH, 0); /* LPES = 0 */ 192 190 init_HFSCR(); ··· 206 202 mtspr(SPRN_PSSCR, 0); 207 203 mtspr(SPRN_LPID, 0); 208 204 mtspr(SPRN_PID, 0); 205 + mtspr(SPRN_AMOR, ~0); 209 206 mtspr(SPRN_PCR, PCR_MASK); 210 207 init_LPCR_ISA300((mfspr(SPRN_LPCR) | LPCR_PECEDH | LPCR_PECE_HVEE |\ 211 208 LPCR_HVICE | LPCR_HEIC) & ~(LPCR_UPRT | LPCR_HR), 0); ··· 228 223 mtspr(SPRN_PSSCR, 0); 229 224 mtspr(SPRN_LPID, 0); 230 225 mtspr(SPRN_PID, 0); 226 + mtspr(SPRN_AMOR, ~0); 231 227 mtspr(SPRN_PCR, PCR_MASK); 232 228 init_LPCR_ISA300((mfspr(SPRN_LPCR) | LPCR_PECEDH | LPCR_PECE_HVEE |\ 233 229 LPCR_HVICE | LPCR_HEIC) & ~(LPCR_UPRT | LPCR_HR), 0); ··· 248 242 mtspr(SPRN_PSSCR, 0); 249 243 mtspr(SPRN_LPID, 0); 250 244 mtspr(SPRN_PID, 0); 245 + mtspr(SPRN_AMOR, ~0); 251 246 mtspr(SPRN_PCR, PCR_MASK); 252 247 init_LPCR_ISA300((mfspr(SPRN_LPCR) | LPCR_PECEDH | LPCR_PECE_HVEE |\ 253 248 LPCR_HVICE | LPCR_HEIC) & ~(LPCR_UPRT | LPCR_HR), 0); ··· 271 264 mtspr(SPRN_PSSCR, 0); 272 265 mtspr(SPRN_LPID, 0); 273 266 mtspr(SPRN_PID, 0); 267 + mtspr(SPRN_AMOR, ~0); 274 268 mtspr(SPRN_PCR, PCR_MASK); 275 269 init_LPCR_ISA300((mfspr(SPRN_LPCR) | LPCR_PECEDH | LPCR_PECE_HVEE |\ 276 270 LPCR_HVICE | LPCR_HEIC) & ~(LPCR_UPRT | LPCR_HR), 0);
+2 -1
arch/powerpc/kernel/dbell.c
··· 27 27 28 28 ppc_msgsync(); 29 29 30 - may_hard_irq_enable(); 30 + if (should_hard_irq_enable()) 31 + do_hard_irq_enable(); 31 32 32 33 kvmppc_clear_host_ipi(smp_processor_id()); 33 34 __this_cpu_inc(irq_stat.doorbell_irqs);
+16 -8
arch/powerpc/kernel/dt_cpu_ftrs.c
··· 80 80 mtspr(SPRN_LPCR, system_registers.lpcr); 81 81 if (hv_mode) { 82 82 mtspr(SPRN_LPID, 0); 83 + mtspr(SPRN_AMOR, ~0); 83 84 mtspr(SPRN_HFSCR, system_registers.hfscr); 84 85 mtspr(SPRN_PCR, system_registers.pcr); 85 86 } ··· 217 216 } 218 217 219 218 mtspr(SPRN_LPID, 0); 219 + mtspr(SPRN_AMOR, ~0); 220 220 221 221 lpcr = mfspr(SPRN_LPCR); 222 222 lpcr &= ~LPCR_LPES0; /* HV external interrupts */ ··· 273 271 { 274 272 u64 lpcr; 275 273 274 + if (!IS_ENABLED(CONFIG_PPC_64S_HASH_MMU)) 275 + return 0; 276 + 276 277 lpcr = mfspr(SPRN_LPCR); 277 278 lpcr &= ~LPCR_ISL; 278 279 ··· 295 290 { 296 291 u64 lpcr; 297 292 293 + if (!IS_ENABLED(CONFIG_PPC_64S_HASH_MMU)) 294 + return 0; 295 + 298 296 lpcr = mfspr(SPRN_LPCR); 299 297 lpcr &= ~(LPCR_ISL | LPCR_UPRT | LPCR_HR); 300 298 mtspr(SPRN_LPCR, lpcr); ··· 311 303 312 304 static int __init feat_enable_mmu_radix(struct dt_cpu_feature *f) 313 305 { 314 - #ifdef CONFIG_PPC_RADIX_MMU 306 + if (!IS_ENABLED(CONFIG_PPC_RADIX_MMU)) 307 + return 0; 308 + 309 + cur_cpu_spec->mmu_features |= MMU_FTR_KERNEL_RO; 315 310 cur_cpu_spec->mmu_features |= MMU_FTR_TYPE_RADIX; 316 - cur_cpu_spec->mmu_features |= MMU_FTRS_HASH_BASE; 317 311 cur_cpu_spec->mmu_features |= MMU_FTR_GTSE; 318 312 cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_MMU; 319 313 320 314 return 1; 321 - #endif 322 - return 0; 323 315 } 324 316 325 317 static int __init feat_enable_dscr(struct dt_cpu_feature *f) ··· 344 336 return 1; 345 337 } 346 338 347 - static void hfscr_pmu_enable(void) 339 + static void __init hfscr_pmu_enable(void) 348 340 { 349 341 u64 hfscr = mfspr(SPRN_HFSCR); 350 342 hfscr |= PPC_BIT(60); ··· 359 351 } 360 352 361 353 mtspr(SPRN_MMCRA, 0); 362 - mtspr(SPRN_MMCR0, 0); 354 + mtspr(SPRN_MMCR0, MMCR0_FC); 363 355 mtspr(SPRN_MMCR1, 0); 364 356 mtspr(SPRN_MMCR2, 0); 365 357 mtspr(SPRN_MMCRS, 0); ··· 398 390 mtspr(SPRN_MMCRC, 0); 399 391 400 392 mtspr(SPRN_MMCRA, 0); 401 - mtspr(SPRN_MMCR0, 0); 393 + mtspr(SPRN_MMCR0, MMCR0_FC); 402 394 mtspr(SPRN_MMCR1, 0); 403 395 mtspr(SPRN_MMCR2, 0); 404 396 } ··· 434 426 435 427 mtspr(SPRN_MMCR3, 0); 436 428 mtspr(SPRN_MMCRA, MMCRA_BHRB_DISABLE); 437 - mtspr(SPRN_MMCR0, MMCR0_PMCCEXT); 429 + mtspr(SPRN_MMCR0, MMCR0_FC | MMCR0_PMCCEXT); 438 430 } 439 431 440 432 static int __init feat_enable_pmu_power10(struct dt_cpu_feature *f)
+1 -1
arch/powerpc/kernel/eeh_cache.c
··· 280 280 } 281 281 DEFINE_SHOW_ATTRIBUTE(eeh_addr_cache); 282 282 283 - void eeh_cache_debugfs_init(void) 283 + void __init eeh_cache_debugfs_init(void) 284 284 { 285 285 debugfs_create_file_unsafe("eeh_address_cache", 0400, 286 286 arch_debugfs_dir, NULL,
+80 -82
arch/powerpc/kernel/eeh_driver.c
··· 905 905 } 906 906 #endif /* CONFIG_STACKTRACE */ 907 907 908 + eeh_for_each_pe(pe, tmp_pe) 909 + eeh_pe_for_each_dev(tmp_pe, edev, tmp) 910 + edev->mode &= ~EEH_DEV_NO_HANDLER; 911 + 908 912 eeh_pe_update_time_stamp(pe); 909 913 pe->freeze_count++; 910 914 if (pe->freeze_count > eeh_max_freezes) { 911 915 pr_err("EEH: PHB#%x-PE#%x has failed %d times in the last hour and has been permanently disabled.\n", 912 916 pe->phb->global_number, pe->addr, 913 917 pe->freeze_count); 914 - result = PCI_ERS_RESULT_DISCONNECT; 915 - } 916 918 917 - eeh_for_each_pe(pe, tmp_pe) 918 - eeh_pe_for_each_dev(tmp_pe, edev, tmp) 919 - edev->mode &= ~EEH_DEV_NO_HANDLER; 919 + goto recover_failed; 920 + } 920 921 921 922 /* Walk the various device drivers attached to this slot through 922 923 * a reset sequence, giving each an opportunity to do what it needs ··· 929 928 * the error. Override the result if necessary to have partially 930 929 * hotplug for this case. 931 930 */ 932 - if (result != PCI_ERS_RESULT_DISCONNECT) { 933 - pr_warn("EEH: This PCI device has failed %d times in the last hour and will be permanently disabled after %d failures.\n", 934 - pe->freeze_count, eeh_max_freezes); 935 - pr_info("EEH: Notify device drivers to shutdown\n"); 936 - eeh_set_channel_state(pe, pci_channel_io_frozen); 937 - eeh_set_irq_state(pe, false); 938 - eeh_pe_report("error_detected(IO frozen)", pe, 939 - eeh_report_error, &result); 940 - if ((pe->type & EEH_PE_PHB) && 941 - result != PCI_ERS_RESULT_NONE && 942 - result != PCI_ERS_RESULT_NEED_RESET) 943 - result = PCI_ERS_RESULT_NEED_RESET; 944 - } 931 + pr_warn("EEH: This PCI device has failed %d times in the last hour and will be permanently disabled after %d failures.\n", 932 + pe->freeze_count, eeh_max_freezes); 933 + pr_info("EEH: Notify device drivers to shutdown\n"); 934 + eeh_set_channel_state(pe, pci_channel_io_frozen); 935 + eeh_set_irq_state(pe, false); 936 + eeh_pe_report("error_detected(IO frozen)", pe, 937 + eeh_report_error, &result); 938 + if (result == PCI_ERS_RESULT_DISCONNECT) 939 + goto recover_failed; 940 + 941 + /* 942 + * Error logged on a PHB are always fences which need a full 943 + * PHB reset to clear so force that to happen. 944 + */ 945 + if ((pe->type & EEH_PE_PHB) && result != PCI_ERS_RESULT_NONE) 946 + result = PCI_ERS_RESULT_NEED_RESET; 945 947 946 948 /* Get the current PCI slot state. This can take a long time, 947 949 * sometimes over 300 seconds for certain systems. 948 950 */ 949 - if (result != PCI_ERS_RESULT_DISCONNECT) { 950 - rc = eeh_wait_state(pe, MAX_WAIT_FOR_RECOVERY*1000); 951 - if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) { 952 - pr_warn("EEH: Permanent failure\n"); 953 - result = PCI_ERS_RESULT_DISCONNECT; 954 - } 951 + rc = eeh_wait_state(pe, MAX_WAIT_FOR_RECOVERY * 1000); 952 + if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) { 953 + pr_warn("EEH: Permanent failure\n"); 954 + goto recover_failed; 955 955 } 956 956 957 957 /* Since rtas may enable MMIO when posting the error log, 958 958 * don't post the error log until after all dev drivers 959 959 * have been informed. 960 960 */ 961 - if (result != PCI_ERS_RESULT_DISCONNECT) { 962 - pr_info("EEH: Collect temporary log\n"); 963 - eeh_slot_error_detail(pe, EEH_LOG_TEMP); 964 - } 961 + pr_info("EEH: Collect temporary log\n"); 962 + eeh_slot_error_detail(pe, EEH_LOG_TEMP); 965 963 966 964 /* If all device drivers were EEH-unaware, then shut 967 965 * down all of the device drivers, and hope they ··· 970 970 pr_info("EEH: Reset with hotplug activity\n"); 971 971 rc = eeh_reset_device(pe, bus, NULL, false); 972 972 if (rc) { 973 - pr_warn("%s: Unable to reset, err=%d\n", 974 - __func__, rc); 975 - result = PCI_ERS_RESULT_DISCONNECT; 973 + pr_warn("%s: Unable to reset, err=%d\n", __func__, rc); 974 + goto recover_failed; 976 975 } 977 976 } 978 977 ··· 979 980 if (result == PCI_ERS_RESULT_CAN_RECOVER) { 980 981 pr_info("EEH: Enable I/O for affected devices\n"); 981 982 rc = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO); 983 + if (rc < 0) 984 + goto recover_failed; 982 985 983 - if (rc < 0) { 984 - result = PCI_ERS_RESULT_DISCONNECT; 985 - } else if (rc) { 986 + if (rc) { 986 987 result = PCI_ERS_RESULT_NEED_RESET; 987 988 } else { 988 989 pr_info("EEH: Notify device drivers to resume I/O\n"); ··· 990 991 eeh_report_mmio_enabled, &result); 991 992 } 992 993 } 993 - 994 - /* If all devices reported they can proceed, then re-enable DMA */ 995 994 if (result == PCI_ERS_RESULT_CAN_RECOVER) { 996 995 pr_info("EEH: Enabled DMA for affected devices\n"); 997 996 rc = eeh_pci_enable(pe, EEH_OPT_THAW_DMA); 997 + if (rc < 0) 998 + goto recover_failed; 998 999 999 - if (rc < 0) { 1000 - result = PCI_ERS_RESULT_DISCONNECT; 1001 - } else if (rc) { 1000 + if (rc) { 1002 1001 result = PCI_ERS_RESULT_NEED_RESET; 1003 1002 } else { 1004 1003 /* ··· 1014 1017 pr_info("EEH: Reset without hotplug activity\n"); 1015 1018 rc = eeh_reset_device(pe, bus, &rmv_data, true); 1016 1019 if (rc) { 1017 - pr_warn("%s: Cannot reset, err=%d\n", 1018 - __func__, rc); 1019 - result = PCI_ERS_RESULT_DISCONNECT; 1020 - } else { 1021 - result = PCI_ERS_RESULT_NONE; 1022 - eeh_set_channel_state(pe, pci_channel_io_normal); 1023 - eeh_set_irq_state(pe, true); 1024 - eeh_pe_report("slot_reset", pe, eeh_report_reset, 1025 - &result); 1020 + pr_warn("%s: Cannot reset, err=%d\n", __func__, rc); 1021 + goto recover_failed; 1026 1022 } 1023 + 1024 + result = PCI_ERS_RESULT_NONE; 1025 + eeh_set_channel_state(pe, pci_channel_io_normal); 1026 + eeh_set_irq_state(pe, true); 1027 + eeh_pe_report("slot_reset", pe, eeh_report_reset, 1028 + &result); 1027 1029 } 1028 1030 1029 1031 if ((result == PCI_ERS_RESULT_RECOVERED) || ··· 1050 1054 } 1051 1055 1052 1056 pr_info("EEH: Recovery successful.\n"); 1053 - } else { 1054 - /* 1055 - * About 90% of all real-life EEH failures in the field 1056 - * are due to poorly seated PCI cards. Only 10% or so are 1057 - * due to actual, failed cards. 1058 - */ 1059 - pr_err("EEH: Unable to recover from failure from PHB#%x-PE#%x.\n" 1060 - "Please try reseating or replacing it\n", 1061 - pe->phb->global_number, pe->addr); 1057 + goto out; 1058 + } 1062 1059 1063 - eeh_slot_error_detail(pe, EEH_LOG_PERM); 1060 + recover_failed: 1061 + /* 1062 + * About 90% of all real-life EEH failures in the field 1063 + * are due to poorly seated PCI cards. Only 10% or so are 1064 + * due to actual, failed cards. 1065 + */ 1066 + pr_err("EEH: Unable to recover from failure from PHB#%x-PE#%x.\n" 1067 + "Please try reseating or replacing it\n", 1068 + pe->phb->global_number, pe->addr); 1064 1069 1065 - /* Notify all devices that they're about to go down. */ 1066 - eeh_set_channel_state(pe, pci_channel_io_perm_failure); 1067 - eeh_set_irq_state(pe, false); 1068 - eeh_pe_report("error_detected(permanent failure)", pe, 1069 - eeh_report_failure, NULL); 1070 + eeh_slot_error_detail(pe, EEH_LOG_PERM); 1070 1071 1071 - /* Mark the PE to be removed permanently */ 1072 - eeh_pe_state_mark(pe, EEH_PE_REMOVED); 1072 + /* Notify all devices that they're about to go down. */ 1073 + eeh_set_channel_state(pe, pci_channel_io_perm_failure); 1074 + eeh_set_irq_state(pe, false); 1075 + eeh_pe_report("error_detected(permanent failure)", pe, 1076 + eeh_report_failure, NULL); 1073 1077 1074 - /* 1075 - * Shut down the device drivers for good. We mark 1076 - * all removed devices correctly to avoid access 1077 - * the their PCI config any more. 1078 - */ 1079 - if (pe->type & EEH_PE_VF) { 1080 - eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL); 1081 - eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED); 1082 - } else { 1083 - eeh_pe_state_clear(pe, EEH_PE_PRI_BUS, true); 1084 - eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED); 1078 + /* Mark the PE to be removed permanently */ 1079 + eeh_pe_state_mark(pe, EEH_PE_REMOVED); 1085 1080 1086 - pci_lock_rescan_remove(); 1087 - pci_hp_remove_devices(bus); 1088 - pci_unlock_rescan_remove(); 1089 - /* The passed PE should no longer be used */ 1090 - return; 1091 - } 1081 + /* 1082 + * Shut down the device drivers for good. We mark 1083 + * all removed devices correctly to avoid access 1084 + * the their PCI config any more. 1085 + */ 1086 + if (pe->type & EEH_PE_VF) { 1087 + eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL); 1088 + eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED); 1089 + } else { 1090 + eeh_pe_state_clear(pe, EEH_PE_PRI_BUS, true); 1091 + eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED); 1092 + 1093 + pci_lock_rescan_remove(); 1094 + pci_hp_remove_devices(bus); 1095 + pci_unlock_rescan_remove(); 1096 + /* The passed PE should no longer be used */ 1097 + return; 1092 1098 } 1093 1099 1094 1100 out:
+39 -15
arch/powerpc/kernel/entry_32.S
··· 73 73 _ASM_NOKPROBE_SYMBOL(prepare_transfer_to_handler) 74 74 #endif /* CONFIG_PPC_BOOK3S_32 || CONFIG_E500 */ 75 75 76 + #if defined(CONFIG_PPC_KUEP) && defined(CONFIG_PPC_BOOK3S_32) 77 + .globl __kuep_lock 78 + __kuep_lock: 79 + lwz r9, THREAD+THSR0(r2) 80 + update_user_segments_by_4 r9, r10, r11, r12 81 + blr 82 + 83 + __kuep_unlock: 84 + lwz r9, THREAD+THSR0(r2) 85 + rlwinm r9,r9,0,~SR_NX 86 + update_user_segments_by_4 r9, r10, r11, r12 87 + blr 88 + 89 + .macro kuep_lock 90 + bl __kuep_lock 91 + .endm 92 + .macro kuep_unlock 93 + bl __kuep_unlock 94 + .endm 95 + #else 96 + .macro kuep_lock 97 + .endm 98 + .macro kuep_unlock 99 + .endm 100 + #endif 101 + 76 102 .globl transfer_to_syscall 77 103 transfer_to_syscall: 78 104 stw r11, GPR1(r1) 79 105 stw r11, 0(r1) 80 106 mflr r12 81 107 stw r12, _LINK(r1) 82 - #if defined(CONFIG_BOOKE) || defined(CONFIG_40x) 108 + #ifdef CONFIG_BOOKE_OR_40x 83 109 rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */ 84 110 #endif 85 111 lis r12,STACK_FRAME_REGS_MARKER@ha /* exception frame marker */ ··· 116 90 stw r12,8(r1) 117 91 stw r2,_TRAP(r1) 118 92 SAVE_GPR(0, r1) 119 - SAVE_4GPRS(3, r1) 120 - SAVE_2GPRS(7, r1) 93 + SAVE_GPRS(3, 8, r1) 121 94 addi r2,r10,-THREAD 122 95 SAVE_NVGPRS(r1) 96 + kuep_lock 123 97 124 98 /* Calling convention has r9 = orig r0, r10 = regs */ 125 99 addi r10,r1,STACK_FRAME_OVERHEAD ··· 136 110 cmplwi cr0,r5,0 137 111 bne- 2f 138 112 #endif /* CONFIG_PPC_47x */ 113 + kuep_unlock 139 114 lwz r4,_LINK(r1) 140 115 lwz r5,_CCR(r1) 141 116 mtlr r4 ··· 166 139 mtxer r5 167 140 lwz r0,GPR0(r1) 168 141 lwz r3,GPR3(r1) 169 - REST_8GPRS(4,r1) 142 + REST_GPRS(4, 11, r1) 170 143 lwz r12,GPR12(r1) 171 144 b 1b 172 145 ··· 259 232 beq 3f /* if not, we've got problems */ 260 233 #endif 261 234 262 - 2: REST_4GPRS(3, r11) 235 + 2: REST_GPRS(3, 6, r11) 263 236 lwz r10,_CCR(r11) 264 - REST_2GPRS(1, r11) 237 + REST_GPRS(1, 2, r11) 265 238 mtcr r10 266 239 lwz r10,_LINK(r11) 267 240 mtlr r10 ··· 300 273 beq .Lkernel_interrupt_return 301 274 bl interrupt_exit_user_prepare 302 275 cmpwi r3,0 276 + kuep_unlock 303 277 bne- .Lrestore_nvgprs 304 278 305 279 .Lfast_user_interrupt_return: ··· 326 298 * the reliable stack unwinder later on. Clear it. 327 299 */ 328 300 stw r0,8(r1) 329 - REST_4GPRS(7, r1) 330 - REST_2GPRS(11, r1) 301 + REST_GPRS(7, 12, r1) 331 302 332 303 mtcr r3 333 304 mtlr r4 334 305 mtctr r5 335 306 mtspr SPRN_XER,r6 336 307 337 - REST_4GPRS(2, r1) 338 - REST_GPR(6, r1) 308 + REST_GPRS(2, 6, r1) 339 309 REST_GPR(0, r1) 340 310 REST_GPR(1, r1) 341 311 rfi ··· 367 341 lwz r6,_CCR(r1) 368 342 li r0,0 369 343 370 - REST_4GPRS(7, r1) 371 - REST_2GPRS(11, r1) 344 + REST_GPRS(7, 12, r1) 372 345 373 346 mtlr r3 374 347 mtctr r4 ··· 379 354 */ 380 355 stw r0,8(r1) 381 356 382 - REST_4GPRS(2, r1) 357 + REST_GPRS(2, 5, r1) 383 358 384 359 bne- cr1,1f /* emulate stack store */ 385 360 mtcr r6 ··· 455 430 bne interrupt_return; \ 456 431 lwz r0,GPR0(r1); \ 457 432 lwz r2,GPR2(r1); \ 458 - REST_4GPRS(3, r1); \ 459 - REST_2GPRS(7, r1); \ 433 + REST_GPRS(3, 8, r1); \ 460 434 lwz r10,_XER(r1); \ 461 435 lwz r11,_CTR(r1); \ 462 436 mtspr SPRN_XER,r10; \
+2 -2
arch/powerpc/kernel/entry_64.S
··· 180 180 #endif 181 181 182 182 ld r8,KSP(r4) /* new stack pointer */ 183 - #ifdef CONFIG_PPC_BOOK3S_64 183 + #ifdef CONFIG_PPC_64S_HASH_MMU 184 184 BEGIN_MMU_FTR_SECTION 185 185 b 2f 186 186 END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX) ··· 232 232 slbmte r7,r0 233 233 isync 234 234 2: 235 - #endif /* CONFIG_PPC_BOOK3S_64 */ 235 + #endif /* CONFIG_PPC_64S_HASH_MMU */ 236 236 237 237 clrrdi r7, r8, THREAD_SHIFT /* base of new stack */ 238 238 /* Note: this uses SWITCH_FRAME_SIZE rather than INT_FRAME_SIZE
+1 -1
arch/powerpc/kernel/epapr_paravirt.c
··· 37 37 return -1; 38 38 39 39 for (i = 0; i < (len / 4); i++) { 40 - struct ppc_inst inst = ppc_inst(be32_to_cpu(insts[i])); 40 + ppc_inst_t inst = ppc_inst(be32_to_cpu(insts[i])); 41 41 patch_instruction(epapr_hypercall_start + i, inst); 42 42 #if !defined(CONFIG_64BIT) || defined(CONFIG_PPC_BOOK3E_64) 43 43 patch_instruction(epapr_ev_idle_start + i, inst);
+4 -10
arch/powerpc/kernel/exceptions-64e.S
··· 198 198 199 199 stdcx. r0,0,r1 /* to clear the reservation */ 200 200 201 - REST_4GPRS(2, r1) 202 - REST_4GPRS(6, r1) 201 + REST_GPRS(2, 9, r1) 203 202 204 203 ld r10,_CTR(r1) 205 204 ld r11,_XER(r1) ··· 374 375 exc_##n##_common: \ 375 376 std r0,GPR0(r1); /* save r0 in stackframe */ \ 376 377 std r2,GPR2(r1); /* save r2 in stackframe */ \ 377 - SAVE_4GPRS(3, r1); /* save r3 - r6 in stackframe */ \ 378 - SAVE_2GPRS(7, r1); /* save r7, r8 in stackframe */ \ 379 - std r9,GPR9(r1); /* save r9 in stackframe */ \ 378 + SAVE_GPRS(3, 9, r1); /* save r3 - r9 in stackframe */ \ 380 379 std r10,_NIP(r1); /* save SRR0 to stackframe */ \ 381 380 std r11,_MSR(r1); /* save SRR1 to stackframe */ \ 382 381 beq 2f; /* if from kernel mode */ \ ··· 1058 1061 std r11,_ESR(r1) 1059 1062 std r0,GPR0(r1); /* save r0 in stackframe */ \ 1060 1063 std r2,GPR2(r1); /* save r2 in stackframe */ \ 1061 - SAVE_4GPRS(3, r1); /* save r3 - r6 in stackframe */ \ 1062 - SAVE_2GPRS(7, r1); /* save r7, r8 in stackframe */ \ 1063 - std r9,GPR9(r1); /* save r9 in stackframe */ \ 1064 + SAVE_GPRS(3, 9, r1); /* save r3 - r9 in stackframe */ \ 1064 1065 ld r3,PACA_EXGEN+EX_R10(r13);/* get back r10 */ \ 1065 1066 ld r4,PACA_EXGEN+EX_R11(r13);/* get back r11 */ \ 1066 1067 mfspr r5,SPRN_SPRG_GEN_SCRATCH;/* get back r13 XXX can be wrong */ \ ··· 1072 1077 std r10,_LINK(r1) 1073 1078 std r11,_CTR(r1) 1074 1079 std r12,_XER(r1) 1075 - SAVE_10GPRS(14,r1) 1076 - SAVE_8GPRS(24,r1) 1080 + SAVE_GPRS(14, 31, r1) 1077 1081 lhz r12,PACA_TRAP_SAVE(r13) 1078 1082 std r12,_TRAP(r1) 1079 1083 addi r11,r1,INT_FRAME_SIZE
+105 -69
arch/powerpc/kernel/exceptions-64s.S
··· 48 48 .balign IFETCH_ALIGN_BYTES; \ 49 49 .global name; \ 50 50 _ASM_NOKPROBE_SYMBOL(name); \ 51 - DEFINE_FIXED_SYMBOL(name); \ 51 + DEFINE_FIXED_SYMBOL(name, text); \ 52 52 name: 53 53 54 54 #define TRAMP_REAL_BEGIN(name) \ ··· 76 76 ld reg,PACAKBASE(r13); /* get high part of &label */ \ 77 77 ori reg,reg,FIXED_SYMBOL_ABS_ADDR(label) 78 78 79 - #define __LOAD_HANDLER(reg, label) \ 79 + #define __LOAD_HANDLER(reg, label, section) \ 80 80 ld reg,PACAKBASE(r13); \ 81 - ori reg,reg,(ABS_ADDR(label))@l 81 + ori reg,reg,(ABS_ADDR(label, section))@l 82 82 83 83 /* 84 84 * Branches from unrelocated code (e.g., interrupts) to labels outside 85 85 * head-y require >64K offsets. 86 86 */ 87 - #define __LOAD_FAR_HANDLER(reg, label) \ 87 + #define __LOAD_FAR_HANDLER(reg, label, section) \ 88 88 ld reg,PACAKBASE(r13); \ 89 - ori reg,reg,(ABS_ADDR(label))@l; \ 90 - addis reg,reg,(ABS_ADDR(label))@h 91 - 92 - /* 93 - * Branch to label using its 0xC000 address. This results in instruction 94 - * address suitable for MSR[IR]=0 or 1, which allows relocation to be turned 95 - * on using mtmsr rather than rfid. 96 - * 97 - * This could set the 0xc bits for !RELOCATABLE as an immediate, rather than 98 - * load KBASE for a slight optimisation. 99 - */ 100 - #define BRANCH_TO_C000(reg, label) \ 101 - __LOAD_FAR_HANDLER(reg, label); \ 102 - mtctr reg; \ 103 - bctr 89 + ori reg,reg,(ABS_ADDR(label, section))@l; \ 90 + addis reg,reg,(ABS_ADDR(label, section))@h 104 91 105 92 /* 106 93 * Interrupt code generation macros ··· 98 111 #define IAREA .L_IAREA_\name\() /* PACA save area */ 99 112 #define IVIRT .L_IVIRT_\name\() /* Has virt mode entry point */ 100 113 #define IISIDE .L_IISIDE_\name\() /* Uses SRR0/1 not DAR/DSISR */ 114 + #define ICFAR .L_ICFAR_\name\() /* Uses CFAR */ 115 + #define ICFAR_IF_HVMODE .L_ICFAR_IF_HVMODE_\name\() /* Uses CFAR if HV */ 101 116 #define IDAR .L_IDAR_\name\() /* Uses DAR (or SRR0) */ 102 117 #define IDSISR .L_IDSISR_\name\() /* Uses DSISR (or SRR1) */ 103 - #define ISET_RI .L_ISET_RI_\name\() /* Run common code w/ MSR[RI]=1 */ 104 118 #define IBRANCH_TO_COMMON .L_IBRANCH_TO_COMMON_\name\() /* ENTRY branch to common */ 105 119 #define IREALMODE_COMMON .L_IREALMODE_COMMON_\name\() /* Common runs in realmode */ 106 120 #define IMASK .L_IMASK_\name\() /* IRQ soft-mask bit */ ··· 139 151 .ifndef IISIDE 140 152 IISIDE=0 141 153 .endif 154 + .ifndef ICFAR 155 + ICFAR=1 156 + .endif 157 + .ifndef ICFAR_IF_HVMODE 158 + ICFAR_IF_HVMODE=0 159 + .endif 142 160 .ifndef IDAR 143 161 IDAR=0 144 162 .endif 145 163 .ifndef IDSISR 146 164 IDSISR=0 147 - .endif 148 - .ifndef ISET_RI 149 - ISET_RI=1 150 165 .endif 151 166 .ifndef IBRANCH_TO_COMMON 152 167 IBRANCH_TO_COMMON=1 ··· 282 291 END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) 283 292 HMT_MEDIUM 284 293 std r10,IAREA+EX_R10(r13) /* save r10 - r12 */ 294 + .if ICFAR 285 295 BEGIN_FTR_SECTION 286 296 mfspr r10,SPRN_CFAR 287 297 END_FTR_SECTION_IFSET(CPU_FTR_CFAR) 298 + .elseif ICFAR_IF_HVMODE 299 + BEGIN_FTR_SECTION 300 + BEGIN_FTR_SECTION_NESTED(69) 301 + mfspr r10,SPRN_CFAR 302 + END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 69) 303 + FTR_SECTION_ELSE 304 + BEGIN_FTR_SECTION_NESTED(69) 305 + li r10,0 306 + END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 69) 307 + ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) 308 + .endif 288 309 .if \ool 289 310 .if !\virt 290 311 b tramp_real_\name ··· 312 309 BEGIN_FTR_SECTION 313 310 std r9,IAREA+EX_PPR(r13) 314 311 END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) 312 + .if ICFAR || ICFAR_IF_HVMODE 315 313 BEGIN_FTR_SECTION 316 314 std r10,IAREA+EX_CFAR(r13) 317 315 END_FTR_SECTION_IFSET(CPU_FTR_CFAR) 316 + .endif 318 317 INTERRUPT_TO_KERNEL 319 318 mfctr r10 320 319 std r10,IAREA+EX_CTR(r13) ··· 381 376 * This switches to virtual mode and sets MSR[RI]. 382 377 */ 383 378 .macro __GEN_COMMON_ENTRY name 384 - DEFINE_FIXED_SYMBOL(\name\()_common_real) 379 + DEFINE_FIXED_SYMBOL(\name\()_common_real, text) 385 380 \name\()_common_real: 386 381 .if IKVM_REAL 387 382 KVMTEST \name kvm_interrupt ··· 404 399 .endif 405 400 406 401 .balign IFETCH_ALIGN_BYTES 407 - DEFINE_FIXED_SYMBOL(\name\()_common_virt) 402 + DEFINE_FIXED_SYMBOL(\name\()_common_virt, text) 408 403 \name\()_common_virt: 409 404 .if IKVM_VIRT 410 405 KVMTEST \name kvm_interrupt ··· 418 413 * want to run in real mode. 419 414 */ 420 415 .macro __GEN_REALMODE_COMMON_ENTRY name 421 - DEFINE_FIXED_SYMBOL(\name\()_common_real) 416 + DEFINE_FIXED_SYMBOL(\name\()_common_real, text) 422 417 \name\()_common_real: 423 418 .if IKVM_REAL 424 419 KVMTEST \name kvm_interrupt ··· 517 512 stb r10,PACASRR_VALID(r13) 518 513 .endif 519 514 520 - .if ISET_RI 521 - li r10,MSR_RI 522 - mtmsrd r10,1 /* Set MSR_RI */ 523 - .endif 524 - 525 515 .if ISTACK 526 516 .if IKUAP 527 517 kuap_save_amr_and_lock r9, r10, cr1, cr0 ··· 568 568 .endif 569 569 570 570 BEGIN_FTR_SECTION 571 + .if ICFAR || ICFAR_IF_HVMODE 571 572 ld r10,IAREA+EX_CFAR(r13) 573 + .else 574 + li r10,0 575 + .endif 572 576 std r10,ORIG_GPR3(r1) 573 577 END_FTR_SECTION_IFSET(CPU_FTR_CFAR) 574 578 ld r10,IAREA+EX_CTR(r13) 575 579 std r10,_CTR(r1) 576 580 std r2,GPR2(r1) /* save r2 in stackframe */ 577 - SAVE_4GPRS(3, r1) /* save r3 - r6 in stackframe */ 578 - SAVE_2GPRS(7, r1) /* save r7, r8 in stackframe */ 581 + SAVE_GPRS(3, 8, r1) /* save r3 - r8 in stackframe */ 579 582 mflr r9 /* Get LR, later save to stack */ 580 583 ld r2,PACATOC(r13) /* get kernel TOC into r2 */ 581 584 std r9,_LINK(r1) ··· 696 693 mtlr r9 697 694 ld r9,_CCR(r1) 698 695 mtcr r9 699 - REST_8GPRS(2, r1) 700 - REST_4GPRS(10, r1) 696 + REST_GPRS(2, 13, r1) 701 697 REST_GPR(0, r1) 702 698 /* restore original r1. */ 703 699 ld r1,GPR1(r1) ··· 852 850 853 851 #ifdef CONFIG_RELOCATABLE 854 852 TRAMP_VIRT_BEGIN(system_call_vectored_tramp) 855 - __LOAD_HANDLER(r10, system_call_vectored_common) 853 + __LOAD_HANDLER(r10, system_call_vectored_common, virt_trampolines) 856 854 mtctr r10 857 855 bctr 858 856 859 857 TRAMP_VIRT_BEGIN(system_call_vectored_sigill_tramp) 860 - __LOAD_HANDLER(r10, system_call_vectored_sigill) 858 + __LOAD_HANDLER(r10, system_call_vectored_sigill, virt_trampolines) 861 859 mtctr r10 862 860 bctr 863 861 #endif ··· 904 902 IVEC=0x100 905 903 IAREA=PACA_EXNMI 906 904 IVIRT=0 /* no virt entry point */ 907 - /* 908 - * MSR_RI is not enabled, because PACA_EXNMI and nmi stack is 909 - * being used, so a nested NMI exception would corrupt it. 910 - */ 911 - ISET_RI=0 912 905 ISTACK=0 913 906 IKVM_REAL=1 914 907 INT_DEFINE_END(system_reset) ··· 961 964 /* We are waking up from idle, so may clobber any volatile register */ 962 965 cmpwi cr1,r5,2 963 966 bltlr cr1 /* no state loss, return to idle caller with r3=SRR1 */ 964 - BRANCH_TO_C000(r12, DOTSYM(idle_return_gpr_loss)) 967 + __LOAD_FAR_HANDLER(r12, DOTSYM(idle_return_gpr_loss), real_trampolines) 968 + mtctr r12 969 + bctr 965 970 #endif 966 971 967 972 #ifdef CONFIG_PPC_PSERIES ··· 978 979 EXC_COMMON_BEGIN(system_reset_common) 979 980 __GEN_COMMON_ENTRY system_reset 980 981 /* 981 - * Increment paca->in_nmi then enable MSR_RI. SLB or MCE will be able 982 - * to recover, but nested NMI will notice in_nmi and not recover 983 - * because of the use of the NMI stack. in_nmi reentrancy is tested in 984 - * system_reset_exception. 982 + * Increment paca->in_nmi. When the interrupt entry wrapper later 983 + * enable MSR_RI, then SLB or MCE will be able to recover, but a nested 984 + * NMI will notice in_nmi and not recover because of the use of the NMI 985 + * stack. in_nmi reentrancy is tested in system_reset_exception. 985 986 */ 986 987 lhz r10,PACA_IN_NMI(r13) 987 988 addi r10,r10,1 988 989 sth r10,PACA_IN_NMI(r13) 989 - li r10,MSR_RI 990 - mtmsrd r10,1 991 990 992 991 mr r10,r1 993 992 ld r1,PACA_NMI_EMERG_SP(r13) ··· 1059 1062 IAREA=PACA_EXMC 1060 1063 IVIRT=0 /* no virt entry point */ 1061 1064 IREALMODE_COMMON=1 1062 - /* 1063 - * MSR_RI is not enabled, because PACA_EXMC is being used, so a 1064 - * nested machine check corrupts it. machine_check_common enables 1065 - * MSR_RI. 1066 - */ 1067 - ISET_RI=0 1068 1065 ISTACK=0 1069 1066 IDAR=1 1070 1067 IDSISR=1 ··· 1069 1078 IVEC=0x200 1070 1079 IAREA=PACA_EXMC 1071 1080 IVIRT=0 /* no virt entry point */ 1072 - ISET_RI=0 1073 1081 IDAR=1 1074 1082 IDSISR=1 1075 1083 IKVM_REAL=1 ··· 1138 1148 BEGIN_FTR_SECTION 1139 1149 bl enable_machine_check 1140 1150 END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) 1141 - li r10,MSR_RI 1142 - mtmsrd r10,1 1143 - 1144 1151 addi r3,r1,STACK_FRAME_OVERHEAD 1145 1152 bl machine_check_early 1146 1153 std r3,RESULT(r1) /* Save result */ ··· 1225 1238 * save area: PACA_EXMC instead of PACA_EXGEN. 1226 1239 */ 1227 1240 GEN_COMMON machine_check 1228 - 1229 - /* Enable MSR_RI when finished with PACA_EXMC */ 1230 - li r10,MSR_RI 1231 - mtmsrd r10,1 1232 1241 addi r3,r1,STACK_FRAME_OVERHEAD 1233 1242 bl machine_check_exception_async 1234 1243 b interrupt_return_srr ··· 1352 1369 addi r3,r1,STACK_FRAME_OVERHEAD 1353 1370 andis. r0,r4,DSISR_DABRMATCH@h 1354 1371 bne- 1f 1372 + #ifdef CONFIG_PPC_64S_HASH_MMU 1355 1373 BEGIN_MMU_FTR_SECTION 1356 1374 bl do_hash_fault 1357 1375 MMU_FTR_SECTION_ELSE 1358 1376 bl do_page_fault 1359 1377 ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) 1378 + #else 1379 + bl do_page_fault 1380 + #endif 1360 1381 b interrupt_return_srr 1361 1382 1362 1383 1: bl do_break ··· 1403 1416 EXC_VIRT_END(data_access_slb, 0x4380, 0x80) 1404 1417 EXC_COMMON_BEGIN(data_access_slb_common) 1405 1418 GEN_COMMON data_access_slb 1419 + #ifdef CONFIG_PPC_64S_HASH_MMU 1406 1420 BEGIN_MMU_FTR_SECTION 1407 1421 /* HPT case, do SLB fault */ 1408 1422 addi r3,r1,STACK_FRAME_OVERHEAD ··· 1416 1428 /* Radix case, access is outside page table range */ 1417 1429 li r3,-EFAULT 1418 1430 ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) 1431 + #else 1432 + li r3,-EFAULT 1433 + #endif 1419 1434 std r3,RESULT(r1) 1420 1435 addi r3,r1,STACK_FRAME_OVERHEAD 1421 - bl do_bad_slb_fault 1436 + bl do_bad_segment_interrupt 1422 1437 b interrupt_return_srr 1423 1438 1424 1439 ··· 1453 1462 EXC_COMMON_BEGIN(instruction_access_common) 1454 1463 GEN_COMMON instruction_access 1455 1464 addi r3,r1,STACK_FRAME_OVERHEAD 1465 + #ifdef CONFIG_PPC_64S_HASH_MMU 1456 1466 BEGIN_MMU_FTR_SECTION 1457 1467 bl do_hash_fault 1458 1468 MMU_FTR_SECTION_ELSE 1459 1469 bl do_page_fault 1460 1470 ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) 1471 + #else 1472 + bl do_page_fault 1473 + #endif 1461 1474 b interrupt_return_srr 1462 1475 1463 1476 ··· 1491 1496 EXC_VIRT_END(instruction_access_slb, 0x4480, 0x80) 1492 1497 EXC_COMMON_BEGIN(instruction_access_slb_common) 1493 1498 GEN_COMMON instruction_access_slb 1499 + #ifdef CONFIG_PPC_64S_HASH_MMU 1494 1500 BEGIN_MMU_FTR_SECTION 1495 1501 /* HPT case, do SLB fault */ 1496 1502 addi r3,r1,STACK_FRAME_OVERHEAD ··· 1504 1508 /* Radix case, access is outside page table range */ 1505 1509 li r3,-EFAULT 1506 1510 ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) 1511 + #else 1512 + li r3,-EFAULT 1513 + #endif 1507 1514 std r3,RESULT(r1) 1508 1515 addi r3,r1,STACK_FRAME_OVERHEAD 1509 - bl do_bad_slb_fault 1516 + bl do_bad_segment_interrupt 1510 1517 b interrupt_return_srr 1511 1518 1512 1519 ··· 1535 1536 * 1536 1537 * If soft masked, the masked handler will note the pending interrupt for 1537 1538 * replay, and clear MSR[EE] in the interrupted context. 1539 + * 1540 + * CFAR is not required because this is an asynchronous interrupt that in 1541 + * general won't have much bearing on the state of the CPU, with the possible 1542 + * exception of crash/debug IPIs, but those are generally moving to use SRESET 1543 + * IPIs. Unless this is an HV interrupt and KVM HV is possible, in which case 1544 + * it may be exiting the guest and need CFAR to be saved. 1538 1545 */ 1539 1546 INT_DEFINE_BEGIN(hardware_interrupt) 1540 1547 IVEC=0x500 ··· 1548 1543 IMASK=IRQS_DISABLED 1549 1544 IKVM_REAL=1 1550 1545 IKVM_VIRT=1 1546 + ICFAR=0 1547 + #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 1548 + ICFAR_IF_HVMODE=1 1549 + #endif 1551 1550 INT_DEFINE_END(hardware_interrupt) 1552 1551 1553 1552 EXC_REAL_BEGIN(hardware_interrupt, 0x500, 0x100) ··· 1773 1764 * If PPC_WATCHDOG is configured, the soft masked handler will actually set 1774 1765 * things back up to run soft_nmi_interrupt as a regular interrupt handler 1775 1766 * on the emergency stack. 1767 + * 1768 + * CFAR is not required because this is asynchronous (see hardware_interrupt). 1769 + * A watchdog interrupt may like to have CFAR, but usually the interesting 1770 + * branch is long gone by that point (e.g., infinite loop). 1776 1771 */ 1777 1772 INT_DEFINE_BEGIN(decrementer) 1778 1773 IVEC=0x900 ··· 1784 1771 #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE 1785 1772 IKVM_REAL=1 1786 1773 #endif 1774 + ICFAR=0 1787 1775 INT_DEFINE_END(decrementer) 1788 1776 1789 1777 EXC_REAL_BEGIN(decrementer, 0x900, 0x80) ··· 1860 1846 * If soft masked, the masked handler will note the pending interrupt for 1861 1847 * replay, leaving MSR[EE] enabled in the interrupted context because the 1862 1848 * doorbells are edge triggered. 1849 + * 1850 + * CFAR is not required, similarly to hardware_interrupt. 1863 1851 */ 1864 1852 INT_DEFINE_BEGIN(doorbell_super) 1865 1853 IVEC=0xa00 ··· 1869 1853 #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE 1870 1854 IKVM_REAL=1 1871 1855 #endif 1856 + ICFAR=0 1872 1857 INT_DEFINE_END(doorbell_super) 1873 1858 1874 1859 EXC_REAL_BEGIN(doorbell_super, 0xa00, 0x100) ··· 1921 1904 IVEC=0xc00 1922 1905 IKVM_REAL=1 1923 1906 IKVM_VIRT=1 1907 + ICFAR=0 1924 1908 INT_DEFINE_END(system_call) 1925 1909 1926 1910 .macro SYSTEM_CALL virt ··· 1960 1942 HMT_MEDIUM 1961 1943 1962 1944 .if ! \virt 1963 - __LOAD_HANDLER(r10, system_call_common_real) 1945 + __LOAD_HANDLER(r10, system_call_common_real, real_vectors) 1964 1946 mtctr r10 1965 1947 bctr 1966 1948 .else 1967 1949 #ifdef CONFIG_RELOCATABLE 1968 - __LOAD_HANDLER(r10, system_call_common) 1950 + __LOAD_HANDLER(r10, system_call_common, virt_vectors) 1969 1951 mtctr r10 1970 1952 bctr 1971 1953 #else ··· 2019 2001 * Requires __LOAD_FAR_HANDLER beause kvmppc_hcall lives 2020 2002 * outside the head section. 2021 2003 */ 2022 - __LOAD_FAR_HANDLER(r10, kvmppc_hcall) 2004 + __LOAD_FAR_HANDLER(r10, kvmppc_hcall, real_trampolines) 2023 2005 mtctr r10 2024 2006 bctr 2025 2007 #else ··· 2220 2202 * Interrupt 0xe80 - Directed Hypervisor Doorbell Interrupt. 2221 2203 * This is an asynchronous interrupt in response to a msgsnd doorbell. 2222 2204 * Similar to the 0xa00 doorbell but for host rather than guest. 2205 + * 2206 + * CFAR is not required (similar to doorbell_interrupt), unless KVM HV 2207 + * is enabled, in which case it may be a guest exit. Most PowerNV kernels 2208 + * include KVM support so it would be nice if this could be dynamically 2209 + * patched out if KVM was not currently running any guests. 2223 2210 */ 2224 2211 INT_DEFINE_BEGIN(h_doorbell) 2225 2212 IVEC=0xe80 ··· 2232 2209 IMASK=IRQS_DISABLED 2233 2210 IKVM_REAL=1 2234 2211 IKVM_VIRT=1 2212 + #ifndef CONFIG_KVM_BOOK3S_HV_POSSIBLE 2213 + ICFAR=0 2214 + #endif 2235 2215 INT_DEFINE_END(h_doorbell) 2236 2216 2237 2217 EXC_REAL_BEGIN(h_doorbell, 0xe80, 0x20) ··· 2258 2232 * Interrupt 0xea0 - Hypervisor Virtualization Interrupt. 2259 2233 * This is an asynchronous interrupt in response to an "external exception". 2260 2234 * Similar to 0x500 but for host only. 2235 + * 2236 + * Like h_doorbell, CFAR is only required for KVM HV because this can be 2237 + * a guest exit. 2261 2238 */ 2262 2239 INT_DEFINE_BEGIN(h_virt_irq) 2263 2240 IVEC=0xea0 ··· 2268 2239 IMASK=IRQS_DISABLED 2269 2240 IKVM_REAL=1 2270 2241 IKVM_VIRT=1 2242 + #ifndef CONFIG_KVM_BOOK3S_HV_POSSIBLE 2243 + ICFAR=0 2244 + #endif 2271 2245 INT_DEFINE_END(h_virt_irq) 2272 2246 2273 2247 EXC_REAL_BEGIN(h_virt_irq, 0xea0, 0x20) ··· 2307 2275 * 2308 2276 * If soft masked, the masked handler will note the pending interrupt for 2309 2277 * replay, and clear MSR[EE] in the interrupted context. 2278 + * 2279 + * CFAR is not used by perf interrupts so not required. 2310 2280 */ 2311 2281 INT_DEFINE_BEGIN(performance_monitor) 2312 2282 IVEC=0xf00 ··· 2316 2282 #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE 2317 2283 IKVM_REAL=1 2318 2284 #endif 2285 + ICFAR=0 2319 2286 INT_DEFINE_END(performance_monitor) 2320 2287 2321 2288 EXC_REAL_BEGIN(performance_monitor, 0xf00, 0x20) ··· 2741 2706 INT_DEFINE_BEGIN(soft_nmi) 2742 2707 IVEC=0x900 2743 2708 ISTACK=0 2709 + ICFAR=0 2744 2710 INT_DEFINE_END(soft_nmi) 2745 2711 2746 2712 /* ··· 3061 3025 .align 7 3062 3026 .globl __end_interrupts 3063 3027 __end_interrupts: 3064 - DEFINE_FIXED_SYMBOL(__end_interrupts) 3028 + DEFINE_FIXED_SYMBOL(__end_interrupts, virt_trampolines) 3065 3029 3066 3030 CLOSE_FIXED_SECTION(real_vectors); 3067 3031 CLOSE_FIXED_SECTION(real_trampolines);
+17 -9
arch/powerpc/kernel/fadump.c
··· 251 251 } 252 252 253 253 /* Print firmware assisted dump configurations for debugging purpose. */ 254 - static void fadump_show_config(void) 254 + static void __init fadump_show_config(void) 255 255 { 256 256 int i; 257 257 ··· 353 353 * Calculate the total memory size required to be reserved for 354 354 * firmware-assisted dump registration. 355 355 */ 356 - static unsigned long get_fadump_area_size(void) 356 + static unsigned long __init get_fadump_area_size(void) 357 357 { 358 358 unsigned long size = 0; 359 359 ··· 462 462 * with the given memory range. 463 463 * False, otherwise. 464 464 */ 465 - static bool overlaps_reserved_ranges(u64 base, u64 end, int *idx) 465 + static bool __init overlaps_reserved_ranges(u64 base, u64 end, int *idx) 466 466 { 467 467 bool ret = false; 468 468 int i; ··· 737 737 fw_dump.ops->fadump_trigger(fdh, str); 738 738 } 739 739 740 - u32 *fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs) 740 + u32 *__init fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs) 741 741 { 742 742 struct elf_prstatus prstatus; 743 743 ··· 752 752 return buf; 753 753 } 754 754 755 - void fadump_update_elfcore_header(char *bufp) 755 + void __init fadump_update_elfcore_header(char *bufp) 756 756 { 757 757 struct elf_phdr *phdr; 758 758 ··· 770 770 return; 771 771 } 772 772 773 - static void *fadump_alloc_buffer(unsigned long size) 773 + static void *__init fadump_alloc_buffer(unsigned long size) 774 774 { 775 775 unsigned long count, i; 776 776 struct page *page; ··· 792 792 free_reserved_area((void *)vaddr, (void *)(vaddr + size), -1, NULL); 793 793 } 794 794 795 - s32 fadump_setup_cpu_notes_buf(u32 num_cpus) 795 + s32 __init fadump_setup_cpu_notes_buf(u32 num_cpus) 796 796 { 797 797 /* Allocate buffer to hold cpu crash notes. */ 798 798 fw_dump.cpu_notes_buf_size = num_cpus * sizeof(note_buf_t); ··· 1447 1447 } 1448 1448 1449 1449 /* Release the reserved memory and disable the FADump */ 1450 - static void unregister_fadump(void) 1450 + static void __init unregister_fadump(void) 1451 1451 { 1452 1452 fadump_cleanup(); 1453 1453 fadump_release_memory(fw_dump.reserve_dump_area_start, ··· 1547 1547 1548 1548 DEFINE_SHOW_ATTRIBUTE(fadump_region); 1549 1549 1550 - static void fadump_init_files(void) 1550 + static void __init fadump_init_files(void) 1551 1551 { 1552 1552 int rc = 0; 1553 1553 ··· 1640 1640 /* Initialize the kernel dump memory structure for FAD registration. */ 1641 1641 else if (fw_dump.reserve_dump_area_size) 1642 1642 fw_dump.ops->fadump_init_mem_struct(&fw_dump); 1643 + 1644 + /* 1645 + * In case of panic, fadump is triggered via ppc_panic_event() 1646 + * panic notifier. Setting crash_kexec_post_notifiers to 'true' 1647 + * lets panic() function take crash friendly path before panic 1648 + * notifiers are invoked. 1649 + */ 1650 + crash_kexec_post_notifiers = true; 1643 1651 1644 1652 return 1; 1645 1653 }
+5
arch/powerpc/kernel/fpu.S
··· 81 81 */ 82 82 _GLOBAL(load_up_fpu) 83 83 mfmsr r5 84 + #ifdef CONFIG_PPC_BOOK3S_64 85 + /* interrupt doesn't set MSR[RI] and HPT can fault on current access */ 86 + ori r5,r5,MSR_FP|MSR_RI 87 + #else 84 88 ori r5,r5,MSR_FP 89 + #endif 85 90 #ifdef CONFIG_VSX 86 91 BEGIN_FTR_SECTION 87 92 oris r5,r5,MSR_VSX@h
+7 -2
arch/powerpc/kernel/head_32.h
··· 115 115 stw r10,8(r1) 116 116 li r10, \trapno 117 117 stw r10,_TRAP(r1) 118 - SAVE_4GPRS(3, r1) 119 - SAVE_2GPRS(7, r1) 118 + SAVE_GPRS(3, 8, r1) 120 119 SAVE_NVGPRS(r1) 121 120 stw r2,GPR2(r1) 122 121 stw r12,_NIP(r1) ··· 135 136 andi. r12,r9,MSR_PR 136 137 bne 777f 137 138 bl prepare_transfer_to_handler 139 + #ifdef CONFIG_PPC_KUEP 140 + b 778f 141 + 777: 142 + bl __kuep_lock 143 + 778: 144 + #endif 138 145 777: 139 146 #endif 140 147 .endm
+16 -1
arch/powerpc/kernel/head_40x.S
··· 27 27 28 28 #include <linux/init.h> 29 29 #include <linux/pgtable.h> 30 + #include <linux/sizes.h> 30 31 #include <asm/processor.h> 31 32 #include <asm/page.h> 32 33 #include <asm/mmu.h> ··· 298 297 3: 299 298 mfspr r11,SPRN_SPRG_THREAD 300 299 lwz r11,PGDIR(r11) 300 + #ifdef CONFIG_PPC_KUAP 301 + rlwinm. r9, r9, 0, 0xff 302 + beq 5f /* Kuap fault */ 303 + #endif 301 304 4: 302 305 tophys(r11, r11) 303 306 rlwimi r11, r10, 12, 20, 29 /* Create L1 (pgdir/pmd) address */ ··· 382 377 3: 383 378 mfspr r11,SPRN_SPRG_THREAD 384 379 lwz r11,PGDIR(r11) 380 + #ifdef CONFIG_PPC_KUAP 381 + rlwinm. r9, r9, 0, 0xff 382 + beq 5f /* Kuap fault */ 383 + #endif 385 384 4: 386 385 tophys(r11, r11) 387 386 rlwimi r11, r10, 12, 20, 29 /* Create L1 (pgdir/pmd) address */ ··· 659 650 b . /* prevent prefetch past rfi */ 660 651 661 652 /* Set up the initial MMU state so we can do the first level of 662 - * kernel initialization. This maps the first 16 MBytes of memory 1:1 653 + * kernel initialization. This maps the first 32 MBytes of memory 1:1 663 654 * virtual to physical and more importantly sets the cache mode. 664 655 */ 665 656 initial_mmu: ··· 693 684 694 685 li r0,63 /* TLB slot 63 */ 695 686 687 + tlbwe r4,r0,TLB_DATA /* Load the data portion of the entry */ 688 + tlbwe r3,r0,TLB_TAG /* Load the tag portion of the entry */ 689 + 690 + li r0,62 /* TLB slot 62 */ 691 + addis r4,r4,SZ_16M@h 692 + addis r3,r3,SZ_16M@h 696 693 tlbwe r4,r0,TLB_DATA /* Load the data portion of the entry */ 697 694 tlbwe r3,r0,TLB_TAG /* Load the tag portion of the entry */ 698 695
+18 -8
arch/powerpc/kernel/head_44x.S
··· 334 334 mfspr r12,SPRN_MMUCR 335 335 mfspr r13,SPRN_PID /* Get PID */ 336 336 rlwimi r12,r13,0,24,31 /* Set TID */ 337 + #ifdef CONFIG_PPC_KUAP 338 + cmpwi r13,0 339 + beq 2f /* KUAP Fault */ 340 + #endif 337 341 338 342 4: 339 343 mtspr SPRN_MMUCR,r12 ··· 448 444 mfspr r12,SPRN_MMUCR 449 445 mfspr r13,SPRN_PID /* Get PID */ 450 446 rlwimi r12,r13,0,24,31 /* Set TID */ 447 + #ifdef CONFIG_PPC_KUAP 448 + cmpwi r13,0 449 + beq 2f /* KUAP Fault */ 450 + #endif 451 451 452 452 4: 453 453 mtspr SPRN_MMUCR,r12 ··· 540 532 andi. r10,r12,_PAGE_USER /* User page ? */ 541 533 beq 1f /* nope, leave U bits empty */ 542 534 rlwimi r11,r11,3,26,28 /* yes, copy S bits to U */ 543 - #ifdef CONFIG_PPC_KUEP 544 - 0: rlwinm r11,r11,0,~PPC44x_TLB_SX /* Clear SX if User page */ 545 - patch_site 0b, patch__tlb_44x_kuep 546 - #endif 535 + rlwinm r11,r11,0,~PPC44x_TLB_SX /* Clear SX if User page */ 547 536 1: tlbwe r11,r13,PPC44x_TLB_ATTRIB /* Write ATTRIB */ 548 537 549 538 /* Done...restore registers and get out of here. ··· 580 575 3: mfspr r11,SPRN_SPRG3 581 576 lwz r11,PGDIR(r11) 582 577 mfspr r12,SPRN_PID /* Get PID */ 578 + #ifdef CONFIG_PPC_KUAP 579 + cmpwi r12,0 580 + beq 2f /* KUAP Fault */ 581 + #endif 583 582 4: mtspr SPRN_MMUCR,r12 /* Set MMUCR */ 584 583 585 584 /* Mask of required permission bits. Note that while we ··· 681 672 3: mfspr r11,SPRN_SPRG_THREAD 682 673 lwz r11,PGDIR(r11) 683 674 mfspr r12,SPRN_PID /* Get PID */ 675 + #ifdef CONFIG_PPC_KUAP 676 + cmpwi r12,0 677 + beq 2f /* KUAP Fault */ 678 + #endif 684 679 4: mtspr SPRN_MMUCR,r12 /* Set MMUCR */ 685 680 686 681 /* Make up the required permissions */ ··· 760 747 andi. r10,r12,_PAGE_USER /* User page ? */ 761 748 beq 1f /* nope, leave U bits empty */ 762 749 rlwimi r11,r11,3,26,28 /* yes, copy S bits to U */ 763 - #ifdef CONFIG_PPC_KUEP 764 - 0: rlwinm r11,r11,0,~PPC47x_TLB2_SX /* Clear SX if User page */ 765 - patch_site 0b, patch__tlb_47x_kuep 766 - #endif 750 + rlwinm r11,r11,0,~PPC47x_TLB2_SX /* Clear SX if User page */ 767 751 1: tlbwe r11,r13,2 768 752 769 753 /* Done...restore registers and get out of here.
+10 -10
arch/powerpc/kernel/head_64.S
··· 126 126 . = 0x5c 127 127 .globl __run_at_load 128 128 __run_at_load: 129 - DEFINE_FIXED_SYMBOL(__run_at_load) 129 + DEFINE_FIXED_SYMBOL(__run_at_load, first_256B) 130 130 .long RUN_AT_LOAD_DEFAULT 131 131 #endif 132 132 ··· 156 156 /* Tell the master cpu we're here */ 157 157 /* Relocation is off & we are located at an address less */ 158 158 /* than 0x100, so only need to grab low order offset. */ 159 - std r24,(ABS_ADDR(__secondary_hold_acknowledge))(0) 159 + std r24,(ABS_ADDR(__secondary_hold_acknowledge, first_256B))(0) 160 160 sync 161 161 162 162 li r26,0 ··· 164 164 tovirt(r26,r26) 165 165 #endif 166 166 /* All secondary cpus wait here until told to start. */ 167 - 100: ld r12,(ABS_ADDR(__secondary_hold_spinloop))(r26) 167 + 100: ld r12,(ABS_ADDR(__secondary_hold_spinloop, first_256B))(r26) 168 168 cmpdi 0,r12,0 169 169 beq 100b 170 170 ··· 649 649 3: 650 650 #endif 651 651 /* # bytes of memory to copy */ 652 - lis r5,(ABS_ADDR(copy_to_here))@ha 653 - addi r5,r5,(ABS_ADDR(copy_to_here))@l 652 + lis r5,(ABS_ADDR(copy_to_here, text))@ha 653 + addi r5,r5,(ABS_ADDR(copy_to_here, text))@l 654 654 655 655 bl copy_and_flush /* copy the first n bytes */ 656 656 /* this includes the code being */ 657 657 /* executed here. */ 658 658 /* Jump to the copy of this code that we just made */ 659 - addis r8,r3,(ABS_ADDR(4f))@ha 660 - addi r12,r8,(ABS_ADDR(4f))@l 659 + addis r8,r3,(ABS_ADDR(4f, text))@ha 660 + addi r12,r8,(ABS_ADDR(4f, text))@l 661 661 mtctr r12 662 662 bctr 663 663 ··· 669 669 * Now copy the rest of the kernel up to _end, add 670 670 * _end - copy_to_here to the copy limit and run again. 671 671 */ 672 - addis r8,r26,(ABS_ADDR(p_end))@ha 673 - ld r8,(ABS_ADDR(p_end))@l(r8) 672 + addis r8,r26,(ABS_ADDR(p_end, text))@ha 673 + ld r8,(ABS_ADDR(p_end, text))@l(r8) 674 674 add r5,r5,r8 675 675 5: bl copy_and_flush /* copy the rest */ 676 676 ··· 904 904 blr 905 905 906 906 .balign 8 907 - p_toc: .8byte __toc_start + 0x8000 - 0b 907 + p_toc: .8byte .TOC. - 0b 908 908 909 909 /* 910 910 * This is where the main kernel code starts.
+4
arch/powerpc/kernel/head_book3s_32.S
··· 931 931 _GLOBAL(load_segment_registers) 932 932 li r0, NUM_USER_SEGMENTS /* load up user segment register values */ 933 933 mtctr r0 /* for context 0 */ 934 + #ifdef CONFIG_PPC_KUEP 935 + lis r3, SR_NX@h /* Kp = 0, Ks = 0, VSID = 0 */ 936 + #else 934 937 li r3, 0 /* Kp = 0, Ks = 0, VSID = 0 */ 938 + #endif 935 939 li r4, 0 936 940 3: mtsrin r3, r4 937 941 addi r3, r3, 0x111 /* increment VSID */
+1 -2
arch/powerpc/kernel/head_booke.h
··· 87 87 stw r10, 8(r1) 88 88 li r10, \trapno 89 89 stw r10,_TRAP(r1) 90 - SAVE_4GPRS(3, r1) 91 - SAVE_2GPRS(7, r1) 90 + SAVE_GPRS(3, 8, r1) 92 91 SAVE_NVGPRS(r1) 93 92 stw r2,GPR2(r1) 94 93 stw r12,_NIP(r1)
+13
arch/powerpc/kernel/head_fsl_booke.S
··· 462 462 mfspr r11,SPRN_SPRG_THREAD 463 463 lwz r11,PGDIR(r11) 464 464 465 + #ifdef CONFIG_PPC_KUAP 466 + mfspr r12, SPRN_MAS1 467 + rlwinm. r12,r12,0,0x3fff0000 468 + beq 2f /* KUAP fault */ 469 + #endif 470 + 465 471 4: 466 472 /* Mask of required permission bits. Note that while we 467 473 * do copy ESR:ST to _PAGE_RW position as trying to write ··· 576 570 3: 577 571 mfspr r11,SPRN_SPRG_THREAD 578 572 lwz r11,PGDIR(r11) 573 + 574 + #ifdef CONFIG_PPC_KUAP 575 + mfspr r12, SPRN_MAS1 576 + rlwinm. r12,r12,0,0x3fff0000 577 + beq 2f /* KUAP fault */ 578 + #endif 579 579 580 580 /* Make up the required permissions for user code */ 581 581 #ifdef CONFIG_PTE_64BIT ··· 789 777 andi. r10, r11, _PAGE_USER /* Test for _PAGE_USER */ 790 778 slwi r10, r12, 1 791 779 or r10, r10, r12 780 + rlwinm r10, r10, 0, ~_PAGE_EXEC /* Clear SX on user pages */ 792 781 iseleq r12, r12, r10 793 782 rlwimi r13, r12, 0, 20, 31 /* Get RPN from PTE, merge w/ perms */ 794 783 mtspr SPRN_MAS3, r13
+2 -2
arch/powerpc/kernel/hw_breakpoint.c
··· 523 523 524 524 static bool stepping_handler(struct pt_regs *regs, struct perf_event **bp, 525 525 struct arch_hw_breakpoint **info, int *hit, 526 - struct ppc_inst instr) 526 + ppc_inst_t instr) 527 527 { 528 528 int i; 529 529 int stepped; ··· 616 616 int hit[HBP_NUM_MAX] = {0}; 617 617 int nr_hit = 0; 618 618 bool ptrace_bp = false; 619 - struct ppc_inst instr = ppc_inst(0); 619 + ppc_inst_t instr = ppc_inst(0); 620 620 int type = 0; 621 621 int size = 0; 622 622 unsigned long ea;
+2 -2
arch/powerpc/kernel/hw_breakpoint_constraints.c
··· 80 80 * Return true if the event is valid wrt dawr configuration, 81 81 * including extraneous exception. Otherwise return false. 82 82 */ 83 - bool wp_check_constraints(struct pt_regs *regs, struct ppc_inst instr, 83 + bool wp_check_constraints(struct pt_regs *regs, ppc_inst_t instr, 84 84 unsigned long ea, int type, int size, 85 85 struct arch_hw_breakpoint *info) 86 86 { ··· 127 127 return false; 128 128 } 129 129 130 - void wp_get_instr_detail(struct pt_regs *regs, struct ppc_inst *instr, 130 + void wp_get_instr_detail(struct pt_regs *regs, ppc_inst_t *instr, 131 131 int *type, int *size, unsigned long *ea) 132 132 { 133 133 struct instruction_op op;
+1 -1
arch/powerpc/kernel/idle.c
··· 82 82 return; 83 83 84 84 if (cpu_has_feature(CPU_FTR_ALTIVEC)) 85 - asm volatile("DSSALL ; sync" ::: "memory"); 85 + asm volatile(PPC_DSSALL " ; sync" ::: "memory"); 86 86 87 87 power4_idle_nap(); 88 88
+1 -1
arch/powerpc/kernel/idle_6xx.S
··· 129 129 END_FTR_SECTION_IFCLR(CPU_FTR_NO_DPM) 130 130 mtspr SPRN_HID0,r4 131 131 BEGIN_FTR_SECTION 132 - DSSALL 132 + PPC_DSSALL 133 133 sync 134 134 END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) 135 135 lwz r8,TI_LOCAL_FLAGS(r2) /* set napping bit */
+1 -2
arch/powerpc/kernel/interrupt.c
··· 81 81 { 82 82 syscall_fn f; 83 83 84 - kuep_lock(); 84 + kuap_lock(); 85 85 86 86 regs->orig_gpr3 = r3; 87 87 ··· 406 406 407 407 /* Restore user access locks last */ 408 408 kuap_user_restore(regs); 409 - kuep_unlock(); 410 409 411 410 return ret; 412 411 }
+20 -26
arch/powerpc/kernel/interrupt_64.S
··· 30 30 .ifc \srr,srr 31 31 mfspr r11,SPRN_SRR0 32 32 ld r12,_NIP(r1) 33 + clrrdi r12,r12,2 33 34 100: tdne r11,r12 34 - EMIT_BUG_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE) 35 + EMIT_WARN_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE) 35 36 mfspr r11,SPRN_SRR1 36 37 ld r12,_MSR(r1) 37 38 100: tdne r11,r12 38 - EMIT_BUG_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE) 39 + EMIT_WARN_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE) 39 40 .else 40 41 mfspr r11,SPRN_HSRR0 41 42 ld r12,_NIP(r1) 43 + clrrdi r12,r12,2 42 44 100: tdne r11,r12 43 - EMIT_BUG_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE) 45 + EMIT_WARN_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE) 44 46 mfspr r11,SPRN_HSRR1 45 47 ld r12,_MSR(r1) 46 48 100: tdne r11,r12 47 - EMIT_BUG_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE) 49 + EMIT_WARN_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE) 48 50 .endif 49 51 #endif 50 52 .endm ··· 164 162 * The value of AMR only matters while we're in the kernel. 165 163 */ 166 164 mtcr r2 167 - ld r2,GPR2(r1) 168 - ld r3,GPR3(r1) 169 - ld r13,GPR13(r1) 170 - ld r1,GPR1(r1) 165 + REST_GPRS(2, 3, r1) 166 + REST_GPR(13, r1) 167 + REST_GPR(1, r1) 171 168 RFSCV_TO_USER 172 169 b . /* prevent speculative execution */ 173 170 ··· 184 183 mtctr r3 185 184 mtlr r4 186 185 mtspr SPRN_XER,r5 187 - REST_10GPRS(2, r1) 188 - REST_2GPRS(12, r1) 189 - ld r1,GPR1(r1) 186 + REST_GPRS(2, 13, r1) 187 + REST_GPR(1, r1) 190 188 RFI_TO_USER 191 189 .Lsyscall_vectored_\name\()_rst_end: 192 190 ··· 374 374 * The value of AMR only matters while we're in the kernel. 375 375 */ 376 376 mtcr r2 377 - ld r2,GPR2(r1) 378 - ld r3,GPR3(r1) 379 - ld r13,GPR13(r1) 380 - ld r1,GPR1(r1) 377 + REST_GPRS(2, 3, r1) 378 + REST_GPR(13, r1) 379 + REST_GPR(1, r1) 381 380 RFI_TO_USER 382 381 b . /* prevent speculative execution */ 383 382 ··· 387 388 mtctr r3 388 389 mtspr SPRN_XER,r4 389 390 ld r0,GPR0(r1) 390 - REST_8GPRS(4, r1) 391 - ld r12,GPR12(r1) 391 + REST_GPRS(4, 12, r1) 392 392 b .Lsyscall_restore_regs_cont 393 393 .Lsyscall_rst_end: 394 394 ··· 516 518 ld r6,_XER(r1) 517 519 li r0,0 518 520 519 - REST_4GPRS(7, r1) 520 - REST_2GPRS(11, r1) 521 - REST_GPR(13, r1) 521 + REST_GPRS(7, 13, r1) 522 522 523 523 mtcr r3 524 524 mtlr r4 525 525 mtctr r5 526 526 mtspr SPRN_XER,r6 527 527 528 - REST_4GPRS(2, r1) 529 - REST_GPR(6, r1) 528 + REST_GPRS(2, 6, r1) 530 529 REST_GPR(0, r1) 531 530 REST_GPR(1, r1) 532 531 .ifc \srr,srr ··· 620 625 ld r6,_CCR(r1) 621 626 li r0,0 622 627 623 - REST_4GPRS(7, r1) 624 - REST_2GPRS(11, r1) 628 + REST_GPRS(7, 12, r1) 625 629 626 630 mtlr r3 627 631 mtctr r4 ··· 632 638 */ 633 639 std r0,STACK_FRAME_OVERHEAD-16(r1) 634 640 635 - REST_4GPRS(2, r1) 641 + REST_GPRS(2, 5, r1) 636 642 637 643 bne- cr1,1f /* emulate stack store */ 638 644 mtcr r6 ··· 697 703 698 704 .globl __end_soft_masked 699 705 __end_soft_masked: 700 - DEFINE_FIXED_SYMBOL(__end_soft_masked) 706 + DEFINE_FIXED_SYMBOL(__end_soft_masked, text) 701 707 #endif /* CONFIG_PPC_BOOK3S */ 702 708 703 709 #ifdef CONFIG_PPC_BOOK3S
+3 -2
arch/powerpc/kernel/irq.c
··· 745 745 irq = ppc_md.get_irq(); 746 746 747 747 /* We can hard enable interrupts now to allow perf interrupts */ 748 - may_hard_irq_enable(); 748 + if (should_hard_irq_enable()) 749 + do_hard_irq_enable(); 749 750 750 751 /* And finally process it */ 751 752 if (unlikely(!irq)) ··· 812 811 ppc_md.init_IRQ(); 813 812 } 814 813 815 - #if defined(CONFIG_BOOKE) || defined(CONFIG_40x) 814 + #ifdef CONFIG_BOOKE_OR_40x 816 815 void *critirq_ctx[NR_CPUS] __read_mostly; 817 816 void *dbgirq_ctx[NR_CPUS] __read_mostly; 818 817 void *mcheckirq_ctx[NR_CPUS] __read_mostly;
+2 -2
arch/powerpc/kernel/kgdb.c
··· 48 48 { 0x0800, 0x08 /* SIGFPE */ }, /* fp unavailable */ 49 49 { 0x0900, 0x0e /* SIGALRM */ }, /* decrementer */ 50 50 { 0x0c00, 0x14 /* SIGCHLD */ }, /* system call */ 51 - #if defined(CONFIG_40x) || defined(CONFIG_BOOKE) 51 + #ifdef CONFIG_BOOKE_OR_40x 52 52 { 0x2002, 0x05 /* SIGTRAP */ }, /* debug */ 53 53 #if defined(CONFIG_FSL_BOOKE) 54 54 { 0x2010, 0x08 /* SIGFPE */ }, /* spe unavailable */ ··· 67 67 { 0x2010, 0x08 /* SIGFPE */ }, /* fp unavailable */ 68 68 { 0x2020, 0x08 /* SIGFPE */ }, /* ap unavailable */ 69 69 #endif 70 - #else /* ! (defined(CONFIG_40x) || defined(CONFIG_BOOKE)) */ 70 + #else /* !CONFIG_BOOKE_OR_40x */ 71 71 { 0x0d00, 0x05 /* SIGTRAP */ }, /* single-step */ 72 72 #if defined(CONFIG_PPC_8xx) 73 73 { 0x1000, 0x04 /* SIGILL */ }, /* software emulation */
+2 -2
arch/powerpc/kernel/kprobes.c
··· 124 124 { 125 125 int ret = 0; 126 126 struct kprobe *prev; 127 - struct ppc_inst insn = ppc_inst_read(p->addr); 127 + ppc_inst_t insn = ppc_inst_read(p->addr); 128 128 129 129 if ((unsigned long)p->addr & 0x03) { 130 130 printk("Attempt to register kprobe at an unaligned address\n"); ··· 244 244 static int try_to_emulate(struct kprobe *p, struct pt_regs *regs) 245 245 { 246 246 int ret; 247 - struct ppc_inst insn = ppc_inst_read(p->ainsn.insn); 247 + ppc_inst_t insn = ppc_inst_read(p->ainsn.insn); 248 248 249 249 /* regs->nip is also adjusted if emulate_step returns 1 */ 250 250 ret = emulate_step(regs, insn);
+3 -3
arch/powerpc/kernel/l2cr_6xx.S
··· 96 96 97 97 /* Stop DST streams */ 98 98 BEGIN_FTR_SECTION 99 - DSSALL 99 + PPC_DSSALL 100 100 sync 101 101 END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) 102 102 ··· 292 292 isync 293 293 294 294 /* Stop DST streams */ 295 - DSSALL 295 + PPC_DSSALL 296 296 sync 297 297 298 298 /* Get the current enable bit of the L3CR into r4 */ ··· 401 401 _GLOBAL(__flush_disable_L1) 402 402 /* Stop pending alitvec streams and memory accesses */ 403 403 BEGIN_FTR_SECTION 404 - DSSALL 404 + PPC_DSSALL 405 405 END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) 406 406 sync 407 407
+1 -1
arch/powerpc/kernel/mce.c
··· 586 586 mc_error_class[evt->error_class] : "Unknown"; 587 587 printk("%sMCE: CPU%d: %s\n", level, evt->cpu, subtype); 588 588 589 - #ifdef CONFIG_PPC_BOOK3S_64 589 + #ifdef CONFIG_PPC_64S_HASH_MMU 590 590 /* Display faulty slb contents for SLB errors. */ 591 591 if (evt->error_type == MCE_ERROR_TYPE_SLB && !in_guest) 592 592 slb_dump_contents(local_paca->mce_faulty_slbs);
+11 -7
arch/powerpc/kernel/mce_power.c
··· 77 77 } 78 78 79 79 /* flush SLBs and reload */ 80 - #ifdef CONFIG_PPC_BOOK3S_64 80 + #ifdef CONFIG_PPC_64S_HASH_MMU 81 81 void flush_and_reload_slb(void) 82 82 { 83 - /* Invalidate all SLBs */ 84 - slb_flush_all_realmode(); 85 - 86 83 if (early_radix_enabled()) 87 84 return; 85 + 86 + /* Invalidate all SLBs */ 87 + slb_flush_all_realmode(); 88 88 89 89 /* 90 90 * This probably shouldn't happen, but it may be possible it's ··· 99 99 100 100 void flush_erat(void) 101 101 { 102 - #ifdef CONFIG_PPC_BOOK3S_64 102 + #ifdef CONFIG_PPC_64S_HASH_MMU 103 103 if (!early_cpu_has_feature(CPU_FTR_ARCH_300)) { 104 104 flush_and_reload_slb(); 105 105 return; ··· 114 114 115 115 static int mce_flush(int what) 116 116 { 117 - #ifdef CONFIG_PPC_BOOK3S_64 117 + #ifdef CONFIG_PPC_64S_HASH_MMU 118 118 if (what == MCE_FLUSH_SLB) { 119 119 flush_and_reload_slb(); 120 120 return 1; ··· 455 455 * in real-mode is tricky and can lead to recursive 456 456 * faults 457 457 */ 458 - struct ppc_inst instr; 458 + ppc_inst_t instr; 459 459 unsigned long pfn, instr_addr; 460 460 struct instruction_op op; 461 461 struct pt_regs tmp = *regs; ··· 499 499 /* attempt to correct the error */ 500 500 switch (table[i].error_type) { 501 501 case MCE_ERROR_TYPE_SLB: 502 + #ifdef CONFIG_PPC_64S_HASH_MMU 502 503 if (local_paca->in_mce == 1) 503 504 slb_save_contents(local_paca->mce_faulty_slbs); 505 + #endif 504 506 handled = mce_flush(MCE_FLUSH_SLB); 505 507 break; 506 508 case MCE_ERROR_TYPE_ERAT: ··· 590 588 /* attempt to correct the error */ 591 589 switch (table[i].error_type) { 592 590 case MCE_ERROR_TYPE_SLB: 591 + #ifdef CONFIG_PPC_64S_HASH_MMU 593 592 if (local_paca->in_mce == 1) 594 593 slb_save_contents(local_paca->mce_faulty_slbs); 594 + #endif 595 595 if (mce_flush(MCE_FLUSH_SLB)) 596 596 handled = 1; 597 597 break;
+6 -5
arch/powerpc/kernel/module.c
··· 90 90 } 91 91 92 92 static __always_inline void * 93 - __module_alloc(unsigned long size, unsigned long start, unsigned long end) 93 + __module_alloc(unsigned long size, unsigned long start, unsigned long end, bool nowarn) 94 94 { 95 95 pgprot_t prot = strict_module_rwx_enabled() ? PAGE_KERNEL : PAGE_KERNEL_EXEC; 96 + gfp_t gfp = GFP_KERNEL | (nowarn ? __GFP_NOWARN : 0); 96 97 97 98 /* 98 99 * Don't do huge page allocations for modules yet until more testing 99 100 * is done. STRICT_MODULE_RWX may require extra work to support this 100 101 * too. 101 102 */ 102 - return __vmalloc_node_range(size, 1, start, end, GFP_KERNEL, prot, 103 + return __vmalloc_node_range(size, 1, start, end, gfp, prot, 103 104 VM_FLUSH_RESET_PERMS | VM_NO_HUGE_VMAP, 104 105 NUMA_NO_NODE, __builtin_return_address(0)); 105 106 } ··· 115 114 116 115 /* First try within 32M limit from _etext to avoid branch trampolines */ 117 116 if (MODULES_VADDR < PAGE_OFFSET && MODULES_END > limit) 118 - ptr = __module_alloc(size, limit, MODULES_END); 117 + ptr = __module_alloc(size, limit, MODULES_END, true); 119 118 120 119 if (!ptr) 121 - ptr = __module_alloc(size, MODULES_VADDR, MODULES_END); 120 + ptr = __module_alloc(size, MODULES_VADDR, MODULES_END, false); 122 121 123 122 return ptr; 124 123 #else 125 - return __module_alloc(size, VMALLOC_START, VMALLOC_END); 124 + return __module_alloc(size, VMALLOC_START, VMALLOC_END, false); 126 125 #endif 127 126 }
+33
arch/powerpc/kernel/module_32.c
··· 273 273 } 274 274 275 275 #ifdef CONFIG_DYNAMIC_FTRACE 276 + int module_trampoline_target(struct module *mod, unsigned long addr, 277 + unsigned long *target) 278 + { 279 + unsigned int jmp[4]; 280 + 281 + /* Find where the trampoline jumps to */ 282 + if (copy_from_kernel_nofault(jmp, (void *)addr, sizeof(jmp))) 283 + return -EFAULT; 284 + 285 + /* verify that this is what we expect it to be */ 286 + if ((jmp[0] & 0xffff0000) != PPC_RAW_LIS(_R12, 0) || 287 + (jmp[1] & 0xffff0000) != PPC_RAW_ADDI(_R12, _R12, 0) || 288 + jmp[2] != PPC_RAW_MTCTR(_R12) || 289 + jmp[3] != PPC_RAW_BCTR()) 290 + return -EINVAL; 291 + 292 + addr = (jmp[1] & 0xffff) | ((jmp[0] & 0xffff) << 16); 293 + if (addr & 0x8000) 294 + addr -= 0x10000; 295 + 296 + *target = addr; 297 + 298 + return 0; 299 + } 300 + 276 301 int module_finalize_ftrace(struct module *module, const Elf_Shdr *sechdrs) 277 302 { 278 303 module->arch.tramp = do_plt_call(module->core_layout.base, ··· 305 280 sechdrs, module); 306 281 if (!module->arch.tramp) 307 282 return -ENOENT; 283 + 284 + #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS 285 + module->arch.tramp_regs = do_plt_call(module->core_layout.base, 286 + (unsigned long)ftrace_regs_caller, 287 + sechdrs, module); 288 + if (!module->arch.tramp_regs) 289 + return -ENOENT; 290 + #endif 308 291 309 292 return 0; 310 293 }
+3 -3
arch/powerpc/kernel/nvram_64.c
··· 540 540 .write = nvram_pstore_write, 541 541 }; 542 542 543 - static int nvram_pstore_init(void) 543 + static int __init nvram_pstore_init(void) 544 544 { 545 545 int rc = 0; 546 546 ··· 562 562 return rc; 563 563 } 564 564 #else 565 - static int nvram_pstore_init(void) 565 + static int __init nvram_pstore_init(void) 566 566 { 567 567 return -1; 568 568 } ··· 755 755 * Per the criteria passed via nvram_remove_partition(), should this 756 756 * partition be removed? 1=remove, 0=keep 757 757 */ 758 - static int nvram_can_remove_partition(struct nvram_partition *part, 758 + static int __init nvram_can_remove_partition(struct nvram_partition *part, 759 759 const char *name, int sig, const char *exceptions[]) 760 760 { 761 761 if (part->header.signature != sig)
+4 -8
arch/powerpc/kernel/optprobes.c
··· 153 153 154 154 int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p) 155 155 { 156 - struct ppc_inst branch_op_callback, branch_emulate_step, temp; 156 + ppc_inst_t branch_op_callback, branch_emulate_step, temp; 157 157 unsigned long op_callback_addr, emulate_step_addr; 158 158 kprobe_opcode_t *buff; 159 159 long b_offset; ··· 228 228 /* 229 229 * 3. load instruction to be emulated into relevant register, and 230 230 */ 231 - if (IS_ENABLED(CONFIG_PPC64)) { 232 - temp = ppc_inst_read(p->ainsn.insn); 233 - patch_imm_load_insns(ppc_inst_as_ulong(temp), 4, buff + TMPL_INSN_IDX); 234 - } else { 235 - patch_imm_load_insns((unsigned long)p->ainsn.insn, 4, buff + TMPL_INSN_IDX); 236 - } 231 + temp = ppc_inst_read(p->ainsn.insn); 232 + patch_imm_load_insns(ppc_inst_as_ulong(temp), 4, buff + TMPL_INSN_IDX); 237 233 238 234 /* 239 235 * 4. branch back from trampoline ··· 265 269 266 270 void arch_optimize_kprobes(struct list_head *oplist) 267 271 { 268 - struct ppc_inst instr; 272 + ppc_inst_t instr; 269 273 struct optimized_kprobe *op; 270 274 struct optimized_kprobe *tmp; 271 275
+2 -2
arch/powerpc/kernel/optprobes_head.S
··· 10 10 #include <asm/asm-offsets.h> 11 11 12 12 #ifdef CONFIG_PPC64 13 - #define SAVE_30GPRS(base) SAVE_10GPRS(2,base); SAVE_10GPRS(12,base); SAVE_10GPRS(22,base) 14 - #define REST_30GPRS(base) REST_10GPRS(2,base); REST_10GPRS(12,base); REST_10GPRS(22,base) 13 + #define SAVE_30GPRS(base) SAVE_GPRS(2, 31, base) 14 + #define REST_30GPRS(base) REST_GPRS(2, 31, base) 15 15 #define TEMPLATE_FOR_IMM_LOAD_INSNS nop; nop; nop; nop; nop 16 16 #else 17 17 #define SAVE_30GPRS(base) stmw r2, GPR2(base)
+7 -11
arch/powerpc/kernel/paca.c
··· 139 139 } 140 140 #endif /* CONFIG_PPC_PSERIES */ 141 141 142 - #ifdef CONFIG_PPC_BOOK3S_64 143 - 142 + #ifdef CONFIG_PPC_64S_HASH_MMU 144 143 /* 145 144 * 3 persistent SLBs are allocated here. The buffer will be zero 146 145 * initially, hence will all be invaild until we actually write them. ··· 168 169 169 170 return s; 170 171 } 171 - 172 - #endif /* CONFIG_PPC_BOOK3S_64 */ 172 + #endif /* CONFIG_PPC_64S_HASH_MMU */ 173 173 174 174 #ifdef CONFIG_PPC_PSERIES 175 175 /** ··· 224 226 new_paca->kexec_state = KEXEC_STATE_NONE; 225 227 new_paca->__current = &init_task; 226 228 new_paca->data_offset = 0xfeeeeeeeeeeeeeeeULL; 227 - #ifdef CONFIG_PPC_BOOK3S_64 229 + #ifdef CONFIG_PPC_64S_HASH_MMU 228 230 new_paca->slb_shadow_ptr = NULL; 229 231 #endif 230 232 ··· 305 307 #ifdef CONFIG_PPC_PSERIES 306 308 paca->lppaca_ptr = new_lppaca(cpu, limit); 307 309 #endif 308 - #ifdef CONFIG_PPC_BOOK3S_64 310 + #ifdef CONFIG_PPC_64S_HASH_MMU 309 311 paca->slb_shadow_ptr = new_slb_shadow(cpu, limit); 310 312 #endif 311 313 #ifdef CONFIG_PPC_PSERIES ··· 326 328 paca_nr_cpu_ids = nr_cpu_ids; 327 329 paca_ptrs_size = new_ptrs_size; 328 330 329 - #ifdef CONFIG_PPC_BOOK3S_64 331 + #ifdef CONFIG_PPC_64S_HASH_MMU 330 332 if (early_radix_enabled()) { 331 333 /* Ugly fixup, see new_slb_shadow() */ 332 334 memblock_phys_free(__pa(paca_ptrs[boot_cpuid]->slb_shadow_ptr), ··· 339 341 paca_ptrs_size + paca_struct_size, nr_cpu_ids); 340 342 } 341 343 344 + #ifdef CONFIG_PPC_64S_HASH_MMU 342 345 void copy_mm_to_paca(struct mm_struct *mm) 343 346 { 344 - #ifdef CONFIG_PPC_BOOK3S 345 347 mm_context_t *context = &mm->context; 346 348 347 349 #ifdef CONFIG_PPC_MM_SLICES ··· 354 356 get_paca()->mm_ctx_user_psize = context->user_psize; 355 357 get_paca()->mm_ctx_sllp = context->sllp; 356 358 #endif 357 - #else /* !CONFIG_PPC_BOOK3S */ 358 - return; 359 - #endif 360 359 } 360 + #endif /* CONFIG_PPC_64S_HASH_MMU */
+1 -1
arch/powerpc/kernel/pci-common.c
··· 62 62 63 63 static const struct dma_map_ops *pci_dma_ops; 64 64 65 - void set_pci_dma_ops(const struct dma_map_ops *dma_ops) 65 + void __init set_pci_dma_ops(const struct dma_map_ops *dma_ops) 66 66 { 67 67 pci_dma_ops = dma_ops; 68 68 }
+2 -2
arch/powerpc/kernel/pci_32.c
··· 37 37 EXPORT_SYMBOL(isa_io_base); 38 38 EXPORT_SYMBOL(pci_dram_offset); 39 39 40 - void pcibios_make_OF_bus_map(void); 40 + void __init pcibios_make_OF_bus_map(void); 41 41 42 42 static void fixup_cpc710_pci64(struct pci_dev* dev); 43 43 static u8* pci_to_OF_bus_map; ··· 109 109 } 110 110 } 111 111 112 - void 112 + void __init 113 113 pcibios_make_OF_bus_map(void) 114 114 { 115 115 int i;
+48 -10
arch/powerpc/kernel/process.c
··· 628 628 { 629 629 struct arch_hw_breakpoint null_brk = {0}; 630 630 struct arch_hw_breakpoint *info; 631 - struct ppc_inst instr = ppc_inst(0); 631 + ppc_inst_t instr = ppc_inst(0); 632 632 int type = 0; 633 633 int size = 0; 634 634 unsigned long ea; ··· 1156 1156 #endif 1157 1157 } 1158 1158 1159 + #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 1160 + void kvmppc_save_user_regs(void) 1161 + { 1162 + unsigned long usermsr; 1163 + 1164 + if (!current->thread.regs) 1165 + return; 1166 + 1167 + usermsr = current->thread.regs->msr; 1168 + 1169 + if (usermsr & MSR_FP) 1170 + save_fpu(current); 1171 + 1172 + if (usermsr & MSR_VEC) 1173 + save_altivec(current); 1174 + 1175 + #ifdef CONFIG_PPC_TRANSACTIONAL_MEM 1176 + if (usermsr & MSR_TM) { 1177 + current->thread.tm_tfhar = mfspr(SPRN_TFHAR); 1178 + current->thread.tm_tfiar = mfspr(SPRN_TFIAR); 1179 + current->thread.tm_texasr = mfspr(SPRN_TEXASR); 1180 + current->thread.regs->msr &= ~MSR_TM; 1181 + } 1182 + #endif 1183 + } 1184 + EXPORT_SYMBOL_GPL(kvmppc_save_user_regs); 1185 + 1186 + void kvmppc_save_current_sprs(void) 1187 + { 1188 + save_sprs(&current->thread); 1189 + } 1190 + EXPORT_SYMBOL_GPL(kvmppc_save_current_sprs); 1191 + #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ 1192 + 1159 1193 static inline void restore_sprs(struct thread_struct *old_thread, 1160 1194 struct thread_struct *new_thread) 1161 1195 { ··· 1240 1206 { 1241 1207 struct thread_struct *new_thread, *old_thread; 1242 1208 struct task_struct *last; 1243 - #ifdef CONFIG_PPC_BOOK3S_64 1209 + #ifdef CONFIG_PPC_64S_HASH_MMU 1244 1210 struct ppc64_tlb_batch *batch; 1245 1211 #endif 1246 1212 ··· 1249 1215 1250 1216 WARN_ON(!irqs_disabled()); 1251 1217 1252 - #ifdef CONFIG_PPC_BOOK3S_64 1218 + #ifdef CONFIG_PPC_64S_HASH_MMU 1253 1219 batch = this_cpu_ptr(&ppc64_tlb_batch); 1254 1220 if (batch->active) { 1255 1221 current_thread_info()->local_flags |= _TLF_LAZY_MMU; ··· 1315 1281 1316 1282 set_return_regs_changed(); /* _switch changes stack (and regs) */ 1317 1283 1318 - #ifdef CONFIG_PPC32 1319 - kuap_assert_locked(); 1320 - #endif 1284 + if (!IS_ENABLED(CONFIG_PPC_BOOK3S_64)) 1285 + kuap_assert_locked(); 1286 + 1321 1287 last = _switch(old_thread, new_thread); 1322 1288 1323 1289 /* ··· 1328 1294 */ 1329 1295 1330 1296 #ifdef CONFIG_PPC_BOOK3S_64 1297 + #ifdef CONFIG_PPC_64S_HASH_MMU 1331 1298 /* 1332 1299 * This applies to a process that was context switched while inside 1333 1300 * arch_enter_lazy_mmu_mode(), to re-activate the batch that was ··· 1340 1305 batch = this_cpu_ptr(&ppc64_tlb_batch); 1341 1306 batch->active = 1; 1342 1307 } 1308 + #endif 1343 1309 1344 1310 /* 1345 1311 * Math facilities are masked out of the child MSR in copy_thread. ··· 1691 1655 1692 1656 static void setup_ksp_vsid(struct task_struct *p, unsigned long sp) 1693 1657 { 1694 - #ifdef CONFIG_PPC_BOOK3S_64 1658 + #ifdef CONFIG_PPC_64S_HASH_MMU 1695 1659 unsigned long sp_vsid; 1696 1660 unsigned long llp = mmu_psize_defs[mmu_linear_psize].sllp; 1697 1661 ··· 1802 1766 #endif 1803 1767 #if defined(CONFIG_PPC_BOOK3S_32) && defined(CONFIG_PPC_KUAP) 1804 1768 p->thread.kuap = KUAP_NONE; 1769 + #endif 1770 + #if defined(CONFIG_BOOKE_OR_40x) && defined(CONFIG_PPC_KUAP) 1771 + p->thread.pid = MMU_NO_CONTEXT; 1805 1772 #endif 1806 1773 1807 1774 setup_ksp_vsid(p, sp); ··· 2338 2299 * the heap, we can put it above 1TB so it is backed by a 1TB 2339 2300 * segment. Otherwise the heap will be in the bottom 1TB 2340 2301 * which always uses 256MB segments and this may result in a 2341 - * performance penalty. We don't need to worry about radix. For 2342 - * radix, mmu_highuser_ssize remains unchanged from 256MB. 2302 + * performance penalty. 2343 2303 */ 2344 - if (!is_32bit_task() && (mmu_highuser_ssize == MMU_SEGSIZE_1T)) 2304 + if (!radix_enabled() && !is_32bit_task() && (mmu_highuser_ssize == MMU_SEGSIZE_1T)) 2345 2305 base = max_t(unsigned long, mm->brk, 1UL << SID_SHIFT_1T); 2346 2306 #endif 2347 2307
+3 -3
arch/powerpc/kernel/prom.c
··· 231 231 ibm_pa_features, ARRAY_SIZE(ibm_pa_features)); 232 232 } 233 233 234 - #ifdef CONFIG_PPC_BOOK3S_64 234 + #ifdef CONFIG_PPC_64S_HASH_MMU 235 235 static void __init init_mmu_slb_size(unsigned long node) 236 236 { 237 237 const __be32 *slb_size_ptr; ··· 447 447 */ 448 448 449 449 #ifdef CONFIG_SPARSEMEM 450 - static bool validate_mem_limit(u64 base, u64 *size) 450 + static bool __init validate_mem_limit(u64 base, u64 *size) 451 451 { 452 452 u64 max_mem = 1UL << (MAX_PHYSMEM_BITS); 453 453 ··· 458 458 return true; 459 459 } 460 460 #else 461 - static bool validate_mem_limit(u64 base, u64 *size) 461 + static bool __init validate_mem_limit(u64 base, u64 *size) 462 462 { 463 463 return true; 464 464 }
+7 -7
arch/powerpc/kernel/prom_init.c
··· 672 672 return call_prom("getproplen", 2, 1, node, ADDR(pname)); 673 673 } 674 674 675 - static void add_string(char **str, const char *q) 675 + static void __init add_string(char **str, const char *q) 676 676 { 677 677 char *p = *str; 678 678 ··· 682 682 *str = p; 683 683 } 684 684 685 - static char *tohex(unsigned int x) 685 + static char *__init tohex(unsigned int x) 686 686 { 687 687 static const char digits[] __initconst = "0123456789abcdef"; 688 688 static char result[9] __prombss; ··· 728 728 #define prom_islower(c) ('a' <= (c) && (c) <= 'z') 729 729 #define prom_toupper(c) (prom_islower(c) ? ((c) - 'a' + 'A') : (c)) 730 730 731 - static unsigned long prom_strtoul(const char *cp, const char **endp) 731 + static unsigned long __init prom_strtoul(const char *cp, const char **endp) 732 732 { 733 733 unsigned long result = 0, base = 10, value; 734 734 ··· 753 753 return result; 754 754 } 755 755 756 - static unsigned long prom_memparse(const char *ptr, const char **retptr) 756 + static unsigned long __init prom_memparse(const char *ptr, const char **retptr) 757 757 { 758 758 unsigned long ret = prom_strtoul(ptr, retptr); 759 759 int shift = 0; ··· 1786 1786 } 1787 1787 1788 1788 #ifdef CONFIG_PPC_SVM 1789 - static int prom_rtas_hcall(uint64_t args) 1789 + static int __init prom_rtas_hcall(uint64_t args) 1790 1790 { 1791 1791 register uint64_t arg1 asm("r3") = H_RTAS; 1792 1792 register uint64_t arg2 asm("r4") = args; ··· 2991 2991 2992 2992 /* Check if the phy-handle property exists - bail if it does */ 2993 2993 rv = prom_getprop(node, "phy-handle", prop, sizeof(prop)); 2994 - if (!rv) 2994 + if (rv <= 0) 2995 2995 return; 2996 2996 2997 2997 /* ··· 3248 3248 /* 3249 3249 * Perform the Enter Secure Mode ultracall. 3250 3250 */ 3251 - static int enter_secure_mode(unsigned long kbase, unsigned long fdt) 3251 + static int __init enter_secure_mode(unsigned long kbase, unsigned long fdt) 3252 3252 { 3253 3253 register unsigned long r3 asm("r3") = UV_ESM; 3254 3254 register unsigned long r4 asm("r4") = kbase;
+91 -13
arch/powerpc/kernel/rtas.c
··· 492 492 } 493 493 EXPORT_SYMBOL(rtas_call); 494 494 495 - /* For RTAS_BUSY (-2), delay for 1 millisecond. For an extended busy status 496 - * code of 990n, perform the hinted delay of 10^n (last digit) milliseconds. 495 + /** 496 + * rtas_busy_delay_time() - From an RTAS status value, calculate the 497 + * suggested delay time in milliseconds. 498 + * 499 + * @status: a value returned from rtas_call() or similar APIs which return 500 + * the status of a RTAS function call. 501 + * 502 + * Context: Any context. 503 + * 504 + * Return: 505 + * * 100000 - If @status is 9905. 506 + * * 10000 - If @status is 9904. 507 + * * 1000 - If @status is 9903. 508 + * * 100 - If @status is 9902. 509 + * * 10 - If @status is 9901. 510 + * * 1 - If @status is either 9900 or -2. This is "wrong" for -2, but 511 + * some callers depend on this behavior, and the worst outcome 512 + * is that they will delay for longer than necessary. 513 + * * 0 - If @status is not a busy or extended delay value. 497 514 */ 498 515 unsigned int rtas_busy_delay_time(int status) 499 516 { ··· 530 513 } 531 514 EXPORT_SYMBOL(rtas_busy_delay_time); 532 515 533 - /* For an RTAS busy status code, perform the hinted delay. */ 534 - unsigned int rtas_busy_delay(int status) 516 + /** 517 + * rtas_busy_delay() - helper for RTAS busy and extended delay statuses 518 + * 519 + * @status: a value returned from rtas_call() or similar APIs which return 520 + * the status of a RTAS function call. 521 + * 522 + * Context: Process context. May sleep or schedule. 523 + * 524 + * Return: 525 + * * true - @status is RTAS_BUSY or an extended delay hint. The 526 + * caller may assume that the CPU has been yielded if necessary, 527 + * and that an appropriate delay for @status has elapsed. 528 + * Generally the caller should reattempt the RTAS call which 529 + * yielded @status. 530 + * 531 + * * false - @status is not @RTAS_BUSY nor an extended delay hint. The 532 + * caller is responsible for handling @status. 533 + */ 534 + bool rtas_busy_delay(int status) 535 535 { 536 536 unsigned int ms; 537 + bool ret; 537 538 538 - might_sleep(); 539 - ms = rtas_busy_delay_time(status); 540 - if (ms && need_resched()) 541 - msleep(ms); 539 + switch (status) { 540 + case RTAS_EXTENDED_DELAY_MIN...RTAS_EXTENDED_DELAY_MAX: 541 + ret = true; 542 + ms = rtas_busy_delay_time(status); 543 + /* 544 + * The extended delay hint can be as high as 100 seconds. 545 + * Surely any function returning such a status is either 546 + * buggy or isn't going to be significantly slowed by us 547 + * polling at 1HZ. Clamp the sleep time to one second. 548 + */ 549 + ms = clamp(ms, 1U, 1000U); 550 + /* 551 + * The delay hint is an order-of-magnitude suggestion, not 552 + * a minimum. It is fine, possibly even advantageous, for 553 + * us to pause for less time than hinted. For small values, 554 + * use usleep_range() to ensure we don't sleep much longer 555 + * than actually needed. 556 + * 557 + * See Documentation/timers/timers-howto.rst for 558 + * explanation of the threshold used here. In effect we use 559 + * usleep_range() for 9900 and 9901, msleep() for 560 + * 9902-9905. 561 + */ 562 + if (ms <= 20) 563 + usleep_range(ms * 100, ms * 1000); 564 + else 565 + msleep(ms); 566 + break; 567 + case RTAS_BUSY: 568 + ret = true; 569 + /* 570 + * We should call again immediately if there's no other 571 + * work to do. 572 + */ 573 + cond_resched(); 574 + break; 575 + default: 576 + ret = false; 577 + /* 578 + * Not a busy or extended delay status; the caller should 579 + * handle @status itself. Ensure we warn on misuses in 580 + * atomic context regardless. 581 + */ 582 + might_sleep(); 583 + break; 584 + } 542 585 543 - return ms; 586 + return ret; 544 587 } 545 588 EXPORT_SYMBOL(rtas_busy_delay); 546 589 ··· 886 809 /** 887 810 * rtas_activate_firmware() - Activate a new version of firmware. 888 811 * 812 + * Context: This function may sleep. 813 + * 889 814 * Activate a new version of partition firmware. The OS must call this 890 815 * after resuming from a partition hibernation or migration in order 891 816 * to maintain the ability to perform live firmware updates. It's not 892 817 * catastrophic for this method to be absent or to fail; just log the 893 818 * condition in that case. 894 - * 895 - * Context: This function may sleep. 896 819 */ 897 820 void rtas_activate_firmware(void) 898 821 { ··· 967 890 #endif /* CONFIG_PPC_PSERIES */ 968 891 969 892 /** 970 - * Find a specific pseries error log in an RTAS extended event log. 893 + * get_pseries_errorlog() - Find a specific pseries error log in an RTAS 894 + * extended event log. 971 895 * @log: RTAS error/event log 972 896 * @section_id: two character section identifier 973 897 * 974 - * Returns a pointer to the specified errorlog or NULL if not found. 898 + * Return: A pointer to the specified errorlog or NULL if not found. 975 899 */ 976 900 struct pseries_errorlog *get_pseries_errorlog(struct rtas_error_log *log, 977 901 uint16_t section_id)
+3 -3
arch/powerpc/kernel/rtasd.c
··· 455 455 } 456 456 457 457 #ifdef CONFIG_PPC64 458 - static void retrieve_nvram_error_log(void) 458 + static void __init retrieve_nvram_error_log(void) 459 459 { 460 460 unsigned int err_type ; 461 461 int rc ; ··· 473 473 } 474 474 } 475 475 #else /* CONFIG_PPC64 */ 476 - static void retrieve_nvram_error_log(void) 476 + static void __init retrieve_nvram_error_log(void) 477 477 { 478 478 } 479 479 #endif /* CONFIG_PPC64 */ 480 480 481 - static void start_event_scan(void) 481 + static void __init start_event_scan(void) 482 482 { 483 483 printk(KERN_DEBUG "RTAS daemon started\n"); 484 484 pr_debug("rtasd: will sleep for %d milliseconds\n",
+2 -2
arch/powerpc/kernel/security.c
··· 44 44 do_barrier_nospec_fixups(enable); 45 45 } 46 46 47 - void setup_barrier_nospec(void) 47 + void __init setup_barrier_nospec(void) 48 48 { 49 49 bool enable; 50 50 ··· 132 132 #endif /* CONFIG_PPC_FSL_BOOK3E || CONFIG_PPC_BOOK3S_64 */ 133 133 134 134 #ifdef CONFIG_PPC_FSL_BOOK3E 135 - void setup_spectre_v2(void) 135 + void __init setup_spectre_v2(void) 136 136 { 137 137 if (no_spectrev2 || cpu_mitigations_off()) 138 138 do_btb_flush_fixups();
+1 -1
arch/powerpc/kernel/setup-common.c
··· 582 582 device_initcall(add_pcspkr); 583 583 #endif /* CONFIG_PCSPKR_PLATFORM */ 584 584 585 - void probe_machine(void) 585 + static __init void probe_machine(void) 586 586 { 587 587 extern struct machdep_calls __machine_desc_start; 588 588 extern struct machdep_calls __machine_desc_end;
+1 -1
arch/powerpc/kernel/setup.h
··· 29 29 static inline void setup_tlb_core_data(void) { } 30 30 #endif 31 31 32 - #if defined(CONFIG_PPC_BOOK3E) || defined(CONFIG_BOOKE) || defined(CONFIG_40x) 32 + #ifdef CONFIG_BOOKE_OR_40x 33 33 void exc_lvl_early_init(void); 34 34 #else 35 35 static inline void exc_lvl_early_init(void) { }
+2 -2
arch/powerpc/kernel/setup_32.c
··· 75 75 notrace void __init machine_init(u64 dt_ptr) 76 76 { 77 77 u32 *addr = (u32 *)patch_site_addr(&patch__memset_nocache); 78 - struct ppc_inst insn; 78 + ppc_inst_t insn; 79 79 80 80 /* Configure static keys first, now that we're relocated. */ 81 81 setup_feature_keys(); ··· 175 175 } 176 176 #endif 177 177 178 - #if defined(CONFIG_BOOKE) || defined(CONFIG_40x) 178 + #ifdef CONFIG_BOOKE_OR_40x 179 179 void __init exc_lvl_early_init(void) 180 180 { 181 181 unsigned int i, hw_cpu;
+16 -7
arch/powerpc/kernel/setup_64.c
··· 499 499 * routines and/or provided to userland 500 500 */ 501 501 502 - static void init_cache_info(struct ppc_cache_info *info, u32 size, u32 lsize, 502 + static void __init init_cache_info(struct ppc_cache_info *info, u32 size, u32 lsize, 503 503 u32 bsize, u32 sets) 504 504 { 505 505 info->size = size; ··· 880 880 int rc = -EINVAL; 881 881 882 882 /* 883 - * Linear mapping is one of 4K, 1M and 16M. For 4K, no need 884 - * to group units. For larger mappings, use 1M atom which 885 - * should be large enough to contain a number of units. 883 + * BookE and BookS radix are historical values and should be revisited. 886 884 */ 887 - if (mmu_linear_psize == MMU_PAGE_4K) 885 + if (IS_ENABLED(CONFIG_PPC_BOOK3E)) { 886 + atom_size = SZ_1M; 887 + } else if (radix_enabled()) { 888 888 atom_size = PAGE_SIZE; 889 - else 890 - atom_size = 1 << 20; 889 + } else if (IS_ENABLED(CONFIG_PPC_64S_HASH_MMU)) { 890 + /* 891 + * Linear mapping is one of 4K, 1M and 16M. For 4K, no need 892 + * to group units. For larger mappings, use 1M atom which 893 + * should be large enough to contain a number of units. 894 + */ 895 + if (mmu_linear_psize == MMU_PAGE_4K) 896 + atom_size = PAGE_SIZE; 897 + else 898 + atom_size = SZ_1M; 899 + } 891 900 892 901 if (pcpu_chosen_fc != PCPU_FC_PAGE) { 893 902 rc = pcpu_embed_first_chunk(0, dyn_size, atom_size, pcpu_cpu_distance,
+9 -5
arch/powerpc/kernel/signal_32.c
··· 527 527 regs_set_return_msr(regs, regs->msr & ~(MSR_FP | MSR_FE0 | MSR_FE1)); 528 528 529 529 #ifdef CONFIG_SPE 530 - /* force the process to reload the spe registers from 531 - current->thread when it next does spe instructions */ 530 + /* 531 + * Force the process to reload the spe registers from 532 + * current->thread when it next does spe instructions. 533 + * Since this is user ABI, we must enforce the sizing. 534 + */ 535 + BUILD_BUG_ON(sizeof(current->thread.spe) != ELF_NEVRREG * sizeof(u32)); 532 536 regs_set_return_msr(regs, regs->msr & ~MSR_SPE); 533 537 if (msr & MSR_SPE) { 534 538 /* restore spe registers from the stack */ 535 - unsafe_copy_from_user(current->thread.evr, &sr->mc_vregs, 536 - ELF_NEVRREG * sizeof(u32), failed); 539 + unsafe_copy_from_user(&current->thread.spe, &sr->mc_vregs, 540 + sizeof(current->thread.spe), failed); 537 541 current->thread.used_spe = true; 538 542 } else if (current->thread.used_spe) 539 - memset(current->thread.evr, 0, ELF_NEVRREG * sizeof(u32)); 543 + memset(&current->thread.spe, 0, sizeof(current->thread.spe)); 540 544 541 545 /* Always get SPEFSCR back */ 542 546 unsafe_get_user(current->thread.spefscr, (u32 __user *)&sr->mc_vregs + ELF_NEVRREG, failed);
+45 -2
arch/powerpc/kernel/smp.c
··· 61 61 #include <asm/cpu_has_feature.h> 62 62 #include <asm/ftrace.h> 63 63 #include <asm/kup.h> 64 + #include <asm/fadump.h> 64 65 65 66 #ifdef DEBUG 66 67 #include <asm/udbg.h> ··· 622 621 #endif 623 622 624 623 #ifdef CONFIG_NMI_IPI 624 + static void crash_stop_this_cpu(struct pt_regs *regs) 625 + #else 626 + static void crash_stop_this_cpu(void *dummy) 627 + #endif 628 + { 629 + /* 630 + * Just busy wait here and avoid marking CPU as offline to ensure 631 + * register data is captured appropriately. 632 + */ 633 + while (1) 634 + cpu_relax(); 635 + } 636 + 637 + void crash_smp_send_stop(void) 638 + { 639 + static bool stopped = false; 640 + 641 + /* 642 + * In case of fadump, register data for all CPUs is captured by f/w 643 + * on ibm,os-term rtas call. Skip IPI callbacks to other CPUs before 644 + * this rtas call to avoid tricky post processing of those CPUs' 645 + * backtraces. 646 + */ 647 + if (should_fadump_crash()) 648 + return; 649 + 650 + if (stopped) 651 + return; 652 + 653 + stopped = true; 654 + 655 + #ifdef CONFIG_NMI_IPI 656 + smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, crash_stop_this_cpu, 1000000); 657 + #else 658 + smp_call_function(crash_stop_this_cpu, NULL, 0); 659 + #endif /* CONFIG_NMI_IPI */ 660 + } 661 + 662 + #ifdef CONFIG_NMI_IPI 625 663 static void nmi_stop_this_cpu(struct pt_regs *regs) 626 664 { 627 665 /* ··· 936 896 return tg; 937 897 } 938 898 939 - static int update_mask_from_threadgroup(cpumask_var_t *mask, struct thread_groups *tg, int cpu, int cpu_group_start) 899 + static int __init update_mask_from_threadgroup(cpumask_var_t *mask, struct thread_groups *tg, 900 + int cpu, int cpu_group_start) 940 901 { 941 902 int first_thread = cpu_first_thread_sibling(cpu); 942 903 int i; ··· 1676 1635 BUG(); 1677 1636 } 1678 1637 1638 + #ifdef CONFIG_PROFILING 1679 1639 int setup_profiling_timer(unsigned int multiplier) 1680 1640 { 1681 1641 return 0; 1682 1642 } 1643 + #endif 1683 1644 1684 - static void fixup_topology(void) 1645 + static void __init fixup_topology(void) 1685 1646 { 1686 1647 int i; 1687 1648
+1 -1
arch/powerpc/kernel/swsusp_32.S
··· 181 181 #ifdef CONFIG_ALTIVEC 182 182 /* Stop pending alitvec streams and memory accesses */ 183 183 BEGIN_FTR_SECTION 184 - DSSALL 184 + PPC_DSSALL 185 185 END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) 186 186 #endif 187 187 sync
+1 -1
arch/powerpc/kernel/swsusp_asm64.S
··· 141 141 _GLOBAL(swsusp_arch_resume) 142 142 /* Stop pending alitvec streams and memory accesses */ 143 143 BEGIN_FTR_SECTION 144 - DSSALL 144 + PPC_DSSALL 145 145 END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) 146 146 sync 147 147
+5 -5
arch/powerpc/kernel/sysfs.c
··· 214 214 static DEVICE_ATTR(dscr_default, 0600, 215 215 show_dscr_default, store_dscr_default); 216 216 217 - static void sysfs_create_dscr_default(void) 217 + static void __init sysfs_create_dscr_default(void) 218 218 { 219 219 if (cpu_has_feature(CPU_FTR_DSCR)) { 220 220 int cpu; ··· 744 744 } 745 745 static DEVICE_ATTR(svm, 0444, show_svm, NULL); 746 746 747 - static void create_svm_file(void) 747 + static void __init create_svm_file(void) 748 748 { 749 749 device_create_file(cpu_subsys.dev_root, &dev_attr_svm); 750 750 } 751 751 #else 752 - static void create_svm_file(void) 752 + static void __init create_svm_file(void) 753 753 { 754 754 } 755 755 #endif /* CONFIG_PPC_SVM */ ··· 1110 1110 /* NUMA stuff */ 1111 1111 1112 1112 #ifdef CONFIG_NUMA 1113 - static void register_nodes(void) 1113 + static void __init register_nodes(void) 1114 1114 { 1115 1115 int i; 1116 1116 ··· 1134 1134 EXPORT_SYMBOL_GPL(sysfs_remove_device_from_node); 1135 1135 1136 1136 #else 1137 - static void register_nodes(void) 1137 + static void __init register_nodes(void) 1138 1138 { 1139 1139 return; 1140 1140 }
+62 -25
arch/powerpc/kernel/time.c
··· 88 88 89 89 #define DECREMENTER_DEFAULT_MAX 0x7FFFFFFF 90 90 u64 decrementer_max = DECREMENTER_DEFAULT_MAX; 91 + EXPORT_SYMBOL_GPL(decrementer_max); /* for KVM HDEC */ 91 92 92 93 static int decrementer_set_next_event(unsigned long evt, 93 94 struct clock_event_device *dev); ··· 108 107 EXPORT_SYMBOL(decrementer_clockevent); 109 108 110 109 DEFINE_PER_CPU(u64, decrementers_next_tb); 110 + EXPORT_SYMBOL_GPL(decrementers_next_tb); 111 111 static DEFINE_PER_CPU(struct clock_event_device, decrementers); 112 112 113 113 #define XSEC_PER_SEC (1024*1024) ··· 498 496 * 64-bit uses a byte in the PACA, 32-bit uses a per-cpu variable... 499 497 */ 500 498 #ifdef CONFIG_PPC64 499 + static inline unsigned long test_irq_work_pending(void) 500 + { 501 + unsigned long x; 502 + 503 + asm volatile("lbz %0,%1(13)" 504 + : "=r" (x) 505 + : "i" (offsetof(struct paca_struct, irq_work_pending))); 506 + return x; 507 + } 508 + 501 509 static inline void set_irq_work_pending_flag(void) 502 510 { 503 511 asm volatile("stb %0,%1(13)" : : ··· 551 539 preempt_enable(); 552 540 } 553 541 542 + static void set_dec_or_work(u64 val) 543 + { 544 + set_dec(val); 545 + /* We may have raced with new irq work */ 546 + if (unlikely(test_irq_work_pending())) 547 + set_dec(1); 548 + } 549 + 554 550 #else /* CONFIG_IRQ_WORK */ 555 551 556 552 #define test_irq_work_pending() 0 557 553 #define clear_irq_work_pending() 558 554 555 + static void set_dec_or_work(u64 val) 556 + { 557 + set_dec(val); 558 + } 559 559 #endif /* CONFIG_IRQ_WORK */ 560 + 561 + #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 562 + void timer_rearm_host_dec(u64 now) 563 + { 564 + u64 *next_tb = this_cpu_ptr(&decrementers_next_tb); 565 + 566 + WARN_ON_ONCE(!arch_irqs_disabled()); 567 + WARN_ON_ONCE(mfmsr() & MSR_EE); 568 + 569 + if (now >= *next_tb) { 570 + local_paca->irq_happened |= PACA_IRQ_DEC; 571 + } else { 572 + now = *next_tb - now; 573 + if (now <= decrementer_max) 574 + set_dec_or_work(now); 575 + } 576 + } 577 + EXPORT_SYMBOL_GPL(timer_rearm_host_dec); 578 + #endif 560 579 561 580 /* 562 581 * timer_interrupt - gets called when the decrementer overflows, ··· 609 566 return; 610 567 } 611 568 612 - /* Ensure a positive value is written to the decrementer, or else 613 - * some CPUs will continue to take decrementer exceptions. When the 614 - * PPC_WATCHDOG (decrementer based) is configured, keep this at most 615 - * 31 bits, which is about 4 seconds on most systems, which gives 616 - * the watchdog a chance of catching timer interrupt hard lockups. 617 - */ 618 - if (IS_ENABLED(CONFIG_PPC_WATCHDOG)) 619 - set_dec(0x7fffffff); 620 - else 621 - set_dec(decrementer_max); 569 + /* Conditionally hard-enable interrupts. */ 570 + if (should_hard_irq_enable()) { 571 + /* 572 + * Ensure a positive value is written to the decrementer, or 573 + * else some CPUs will continue to take decrementer exceptions. 574 + * When the PPC_WATCHDOG (decrementer based) is configured, 575 + * keep this at most 31 bits, which is about 4 seconds on most 576 + * systems, which gives the watchdog a chance of catching timer 577 + * interrupt hard lockups. 578 + */ 579 + if (IS_ENABLED(CONFIG_PPC_WATCHDOG)) 580 + set_dec(0x7fffffff); 581 + else 582 + set_dec(decrementer_max); 622 583 623 - /* Conditionally hard-enable interrupts now that the DEC has been 624 - * bumped to its maximum value 625 - */ 626 - may_hard_irq_enable(); 627 - 584 + do_hard_irq_enable(); 585 + } 628 586 629 587 #if defined(CONFIG_PPC32) && defined(CONFIG_PPC_PMAC) 630 588 if (atomic_read(&ppc_n_lost_interrupts) != 0) ··· 650 606 } else { 651 607 now = *next_tb - now; 652 608 if (now <= decrementer_max) 653 - set_dec(now); 654 - /* We may have raced with new irq work */ 655 - if (test_irq_work_pending()) 656 - set_dec(1); 609 + set_dec_or_work(now); 657 610 __this_cpu_inc(irq_stat.timer_irqs_others); 658 611 } 659 612 ··· 771 730 772 731 static void start_cpu_decrementer(void) 773 732 { 774 - #if defined(CONFIG_BOOKE) || defined(CONFIG_40x) 733 + #ifdef CONFIG_BOOKE_OR_40x 775 734 unsigned int tcr; 776 735 777 736 /* Clear any pending timer interrupts */ ··· 884 843 struct clock_event_device *dev) 885 844 { 886 845 __this_cpu_write(decrementers_next_tb, get_tb() + evt); 887 - set_dec(evt); 888 - 889 - /* We may have raced with new irq work */ 890 - if (test_irq_work_pending()) 891 - set_dec(1); 846 + set_dec_or_work(evt); 892 847 893 848 return 0; 894 849 }
+4 -11
arch/powerpc/kernel/tm.S
··· 226 226 227 227 /* Sync the userland GPRs 2-12, 14-31 to thread->regs: */ 228 228 SAVE_GPR(0, r7) /* user r0 */ 229 - SAVE_GPR(2, r7) /* user r2 */ 230 - SAVE_4GPRS(3, r7) /* user r3-r6 */ 231 - SAVE_GPR(8, r7) /* user r8 */ 232 - SAVE_GPR(9, r7) /* user r9 */ 233 - SAVE_GPR(10, r7) /* user r10 */ 229 + SAVE_GPRS(2, 6, r7) /* user r2-r6 */ 230 + SAVE_GPRS(8, 10, r7) /* user r8-r10 */ 234 231 ld r3, GPR1(r1) /* user r1 */ 235 232 ld r4, GPR7(r1) /* user r7 */ 236 233 ld r5, GPR11(r1) /* user r11 */ ··· 442 445 ld r6, THREAD_TM_PPR(r3) 443 446 444 447 REST_GPR(0, r7) /* GPR0 */ 445 - REST_2GPRS(2, r7) /* GPR2-3 */ 446 - REST_GPR(4, r7) /* GPR4 */ 447 - REST_4GPRS(8, r7) /* GPR8-11 */ 448 - REST_2GPRS(12, r7) /* GPR12-13 */ 449 - 450 - REST_NVGPRS(r7) /* GPR14-31 */ 448 + REST_GPRS(2, 4, r7) /* GPR2-4 */ 449 + REST_GPRS(8, 31, r7) /* GPR8-31 */ 451 450 452 451 /* Load up PPR and DSCR here so we don't run with user values for long */ 453 452 mtspr SPRN_DSCR, r5
+45 -62
arch/powerpc/kernel/trace/ftrace.c
··· 41 41 #define NUM_FTRACE_TRAMPS 8 42 42 static unsigned long ftrace_tramps[NUM_FTRACE_TRAMPS]; 43 43 44 - static struct ppc_inst 44 + static ppc_inst_t 45 45 ftrace_call_replace(unsigned long ip, unsigned long addr, int link) 46 46 { 47 - struct ppc_inst op; 47 + ppc_inst_t op; 48 48 49 49 addr = ppc_function_entry((void *)addr); 50 50 ··· 55 55 } 56 56 57 57 static int 58 - ftrace_modify_code(unsigned long ip, struct ppc_inst old, struct ppc_inst new) 58 + ftrace_modify_code(unsigned long ip, ppc_inst_t old, ppc_inst_t new) 59 59 { 60 - struct ppc_inst replaced; 60 + ppc_inst_t replaced; 61 61 62 62 /* 63 63 * Note: ··· 90 90 */ 91 91 static int test_24bit_addr(unsigned long ip, unsigned long addr) 92 92 { 93 - struct ppc_inst op; 93 + ppc_inst_t op; 94 94 addr = ppc_function_entry((void *)addr); 95 95 96 96 /* use the create_branch to verify that this offset can be branched */ 97 97 return create_branch(&op, (u32 *)ip, addr, 0) == 0; 98 98 } 99 99 100 - static int is_bl_op(struct ppc_inst op) 100 + static int is_bl_op(ppc_inst_t op) 101 101 { 102 102 return (ppc_inst_val(op) & 0xfc000003) == 0x48000001; 103 103 } 104 104 105 - static int is_b_op(struct ppc_inst op) 105 + static int is_b_op(ppc_inst_t op) 106 106 { 107 107 return (ppc_inst_val(op) & 0xfc000003) == 0x48000000; 108 108 } 109 109 110 - static unsigned long find_bl_target(unsigned long ip, struct ppc_inst op) 110 + static unsigned long find_bl_target(unsigned long ip, ppc_inst_t op) 111 111 { 112 112 int offset; 113 113 ··· 127 127 { 128 128 unsigned long entry, ptr, tramp; 129 129 unsigned long ip = rec->ip; 130 - struct ppc_inst op, pop; 130 + ppc_inst_t op, pop; 131 131 132 132 /* read where this goes */ 133 133 if (copy_inst_from_kernel_nofault(&op, (void *)ip)) { ··· 221 221 __ftrace_make_nop(struct module *mod, 222 222 struct dyn_ftrace *rec, unsigned long addr) 223 223 { 224 - struct ppc_inst op; 225 - unsigned int jmp[4]; 224 + ppc_inst_t op; 226 225 unsigned long ip = rec->ip; 227 - unsigned long tramp; 226 + unsigned long tramp, ptr; 228 227 229 228 if (copy_from_kernel_nofault(&op, (void *)ip, MCOUNT_INSN_SIZE)) 230 229 return -EFAULT; ··· 237 238 /* lets find where the pointer goes */ 238 239 tramp = find_bl_target(ip, op); 239 240 240 - /* 241 - * On PPC32 the trampoline looks like: 242 - * 0x3d, 0x80, 0x00, 0x00 lis r12,sym@ha 243 - * 0x39, 0x8c, 0x00, 0x00 addi r12,r12,sym@l 244 - * 0x7d, 0x89, 0x03, 0xa6 mtctr r12 245 - * 0x4e, 0x80, 0x04, 0x20 bctr 246 - */ 247 - 248 - pr_devel("ip:%lx jumps to %lx", ip, tramp); 249 - 250 241 /* Find where the trampoline jumps to */ 251 - if (copy_from_kernel_nofault(jmp, (void *)tramp, sizeof(jmp))) { 252 - pr_err("Failed to read %lx\n", tramp); 242 + if (module_trampoline_target(mod, tramp, &ptr)) { 243 + pr_err("Failed to get trampoline target\n"); 253 244 return -EFAULT; 254 245 } 255 246 256 - pr_devel(" %08x %08x ", jmp[0], jmp[1]); 257 - 258 - /* verify that this is what we expect it to be */ 259 - if (((jmp[0] & 0xffff0000) != 0x3d800000) || 260 - ((jmp[1] & 0xffff0000) != 0x398c0000) || 261 - (jmp[2] != 0x7d8903a6) || 262 - (jmp[3] != 0x4e800420)) { 263 - pr_err("Not a trampoline\n"); 264 - return -EINVAL; 265 - } 266 - 267 - tramp = (jmp[1] & 0xffff) | 268 - ((jmp[0] & 0xffff) << 16); 269 - if (tramp & 0x8000) 270 - tramp -= 0x10000; 271 - 272 - pr_devel(" %lx ", tramp); 273 - 274 - if (tramp != addr) { 247 + if (ptr != addr) { 275 248 pr_err("Trampoline location %08lx does not match addr\n", 276 249 tramp); 277 250 return -EINVAL; ··· 262 291 static unsigned long find_ftrace_tramp(unsigned long ip) 263 292 { 264 293 int i; 265 - struct ppc_inst instr; 294 + ppc_inst_t instr; 266 295 267 296 /* 268 297 * We have the compiler generated long_branch tramps at the end ··· 300 329 static int setup_mcount_compiler_tramp(unsigned long tramp) 301 330 { 302 331 int i; 303 - struct ppc_inst op; 332 + ppc_inst_t op; 304 333 unsigned long ptr; 305 - struct ppc_inst instr; 334 + ppc_inst_t instr; 306 335 static unsigned long ftrace_plt_tramps[NUM_FTRACE_TRAMPS]; 307 336 308 337 /* Is this a known long jump tramp? */ ··· 367 396 static int __ftrace_make_nop_kernel(struct dyn_ftrace *rec, unsigned long addr) 368 397 { 369 398 unsigned long tramp, ip = rec->ip; 370 - struct ppc_inst op; 399 + ppc_inst_t op; 371 400 372 401 /* Read where this goes */ 373 402 if (copy_inst_from_kernel_nofault(&op, (void *)ip)) { ··· 407 436 struct dyn_ftrace *rec, unsigned long addr) 408 437 { 409 438 unsigned long ip = rec->ip; 410 - struct ppc_inst old, new; 439 + ppc_inst_t old, new; 411 440 412 441 /* 413 442 * If the calling address is more that 24 bits away, ··· 460 489 */ 461 490 #ifndef CONFIG_MPROFILE_KERNEL 462 491 static int 463 - expected_nop_sequence(void *ip, struct ppc_inst op0, struct ppc_inst op1) 492 + expected_nop_sequence(void *ip, ppc_inst_t op0, ppc_inst_t op1) 464 493 { 465 494 /* 466 495 * We expect to see: ··· 478 507 } 479 508 #else 480 509 static int 481 - expected_nop_sequence(void *ip, struct ppc_inst op0, struct ppc_inst op1) 510 + expected_nop_sequence(void *ip, ppc_inst_t op0, ppc_inst_t op1) 482 511 { 483 512 /* look for patched "NOP" on ppc64 with -mprofile-kernel */ 484 513 if (!ppc_inst_equal(op0, ppc_inst(PPC_RAW_NOP()))) ··· 490 519 static int 491 520 __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) 492 521 { 493 - struct ppc_inst op[2]; 494 - struct ppc_inst instr; 522 + ppc_inst_t op[2]; 523 + ppc_inst_t instr; 495 524 void *ip = (void *)rec->ip; 496 525 unsigned long entry, ptr, tramp; 497 526 struct module *mod = rec->arch.mod; ··· 559 588 __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) 560 589 { 561 590 int err; 562 - struct ppc_inst op; 591 + ppc_inst_t op; 563 592 u32 *ip = (u32 *)rec->ip; 593 + struct module *mod = rec->arch.mod; 594 + unsigned long tramp; 564 595 565 596 /* read where this goes */ 566 597 if (copy_inst_from_kernel_nofault(&op, ip)) ··· 575 602 } 576 603 577 604 /* If we never set up a trampoline to ftrace_caller, then bail */ 578 - if (!rec->arch.mod->arch.tramp) { 605 + #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS 606 + if (!mod->arch.tramp || !mod->arch.tramp_regs) { 607 + #else 608 + if (!mod->arch.tramp) { 609 + #endif 579 610 pr_err("No ftrace trampoline\n"); 580 611 return -EINVAL; 581 612 } 582 613 614 + #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS 615 + if (rec->flags & FTRACE_FL_REGS) 616 + tramp = mod->arch.tramp_regs; 617 + else 618 + #endif 619 + tramp = mod->arch.tramp; 583 620 /* create the branch to the trampoline */ 584 - err = create_branch(&op, ip, rec->arch.mod->arch.tramp, BRANCH_SET_LINK); 621 + err = create_branch(&op, ip, tramp, BRANCH_SET_LINK); 585 622 if (err) { 586 623 pr_err("REL24 out of range!\n"); 587 624 return -EINVAL; ··· 609 626 610 627 static int __ftrace_make_call_kernel(struct dyn_ftrace *rec, unsigned long addr) 611 628 { 612 - struct ppc_inst op; 629 + ppc_inst_t op; 613 630 void *ip = (void *)rec->ip; 614 631 unsigned long tramp, entry, ptr; 615 632 ··· 657 674 int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) 658 675 { 659 676 unsigned long ip = rec->ip; 660 - struct ppc_inst old, new; 677 + ppc_inst_t old, new; 661 678 662 679 /* 663 680 * If the calling address is more that 24 bits away, ··· 696 713 __ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, 697 714 unsigned long addr) 698 715 { 699 - struct ppc_inst op; 716 + ppc_inst_t op; 700 717 unsigned long ip = rec->ip; 701 718 unsigned long entry, ptr, tramp; 702 719 struct module *mod = rec->arch.mod; ··· 790 807 unsigned long addr) 791 808 { 792 809 unsigned long ip = rec->ip; 793 - struct ppc_inst old, new; 810 + ppc_inst_t old, new; 794 811 795 812 /* 796 813 * If the calling address is more that 24 bits away, ··· 830 847 int ftrace_update_ftrace_func(ftrace_func_t func) 831 848 { 832 849 unsigned long ip = (unsigned long)(&ftrace_call); 833 - struct ppc_inst old, new; 850 + ppc_inst_t old, new; 834 851 int ret; 835 852 836 853 old = ppc_inst_read((u32 *)&ftrace_call); ··· 915 932 unsigned long ip = (unsigned long)(&ftrace_graph_call); 916 933 unsigned long addr = (unsigned long)(&ftrace_graph_caller); 917 934 unsigned long stub = (unsigned long)(&ftrace_graph_stub); 918 - struct ppc_inst old, new; 935 + ppc_inst_t old, new; 919 936 920 937 old = ftrace_call_replace(ip, stub, 0); 921 938 new = ftrace_call_replace(ip, addr, 0); ··· 928 945 unsigned long ip = (unsigned long)(&ftrace_graph_call); 929 946 unsigned long addr = (unsigned long)(&ftrace_graph_caller); 930 947 unsigned long stub = (unsigned long)(&ftrace_graph_stub); 931 - struct ppc_inst old, new; 948 + ppc_inst_t old, new; 932 949 933 950 old = ftrace_call_replace(ip, addr, 0); 934 951 new = ftrace_call_replace(ip, stub, 0);
+106 -14
arch/powerpc/kernel/trace/ftrace_32.S
··· 9 9 #include <asm/asm-offsets.h> 10 10 #include <asm/ftrace.h> 11 11 #include <asm/export.h> 12 + #include <asm/ptrace.h> 12 13 13 14 _GLOBAL(mcount) 14 15 _GLOBAL(_mcount) 15 16 /* 16 17 * It is required that _mcount on PPC32 must preserve the 17 - * link register. But we have r0 to play with. We use r0 18 + * link register. But we have r12 to play with. We use r12 18 19 * to push the return address back to the caller of mcount 19 20 * into the ctr register, restore the link register and 20 21 * then jump back using the ctr register. 21 22 */ 22 - mflr r0 23 - mtctr r0 24 - lwz r0, 4(r1) 23 + mflr r12 24 + mtctr r12 25 25 mtlr r0 26 26 bctr 27 + EXPORT_SYMBOL(_mcount) 27 28 28 29 _GLOBAL(ftrace_caller) 29 30 MCOUNT_SAVE_FRAME 30 31 /* r3 ends up with link register */ 31 32 subi r3, r3, MCOUNT_INSN_SIZE 33 + lis r5,function_trace_op@ha 34 + lwz r5,function_trace_op@l(r5) 35 + li r6, 0 32 36 .globl ftrace_call 33 37 ftrace_call: 34 38 bl ftrace_stub 35 39 nop 40 + MCOUNT_RESTORE_FRAME 41 + ftrace_caller_common: 36 42 #ifdef CONFIG_FUNCTION_GRAPH_TRACER 37 43 .globl ftrace_graph_call 38 44 ftrace_graph_call: 39 45 b ftrace_graph_stub 40 46 _GLOBAL(ftrace_graph_stub) 41 47 #endif 42 - MCOUNT_RESTORE_FRAME 43 48 /* old link register ends up in ctr reg */ 44 49 bctr 45 50 46 - EXPORT_SYMBOL(_mcount) 47 51 48 52 _GLOBAL(ftrace_stub) 49 53 blr 50 54 55 + _GLOBAL(ftrace_regs_caller) 56 + /* Save the original return address in A's stack frame */ 57 + stw r0,LRSAVE(r1) 58 + 59 + /* Create our stack frame + pt_regs */ 60 + stwu r1,-INT_FRAME_SIZE(r1) 61 + 62 + /* Save all gprs to pt_regs */ 63 + stw r0, GPR0(r1) 64 + stmw r2, GPR2(r1) 65 + 66 + /* Save previous stack pointer (r1) */ 67 + addi r8, r1, INT_FRAME_SIZE 68 + stw r8, GPR1(r1) 69 + 70 + /* Load special regs for save below */ 71 + mfmsr r8 72 + mfctr r9 73 + mfxer r10 74 + mfcr r11 75 + 76 + /* Get the _mcount() call site out of LR */ 77 + mflr r7 78 + /* Save it as pt_regs->nip */ 79 + stw r7, _NIP(r1) 80 + /* Save the read LR in pt_regs->link */ 81 + stw r0, _LINK(r1) 82 + 83 + lis r3,function_trace_op@ha 84 + lwz r5,function_trace_op@l(r3) 85 + 86 + /* Calculate ip from nip-4 into r3 for call below */ 87 + subi r3, r7, MCOUNT_INSN_SIZE 88 + 89 + /* Put the original return address in r4 as parent_ip */ 90 + mr r4, r0 91 + 92 + /* Save special regs */ 93 + stw r8, _MSR(r1) 94 + stw r9, _CTR(r1) 95 + stw r10, _XER(r1) 96 + stw r11, _CCR(r1) 97 + 98 + /* Load &pt_regs in r6 for call below */ 99 + addi r6, r1, STACK_FRAME_OVERHEAD 100 + 101 + /* ftrace_call(r3, r4, r5, r6) */ 102 + .globl ftrace_regs_call 103 + ftrace_regs_call: 104 + bl ftrace_stub 105 + nop 106 + 107 + /* Load ctr with the possibly modified NIP */ 108 + lwz r3, _NIP(r1) 109 + mtctr r3 110 + 111 + /* Restore gprs */ 112 + lmw r2, GPR2(r1) 113 + 114 + /* Restore possibly modified LR */ 115 + lwz r0, _LINK(r1) 116 + mtlr r0 117 + 118 + /* Pop our stack frame */ 119 + addi r1, r1, INT_FRAME_SIZE 120 + 121 + b ftrace_caller_common 122 + 51 123 #ifdef CONFIG_FUNCTION_GRAPH_TRACER 52 124 _GLOBAL(ftrace_graph_caller) 53 - addi r5, r1, 48 54 - /* load r4 with local address */ 55 - lwz r4, 44(r1) 56 - subi r4, r4, MCOUNT_INSN_SIZE 125 + stwu r1,-48(r1) 126 + stw r3, 12(r1) 127 + stw r4, 16(r1) 128 + stw r5, 20(r1) 129 + stw r6, 24(r1) 130 + stw r7, 28(r1) 131 + stw r8, 32(r1) 132 + stw r9, 36(r1) 133 + stw r10,40(r1) 57 134 58 - /* Grab the LR out of the caller stack frame */ 59 - lwz r3,52(r1) 135 + addi r5, r1, 48 136 + mfctr r4 /* ftrace_caller has moved local addr here */ 137 + stw r4, 44(r1) 138 + mflr r3 /* ftrace_caller has restored LR from stack */ 139 + subi r4, r4, MCOUNT_INSN_SIZE 60 140 61 141 bl prepare_ftrace_return 62 142 nop ··· 146 66 * Change the LR in the callers stack frame to this. 147 67 */ 148 68 stw r3,52(r1) 69 + mtlr r3 70 + lwz r0,44(r1) 71 + mtctr r0 149 72 150 - MCOUNT_RESTORE_FRAME 151 - /* old link register ends up in ctr reg */ 73 + lwz r3, 12(r1) 74 + lwz r4, 16(r1) 75 + lwz r5, 20(r1) 76 + lwz r6, 24(r1) 77 + lwz r7, 28(r1) 78 + lwz r8, 32(r1) 79 + lwz r9, 36(r1) 80 + lwz r10,40(r1) 81 + 82 + addi r1, r1, 48 83 + 152 84 bctr 153 85 154 86 _GLOBAL(return_to_handler)
+6 -9
arch/powerpc/kernel/trace/ftrace_64_mprofile.S
··· 41 41 42 42 /* Save all gprs to pt_regs */ 43 43 SAVE_GPR(0, r1) 44 - SAVE_10GPRS(2, r1) 44 + SAVE_GPRS(2, 11, r1) 45 45 46 46 /* Ok to continue? */ 47 47 lbz r3, PACA_FTRACE_ENABLED(r13) 48 48 cmpdi r3, 0 49 49 beq ftrace_no_trace 50 50 51 - SAVE_10GPRS(12, r1) 52 - SAVE_10GPRS(22, r1) 51 + SAVE_GPRS(12, 31, r1) 53 52 54 53 /* Save previous stack pointer (r1) */ 55 54 addi r8, r1, SWITCH_FRAME_SIZE ··· 107 108 #endif 108 109 109 110 /* Restore gprs */ 110 - REST_GPR(0,r1) 111 - REST_10GPRS(2,r1) 112 - REST_10GPRS(12,r1) 113 - REST_10GPRS(22,r1) 111 + REST_GPR(0, r1) 112 + REST_GPRS(2, 31, r1) 114 113 115 114 /* Restore possibly modified LR */ 116 115 ld r0, _LINK(r1) ··· 154 157 stdu r1, -SWITCH_FRAME_SIZE(r1) 155 158 156 159 /* Save all gprs to pt_regs */ 157 - SAVE_8GPRS(3, r1) 160 + SAVE_GPRS(3, 10, r1) 158 161 159 162 lbz r3, PACA_FTRACE_ENABLED(r13) 160 163 cmpdi r3, 0 ··· 191 194 mtctr r3 192 195 193 196 /* Restore gprs */ 194 - REST_8GPRS(3,r1) 197 + REST_GPRS(3, 10, r1) 195 198 196 199 /* Restore callee's TOC */ 197 200 ld r2, 24(r1)
+5 -5
arch/powerpc/kernel/udbg_16550.c
··· 84 84 return udbg_uart_in(UART_RBR); 85 85 } 86 86 87 - static void udbg_use_uart(void) 87 + static void __init udbg_use_uart(void) 88 88 { 89 89 udbg_putc = udbg_uart_putc; 90 90 udbg_flush = udbg_uart_flush; ··· 92 92 udbg_getc_poll = udbg_uart_getc_poll; 93 93 } 94 94 95 - void udbg_uart_setup(unsigned int speed, unsigned int clock) 95 + void __init udbg_uart_setup(unsigned int speed, unsigned int clock) 96 96 { 97 97 unsigned int dll, base_bauds; 98 98 ··· 121 121 udbg_uart_out(UART_FCR, 0x7); 122 122 } 123 123 124 - unsigned int udbg_probe_uart_speed(unsigned int clock) 124 + unsigned int __init udbg_probe_uart_speed(unsigned int clock) 125 125 { 126 126 unsigned int dll, dlm, divisor, prescaler, speed; 127 127 u8 old_lcr; ··· 172 172 outb(data, udbg_uart.pio_base + (reg * udbg_uart_stride)); 173 173 } 174 174 175 - void udbg_uart_init_pio(unsigned long port, unsigned int stride) 175 + void __init udbg_uart_init_pio(unsigned long port, unsigned int stride) 176 176 { 177 177 if (!port) 178 178 return; ··· 194 194 } 195 195 196 196 197 - void udbg_uart_init_mmio(void __iomem *addr, unsigned int stride) 197 + void __init udbg_uart_init_mmio(void __iomem *addr, unsigned int stride) 198 198 { 199 199 if (!addr) 200 200 return;
+1 -1
arch/powerpc/kernel/vecemu.c
··· 261 261 262 262 int emulate_altivec(struct pt_regs *regs) 263 263 { 264 - struct ppc_inst instr; 264 + ppc_inst_t instr; 265 265 unsigned int i, word; 266 266 unsigned int va, vb, vc, vd; 267 267 vector128 *vrs;
+10
arch/powerpc/kernel/vector.S
··· 47 47 */ 48 48 _GLOBAL(load_up_altivec) 49 49 mfmsr r5 /* grab the current MSR */ 50 + #ifdef CONFIG_PPC_BOOK3S_64 51 + /* interrupt doesn't set MSR[RI] and HPT can fault on current access */ 52 + ori r5,r5,MSR_RI 53 + #endif 50 54 oris r5,r5,MSR_VEC@h 51 55 MTMSRD(r5) /* enable use of AltiVec now */ 52 56 isync ··· 129 125 beql+ load_up_fpu /* skip if already loaded */ 130 126 andis. r5,r12,MSR_VEC@h 131 127 beql+ load_up_altivec /* skip if already loaded */ 128 + 129 + #ifdef CONFIG_PPC_BOOK3S_64 130 + /* interrupt doesn't set MSR[RI] and HPT can fault on current access */ 131 + li r5,MSR_RI 132 + mtmsrd r5,1 133 + #endif 132 134 133 135 ld r4,PACACURRENT(r13) 134 136 addi r4,r4,THREAD /* Get THREAD */
+3 -13
arch/powerpc/kernel/vmlinux.lds.S
··· 322 322 #ifdef CONFIG_PPC32 323 323 .data : AT(ADDR(.data) - LOAD_OFFSET) { 324 324 DATA_DATA 325 - #ifdef CONFIG_UBSAN 326 - *(.data..Lubsan_data*) 327 - *(.data..Lubsan_type*) 328 - #endif 329 325 *(.data.rel*) 330 326 *(SDATA_MAIN) 331 327 *(.sdata2) ··· 332 336 #else 333 337 .data : AT(ADDR(.data) - LOAD_OFFSET) { 334 338 DATA_DATA 335 - #ifdef CONFIG_UBSAN 336 - *(.data..Lubsan_data*) 337 - *(.data..Lubsan_type*) 338 - #endif 339 339 *(.data.rel*) 340 340 *(.toc1) 341 341 *(.branch_lt) 342 342 } 343 343 344 - . = ALIGN(256); 345 - .got : AT(ADDR(.got) - LOAD_OFFSET) { 346 - __toc_start = .; 344 + .got : AT(ADDR(.got) - LOAD_OFFSET) ALIGN(256) { 345 + *(.got) 347 346 #ifndef CONFIG_RELOCATABLE 348 347 __prom_init_toc_start = .; 349 - arch/powerpc/kernel/prom_init.o*(.toc .got) 348 + arch/powerpc/kernel/prom_init.o*(.toc) 350 349 __prom_init_toc_end = .; 351 350 #endif 352 - *(.got) 353 351 *(.toc) 354 352 } 355 353 #endif
+180 -43
arch/powerpc/kernel/watchdog.c
··· 85 85 86 86 /* SMP checker bits */ 87 87 static unsigned long __wd_smp_lock; 88 + static unsigned long __wd_reporting; 89 + static unsigned long __wd_nmi_output; 88 90 static cpumask_t wd_smp_cpus_pending; 89 91 static cpumask_t wd_smp_cpus_stuck; 90 92 static u64 wd_smp_last_reset_tb; 93 + 94 + /* 95 + * Try to take the exclusive watchdog action / NMI IPI / printing lock. 96 + * wd_smp_lock must be held. If this fails, we should return and wait 97 + * for the watchdog to kick in again (or another CPU to trigger it). 98 + * 99 + * Importantly, if hardlockup_panic is set, wd_try_report failure should 100 + * not delay the panic, because whichever other CPU is reporting will 101 + * call panic. 102 + */ 103 + static bool wd_try_report(void) 104 + { 105 + if (__wd_reporting) 106 + return false; 107 + __wd_reporting = 1; 108 + return true; 109 + } 110 + 111 + /* End printing after successful wd_try_report. wd_smp_lock not required. */ 112 + static void wd_end_reporting(void) 113 + { 114 + smp_mb(); /* End printing "critical section" */ 115 + WARN_ON_ONCE(__wd_reporting == 0); 116 + WRITE_ONCE(__wd_reporting, 0); 117 + } 91 118 92 119 static inline void wd_smp_lock(unsigned long *flags) 93 120 { ··· 155 128 else 156 129 dump_stack(); 157 130 131 + /* 132 + * __wd_nmi_output must be set after we printk from NMI context. 133 + * 134 + * printk from NMI context defers printing to the console to irq_work. 135 + * If that NMI was taken in some code that is hard-locked, then irqs 136 + * are disabled so irq_work will never fire. That can result in the 137 + * hard lockup messages being delayed (indefinitely, until something 138 + * else kicks the console drivers). 139 + * 140 + * Setting __wd_nmi_output will cause another CPU to notice and kick 141 + * the console drivers for us. 142 + * 143 + * xchg is not needed here (it could be a smp_mb and store), but xchg 144 + * gives the memory ordering and atomicity required. 145 + */ 146 + xchg(&__wd_nmi_output, 1); 147 + 158 148 /* Do not panic from here because that can recurse into NMI IPI layer */ 159 149 } 160 150 161 - static void set_cpumask_stuck(const struct cpumask *cpumask, u64 tb) 151 + static bool set_cpu_stuck(int cpu) 162 152 { 163 - cpumask_or(&wd_smp_cpus_stuck, &wd_smp_cpus_stuck, cpumask); 164 - cpumask_andnot(&wd_smp_cpus_pending, &wd_smp_cpus_pending, cpumask); 153 + cpumask_set_cpu(cpu, &wd_smp_cpus_stuck); 154 + cpumask_clear_cpu(cpu, &wd_smp_cpus_pending); 155 + /* 156 + * See wd_smp_clear_cpu_pending() 157 + */ 158 + smp_mb(); 165 159 if (cpumask_empty(&wd_smp_cpus_pending)) { 166 - wd_smp_last_reset_tb = tb; 160 + wd_smp_last_reset_tb = get_tb(); 167 161 cpumask_andnot(&wd_smp_cpus_pending, 168 162 &wd_cpus_enabled, 169 163 &wd_smp_cpus_stuck); 164 + return true; 170 165 } 171 - } 172 - static void set_cpu_stuck(int cpu, u64 tb) 173 - { 174 - set_cpumask_stuck(cpumask_of(cpu), tb); 166 + return false; 175 167 } 176 168 177 - static void watchdog_smp_panic(int cpu, u64 tb) 169 + static void watchdog_smp_panic(int cpu) 178 170 { 171 + static cpumask_t wd_smp_cpus_ipi; // protected by reporting 179 172 unsigned long flags; 173 + u64 tb, last_reset; 180 174 int c; 181 175 182 176 wd_smp_lock(&flags); 183 177 /* Double check some things under lock */ 184 - if ((s64)(tb - wd_smp_last_reset_tb) < (s64)wd_smp_panic_timeout_tb) 178 + tb = get_tb(); 179 + last_reset = wd_smp_last_reset_tb; 180 + if ((s64)(tb - last_reset) < (s64)wd_smp_panic_timeout_tb) 185 181 goto out; 186 182 if (cpumask_test_cpu(cpu, &wd_smp_cpus_pending)) 187 183 goto out; 188 - if (cpumask_weight(&wd_smp_cpus_pending) == 0) 184 + if (!wd_try_report()) 189 185 goto out; 186 + for_each_online_cpu(c) { 187 + if (!cpumask_test_cpu(c, &wd_smp_cpus_pending)) 188 + continue; 189 + if (c == cpu) 190 + continue; // should not happen 191 + 192 + __cpumask_set_cpu(c, &wd_smp_cpus_ipi); 193 + if (set_cpu_stuck(c)) 194 + break; 195 + } 196 + if (cpumask_empty(&wd_smp_cpus_ipi)) { 197 + wd_end_reporting(); 198 + goto out; 199 + } 200 + wd_smp_unlock(&flags); 190 201 191 202 pr_emerg("CPU %d detected hard LOCKUP on other CPUs %*pbl\n", 192 - cpu, cpumask_pr_args(&wd_smp_cpus_pending)); 203 + cpu, cpumask_pr_args(&wd_smp_cpus_ipi)); 193 204 pr_emerg("CPU %d TB:%lld, last SMP heartbeat TB:%lld (%lldms ago)\n", 194 - cpu, tb, wd_smp_last_reset_tb, 195 - tb_to_ns(tb - wd_smp_last_reset_tb) / 1000000); 205 + cpu, tb, last_reset, tb_to_ns(tb - last_reset) / 1000000); 196 206 197 207 if (!sysctl_hardlockup_all_cpu_backtrace) { 198 208 /* 199 209 * Try to trigger the stuck CPUs, unless we are going to 200 210 * get a backtrace on all of them anyway. 201 211 */ 202 - for_each_cpu(c, &wd_smp_cpus_pending) { 203 - if (c == cpu) 204 - continue; 212 + for_each_cpu(c, &wd_smp_cpus_ipi) { 205 213 smp_send_nmi_ipi(c, wd_lockup_ipi, 1000000); 214 + __cpumask_clear_cpu(c, &wd_smp_cpus_ipi); 206 215 } 207 - } 208 - 209 - /* Take the stuck CPUs out of the watch group */ 210 - set_cpumask_stuck(&wd_smp_cpus_pending, tb); 211 - 212 - wd_smp_unlock(&flags); 213 - 214 - if (sysctl_hardlockup_all_cpu_backtrace) 216 + } else { 215 217 trigger_allbutself_cpu_backtrace(); 216 - 217 - /* 218 - * Force flush any remote buffers that might be stuck in IRQ context 219 - * and therefore could not run their irq_work. 220 - */ 221 - printk_trigger_flush(); 218 + cpumask_clear(&wd_smp_cpus_ipi); 219 + } 222 220 223 221 if (hardlockup_panic) 224 222 nmi_panic(NULL, "Hard LOCKUP"); 223 + 224 + wd_end_reporting(); 225 225 226 226 return; 227 227 ··· 256 202 wd_smp_unlock(&flags); 257 203 } 258 204 259 - static void wd_smp_clear_cpu_pending(int cpu, u64 tb) 205 + static void wd_smp_clear_cpu_pending(int cpu) 260 206 { 261 207 if (!cpumask_test_cpu(cpu, &wd_smp_cpus_pending)) { 262 208 if (unlikely(cpumask_test_cpu(cpu, &wd_smp_cpus_stuck))) { 263 209 struct pt_regs *regs = get_irq_regs(); 264 210 unsigned long flags; 265 211 266 - wd_smp_lock(&flags); 267 - 268 212 pr_emerg("CPU %d became unstuck TB:%lld\n", 269 - cpu, tb); 213 + cpu, get_tb()); 270 214 print_irqtrace_events(current); 271 215 if (regs) 272 216 show_regs(regs); 273 217 else 274 218 dump_stack(); 275 219 220 + wd_smp_lock(&flags); 276 221 cpumask_clear_cpu(cpu, &wd_smp_cpus_stuck); 277 222 wd_smp_unlock(&flags); 223 + } else { 224 + /* 225 + * The last CPU to clear pending should have reset the 226 + * watchdog so we generally should not find it empty 227 + * here if our CPU was clear. However it could happen 228 + * due to a rare race with another CPU taking the 229 + * last CPU out of the mask concurrently. 230 + * 231 + * We can't add a warning for it. But just in case 232 + * there is a problem with the watchdog that is causing 233 + * the mask to not be reset, try to kick it along here. 234 + */ 235 + if (unlikely(cpumask_empty(&wd_smp_cpus_pending))) 236 + goto none_pending; 278 237 } 279 238 return; 280 239 } 240 + 241 + /* 242 + * All other updates to wd_smp_cpus_pending are performed under 243 + * wd_smp_lock. All of them are atomic except the case where the 244 + * mask becomes empty and is reset. This will not happen here because 245 + * cpu was tested to be in the bitmap (above), and a CPU only clears 246 + * its own bit. _Except_ in the case where another CPU has detected a 247 + * hard lockup on our CPU and takes us out of the pending mask. So in 248 + * normal operation there will be no race here, no problem. 249 + * 250 + * In the lockup case, this atomic clear-bit vs a store that refills 251 + * other bits in the accessed word wll not be a problem. The bit clear 252 + * is atomic so it will not cause the store to get lost, and the store 253 + * will never set this bit so it will not overwrite the bit clear. The 254 + * only way for a stuck CPU to return to the pending bitmap is to 255 + * become unstuck itself. 256 + */ 281 257 cpumask_clear_cpu(cpu, &wd_smp_cpus_pending); 258 + 259 + /* 260 + * Order the store to clear pending with the load(s) to check all 261 + * words in the pending mask to check they are all empty. This orders 262 + * with the same barrier on another CPU. This prevents two CPUs 263 + * clearing the last 2 pending bits, but neither seeing the other's 264 + * store when checking if the mask is empty, and missing an empty 265 + * mask, which ends with a false positive. 266 + */ 267 + smp_mb(); 282 268 if (cpumask_empty(&wd_smp_cpus_pending)) { 283 269 unsigned long flags; 284 270 271 + none_pending: 272 + /* 273 + * Double check under lock because more than one CPU could see 274 + * a clear mask with the lockless check after clearing their 275 + * pending bits. 276 + */ 285 277 wd_smp_lock(&flags); 286 278 if (cpumask_empty(&wd_smp_cpus_pending)) { 287 - wd_smp_last_reset_tb = tb; 279 + wd_smp_last_reset_tb = get_tb(); 288 280 cpumask_andnot(&wd_smp_cpus_pending, 289 281 &wd_cpus_enabled, 290 282 &wd_smp_cpus_stuck); ··· 345 245 346 246 per_cpu(wd_timer_tb, cpu) = tb; 347 247 348 - wd_smp_clear_cpu_pending(cpu, tb); 248 + wd_smp_clear_cpu_pending(cpu); 349 249 350 250 if ((s64)(tb - wd_smp_last_reset_tb) >= (s64)wd_smp_panic_timeout_tb) 351 - watchdog_smp_panic(cpu, tb); 251 + watchdog_smp_panic(cpu); 252 + 253 + if (__wd_nmi_output && xchg(&__wd_nmi_output, 0)) { 254 + /* 255 + * Something has called printk from NMI context. It might be 256 + * stuck, so this this triggers a flush that will get that 257 + * printk output to the console. 258 + * 259 + * See wd_lockup_ipi. 260 + */ 261 + printk_trigger_flush(); 262 + } 352 263 } 353 264 354 265 DEFINE_INTERRUPT_HANDLER_NMI(soft_nmi_interrupt) ··· 378 267 379 268 tb = get_tb(); 380 269 if (tb - per_cpu(wd_timer_tb, cpu) >= wd_panic_timeout_tb) { 270 + /* 271 + * Taking wd_smp_lock here means it is a soft-NMI lock, which 272 + * means we can't take any regular or irqsafe spin locks while 273 + * holding this lock. This is why timers can't printk while 274 + * holding the lock. 275 + */ 381 276 wd_smp_lock(&flags); 382 277 if (cpumask_test_cpu(cpu, &wd_smp_cpus_stuck)) { 383 278 wd_smp_unlock(&flags); 384 279 return 0; 385 280 } 386 - set_cpu_stuck(cpu, tb); 281 + if (!wd_try_report()) { 282 + wd_smp_unlock(&flags); 283 + /* Couldn't report, try again in 100ms */ 284 + mtspr(SPRN_DEC, 100 * tb_ticks_per_usec * 1000); 285 + return 0; 286 + } 287 + 288 + set_cpu_stuck(cpu); 289 + 290 + wd_smp_unlock(&flags); 387 291 388 292 pr_emerg("CPU %d self-detected hard LOCKUP @ %pS\n", 389 293 cpu, (void *)regs->nip); ··· 409 283 print_irqtrace_events(current); 410 284 show_regs(regs); 411 285 412 - wd_smp_unlock(&flags); 286 + xchg(&__wd_nmi_output, 1); // see wd_lockup_ipi 413 287 414 288 if (sysctl_hardlockup_all_cpu_backtrace) 415 289 trigger_allbutself_cpu_backtrace(); 416 290 417 291 if (hardlockup_panic) 418 292 nmi_panic(regs, "Hard LOCKUP"); 293 + 294 + wd_end_reporting(); 419 295 } 296 + /* 297 + * We are okay to change DEC in soft_nmi_interrupt because the masked 298 + * handler has marked a DEC as pending, so the timer interrupt will be 299 + * replayed as soon as local irqs are enabled again. 300 + */ 420 301 if (wd_panic_timeout_tb < 0x7fffffff) 421 302 mtspr(SPRN_DEC, wd_panic_timeout_tb); 422 303 ··· 451 318 { 452 319 unsigned long ticks = tb_ticks_per_usec * wd_timer_period_ms * 1000; 453 320 int cpu = smp_processor_id(); 454 - u64 tb = get_tb(); 321 + u64 tb; 455 322 323 + if (!cpumask_test_cpu(cpu, &watchdog_cpumask)) 324 + return; 325 + 326 + tb = get_tb(); 456 327 if (tb - per_cpu(wd_timer_tb, cpu) >= ticks) { 457 328 per_cpu(wd_timer_tb, cpu) = tb; 458 - wd_smp_clear_cpu_pending(cpu, tb); 329 + wd_smp_clear_cpu_pending(cpu); 459 330 } 460 331 } 461 332 EXPORT_SYMBOL(arch_touch_nmi_watchdog); ··· 517 380 cpumask_clear_cpu(cpu, &wd_cpus_enabled); 518 381 wd_smp_unlock(&flags); 519 382 520 - wd_smp_clear_cpu_pending(cpu, get_tb()); 383 + wd_smp_clear_cpu_pending(cpu); 521 384 } 522 385 523 386 static int stop_watchdog_on_cpu(unsigned int cpu)
+1 -1
arch/powerpc/kexec/core.c
··· 185 185 } 186 186 } 187 187 188 - int overlaps_crashkernel(unsigned long start, unsigned long size) 188 + int __init overlaps_crashkernel(unsigned long start, unsigned long size) 189 189 { 190 190 return (start + size) > crashk_res.start && start <= crashk_res.end; 191 191 }
+2 -2
arch/powerpc/kexec/core_64.c
··· 378 378 /* NOTREACHED */ 379 379 } 380 380 381 - #ifdef CONFIG_PPC_BOOK3S_64 381 + #ifdef CONFIG_PPC_64S_HASH_MMU 382 382 /* Values we need to export to the second kernel via the device tree. */ 383 383 static unsigned long htab_base; 384 384 static unsigned long htab_size; ··· 420 420 return 0; 421 421 } 422 422 late_initcall(export_htab_values); 423 - #endif /* CONFIG_PPC_BOOK3S_64 */ 423 + #endif /* CONFIG_PPC_64S_HASH_MMU */
+1 -1
arch/powerpc/kexec/ranges.c
··· 296 296 return ret; 297 297 } 298 298 299 - #ifdef CONFIG_PPC_BOOK3S_64 299 + #ifdef CONFIG_PPC_64S_HASH_MMU 300 300 /** 301 301 * add_htab_mem_range - Adds htab range to the given memory ranges list, 302 302 * if it exists
+16
arch/powerpc/kvm/Kconfig
··· 69 69 select KVM_BOOK3S_64_HANDLER 70 70 select KVM 71 71 select KVM_BOOK3S_PR_POSSIBLE if !KVM_BOOK3S_HV_POSSIBLE 72 + select PPC_64S_HASH_MMU 72 73 select SPAPR_TCE_IOMMU if IOMMU_SUPPORT && (PPC_PSERIES || PPC_POWERNV) 73 74 help 74 75 Support running unmodified book3s_64 and book3s_32 guest kernels ··· 130 129 ns per exit on POWER8. 131 130 132 131 If unsure, say N. 132 + 133 + config KVM_BOOK3S_HV_NESTED_PMU_WORKAROUND 134 + bool "Nested L0 host workaround for L1 KVM host PMU handling bug" if EXPERT 135 + depends on KVM_BOOK3S_HV_POSSIBLE 136 + default !EXPERT 137 + help 138 + Old nested HV capable Linux guests have a bug where they don't 139 + reflect the PMU in-use status of their L2 guest to the L0 host 140 + while the L2 PMU registers are live. This can result in loss 141 + of L2 PMU register state, causing perf to not work correctly in 142 + L2 guests. 143 + 144 + Selecting this option for the L0 host implements a workaround for 145 + those buggy L1s which saves the L2 state, at the cost of performance 146 + in all nested-capable guest entry/exit. 133 147 134 148 config KVM_BOOKE_HV 135 149 bool
+9 -2
arch/powerpc/kvm/book3s_64_entry.S
··· 374 374 BEGIN_FTR_SECTION 375 375 mtspr SPRN_DAWRX1,r10 376 376 END_FTR_SECTION_IFSET(CPU_FTR_DAWR1) 377 - mtspr SPRN_PID,r10 378 377 379 378 /* 380 - * Switch to host MMU mode 379 + * Switch to host MMU mode (don't have the real host PID but we aren't 380 + * going back to userspace). 381 381 */ 382 + hwsync 383 + isync 384 + 385 + mtspr SPRN_PID,r10 386 + 382 387 ld r10, HSTATE_KVM_VCPU(r13) 383 388 ld r10, VCPU_KVM(r10) 384 389 lwz r10, KVM_HOST_LPID(r10) ··· 393 388 ld r10, VCPU_KVM(r10) 394 389 ld r10, KVM_HOST_LPCR(r10) 395 390 mtspr SPRN_LPCR,r10 391 + 392 + isync 396 393 397 394 /* 398 395 * Set GUEST_MODE_NONE so the handler won't branch to KVM, and clear
+4
arch/powerpc/kvm/book3s_64_mmu_radix.c
··· 57 57 58 58 preempt_disable(); 59 59 60 + asm volatile("hwsync" ::: "memory"); 61 + isync(); 60 62 /* switch the lpid first to avoid running host with unallocated pid */ 61 63 old_lpid = mfspr(SPRN_LPID); 62 64 if (old_lpid != lpid) ··· 77 75 ret = __copy_to_user_inatomic((void __user *)to, from, n); 78 76 pagefault_enable(); 79 77 78 + asm volatile("hwsync" ::: "memory"); 79 + isync(); 80 80 /* switch the pid first to avoid running host with unallocated pid */ 81 81 if (quadrant == 1 && pid != old_pid) 82 82 mtspr(SPRN_PID, old_pid);
+459 -417
arch/powerpc/kvm/book3s_hv.c
··· 80 80 #include <asm/plpar_wrappers.h> 81 81 82 82 #include "book3s.h" 83 + #include "book3s_hv.h" 83 84 84 85 #define CREATE_TRACE_POINTS 85 86 #include "trace_hv.h" ··· 127 126 static bool nested = true; 128 127 module_param(nested, bool, S_IRUGO | S_IWUSR); 129 128 MODULE_PARM_DESC(nested, "Enable nested virtualization (only on POWER9)"); 130 - 131 - static inline bool nesting_enabled(struct kvm *kvm) 132 - { 133 - return kvm->arch.nested_enable && kvm_is_radix(kvm); 134 - } 135 129 136 130 static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu); 137 131 ··· 272 276 * they should never fail.) 273 277 */ 274 278 275 - static void kvmppc_core_start_stolen(struct kvmppc_vcore *vc) 279 + static void kvmppc_core_start_stolen(struct kvmppc_vcore *vc, u64 tb) 276 280 { 277 281 unsigned long flags; 278 282 283 + WARN_ON_ONCE(cpu_has_feature(CPU_FTR_ARCH_300)); 284 + 279 285 spin_lock_irqsave(&vc->stoltb_lock, flags); 280 - vc->preempt_tb = mftb(); 286 + vc->preempt_tb = tb; 281 287 spin_unlock_irqrestore(&vc->stoltb_lock, flags); 282 288 } 283 289 284 - static void kvmppc_core_end_stolen(struct kvmppc_vcore *vc) 290 + static void kvmppc_core_end_stolen(struct kvmppc_vcore *vc, u64 tb) 285 291 { 286 292 unsigned long flags; 287 293 294 + WARN_ON_ONCE(cpu_has_feature(CPU_FTR_ARCH_300)); 295 + 288 296 spin_lock_irqsave(&vc->stoltb_lock, flags); 289 297 if (vc->preempt_tb != TB_NIL) { 290 - vc->stolen_tb += mftb() - vc->preempt_tb; 298 + vc->stolen_tb += tb - vc->preempt_tb; 291 299 vc->preempt_tb = TB_NIL; 292 300 } 293 301 spin_unlock_irqrestore(&vc->stoltb_lock, flags); ··· 301 301 { 302 302 struct kvmppc_vcore *vc = vcpu->arch.vcore; 303 303 unsigned long flags; 304 + u64 now; 305 + 306 + if (cpu_has_feature(CPU_FTR_ARCH_300)) 307 + return; 308 + 309 + now = mftb(); 304 310 305 311 /* 306 312 * We can test vc->runner without taking the vcore lock, ··· 315 309 * ever sets it to NULL. 316 310 */ 317 311 if (vc->runner == vcpu && vc->vcore_state >= VCORE_SLEEPING) 318 - kvmppc_core_end_stolen(vc); 312 + kvmppc_core_end_stolen(vc, now); 319 313 320 314 spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags); 321 315 if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST && 322 316 vcpu->arch.busy_preempt != TB_NIL) { 323 - vcpu->arch.busy_stolen += mftb() - vcpu->arch.busy_preempt; 317 + vcpu->arch.busy_stolen += now - vcpu->arch.busy_preempt; 324 318 vcpu->arch.busy_preempt = TB_NIL; 325 319 } 326 320 spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags); ··· 330 324 { 331 325 struct kvmppc_vcore *vc = vcpu->arch.vcore; 332 326 unsigned long flags; 327 + u64 now; 328 + 329 + if (cpu_has_feature(CPU_FTR_ARCH_300)) 330 + return; 331 + 332 + now = mftb(); 333 333 334 334 if (vc->runner == vcpu && vc->vcore_state >= VCORE_SLEEPING) 335 - kvmppc_core_start_stolen(vc); 335 + kvmppc_core_start_stolen(vc, now); 336 336 337 337 spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags); 338 338 if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST) 339 - vcpu->arch.busy_preempt = mftb(); 339 + vcpu->arch.busy_preempt = now; 340 340 spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags); 341 341 } 342 342 ··· 687 675 u64 p; 688 676 unsigned long flags; 689 677 678 + WARN_ON_ONCE(cpu_has_feature(CPU_FTR_ARCH_300)); 679 + 690 680 spin_lock_irqsave(&vc->stoltb_lock, flags); 691 681 p = vc->stolen_tb; 692 682 if (vc->vcore_state != VCORE_INACTIVE && ··· 698 684 return p; 699 685 } 700 686 701 - static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu, 702 - struct kvmppc_vcore *vc) 687 + static void __kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu, 688 + unsigned int pcpu, u64 now, 689 + unsigned long stolen) 703 690 { 704 691 struct dtl_entry *dt; 705 692 struct lppaca *vpa; 706 - unsigned long stolen; 707 - unsigned long core_stolen; 708 - u64 now; 709 - unsigned long flags; 710 693 711 694 dt = vcpu->arch.dtl_ptr; 712 695 vpa = vcpu->arch.vpa.pinned_addr; 713 - now = mftb(); 714 - core_stolen = vcore_stolen_time(vc, now); 715 - stolen = core_stolen - vcpu->arch.stolen_logged; 716 - vcpu->arch.stolen_logged = core_stolen; 717 - spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags); 718 - stolen += vcpu->arch.busy_stolen; 719 - vcpu->arch.busy_stolen = 0; 720 - spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags); 696 + 721 697 if (!dt || !vpa) 722 698 return; 723 - memset(dt, 0, sizeof(struct dtl_entry)); 699 + 724 700 dt->dispatch_reason = 7; 725 - dt->processor_id = cpu_to_be16(vc->pcpu + vcpu->arch.ptid); 726 - dt->timebase = cpu_to_be64(now + vc->tb_offset); 701 + dt->preempt_reason = 0; 702 + dt->processor_id = cpu_to_be16(pcpu + vcpu->arch.ptid); 727 703 dt->enqueue_to_dispatch_time = cpu_to_be32(stolen); 704 + dt->ready_to_enqueue_time = 0; 705 + dt->waiting_to_ready_time = 0; 706 + dt->timebase = cpu_to_be64(now); 707 + dt->fault_addr = 0; 728 708 dt->srr0 = cpu_to_be64(kvmppc_get_pc(vcpu)); 729 709 dt->srr1 = cpu_to_be64(vcpu->arch.shregs.msr); 710 + 730 711 ++dt; 731 712 if (dt == vcpu->arch.dtl.pinned_end) 732 713 dt = vcpu->arch.dtl.pinned_addr; ··· 732 723 vcpu->arch.dtl.dirty = true; 733 724 } 734 725 726 + static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu, 727 + struct kvmppc_vcore *vc) 728 + { 729 + unsigned long stolen; 730 + unsigned long core_stolen; 731 + u64 now; 732 + unsigned long flags; 733 + 734 + now = mftb(); 735 + 736 + core_stolen = vcore_stolen_time(vc, now); 737 + stolen = core_stolen - vcpu->arch.stolen_logged; 738 + vcpu->arch.stolen_logged = core_stolen; 739 + spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags); 740 + stolen += vcpu->arch.busy_stolen; 741 + vcpu->arch.busy_stolen = 0; 742 + spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags); 743 + 744 + __kvmppc_create_dtl_entry(vcpu, vc->pcpu, now + vc->tb_offset, stolen); 745 + } 746 + 735 747 /* See if there is a doorbell interrupt pending for a vcpu */ 736 748 static bool kvmppc_doorbell_pending(struct kvm_vcpu *vcpu) 737 749 { ··· 761 731 762 732 if (vcpu->arch.doorbell_request) 763 733 return true; 734 + if (cpu_has_feature(CPU_FTR_ARCH_300)) 735 + return false; 764 736 /* 765 737 * Ensure that the read of vcore->dpdes comes after the read 766 738 * of vcpu->doorbell_request. This barrier matches the ··· 932 900 * mode handler is not called but no other threads are in the 933 901 * source vcore. 934 902 */ 935 - 936 - spin_lock(&vcore->lock); 937 - if (target->arch.state == KVMPPC_VCPU_RUNNABLE && 938 - vcore->vcore_state != VCORE_INACTIVE && 939 - vcore->runner) 940 - target = vcore->runner; 941 - spin_unlock(&vcore->lock); 903 + if (!cpu_has_feature(CPU_FTR_ARCH_300)) { 904 + spin_lock(&vcore->lock); 905 + if (target->arch.state == KVMPPC_VCPU_RUNNABLE && 906 + vcore->vcore_state != VCORE_INACTIVE && 907 + vcore->runner) 908 + target = vcore->runner; 909 + spin_unlock(&vcore->lock); 910 + } 942 911 943 912 return kvm_vcpu_yield_to(target); 944 913 } ··· 1454 1421 return RESUME_GUEST; 1455 1422 } 1456 1423 1424 + /* 1425 + * If the lppaca had pmcregs_in_use clear when we exited the guest, then 1426 + * HFSCR_PM is cleared for next entry. If the guest then tries to access 1427 + * the PMU SPRs, we get this facility unavailable interrupt. Putting HFSCR_PM 1428 + * back in the guest HFSCR will cause the next entry to load the PMU SPRs and 1429 + * allow the guest access to continue. 1430 + */ 1431 + static int kvmppc_pmu_unavailable(struct kvm_vcpu *vcpu) 1432 + { 1433 + if (!(vcpu->arch.hfscr_permitted & HFSCR_PM)) 1434 + return EMULATE_FAIL; 1435 + 1436 + vcpu->arch.hfscr |= HFSCR_PM; 1437 + 1438 + return RESUME_GUEST; 1439 + } 1440 + 1441 + static int kvmppc_ebb_unavailable(struct kvm_vcpu *vcpu) 1442 + { 1443 + if (!(vcpu->arch.hfscr_permitted & HFSCR_EBB)) 1444 + return EMULATE_FAIL; 1445 + 1446 + vcpu->arch.hfscr |= HFSCR_EBB; 1447 + 1448 + return RESUME_GUEST; 1449 + } 1450 + 1451 + static int kvmppc_tm_unavailable(struct kvm_vcpu *vcpu) 1452 + { 1453 + if (!(vcpu->arch.hfscr_permitted & HFSCR_TM)) 1454 + return EMULATE_FAIL; 1455 + 1456 + vcpu->arch.hfscr |= HFSCR_TM; 1457 + 1458 + return RESUME_GUEST; 1459 + } 1460 + 1457 1461 static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu, 1458 1462 struct task_struct *tsk) 1459 1463 { ··· 1521 1451 run->ready_for_interrupt_injection = 1; 1522 1452 switch (vcpu->arch.trap) { 1523 1453 /* We're good on these - the host merely wanted to get our attention */ 1454 + case BOOK3S_INTERRUPT_NESTED_HV_DECREMENTER: 1455 + WARN_ON_ONCE(1); /* Should never happen */ 1456 + vcpu->arch.trap = BOOK3S_INTERRUPT_HV_DECREMENTER; 1457 + fallthrough; 1524 1458 case BOOK3S_INTERRUPT_HV_DECREMENTER: 1525 1459 vcpu->stat.dec_exits++; 1526 1460 r = RESUME_GUEST; ··· 1649 1575 unsigned long vsid; 1650 1576 long err; 1651 1577 1652 - if (vcpu->arch.fault_dsisr == HDSISR_CANARY) { 1578 + if (cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG) && 1579 + unlikely(vcpu->arch.fault_dsisr == HDSISR_CANARY)) { 1653 1580 r = RESUME_GUEST; /* Just retry if it's the canary */ 1654 1581 break; 1655 1582 } ··· 1777 1702 * to emulate. 1778 1703 * Otherwise, we just generate a program interrupt to the guest. 1779 1704 */ 1780 - case BOOK3S_INTERRUPT_H_FAC_UNAVAIL: 1705 + case BOOK3S_INTERRUPT_H_FAC_UNAVAIL: { 1706 + u64 cause = vcpu->arch.hfscr >> 56; 1707 + 1781 1708 r = EMULATE_FAIL; 1782 - if (((vcpu->arch.hfscr >> 56) == FSCR_MSGP_LG) && 1783 - cpu_has_feature(CPU_FTR_ARCH_300)) 1784 - r = kvmppc_emulate_doorbell_instr(vcpu); 1709 + if (cpu_has_feature(CPU_FTR_ARCH_300)) { 1710 + if (cause == FSCR_MSGP_LG) 1711 + r = kvmppc_emulate_doorbell_instr(vcpu); 1712 + if (cause == FSCR_PM_LG) 1713 + r = kvmppc_pmu_unavailable(vcpu); 1714 + if (cause == FSCR_EBB_LG) 1715 + r = kvmppc_ebb_unavailable(vcpu); 1716 + if (cause == FSCR_TM_LG) 1717 + r = kvmppc_tm_unavailable(vcpu); 1718 + } 1785 1719 if (r == EMULATE_FAIL) { 1786 1720 kvmppc_core_queue_program(vcpu, SRR1_PROGILL); 1787 1721 r = RESUME_GUEST; 1788 1722 } 1789 1723 break; 1724 + } 1790 1725 1791 1726 case BOOK3S_INTERRUPT_HV_RM_HARD: 1792 1727 r = RESUME_PASSTHROUGH; ··· 1852 1767 case BOOK3S_INTERRUPT_H_VIRT: 1853 1768 vcpu->stat.ext_intr_exits++; 1854 1769 r = RESUME_GUEST; 1770 + break; 1771 + /* These need to go to the nested HV */ 1772 + case BOOK3S_INTERRUPT_NESTED_HV_DECREMENTER: 1773 + vcpu->arch.trap = BOOK3S_INTERRUPT_HV_DECREMENTER; 1774 + vcpu->stat.dec_exits++; 1775 + r = RESUME_HOST; 1855 1776 break; 1856 1777 /* SR/HMI/PMI are HV interrupts that host has handled. Resume guest.*/ 1857 1778 case BOOK3S_INTERRUPT_HMI: ··· 2187 2096 * either vcore->dpdes or doorbell_request. 2188 2097 * On POWER8, doorbell_request is 0. 2189 2098 */ 2190 - *val = get_reg_val(id, vcpu->arch.vcore->dpdes | 2191 - vcpu->arch.doorbell_request); 2099 + if (cpu_has_feature(CPU_FTR_ARCH_300)) 2100 + *val = get_reg_val(id, vcpu->arch.doorbell_request); 2101 + else 2102 + *val = get_reg_val(id, vcpu->arch.vcore->dpdes); 2192 2103 break; 2193 2104 case KVM_REG_PPC_VTB: 2194 2105 *val = get_reg_val(id, vcpu->arch.vcore->vtb); ··· 2331 2238 *val = get_reg_val(id, vcpu->arch.vcore->arch_compat); 2332 2239 break; 2333 2240 case KVM_REG_PPC_DEC_EXPIRY: 2334 - *val = get_reg_val(id, vcpu->arch.dec_expires + 2335 - vcpu->arch.vcore->tb_offset); 2241 + *val = get_reg_val(id, vcpu->arch.dec_expires); 2336 2242 break; 2337 2243 case KVM_REG_PPC_ONLINE: 2338 2244 *val = get_reg_val(id, vcpu->arch.online); ··· 2427 2335 vcpu->arch.pspb = set_reg_val(id, *val); 2428 2336 break; 2429 2337 case KVM_REG_PPC_DPDES: 2430 - vcpu->arch.vcore->dpdes = set_reg_val(id, *val); 2338 + if (cpu_has_feature(CPU_FTR_ARCH_300)) 2339 + vcpu->arch.doorbell_request = set_reg_val(id, *val) & 1; 2340 + else 2341 + vcpu->arch.vcore->dpdes = set_reg_val(id, *val); 2431 2342 break; 2432 2343 case KVM_REG_PPC_VTB: 2433 2344 vcpu->arch.vcore->vtb = set_reg_val(id, *val); ··· 2586 2491 r = kvmppc_set_arch_compat(vcpu, set_reg_val(id, *val)); 2587 2492 break; 2588 2493 case KVM_REG_PPC_DEC_EXPIRY: 2589 - vcpu->arch.dec_expires = set_reg_val(id, *val) - 2590 - vcpu->arch.vcore->tb_offset; 2494 + vcpu->arch.dec_expires = set_reg_val(id, *val); 2591 2495 break; 2592 2496 case KVM_REG_PPC_ONLINE: 2593 2497 i = set_reg_val(id, *val); ··· 2809 2715 #endif 2810 2716 #endif 2811 2717 vcpu->arch.mmcr[0] = MMCR0_FC; 2718 + if (cpu_has_feature(CPU_FTR_ARCH_31)) { 2719 + vcpu->arch.mmcr[0] |= MMCR0_PMCCEXT; 2720 + vcpu->arch.mmcra = MMCRA_BHRB_DISABLE; 2721 + } 2722 + 2812 2723 vcpu->arch.ctrl = CTRL_RUNLATCH; 2813 2724 /* default to host PVR, since we can't spoof it */ 2814 2725 kvmppc_set_pvr_hv(vcpu, mfspr(SPRN_PVR)); ··· 2843 2744 vcpu->arch.hfscr |= HFSCR_TM; 2844 2745 2845 2746 vcpu->arch.hfscr_permitted = vcpu->arch.hfscr; 2747 + 2748 + /* 2749 + * PM, EBB, TM are demand-faulted so start with it clear. 2750 + */ 2751 + vcpu->arch.hfscr &= ~(HFSCR_PM | HFSCR_EBB | HFSCR_TM); 2846 2752 2847 2753 kvmppc_mmu_book3s_hv_init(vcpu); 2848 2754 ··· 2973 2869 unsigned long dec_nsec, now; 2974 2870 2975 2871 now = get_tb(); 2976 - if (now > vcpu->arch.dec_expires) { 2872 + if (now > kvmppc_dec_expires_host_tb(vcpu)) { 2977 2873 /* decrementer has already gone negative */ 2978 2874 kvmppc_core_queue_dec(vcpu); 2979 2875 kvmppc_core_prepare_to_enter(vcpu); 2980 2876 return; 2981 2877 } 2982 - dec_nsec = tb_to_ns(vcpu->arch.dec_expires - now); 2878 + dec_nsec = tb_to_ns(kvmppc_dec_expires_host_tb(vcpu) - now); 2983 2879 hrtimer_start(&vcpu->arch.dec_timer, dec_nsec, HRTIMER_MODE_REL); 2984 2880 vcpu->arch.timer_running = 1; 2985 2881 } ··· 2987 2883 extern int __kvmppc_vcore_entry(void); 2988 2884 2989 2885 static void kvmppc_remove_runnable(struct kvmppc_vcore *vc, 2990 - struct kvm_vcpu *vcpu) 2886 + struct kvm_vcpu *vcpu, u64 tb) 2991 2887 { 2992 2888 u64 now; 2993 2889 2994 2890 if (vcpu->arch.state != KVMPPC_VCPU_RUNNABLE) 2995 2891 return; 2996 2892 spin_lock_irq(&vcpu->arch.tbacct_lock); 2997 - now = mftb(); 2893 + now = tb; 2998 2894 vcpu->arch.busy_stolen += vcore_stolen_time(vc, now) - 2999 2895 vcpu->arch.stolen_logged; 3000 2896 vcpu->arch.busy_preempt = now; ··· 3049 2945 tpaca->kvm_hstate.kvm_split_mode = NULL; 3050 2946 } 3051 2947 2948 + static DEFINE_PER_CPU(struct kvm *, cpu_in_guest); 2949 + 3052 2950 static void radix_flush_cpu(struct kvm *kvm, int cpu, struct kvm_vcpu *vcpu) 3053 2951 { 3054 2952 struct kvm_nested_guest *nested = vcpu->arch.nested; 3055 - cpumask_t *cpu_in_guest; 2953 + cpumask_t *need_tlb_flush; 3056 2954 int i; 3057 2955 2956 + if (nested) 2957 + need_tlb_flush = &nested->need_tlb_flush; 2958 + else 2959 + need_tlb_flush = &kvm->arch.need_tlb_flush; 2960 + 3058 2961 cpu = cpu_first_tlb_thread_sibling(cpu); 3059 - if (nested) { 3060 - cpumask_set_cpu(cpu, &nested->need_tlb_flush); 3061 - cpu_in_guest = &nested->cpu_in_guest; 3062 - } else { 3063 - cpumask_set_cpu(cpu, &kvm->arch.need_tlb_flush); 3064 - cpu_in_guest = &kvm->arch.cpu_in_guest; 3065 - } 3066 - /* 3067 - * Make sure setting of bit in need_tlb_flush precedes 3068 - * testing of cpu_in_guest bits. The matching barrier on 3069 - * the other side is the first smp_mb() in kvmppc_run_core(). 3070 - */ 3071 - smp_mb(); 3072 2962 for (i = cpu; i <= cpu_last_tlb_thread_sibling(cpu); 3073 2963 i += cpu_tlb_thread_sibling_step()) 3074 - if (cpumask_test_cpu(i, cpu_in_guest)) 2964 + cpumask_set_cpu(i, need_tlb_flush); 2965 + 2966 + /* 2967 + * Make sure setting of bit in need_tlb_flush precedes testing of 2968 + * cpu_in_guest. The matching barrier on the other side is hwsync 2969 + * when switching to guest MMU mode, which happens between 2970 + * cpu_in_guest being set to the guest kvm, and need_tlb_flush bit 2971 + * being tested. 2972 + */ 2973 + smp_mb(); 2974 + 2975 + for (i = cpu; i <= cpu_last_tlb_thread_sibling(cpu); 2976 + i += cpu_tlb_thread_sibling_step()) { 2977 + struct kvm *running = *per_cpu_ptr(&cpu_in_guest, i); 2978 + 2979 + if (running == kvm) 3075 2980 smp_call_function_single(i, do_nothing, NULL, 1); 2981 + } 2982 + } 2983 + 2984 + static void do_migrate_away_vcpu(void *arg) 2985 + { 2986 + struct kvm_vcpu *vcpu = arg; 2987 + struct kvm *kvm = vcpu->kvm; 2988 + 2989 + /* 2990 + * If the guest has GTSE, it may execute tlbie, so do a eieio; tlbsync; 2991 + * ptesync sequence on the old CPU before migrating to a new one, in 2992 + * case we interrupted the guest between a tlbie ; eieio ; 2993 + * tlbsync; ptesync sequence. 2994 + * 2995 + * Otherwise, ptesync is sufficient for ordering tlbiel sequences. 2996 + */ 2997 + if (kvm->arch.lpcr & LPCR_GTSE) 2998 + asm volatile("eieio; tlbsync; ptesync"); 2999 + else 3000 + asm volatile("ptesync"); 3076 3001 } 3077 3002 3078 3003 static void kvmppc_prepare_radix_vcpu(struct kvm_vcpu *vcpu, int pcpu) ··· 3127 2994 * can move around between pcpus. To cope with this, when 3128 2995 * a vcpu moves from one pcpu to another, we need to tell 3129 2996 * any vcpus running on the same core as this vcpu previously 3130 - * ran to flush the TLB. The TLB is shared between threads, 3131 - * so we use a single bit in .need_tlb_flush for all 4 threads. 2997 + * ran to flush the TLB. 3132 2998 */ 3133 2999 if (prev_cpu != pcpu) { 3134 - if (prev_cpu >= 0 && 3135 - cpu_first_tlb_thread_sibling(prev_cpu) != 3136 - cpu_first_tlb_thread_sibling(pcpu)) 3137 - radix_flush_cpu(kvm, prev_cpu, vcpu); 3000 + if (prev_cpu >= 0) { 3001 + if (cpu_first_tlb_thread_sibling(prev_cpu) != 3002 + cpu_first_tlb_thread_sibling(pcpu)) 3003 + radix_flush_cpu(kvm, prev_cpu, vcpu); 3004 + 3005 + smp_call_function_single(prev_cpu, 3006 + do_migrate_away_vcpu, vcpu, 1); 3007 + } 3138 3008 if (nested) 3139 3009 nested->prev_cpu[vcpu->arch.nested_vcpu_id] = pcpu; 3140 3010 else ··· 3149 3013 { 3150 3014 int cpu; 3151 3015 struct paca_struct *tpaca; 3152 - struct kvm *kvm = vc->kvm; 3153 3016 3154 3017 cpu = vc->pcpu; 3155 3018 if (vcpu) { ··· 3159 3024 cpu += vcpu->arch.ptid; 3160 3025 vcpu->cpu = vc->pcpu; 3161 3026 vcpu->arch.thread_cpu = cpu; 3162 - cpumask_set_cpu(cpu, &kvm->arch.cpu_in_guest); 3163 3027 } 3164 3028 tpaca = paca_ptrs[cpu]; 3165 3029 tpaca->kvm_hstate.kvm_vcpu = vcpu; ··· 3259 3125 { 3260 3126 struct preempted_vcore_list *lp = this_cpu_ptr(&preempted_vcores); 3261 3127 3128 + WARN_ON_ONCE(cpu_has_feature(CPU_FTR_ARCH_300)); 3129 + 3262 3130 vc->vcore_state = VCORE_PREEMPT; 3263 3131 vc->pcpu = smp_processor_id(); 3264 3132 if (vc->num_threads < threads_per_vcore(vc->kvm)) { ··· 3270 3134 } 3271 3135 3272 3136 /* Start accumulating stolen time */ 3273 - kvmppc_core_start_stolen(vc); 3137 + kvmppc_core_start_stolen(vc, mftb()); 3274 3138 } 3275 3139 3276 3140 static void kvmppc_vcore_end_preempt(struct kvmppc_vcore *vc) 3277 3141 { 3278 3142 struct preempted_vcore_list *lp; 3279 3143 3280 - kvmppc_core_end_stolen(vc); 3144 + WARN_ON_ONCE(cpu_has_feature(CPU_FTR_ARCH_300)); 3145 + 3146 + kvmppc_core_end_stolen(vc, mftb()); 3281 3147 if (!list_empty(&vc->preempt_list)) { 3282 3148 lp = &per_cpu(preempted_vcores, vc->pcpu); 3283 3149 spin_lock(&lp->lock); ··· 3406 3268 vcpu->arch.ret = RESUME_GUEST; 3407 3269 else 3408 3270 continue; 3409 - kvmppc_remove_runnable(vc, vcpu); 3271 + kvmppc_remove_runnable(vc, vcpu, mftb()); 3410 3272 wake_up(&vcpu->arch.cpu_run); 3411 3273 } 3412 3274 } ··· 3425 3287 list_del_init(&pvc->preempt_list); 3426 3288 if (pvc->runner == NULL) { 3427 3289 pvc->vcore_state = VCORE_INACTIVE; 3428 - kvmppc_core_end_stolen(pvc); 3290 + kvmppc_core_end_stolen(pvc, mftb()); 3429 3291 } 3430 3292 spin_unlock(&pvc->lock); 3431 3293 continue; ··· 3434 3296 spin_unlock(&pvc->lock); 3435 3297 continue; 3436 3298 } 3437 - kvmppc_core_end_stolen(pvc); 3299 + kvmppc_core_end_stolen(pvc, mftb()); 3438 3300 pvc->vcore_state = VCORE_PIGGYBACK; 3439 3301 if (cip->total_threads >= target_threads) 3440 3302 break; ··· 3478 3340 */ 3479 3341 spin_unlock(&vc->lock); 3480 3342 /* cancel pending dec exception if dec is positive */ 3481 - if (now < vcpu->arch.dec_expires && 3343 + if (now < kvmppc_dec_expires_host_tb(vcpu) && 3482 3344 kvmppc_core_pending_dec(vcpu)) 3483 3345 kvmppc_core_dequeue_dec(vcpu); 3484 3346 ··· 3501 3363 else 3502 3364 ++still_running; 3503 3365 } else { 3504 - kvmppc_remove_runnable(vc, vcpu); 3366 + kvmppc_remove_runnable(vc, vcpu, mftb()); 3505 3367 wake_up(&vcpu->arch.cpu_run); 3506 3368 } 3507 3369 } ··· 3510 3372 kvmppc_vcore_preempt(vc); 3511 3373 } else if (vc->runner) { 3512 3374 vc->vcore_state = VCORE_PREEMPT; 3513 - kvmppc_core_start_stolen(vc); 3375 + kvmppc_core_start_stolen(vc, mftb()); 3514 3376 } else { 3515 3377 vc->vcore_state = VCORE_INACTIVE; 3516 3378 } ··· 3641 3503 ((vc->num_threads > threads_per_subcore) || !on_primary_thread())) { 3642 3504 for_each_runnable_thread(i, vcpu, vc) { 3643 3505 vcpu->arch.ret = -EBUSY; 3644 - kvmppc_remove_runnable(vc, vcpu); 3506 + kvmppc_remove_runnable(vc, vcpu, mftb()); 3645 3507 wake_up(&vcpu->arch.cpu_run); 3646 3508 } 3647 3509 goto out; ··· 3886 3748 kvmppc_release_hwthread(pcpu + i); 3887 3749 if (sip && sip->napped[i]) 3888 3750 kvmppc_ipi_thread(pcpu + i); 3889 - cpumask_clear_cpu(pcpu + i, &vc->kvm->arch.cpu_in_guest); 3890 3751 } 3891 3752 3892 3753 spin_unlock(&vc->lock); ··· 3907 3770 trace_kvmppc_run_core(vc, 1); 3908 3771 } 3909 3772 3910 - static void load_spr_state(struct kvm_vcpu *vcpu) 3911 - { 3912 - mtspr(SPRN_DSCR, vcpu->arch.dscr); 3913 - mtspr(SPRN_IAMR, vcpu->arch.iamr); 3914 - mtspr(SPRN_PSPB, vcpu->arch.pspb); 3915 - mtspr(SPRN_FSCR, vcpu->arch.fscr); 3916 - mtspr(SPRN_TAR, vcpu->arch.tar); 3917 - mtspr(SPRN_EBBHR, vcpu->arch.ebbhr); 3918 - mtspr(SPRN_EBBRR, vcpu->arch.ebbrr); 3919 - mtspr(SPRN_BESCR, vcpu->arch.bescr); 3920 - mtspr(SPRN_TIDR, vcpu->arch.tid); 3921 - mtspr(SPRN_AMR, vcpu->arch.amr); 3922 - mtspr(SPRN_UAMOR, vcpu->arch.uamor); 3923 - 3924 - /* 3925 - * DAR, DSISR, and for nested HV, SPRGs must be set with MSR[RI] 3926 - * clear (or hstate set appropriately to catch those registers 3927 - * being clobbered if we take a MCE or SRESET), so those are done 3928 - * later. 3929 - */ 3930 - 3931 - if (!(vcpu->arch.ctrl & 1)) 3932 - mtspr(SPRN_CTRLT, mfspr(SPRN_CTRLF) & ~1); 3933 - } 3934 - 3935 - static void store_spr_state(struct kvm_vcpu *vcpu) 3936 - { 3937 - vcpu->arch.ctrl = mfspr(SPRN_CTRLF); 3938 - 3939 - vcpu->arch.iamr = mfspr(SPRN_IAMR); 3940 - vcpu->arch.pspb = mfspr(SPRN_PSPB); 3941 - vcpu->arch.fscr = mfspr(SPRN_FSCR); 3942 - vcpu->arch.tar = mfspr(SPRN_TAR); 3943 - vcpu->arch.ebbhr = mfspr(SPRN_EBBHR); 3944 - vcpu->arch.ebbrr = mfspr(SPRN_EBBRR); 3945 - vcpu->arch.bescr = mfspr(SPRN_BESCR); 3946 - vcpu->arch.tid = mfspr(SPRN_TIDR); 3947 - vcpu->arch.amr = mfspr(SPRN_AMR); 3948 - vcpu->arch.uamor = mfspr(SPRN_UAMOR); 3949 - vcpu->arch.dscr = mfspr(SPRN_DSCR); 3950 - } 3951 - 3952 - /* 3953 - * Privileged (non-hypervisor) host registers to save. 3954 - */ 3955 - struct p9_host_os_sprs { 3956 - unsigned long dscr; 3957 - unsigned long tidr; 3958 - unsigned long iamr; 3959 - unsigned long amr; 3960 - unsigned long fscr; 3961 - }; 3962 - 3963 - static void save_p9_host_os_sprs(struct p9_host_os_sprs *host_os_sprs) 3964 - { 3965 - host_os_sprs->dscr = mfspr(SPRN_DSCR); 3966 - host_os_sprs->tidr = mfspr(SPRN_TIDR); 3967 - host_os_sprs->iamr = mfspr(SPRN_IAMR); 3968 - host_os_sprs->amr = mfspr(SPRN_AMR); 3969 - host_os_sprs->fscr = mfspr(SPRN_FSCR); 3970 - } 3971 - 3972 - /* vcpu guest regs must already be saved */ 3973 - static void restore_p9_host_os_sprs(struct kvm_vcpu *vcpu, 3974 - struct p9_host_os_sprs *host_os_sprs) 3975 - { 3976 - mtspr(SPRN_PSPB, 0); 3977 - mtspr(SPRN_UAMOR, 0); 3978 - 3979 - mtspr(SPRN_DSCR, host_os_sprs->dscr); 3980 - mtspr(SPRN_TIDR, host_os_sprs->tidr); 3981 - mtspr(SPRN_IAMR, host_os_sprs->iamr); 3982 - 3983 - if (host_os_sprs->amr != vcpu->arch.amr) 3984 - mtspr(SPRN_AMR, host_os_sprs->amr); 3985 - 3986 - if (host_os_sprs->fscr != vcpu->arch.fscr) 3987 - mtspr(SPRN_FSCR, host_os_sprs->fscr); 3988 - 3989 - /* Save guest CTRL register, set runlatch to 1 */ 3990 - if (!(vcpu->arch.ctrl & 1)) 3991 - mtspr(SPRN_CTRLT, 1); 3992 - } 3993 - 3994 3773 static inline bool hcall_is_xics(unsigned long req) 3995 3774 { 3996 3775 return req == H_EOI || req == H_CPPR || req == H_IPI || 3997 3776 req == H_IPOLL || req == H_XIRR || req == H_XIRR_X; 3998 3777 } 3999 3778 4000 - /* 4001 - * Guest entry for POWER9 and later CPUs. 4002 - */ 4003 - static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, 4004 - unsigned long lpcr) 3779 + static void vcpu_vpa_increment_dispatch(struct kvm_vcpu *vcpu) 4005 3780 { 4006 - struct kvmppc_vcore *vc = vcpu->arch.vcore; 4007 - struct p9_host_os_sprs host_os_sprs; 4008 - s64 dec; 4009 - u64 tb; 4010 - int trap, save_pmu; 4011 - 4012 - WARN_ON_ONCE(vcpu->arch.ceded); 4013 - 4014 - dec = mfspr(SPRN_DEC); 4015 - tb = mftb(); 4016 - if (dec < 0) 4017 - return BOOK3S_INTERRUPT_HV_DECREMENTER; 4018 - local_paca->kvm_hstate.dec_expires = dec + tb; 4019 - if (local_paca->kvm_hstate.dec_expires < time_limit) 4020 - time_limit = local_paca->kvm_hstate.dec_expires; 4021 - 4022 - save_p9_host_os_sprs(&host_os_sprs); 4023 - 4024 - kvmhv_save_host_pmu(); /* saves it to PACA kvm_hstate */ 4025 - 4026 - kvmppc_subcore_enter_guest(); 4027 - 4028 - vc->entry_exit_map = 1; 4029 - vc->in_guest = 1; 4030 - 4031 - if (vcpu->arch.vpa.pinned_addr) { 4032 - struct lppaca *lp = vcpu->arch.vpa.pinned_addr; 3781 + struct lppaca *lp = vcpu->arch.vpa.pinned_addr; 3782 + if (lp) { 4033 3783 u32 yield_count = be32_to_cpu(lp->yield_count) + 1; 4034 3784 lp->yield_count = cpu_to_be32(yield_count); 4035 3785 vcpu->arch.vpa.dirty = 1; 4036 3786 } 3787 + } 4037 3788 4038 - if (cpu_has_feature(CPU_FTR_TM) || 4039 - cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) 4040 - kvmppc_restore_tm_hv(vcpu, vcpu->arch.shregs.msr, true); 3789 + /* call our hypervisor to load up HV regs and go */ 3790 + static int kvmhv_vcpu_entry_p9_nested(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpcr, u64 *tb) 3791 + { 3792 + struct kvmppc_vcore *vc = vcpu->arch.vcore; 3793 + unsigned long host_psscr; 3794 + unsigned long msr; 3795 + struct hv_guest_state hvregs; 3796 + struct p9_host_os_sprs host_os_sprs; 3797 + s64 dec; 3798 + int trap; 4041 3799 4042 - #ifdef CONFIG_PPC_PSERIES 4043 - if (kvmhv_on_pseries()) { 4044 - barrier(); 4045 - if (vcpu->arch.vpa.pinned_addr) { 4046 - struct lppaca *lp = vcpu->arch.vpa.pinned_addr; 4047 - get_lppaca()->pmcregs_in_use = lp->pmcregs_in_use; 4048 - } else { 4049 - get_lppaca()->pmcregs_in_use = 1; 4050 - } 4051 - barrier(); 4052 - } 4053 - #endif 4054 - kvmhv_load_guest_pmu(vcpu); 3800 + msr = mfmsr(); 4055 3801 4056 - msr_check_and_set(MSR_FP | MSR_VEC | MSR_VSX); 4057 - load_fp_state(&vcpu->arch.fp); 4058 - #ifdef CONFIG_ALTIVEC 4059 - load_vr_state(&vcpu->arch.vr); 4060 - #endif 4061 - mtspr(SPRN_VRSAVE, vcpu->arch.vrsave); 4062 - 4063 - load_spr_state(vcpu); 3802 + save_p9_host_os_sprs(&host_os_sprs); 4064 3803 4065 3804 /* 4066 - * When setting DEC, we must always deal with irq_work_raise via NMI vs 4067 - * setting DEC. The problem occurs right as we switch into guest mode 4068 - * if a NMI hits and sets pending work and sets DEC, then that will 4069 - * apply to the guest and not bring us back to the host. 3805 + * We need to save and restore the guest visible part of the 3806 + * psscr (i.e. using SPRN_PSSCR_PR) since the hypervisor 3807 + * doesn't do this for us. Note only required if pseries since 3808 + * this is done in kvmhv_vcpu_entry_p9() below otherwise. 3809 + */ 3810 + host_psscr = mfspr(SPRN_PSSCR_PR); 3811 + 3812 + kvmppc_msr_hard_disable_set_facilities(vcpu, msr); 3813 + if (lazy_irq_pending()) 3814 + return 0; 3815 + 3816 + if (unlikely(load_vcpu_state(vcpu, &host_os_sprs))) 3817 + msr = mfmsr(); /* TM restore can update msr */ 3818 + 3819 + if (vcpu->arch.psscr != host_psscr) 3820 + mtspr(SPRN_PSSCR_PR, vcpu->arch.psscr); 3821 + 3822 + kvmhv_save_hv_regs(vcpu, &hvregs); 3823 + hvregs.lpcr = lpcr; 3824 + vcpu->arch.regs.msr = vcpu->arch.shregs.msr; 3825 + hvregs.version = HV_GUEST_STATE_VERSION; 3826 + if (vcpu->arch.nested) { 3827 + hvregs.lpid = vcpu->arch.nested->shadow_lpid; 3828 + hvregs.vcpu_token = vcpu->arch.nested_vcpu_id; 3829 + } else { 3830 + hvregs.lpid = vcpu->kvm->arch.lpid; 3831 + hvregs.vcpu_token = vcpu->vcpu_id; 3832 + } 3833 + hvregs.hdec_expiry = time_limit; 3834 + 3835 + /* 3836 + * When setting DEC, we must always deal with irq_work_raise 3837 + * via NMI vs setting DEC. The problem occurs right as we 3838 + * switch into guest mode if a NMI hits and sets pending work 3839 + * and sets DEC, then that will apply to the guest and not 3840 + * bring us back to the host. 4070 3841 * 4071 - * irq_work_raise could check a flag (or possibly LPCR[HDICE] for 4072 - * example) and set HDEC to 1? That wouldn't solve the nested hv 4073 - * case which needs to abort the hcall or zero the time limit. 3842 + * irq_work_raise could check a flag (or possibly LPCR[HDICE] 3843 + * for example) and set HDEC to 1? That wouldn't solve the 3844 + * nested hv case which needs to abort the hcall or zero the 3845 + * time limit. 4074 3846 * 4075 3847 * XXX: Another day's problem. 4076 3848 */ 4077 - mtspr(SPRN_DEC, vcpu->arch.dec_expires - mftb()); 3849 + mtspr(SPRN_DEC, kvmppc_dec_expires_host_tb(vcpu) - *tb); 3850 + 3851 + mtspr(SPRN_DAR, vcpu->arch.shregs.dar); 3852 + mtspr(SPRN_DSISR, vcpu->arch.shregs.dsisr); 3853 + switch_pmu_to_guest(vcpu, &host_os_sprs); 3854 + trap = plpar_hcall_norets(H_ENTER_NESTED, __pa(&hvregs), 3855 + __pa(&vcpu->arch.regs)); 3856 + kvmhv_restore_hv_return_state(vcpu, &hvregs); 3857 + switch_pmu_to_host(vcpu, &host_os_sprs); 3858 + vcpu->arch.shregs.msr = vcpu->arch.regs.msr; 3859 + vcpu->arch.shregs.dar = mfspr(SPRN_DAR); 3860 + vcpu->arch.shregs.dsisr = mfspr(SPRN_DSISR); 3861 + vcpu->arch.psscr = mfspr(SPRN_PSSCR_PR); 3862 + 3863 + store_vcpu_state(vcpu); 3864 + 3865 + dec = mfspr(SPRN_DEC); 3866 + if (!(lpcr & LPCR_LD)) /* Sign extend if not using large decrementer */ 3867 + dec = (s32) dec; 3868 + *tb = mftb(); 3869 + vcpu->arch.dec_expires = dec + (*tb + vc->tb_offset); 3870 + 3871 + timer_rearm_host_dec(*tb); 3872 + 3873 + restore_p9_host_os_sprs(vcpu, &host_os_sprs); 3874 + if (vcpu->arch.psscr != host_psscr) 3875 + mtspr(SPRN_PSSCR_PR, host_psscr); 3876 + 3877 + return trap; 3878 + } 3879 + 3880 + /* 3881 + * Guest entry for POWER9 and later CPUs. 3882 + */ 3883 + static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, 3884 + unsigned long lpcr, u64 *tb) 3885 + { 3886 + u64 next_timer; 3887 + int trap; 3888 + 3889 + next_timer = timer_get_next_tb(); 3890 + if (*tb >= next_timer) 3891 + return BOOK3S_INTERRUPT_HV_DECREMENTER; 3892 + if (next_timer < time_limit) 3893 + time_limit = next_timer; 3894 + else if (*tb >= time_limit) /* nested time limit */ 3895 + return BOOK3S_INTERRUPT_NESTED_HV_DECREMENTER; 3896 + 3897 + vcpu->arch.ceded = 0; 3898 + 3899 + vcpu_vpa_increment_dispatch(vcpu); 4078 3900 4079 3901 if (kvmhv_on_pseries()) { 4080 - /* 4081 - * We need to save and restore the guest visible part of the 4082 - * psscr (i.e. using SPRN_PSSCR_PR) since the hypervisor 4083 - * doesn't do this for us. Note only required if pseries since 4084 - * this is done in kvmhv_vcpu_entry_p9() below otherwise. 4085 - */ 4086 - unsigned long host_psscr; 4087 - /* call our hypervisor to load up HV regs and go */ 4088 - struct hv_guest_state hvregs; 4089 - 4090 - host_psscr = mfspr(SPRN_PSSCR_PR); 4091 - mtspr(SPRN_PSSCR_PR, vcpu->arch.psscr); 4092 - kvmhv_save_hv_regs(vcpu, &hvregs); 4093 - hvregs.lpcr = lpcr; 4094 - vcpu->arch.regs.msr = vcpu->arch.shregs.msr; 4095 - hvregs.version = HV_GUEST_STATE_VERSION; 4096 - if (vcpu->arch.nested) { 4097 - hvregs.lpid = vcpu->arch.nested->shadow_lpid; 4098 - hvregs.vcpu_token = vcpu->arch.nested_vcpu_id; 4099 - } else { 4100 - hvregs.lpid = vcpu->kvm->arch.lpid; 4101 - hvregs.vcpu_token = vcpu->vcpu_id; 4102 - } 4103 - hvregs.hdec_expiry = time_limit; 4104 - mtspr(SPRN_DAR, vcpu->arch.shregs.dar); 4105 - mtspr(SPRN_DSISR, vcpu->arch.shregs.dsisr); 4106 - trap = plpar_hcall_norets(H_ENTER_NESTED, __pa(&hvregs), 4107 - __pa(&vcpu->arch.regs)); 4108 - kvmhv_restore_hv_return_state(vcpu, &hvregs); 4109 - vcpu->arch.shregs.msr = vcpu->arch.regs.msr; 4110 - vcpu->arch.shregs.dar = mfspr(SPRN_DAR); 4111 - vcpu->arch.shregs.dsisr = mfspr(SPRN_DSISR); 4112 - vcpu->arch.psscr = mfspr(SPRN_PSSCR_PR); 4113 - mtspr(SPRN_PSSCR_PR, host_psscr); 3902 + trap = kvmhv_vcpu_entry_p9_nested(vcpu, time_limit, lpcr, tb); 4114 3903 4115 3904 /* H_CEDE has to be handled now, not later */ 4116 3905 if (trap == BOOK3S_INTERRUPT_SYSCALL && !vcpu->arch.nested && ··· 4045 3982 kvmppc_set_gpr(vcpu, 3, 0); 4046 3983 trap = 0; 4047 3984 } 3985 + 4048 3986 } else { 3987 + struct kvm *kvm = vcpu->kvm; 3988 + 4049 3989 kvmppc_xive_push_vcpu(vcpu); 4050 - trap = kvmhv_vcpu_entry_p9(vcpu, time_limit, lpcr); 3990 + 3991 + __this_cpu_write(cpu_in_guest, kvm); 3992 + trap = kvmhv_vcpu_entry_p9(vcpu, time_limit, lpcr, tb); 3993 + __this_cpu_write(cpu_in_guest, NULL); 3994 + 4051 3995 if (trap == BOOK3S_INTERRUPT_SYSCALL && !vcpu->arch.nested && 4052 3996 !(vcpu->arch.shregs.msr & MSR_PR)) { 4053 3997 unsigned long req = kvmppc_get_gpr(vcpu, 3); ··· 4079 4009 } 4080 4010 kvmppc_xive_pull_vcpu(vcpu); 4081 4011 4082 - if (kvm_is_radix(vcpu->kvm)) 4012 + if (kvm_is_radix(kvm)) 4083 4013 vcpu->arch.slb_max = 0; 4084 4014 } 4085 4015 4086 - dec = mfspr(SPRN_DEC); 4087 - if (!(lpcr & LPCR_LD)) /* Sign extend if not using large decrementer */ 4088 - dec = (s32) dec; 4089 - tb = mftb(); 4090 - vcpu->arch.dec_expires = dec + tb; 4091 - vcpu->cpu = -1; 4092 - vcpu->arch.thread_cpu = -1; 4093 - 4094 - store_spr_state(vcpu); 4095 - 4096 - restore_p9_host_os_sprs(vcpu, &host_os_sprs); 4097 - 4098 - msr_check_and_set(MSR_FP | MSR_VEC | MSR_VSX); 4099 - store_fp_state(&vcpu->arch.fp); 4100 - #ifdef CONFIG_ALTIVEC 4101 - store_vr_state(&vcpu->arch.vr); 4102 - #endif 4103 - vcpu->arch.vrsave = mfspr(SPRN_VRSAVE); 4104 - 4105 - if (cpu_has_feature(CPU_FTR_TM) || 4106 - cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) 4107 - kvmppc_save_tm_hv(vcpu, vcpu->arch.shregs.msr, true); 4108 - 4109 - save_pmu = 1; 4110 - if (vcpu->arch.vpa.pinned_addr) { 4111 - struct lppaca *lp = vcpu->arch.vpa.pinned_addr; 4112 - u32 yield_count = be32_to_cpu(lp->yield_count) + 1; 4113 - lp->yield_count = cpu_to_be32(yield_count); 4114 - vcpu->arch.vpa.dirty = 1; 4115 - save_pmu = lp->pmcregs_in_use; 4116 - } 4117 - /* Must save pmu if this guest is capable of running nested guests */ 4118 - save_pmu |= nesting_enabled(vcpu->kvm); 4119 - 4120 - kvmhv_save_guest_pmu(vcpu, save_pmu); 4121 - #ifdef CONFIG_PPC_PSERIES 4122 - if (kvmhv_on_pseries()) { 4123 - barrier(); 4124 - get_lppaca()->pmcregs_in_use = ppc_get_pmu_inuse(); 4125 - barrier(); 4126 - } 4127 - #endif 4128 - 4129 - vc->entry_exit_map = 0x101; 4130 - vc->in_guest = 0; 4131 - 4132 - mtspr(SPRN_DEC, local_paca->kvm_hstate.dec_expires - mftb()); 4133 - /* We may have raced with new irq work */ 4134 - if (test_irq_work_pending()) 4135 - set_dec(1); 4136 - mtspr(SPRN_SPRG_VDSO_WRITE, local_paca->sprg_vdso); 4137 - 4138 - kvmhv_load_host_pmu(); 4139 - 4140 - kvmppc_subcore_exit_guest(); 4016 + vcpu_vpa_increment_dispatch(vcpu); 4141 4017 4142 4018 return trap; 4143 4019 } ··· 4148 4132 return false; 4149 4133 } 4150 4134 4135 + static bool kvmppc_vcpu_check_block(struct kvm_vcpu *vcpu) 4136 + { 4137 + if (!vcpu->arch.ceded || kvmppc_vcpu_woken(vcpu)) 4138 + return true; 4139 + return false; 4140 + } 4141 + 4151 4142 /* 4152 4143 * Check to see if any of the runnable vcpus on the vcore have pending 4153 4144 * exceptions or are no longer ceded ··· 4165 4142 int i; 4166 4143 4167 4144 for_each_runnable_thread(i, vcpu, vc) { 4168 - if (!vcpu->arch.ceded || kvmppc_vcpu_woken(vcpu)) 4145 + if (kvmppc_vcpu_check_block(vcpu)) 4169 4146 return 1; 4170 4147 } 4171 4148 ··· 4181 4158 ktime_t cur, start_poll, start_wait; 4182 4159 int do_sleep = 1; 4183 4160 u64 block_ns; 4161 + 4162 + WARN_ON_ONCE(cpu_has_feature(CPU_FTR_ARCH_300)); 4184 4163 4185 4164 /* Poll for pending exceptions and ceded state */ 4186 4165 cur = start_poll = ktime_get(); ··· 4380 4355 for_each_runnable_thread(i, v, vc) { 4381 4356 kvmppc_core_prepare_to_enter(v); 4382 4357 if (signal_pending(v->arch.run_task)) { 4383 - kvmppc_remove_runnable(vc, v); 4358 + kvmppc_remove_runnable(vc, v, mftb()); 4384 4359 v->stat.signal_exits++; 4385 4360 v->run->exit_reason = KVM_EXIT_INTR; 4386 4361 v->arch.ret = -EINTR; ··· 4421 4396 kvmppc_vcore_end_preempt(vc); 4422 4397 4423 4398 if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) { 4424 - kvmppc_remove_runnable(vc, vcpu); 4399 + kvmppc_remove_runnable(vc, vcpu, mftb()); 4425 4400 vcpu->stat.signal_exits++; 4426 4401 run->exit_reason = KVM_EXIT_INTR; 4427 4402 vcpu->arch.ret = -EINTR; ··· 4442 4417 int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit, 4443 4418 unsigned long lpcr) 4444 4419 { 4420 + struct rcuwait *wait = kvm_arch_vcpu_get_wait(vcpu); 4445 4421 struct kvm_run *run = vcpu->run; 4446 4422 int trap, r, pcpu; 4447 4423 int srcu_idx; 4448 4424 struct kvmppc_vcore *vc; 4449 4425 struct kvm *kvm = vcpu->kvm; 4450 4426 struct kvm_nested_guest *nested = vcpu->arch.nested; 4427 + unsigned long flags; 4428 + u64 tb; 4451 4429 4452 4430 trace_kvmppc_run_vcpu_enter(vcpu); 4453 4431 ··· 4461 4433 vc = vcpu->arch.vcore; 4462 4434 vcpu->arch.ceded = 0; 4463 4435 vcpu->arch.run_task = current; 4464 - vcpu->arch.stolen_logged = vcore_stolen_time(vc, mftb()); 4465 4436 vcpu->arch.state = KVMPPC_VCPU_RUNNABLE; 4466 - vcpu->arch.busy_preempt = TB_NIL; 4467 4437 vcpu->arch.last_inst = KVM_INST_FETCH_FAILED; 4468 - vc->runnable_threads[0] = vcpu; 4469 - vc->n_runnable = 1; 4470 - vc->runner = vcpu; 4471 4438 4472 4439 /* See if the MMU is ready to go */ 4473 - if (!kvm->arch.mmu_ready) { 4440 + if (unlikely(!kvm->arch.mmu_ready)) { 4474 4441 r = kvmhv_setup_mmu(vcpu); 4475 4442 if (r) { 4476 4443 run->exit_reason = KVM_EXIT_FAIL_ENTRY; ··· 4480 4457 4481 4458 kvmppc_update_vpas(vcpu); 4482 4459 4483 - init_vcore_to_run(vc); 4484 - vc->preempt_tb = TB_NIL; 4485 - 4486 4460 preempt_disable(); 4487 4461 pcpu = smp_processor_id(); 4488 - vc->pcpu = pcpu; 4489 4462 if (kvm_is_radix(kvm)) 4490 4463 kvmppc_prepare_radix_vcpu(vcpu, pcpu); 4491 4464 4492 - local_irq_disable(); 4493 - hard_irq_disable(); 4465 + /* flags save not required, but irq_pmu has no disable/enable API */ 4466 + powerpc_local_irq_pmu_save(flags); 4467 + 4494 4468 if (signal_pending(current)) 4495 4469 goto sigpend; 4496 - if (lazy_irq_pending() || need_resched() || !kvm->arch.mmu_ready) 4470 + if (need_resched() || !kvm->arch.mmu_ready) 4497 4471 goto out; 4498 4472 4499 4473 if (!nested) { 4500 4474 kvmppc_core_prepare_to_enter(vcpu); 4501 - if (vcpu->arch.doorbell_request) { 4502 - vc->dpdes = 1; 4503 - smp_wmb(); 4504 - vcpu->arch.doorbell_request = 0; 4505 - } 4506 4475 if (test_bit(BOOK3S_IRQPRIO_EXTERNAL, 4507 4476 &vcpu->arch.pending_exceptions)) 4508 4477 lpcr |= LPCR_MER; ··· 4505 4490 goto out; 4506 4491 } 4507 4492 4508 - kvmppc_clear_host_core(pcpu); 4493 + if (vcpu->arch.timer_running) { 4494 + hrtimer_try_to_cancel(&vcpu->arch.dec_timer); 4495 + vcpu->arch.timer_running = 0; 4496 + } 4509 4497 4510 - local_paca->kvm_hstate.napping = 0; 4511 - local_paca->kvm_hstate.kvm_split_mode = NULL; 4512 - kvmppc_start_thread(vcpu, vc); 4513 - kvmppc_create_dtl_entry(vcpu, vc); 4498 + tb = mftb(); 4499 + 4500 + vcpu->cpu = pcpu; 4501 + vcpu->arch.thread_cpu = pcpu; 4502 + vc->pcpu = pcpu; 4503 + local_paca->kvm_hstate.kvm_vcpu = vcpu; 4504 + local_paca->kvm_hstate.ptid = 0; 4505 + local_paca->kvm_hstate.fake_suspend = 0; 4506 + 4507 + __kvmppc_create_dtl_entry(vcpu, pcpu, tb + vc->tb_offset, 0); 4508 + 4514 4509 trace_kvm_guest_enter(vcpu); 4515 - 4516 - vc->vcore_state = VCORE_RUNNING; 4517 - trace_kvmppc_run_core(vc, 0); 4518 4510 4519 4511 guest_enter_irqoff(); 4520 4512 ··· 4532 4510 /* Tell lockdep that we're about to enable interrupts */ 4533 4511 trace_hardirqs_on(); 4534 4512 4535 - trap = kvmhv_p9_guest_entry(vcpu, time_limit, lpcr); 4513 + trap = kvmhv_p9_guest_entry(vcpu, time_limit, lpcr, &tb); 4536 4514 vcpu->arch.trap = trap; 4537 4515 4538 4516 trace_hardirqs_off(); ··· 4542 4520 srcu_read_unlock(&kvm->srcu, srcu_idx); 4543 4521 4544 4522 set_irq_happened(trap); 4545 - 4546 - kvmppc_set_host_core(pcpu); 4547 4523 4548 4524 context_tracking_guest_exit(); 4549 4525 if (!vtime_accounting_enabled_this_cpu()) { ··· 4558 4538 } 4559 4539 vtime_account_guest_exit(); 4560 4540 4561 - local_irq_enable(); 4541 + vcpu->cpu = -1; 4542 + vcpu->arch.thread_cpu = -1; 4562 4543 4563 - cpumask_clear_cpu(pcpu, &kvm->arch.cpu_in_guest); 4544 + powerpc_local_irq_pmu_restore(flags); 4564 4545 4565 4546 preempt_enable(); 4566 4547 ··· 4571 4550 * by L2 and the L1 decrementer is provided in hdec_expires 4572 4551 */ 4573 4552 if (kvmppc_core_pending_dec(vcpu) && 4574 - ((get_tb() < vcpu->arch.dec_expires) || 4553 + ((tb < kvmppc_dec_expires_host_tb(vcpu)) || 4575 4554 (trap == BOOK3S_INTERRUPT_SYSCALL && 4576 4555 kvmppc_get_gpr(vcpu, 3) == H_ENTER_NESTED))) 4577 4556 kvmppc_core_dequeue_dec(vcpu); ··· 4586 4565 } 4587 4566 vcpu->arch.ret = r; 4588 4567 4589 - if (is_kvmppc_resume_guest(r) && vcpu->arch.ceded && 4590 - !kvmppc_vcpu_woken(vcpu)) { 4568 + if (is_kvmppc_resume_guest(r) && !kvmppc_vcpu_check_block(vcpu)) { 4591 4569 kvmppc_set_timer(vcpu); 4592 - while (vcpu->arch.ceded && !kvmppc_vcpu_woken(vcpu)) { 4570 + 4571 + prepare_to_rcuwait(wait); 4572 + for (;;) { 4573 + set_current_state(TASK_INTERRUPTIBLE); 4593 4574 if (signal_pending(current)) { 4594 4575 vcpu->stat.signal_exits++; 4595 4576 run->exit_reason = KVM_EXIT_INTR; 4596 4577 vcpu->arch.ret = -EINTR; 4597 4578 break; 4598 4579 } 4599 - spin_lock(&vc->lock); 4600 - kvmppc_vcore_blocked(vc); 4601 - spin_unlock(&vc->lock); 4580 + 4581 + if (kvmppc_vcpu_check_block(vcpu)) 4582 + break; 4583 + 4584 + trace_kvmppc_vcore_blocked(vc, 0); 4585 + schedule(); 4586 + trace_kvmppc_vcore_blocked(vc, 1); 4602 4587 } 4588 + finish_rcuwait(wait); 4603 4589 } 4604 4590 vcpu->arch.ceded = 0; 4605 4591 4606 - vc->vcore_state = VCORE_INACTIVE; 4607 - trace_kvmppc_run_core(vc, 1); 4608 - 4609 4592 done: 4610 - kvmppc_remove_runnable(vc, vcpu); 4611 4593 trace_kvmppc_run_vcpu_exit(vcpu); 4612 4594 4613 4595 return vcpu->arch.ret; ··· 4620 4596 run->exit_reason = KVM_EXIT_INTR; 4621 4597 vcpu->arch.ret = -EINTR; 4622 4598 out: 4623 - local_irq_enable(); 4599 + powerpc_local_irq_pmu_restore(flags); 4624 4600 preempt_enable(); 4625 4601 goto done; 4626 4602 } ··· 4630 4606 struct kvm_run *run = vcpu->run; 4631 4607 int r; 4632 4608 int srcu_idx; 4633 - unsigned long ebb_regs[3] = {}; /* shut up GCC */ 4634 - unsigned long user_tar = 0; 4635 - unsigned int user_vrsave; 4636 4609 struct kvm *kvm; 4610 + unsigned long msr; 4637 4611 4638 4612 if (!vcpu->arch.sane) { 4639 4613 run->exit_reason = KVM_EXIT_INTERNAL_ERROR; 4640 4614 return -EINVAL; 4641 4615 } 4642 4616 4617 + /* No need to go into the guest when all we'll do is come back out */ 4618 + if (signal_pending(current)) { 4619 + run->exit_reason = KVM_EXIT_INTR; 4620 + return -EINTR; 4621 + } 4622 + 4623 + #ifdef CONFIG_PPC_TRANSACTIONAL_MEM 4643 4624 /* 4644 4625 * Don't allow entry with a suspended transaction, because 4645 4626 * the guest entry/exit code will lose it. 4646 - * If the guest has TM enabled, save away their TM-related SPRs 4647 - * (they will get restored by the TM unavailable interrupt). 4648 4627 */ 4649 - #ifdef CONFIG_PPC_TRANSACTIONAL_MEM 4650 4628 if (cpu_has_feature(CPU_FTR_TM) && current->thread.regs && 4651 4629 (current->thread.regs->msr & MSR_TM)) { 4652 4630 if (MSR_TM_ACTIVE(current->thread.regs->msr)) { ··· 4656 4630 run->fail_entry.hardware_entry_failure_reason = 0; 4657 4631 return -EINVAL; 4658 4632 } 4659 - /* Enable TM so we can read the TM SPRs */ 4660 - mtmsr(mfmsr() | MSR_TM); 4661 - current->thread.tm_tfhar = mfspr(SPRN_TFHAR); 4662 - current->thread.tm_tfiar = mfspr(SPRN_TFIAR); 4663 - current->thread.tm_texasr = mfspr(SPRN_TEXASR); 4664 - current->thread.regs->msr &= ~MSR_TM; 4665 4633 } 4666 4634 #endif 4667 4635 ··· 4670 4650 4671 4651 kvmppc_core_prepare_to_enter(vcpu); 4672 4652 4673 - /* No need to go into the guest when all we'll do is come back out */ 4674 - if (signal_pending(current)) { 4675 - run->exit_reason = KVM_EXIT_INTR; 4676 - return -EINTR; 4677 - } 4678 - 4679 4653 kvm = vcpu->kvm; 4680 4654 atomic_inc(&kvm->arch.vcpus_running); 4681 4655 /* Order vcpus_running vs. mmu_ready, see kvmppc_alloc_reset_hpt */ 4682 4656 smp_mb(); 4683 4657 4684 - flush_all_to_thread(current); 4658 + msr = 0; 4659 + if (IS_ENABLED(CONFIG_PPC_FPU)) 4660 + msr |= MSR_FP; 4661 + if (cpu_has_feature(CPU_FTR_ALTIVEC)) 4662 + msr |= MSR_VEC; 4663 + if (cpu_has_feature(CPU_FTR_VSX)) 4664 + msr |= MSR_VSX; 4665 + if ((cpu_has_feature(CPU_FTR_TM) || 4666 + cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) && 4667 + (vcpu->arch.hfscr & HFSCR_TM)) 4668 + msr |= MSR_TM; 4669 + msr = msr_check_and_set(msr); 4685 4670 4686 - /* Save userspace EBB and other register values */ 4687 - if (cpu_has_feature(CPU_FTR_ARCH_207S)) { 4688 - ebb_regs[0] = mfspr(SPRN_EBBHR); 4689 - ebb_regs[1] = mfspr(SPRN_EBBRR); 4690 - ebb_regs[2] = mfspr(SPRN_BESCR); 4691 - user_tar = mfspr(SPRN_TAR); 4692 - } 4693 - user_vrsave = mfspr(SPRN_VRSAVE); 4671 + kvmppc_save_user_regs(); 4694 4672 4695 - vcpu->arch.waitp = &vcpu->arch.vcore->wait; 4673 + kvmppc_save_current_sprs(); 4674 + 4675 + if (!cpu_has_feature(CPU_FTR_ARCH_300)) 4676 + vcpu->arch.waitp = &vcpu->arch.vcore->wait; 4696 4677 vcpu->arch.pgdir = kvm->mm->pgd; 4697 4678 vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST; 4698 4679 ··· 4731 4710 r = kvmppc_xics_rm_complete(vcpu, 0); 4732 4711 } 4733 4712 } while (is_kvmppc_resume_guest(r)); 4734 - 4735 - /* Restore userspace EBB and other register values */ 4736 - if (cpu_has_feature(CPU_FTR_ARCH_207S)) { 4737 - mtspr(SPRN_EBBHR, ebb_regs[0]); 4738 - mtspr(SPRN_EBBRR, ebb_regs[1]); 4739 - mtspr(SPRN_BESCR, ebb_regs[2]); 4740 - mtspr(SPRN_TAR, user_tar); 4741 - } 4742 - mtspr(SPRN_VRSAVE, user_vrsave); 4743 4713 4744 4714 vcpu->arch.state = KVMPPC_VCPU_NOTREADY; 4745 4715 atomic_dec(&kvm->arch.vcpus_running); ··· 4873 4861 unsigned long npages = mem->memory_size >> PAGE_SHIFT; 4874 4862 4875 4863 if (change == KVM_MR_CREATE) { 4876 - slot->arch.rmap = vzalloc(array_size(npages, 4877 - sizeof(*slot->arch.rmap))); 4864 + unsigned long size = array_size(npages, sizeof(*slot->arch.rmap)); 4865 + 4866 + if ((size >> PAGE_SHIFT) > totalram_pages()) 4867 + return -ENOMEM; 4868 + 4869 + slot->arch.rmap = vzalloc(size); 4878 4870 if (!slot->arch.rmap) 4879 4871 return -ENOMEM; 4880 4872 } ··· 5088 5072 */ 5089 5073 int kvmppc_switch_mmu_to_hpt(struct kvm *kvm) 5090 5074 { 5075 + unsigned long lpcr, lpcr_mask; 5076 + 5091 5077 if (nesting_enabled(kvm)) 5092 5078 kvmhv_release_all_nested(kvm); 5093 5079 kvmppc_rmap_reset(kvm); ··· 5099 5081 kvm->arch.radix = 0; 5100 5082 spin_unlock(&kvm->mmu_lock); 5101 5083 kvmppc_free_radix(kvm); 5102 - kvmppc_update_lpcr(kvm, LPCR_VPM1, 5103 - LPCR_VPM1 | LPCR_UPRT | LPCR_GTSE | LPCR_HR); 5084 + 5085 + lpcr = LPCR_VPM1; 5086 + lpcr_mask = LPCR_VPM1 | LPCR_UPRT | LPCR_GTSE | LPCR_HR; 5087 + if (cpu_has_feature(CPU_FTR_ARCH_31)) 5088 + lpcr_mask |= LPCR_HAIL; 5089 + kvmppc_update_lpcr(kvm, lpcr, lpcr_mask); 5090 + 5104 5091 return 0; 5105 5092 } 5106 5093 ··· 5115 5092 */ 5116 5093 int kvmppc_switch_mmu_to_radix(struct kvm *kvm) 5117 5094 { 5095 + unsigned long lpcr, lpcr_mask; 5118 5096 int err; 5119 5097 5120 5098 err = kvmppc_init_vm_radix(kvm); ··· 5127 5103 kvm->arch.radix = 1; 5128 5104 spin_unlock(&kvm->mmu_lock); 5129 5105 kvmppc_free_hpt(&kvm->arch.hpt); 5130 - kvmppc_update_lpcr(kvm, LPCR_UPRT | LPCR_GTSE | LPCR_HR, 5131 - LPCR_VPM1 | LPCR_UPRT | LPCR_GTSE | LPCR_HR); 5106 + 5107 + lpcr = LPCR_UPRT | LPCR_GTSE | LPCR_HR; 5108 + lpcr_mask = LPCR_VPM1 | LPCR_UPRT | LPCR_GTSE | LPCR_HR; 5109 + if (cpu_has_feature(CPU_FTR_ARCH_31)) { 5110 + lpcr_mask |= LPCR_HAIL; 5111 + if (cpu_has_feature(CPU_FTR_HVMODE) && 5112 + (kvm->arch.host_lpcr & LPCR_HAIL)) 5113 + lpcr |= LPCR_HAIL; 5114 + } 5115 + kvmppc_update_lpcr(kvm, lpcr, lpcr_mask); 5116 + 5132 5117 return 0; 5133 5118 } 5134 5119 ··· 5158 5125 unsigned long l_ops; 5159 5126 int cpu, core; 5160 5127 int size; 5128 + 5129 + if (cpu_has_feature(CPU_FTR_ARCH_300)) 5130 + return; 5161 5131 5162 5132 /* Not the first time here ? */ 5163 5133 if (kvmppc_host_rm_ops_hv != NULL) ··· 5304 5268 kvm->arch.mmu_ready = 1; 5305 5269 lpcr &= ~LPCR_VPM1; 5306 5270 lpcr |= LPCR_UPRT | LPCR_GTSE | LPCR_HR; 5271 + if (cpu_has_feature(CPU_FTR_HVMODE) && 5272 + cpu_has_feature(CPU_FTR_ARCH_31) && 5273 + (kvm->arch.host_lpcr & LPCR_HAIL)) 5274 + lpcr |= LPCR_HAIL; 5307 5275 ret = kvmppc_init_vm_radix(kvm); 5308 5276 if (ret) { 5309 5277 kvmppc_free_lpid(kvm->arch.lpid); ··· 6103 6063 if (r) 6104 6064 return r; 6105 6065 6106 - r = kvm_init_subcore_bitmap(); 6107 - if (r) 6108 - return r; 6066 + if (!cpu_has_feature(CPU_FTR_ARCH_300)) { 6067 + r = kvm_init_subcore_bitmap(); 6068 + if (r) 6069 + return r; 6070 + } 6109 6071 6110 6072 /* 6111 6073 * We need a way of accessing the XICS interrupt controller,
+42
arch/powerpc/kvm/book3s_hv.h
··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + 3 + /* 4 + * Privileged (non-hypervisor) host registers to save. 5 + */ 6 + struct p9_host_os_sprs { 7 + unsigned long iamr; 8 + unsigned long amr; 9 + 10 + unsigned int pmc1; 11 + unsigned int pmc2; 12 + unsigned int pmc3; 13 + unsigned int pmc4; 14 + unsigned int pmc5; 15 + unsigned int pmc6; 16 + unsigned long mmcr0; 17 + unsigned long mmcr1; 18 + unsigned long mmcr2; 19 + unsigned long mmcr3; 20 + unsigned long mmcra; 21 + unsigned long siar; 22 + unsigned long sier1; 23 + unsigned long sier2; 24 + unsigned long sier3; 25 + unsigned long sdar; 26 + }; 27 + 28 + static inline bool nesting_enabled(struct kvm *kvm) 29 + { 30 + return kvm->arch.nested_enable && kvm_is_radix(kvm); 31 + } 32 + 33 + bool load_vcpu_state(struct kvm_vcpu *vcpu, 34 + struct p9_host_os_sprs *host_os_sprs); 35 + void store_vcpu_state(struct kvm_vcpu *vcpu); 36 + void save_p9_host_os_sprs(struct p9_host_os_sprs *host_os_sprs); 37 + void restore_p9_host_os_sprs(struct kvm_vcpu *vcpu, 38 + struct p9_host_os_sprs *host_os_sprs); 39 + void switch_pmu_to_guest(struct kvm_vcpu *vcpu, 40 + struct p9_host_os_sprs *host_os_sprs); 41 + void switch_pmu_to_host(struct kvm_vcpu *vcpu, 42 + struct p9_host_os_sprs *host_os_sprs);
+10 -45
arch/powerpc/kvm/book3s_hv_builtin.c
··· 649 649 int ext; 650 650 unsigned long lpcr; 651 651 652 + WARN_ON_ONCE(cpu_has_feature(CPU_FTR_ARCH_300)); 653 + 652 654 /* Insert EXTERNAL bit into LPCR at the MER bit position */ 653 655 ext = (vcpu->arch.pending_exceptions >> BOOK3S_IRQPRIO_EXTERNAL) & 1; 654 656 lpcr = mfspr(SPRN_LPCR); ··· 684 682 unsigned long rb, set; 685 683 686 684 rb = PPC_BIT(52); /* IS = 2 */ 687 - if (kvm_is_radix(kvm)) { 688 - /* R=1 PRS=1 RIC=2 */ 685 + for (set = 0; set < kvm->arch.tlb_sets; ++set) { 686 + /* R=0 PRS=0 RIC=0 */ 689 687 asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) 690 - : : "r" (rb), "i" (1), "i" (1), "i" (2), 688 + : : "r" (rb), "i" (0), "i" (0), "i" (0), 691 689 "r" (0) : "memory"); 692 - for (set = 1; set < kvm->arch.tlb_sets; ++set) { 693 - rb += PPC_BIT(51); /* increment set number */ 694 - /* R=1 PRS=1 RIC=0 */ 695 - asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) 696 - : : "r" (rb), "i" (1), "i" (1), "i" (0), 697 - "r" (0) : "memory"); 698 - } 699 - asm volatile("ptesync": : :"memory"); 700 - // POWER9 congruence-class TLBIEL leaves ERAT. Flush it now. 701 - asm volatile(PPC_RADIX_INVALIDATE_ERAT_GUEST : : :"memory"); 702 - } else { 703 - for (set = 0; set < kvm->arch.tlb_sets; ++set) { 704 - /* R=0 PRS=0 RIC=0 */ 705 - asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) 706 - : : "r" (rb), "i" (0), "i" (0), "i" (0), 707 - "r" (0) : "memory"); 708 - rb += PPC_BIT(51); /* increment set number */ 709 - } 710 - asm volatile("ptesync": : :"memory"); 711 - // POWER9 congruence-class TLBIEL leaves ERAT. Flush it now. 712 - if (cpu_has_feature(CPU_FTR_ARCH_300)) 713 - asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT : : :"memory"); 690 + rb += PPC_BIT(51); /* increment set number */ 714 691 } 692 + asm volatile("ptesync": : :"memory"); 715 693 } 716 694 717 - void kvmppc_check_need_tlb_flush(struct kvm *kvm, int pcpu, 718 - struct kvm_nested_guest *nested) 695 + void kvmppc_check_need_tlb_flush(struct kvm *kvm, int pcpu) 719 696 { 720 - cpumask_t *need_tlb_flush; 721 - 722 - /* 723 - * On POWER9, individual threads can come in here, but the 724 - * TLB is shared between the 4 threads in a core, hence 725 - * invalidating on one thread invalidates for all. 726 - * Thus we make all 4 threads use the same bit. 727 - */ 728 - if (cpu_has_feature(CPU_FTR_ARCH_300)) 729 - pcpu = cpu_first_tlb_thread_sibling(pcpu); 730 - 731 - if (nested) 732 - need_tlb_flush = &nested->need_tlb_flush; 733 - else 734 - need_tlb_flush = &kvm->arch.need_tlb_flush; 735 - 736 - if (cpumask_test_cpu(pcpu, need_tlb_flush)) { 697 + if (cpumask_test_cpu(pcpu, &kvm->arch.need_tlb_flush)) { 737 698 flush_guest_tlb(kvm); 738 699 739 700 /* Clear the bit after the TLB flush */ 740 - cpumask_clear_cpu(pcpu, need_tlb_flush); 701 + cpumask_clear_cpu(pcpu, &kvm->arch.need_tlb_flush); 741 702 } 742 703 } 743 704 EXPORT_SYMBOL_GPL(kvmppc_check_need_tlb_flush);
+6 -1
arch/powerpc/kvm/book3s_hv_hmi.c
··· 20 20 21 21 /* 22 22 * NULL bitmap pointer indicates that KVM module hasn't 23 - * been loaded yet and hence no guests are running. 23 + * been loaded yet and hence no guests are running, or running 24 + * on POWER9 or newer CPU. 25 + * 24 26 * If no KVM is in use, no need to co-ordinate among threads 25 27 * as all of them will always be in host and no one is going 26 28 * to modify TB other than the opal hmi handler. 29 + * 30 + * POWER9 and newer don't need this synchronisation. 31 + * 27 32 * Hence, just return from here. 28 33 */ 29 34 if (!local_paca->sibling_subcore_state)
+4 -9
arch/powerpc/kvm/book3s_hv_interrupts.S
··· 104 104 mtlr r0 105 105 blr 106 106 107 - _GLOBAL(kvmhv_save_host_pmu) 107 + /* 108 + * void kvmhv_save_host_pmu(void) 109 + */ 110 + kvmhv_save_host_pmu: 108 111 BEGIN_FTR_SECTION 109 112 /* Work around P8 PMAE bug */ 110 113 li r3, -1 ··· 141 138 std r8, HSTATE_MMCR2(r13) 142 139 std r9, HSTATE_SIER(r13) 143 140 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) 144 - BEGIN_FTR_SECTION 145 - mfspr r5, SPRN_MMCR3 146 - mfspr r6, SPRN_SIER2 147 - mfspr r7, SPRN_SIER3 148 - std r5, HSTATE_MMCR3(r13) 149 - std r6, HSTATE_SIER2(r13) 150 - std r7, HSTATE_SIER3(r13) 151 - END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31) 152 141 mfspr r3, SPRN_PMC1 153 142 mfspr r5, SPRN_PMC2 154 143 mfspr r6, SPRN_PMC3
+4 -6
arch/powerpc/kvm/book3s_hv_nested.c
··· 358 358 /* convert TB values/offsets to host (L0) values */ 359 359 hdec_exp = l2_hv.hdec_expiry - vc->tb_offset; 360 360 vc->tb_offset += l2_hv.tb_offset; 361 + vcpu->arch.dec_expires += l2_hv.tb_offset; 361 362 362 363 /* set L1 state to L2 state */ 363 364 vcpu->arch.nested = l2; ··· 375 374 vcpu->arch.ret = RESUME_GUEST; 376 375 vcpu->arch.trap = 0; 377 376 do { 378 - if (mftb() >= hdec_exp) { 379 - vcpu->arch.trap = BOOK3S_INTERRUPT_HV_DECREMENTER; 380 - r = RESUME_HOST; 381 - break; 382 - } 383 377 r = kvmhv_run_single_vcpu(vcpu, hdec_exp, lpcr); 384 378 } while (is_kvmppc_resume_guest(r)); 385 379 ··· 395 399 if (l2_regs.msr & MSR_TS_MASK) 396 400 vcpu->arch.shregs.msr |= MSR_TS_S; 397 401 vc->tb_offset = saved_l1_hv.tb_offset; 402 + /* XXX: is this always the same delta as saved_l1_hv.tb_offset? */ 403 + vcpu->arch.dec_expires -= l2_hv.tb_offset; 398 404 restore_hv_regs(vcpu, &saved_l1_hv); 399 405 vcpu->arch.purr += delta_purr; 400 406 vcpu->arch.spurr += delta_spurr; ··· 580 582 if (eaddr & (0xFFFUL << 52)) 581 583 return H_PARAMETER; 582 584 583 - buf = kzalloc(n, GFP_KERNEL); 585 + buf = kzalloc(n, GFP_KERNEL | __GFP_NOWARN); 584 586 if (!buf) 585 587 return H_NO_MEM; 586 588
+781 -142
arch/powerpc/kvm/book3s_hv_p9_entry.c
··· 4 4 #include <asm/asm-prototypes.h> 5 5 #include <asm/dbell.h> 6 6 #include <asm/kvm_ppc.h> 7 + #include <asm/pmc.h> 7 8 #include <asm/ppc-opcode.h> 9 + 10 + #include "book3s_hv.h" 11 + 12 + static void freeze_pmu(unsigned long mmcr0, unsigned long mmcra) 13 + { 14 + if (!(mmcr0 & MMCR0_FC)) 15 + goto do_freeze; 16 + if (mmcra & MMCRA_SAMPLE_ENABLE) 17 + goto do_freeze; 18 + if (cpu_has_feature(CPU_FTR_ARCH_31)) { 19 + if (!(mmcr0 & MMCR0_PMCCEXT)) 20 + goto do_freeze; 21 + if (!(mmcra & MMCRA_BHRB_DISABLE)) 22 + goto do_freeze; 23 + } 24 + return; 25 + 26 + do_freeze: 27 + mmcr0 = MMCR0_FC; 28 + mmcra = 0; 29 + if (cpu_has_feature(CPU_FTR_ARCH_31)) { 30 + mmcr0 |= MMCR0_PMCCEXT; 31 + mmcra = MMCRA_BHRB_DISABLE; 32 + } 33 + 34 + mtspr(SPRN_MMCR0, mmcr0); 35 + mtspr(SPRN_MMCRA, mmcra); 36 + isync(); 37 + } 38 + 39 + void switch_pmu_to_guest(struct kvm_vcpu *vcpu, 40 + struct p9_host_os_sprs *host_os_sprs) 41 + { 42 + struct lppaca *lp; 43 + int load_pmu = 1; 44 + 45 + lp = vcpu->arch.vpa.pinned_addr; 46 + if (lp) 47 + load_pmu = lp->pmcregs_in_use; 48 + 49 + /* Save host */ 50 + if (ppc_get_pmu_inuse()) { 51 + /* 52 + * It might be better to put PMU handling (at least for the 53 + * host) in the perf subsystem because it knows more about what 54 + * is being used. 55 + */ 56 + 57 + /* POWER9, POWER10 do not implement HPMC or SPMC */ 58 + 59 + host_os_sprs->mmcr0 = mfspr(SPRN_MMCR0); 60 + host_os_sprs->mmcra = mfspr(SPRN_MMCRA); 61 + 62 + freeze_pmu(host_os_sprs->mmcr0, host_os_sprs->mmcra); 63 + 64 + host_os_sprs->pmc1 = mfspr(SPRN_PMC1); 65 + host_os_sprs->pmc2 = mfspr(SPRN_PMC2); 66 + host_os_sprs->pmc3 = mfspr(SPRN_PMC3); 67 + host_os_sprs->pmc4 = mfspr(SPRN_PMC4); 68 + host_os_sprs->pmc5 = mfspr(SPRN_PMC5); 69 + host_os_sprs->pmc6 = mfspr(SPRN_PMC6); 70 + host_os_sprs->mmcr1 = mfspr(SPRN_MMCR1); 71 + host_os_sprs->mmcr2 = mfspr(SPRN_MMCR2); 72 + host_os_sprs->sdar = mfspr(SPRN_SDAR); 73 + host_os_sprs->siar = mfspr(SPRN_SIAR); 74 + host_os_sprs->sier1 = mfspr(SPRN_SIER); 75 + 76 + if (cpu_has_feature(CPU_FTR_ARCH_31)) { 77 + host_os_sprs->mmcr3 = mfspr(SPRN_MMCR3); 78 + host_os_sprs->sier2 = mfspr(SPRN_SIER2); 79 + host_os_sprs->sier3 = mfspr(SPRN_SIER3); 80 + } 81 + } 82 + 83 + #ifdef CONFIG_PPC_PSERIES 84 + /* After saving PMU, before loading guest PMU, flip pmcregs_in_use */ 85 + if (kvmhv_on_pseries()) { 86 + barrier(); 87 + get_lppaca()->pmcregs_in_use = load_pmu; 88 + barrier(); 89 + } 90 + #endif 91 + 92 + /* 93 + * Load guest. If the VPA said the PMCs are not in use but the guest 94 + * tried to access them anyway, HFSCR[PM] will be set by the HFAC 95 + * fault so we can make forward progress. 96 + */ 97 + if (load_pmu || (vcpu->arch.hfscr & HFSCR_PM)) { 98 + mtspr(SPRN_PMC1, vcpu->arch.pmc[0]); 99 + mtspr(SPRN_PMC2, vcpu->arch.pmc[1]); 100 + mtspr(SPRN_PMC3, vcpu->arch.pmc[2]); 101 + mtspr(SPRN_PMC4, vcpu->arch.pmc[3]); 102 + mtspr(SPRN_PMC5, vcpu->arch.pmc[4]); 103 + mtspr(SPRN_PMC6, vcpu->arch.pmc[5]); 104 + mtspr(SPRN_MMCR1, vcpu->arch.mmcr[1]); 105 + mtspr(SPRN_MMCR2, vcpu->arch.mmcr[2]); 106 + mtspr(SPRN_SDAR, vcpu->arch.sdar); 107 + mtspr(SPRN_SIAR, vcpu->arch.siar); 108 + mtspr(SPRN_SIER, vcpu->arch.sier[0]); 109 + 110 + if (cpu_has_feature(CPU_FTR_ARCH_31)) { 111 + mtspr(SPRN_MMCR3, vcpu->arch.mmcr[3]); 112 + mtspr(SPRN_SIER2, vcpu->arch.sier[1]); 113 + mtspr(SPRN_SIER3, vcpu->arch.sier[2]); 114 + } 115 + 116 + /* Set MMCRA then MMCR0 last */ 117 + mtspr(SPRN_MMCRA, vcpu->arch.mmcra); 118 + mtspr(SPRN_MMCR0, vcpu->arch.mmcr[0]); 119 + /* No isync necessary because we're starting counters */ 120 + 121 + if (!vcpu->arch.nested && 122 + (vcpu->arch.hfscr_permitted & HFSCR_PM)) 123 + vcpu->arch.hfscr |= HFSCR_PM; 124 + } 125 + } 126 + EXPORT_SYMBOL_GPL(switch_pmu_to_guest); 127 + 128 + void switch_pmu_to_host(struct kvm_vcpu *vcpu, 129 + struct p9_host_os_sprs *host_os_sprs) 130 + { 131 + struct lppaca *lp; 132 + int save_pmu = 1; 133 + 134 + lp = vcpu->arch.vpa.pinned_addr; 135 + if (lp) 136 + save_pmu = lp->pmcregs_in_use; 137 + if (IS_ENABLED(CONFIG_KVM_BOOK3S_HV_NESTED_PMU_WORKAROUND)) { 138 + /* 139 + * Save pmu if this guest is capable of running nested guests. 140 + * This is option is for old L1s that do not set their 141 + * lppaca->pmcregs_in_use properly when entering their L2. 142 + */ 143 + save_pmu |= nesting_enabled(vcpu->kvm); 144 + } 145 + 146 + if (save_pmu) { 147 + vcpu->arch.mmcr[0] = mfspr(SPRN_MMCR0); 148 + vcpu->arch.mmcra = mfspr(SPRN_MMCRA); 149 + 150 + freeze_pmu(vcpu->arch.mmcr[0], vcpu->arch.mmcra); 151 + 152 + vcpu->arch.pmc[0] = mfspr(SPRN_PMC1); 153 + vcpu->arch.pmc[1] = mfspr(SPRN_PMC2); 154 + vcpu->arch.pmc[2] = mfspr(SPRN_PMC3); 155 + vcpu->arch.pmc[3] = mfspr(SPRN_PMC4); 156 + vcpu->arch.pmc[4] = mfspr(SPRN_PMC5); 157 + vcpu->arch.pmc[5] = mfspr(SPRN_PMC6); 158 + vcpu->arch.mmcr[1] = mfspr(SPRN_MMCR1); 159 + vcpu->arch.mmcr[2] = mfspr(SPRN_MMCR2); 160 + vcpu->arch.sdar = mfspr(SPRN_SDAR); 161 + vcpu->arch.siar = mfspr(SPRN_SIAR); 162 + vcpu->arch.sier[0] = mfspr(SPRN_SIER); 163 + 164 + if (cpu_has_feature(CPU_FTR_ARCH_31)) { 165 + vcpu->arch.mmcr[3] = mfspr(SPRN_MMCR3); 166 + vcpu->arch.sier[1] = mfspr(SPRN_SIER2); 167 + vcpu->arch.sier[2] = mfspr(SPRN_SIER3); 168 + } 169 + 170 + } else if (vcpu->arch.hfscr & HFSCR_PM) { 171 + /* 172 + * The guest accessed PMC SPRs without specifying they should 173 + * be preserved, or it cleared pmcregs_in_use after the last 174 + * access. Just ensure they are frozen. 175 + */ 176 + freeze_pmu(mfspr(SPRN_MMCR0), mfspr(SPRN_MMCRA)); 177 + 178 + /* 179 + * Demand-fault PMU register access in the guest. 180 + * 181 + * This is used to grab the guest's VPA pmcregs_in_use value 182 + * and reflect it into the host's VPA in the case of a nested 183 + * hypervisor. 184 + * 185 + * It also avoids having to zero-out SPRs after each guest 186 + * exit to avoid side-channels when. 187 + * 188 + * This is cleared here when we exit the guest, so later HFSCR 189 + * interrupt handling can add it back to run the guest with 190 + * PM enabled next time. 191 + */ 192 + if (!vcpu->arch.nested) 193 + vcpu->arch.hfscr &= ~HFSCR_PM; 194 + } /* otherwise the PMU should still be frozen */ 195 + 196 + #ifdef CONFIG_PPC_PSERIES 197 + if (kvmhv_on_pseries()) { 198 + barrier(); 199 + get_lppaca()->pmcregs_in_use = ppc_get_pmu_inuse(); 200 + barrier(); 201 + } 202 + #endif 203 + 204 + if (ppc_get_pmu_inuse()) { 205 + mtspr(SPRN_PMC1, host_os_sprs->pmc1); 206 + mtspr(SPRN_PMC2, host_os_sprs->pmc2); 207 + mtspr(SPRN_PMC3, host_os_sprs->pmc3); 208 + mtspr(SPRN_PMC4, host_os_sprs->pmc4); 209 + mtspr(SPRN_PMC5, host_os_sprs->pmc5); 210 + mtspr(SPRN_PMC6, host_os_sprs->pmc6); 211 + mtspr(SPRN_MMCR1, host_os_sprs->mmcr1); 212 + mtspr(SPRN_MMCR2, host_os_sprs->mmcr2); 213 + mtspr(SPRN_SDAR, host_os_sprs->sdar); 214 + mtspr(SPRN_SIAR, host_os_sprs->siar); 215 + mtspr(SPRN_SIER, host_os_sprs->sier1); 216 + 217 + if (cpu_has_feature(CPU_FTR_ARCH_31)) { 218 + mtspr(SPRN_MMCR3, host_os_sprs->mmcr3); 219 + mtspr(SPRN_SIER2, host_os_sprs->sier2); 220 + mtspr(SPRN_SIER3, host_os_sprs->sier3); 221 + } 222 + 223 + /* Set MMCRA then MMCR0 last */ 224 + mtspr(SPRN_MMCRA, host_os_sprs->mmcra); 225 + mtspr(SPRN_MMCR0, host_os_sprs->mmcr0); 226 + isync(); 227 + } 228 + } 229 + EXPORT_SYMBOL_GPL(switch_pmu_to_host); 230 + 231 + static void load_spr_state(struct kvm_vcpu *vcpu, 232 + struct p9_host_os_sprs *host_os_sprs) 233 + { 234 + /* TAR is very fast */ 235 + mtspr(SPRN_TAR, vcpu->arch.tar); 236 + 237 + #ifdef CONFIG_ALTIVEC 238 + if (cpu_has_feature(CPU_FTR_ALTIVEC) && 239 + current->thread.vrsave != vcpu->arch.vrsave) 240 + mtspr(SPRN_VRSAVE, vcpu->arch.vrsave); 241 + #endif 242 + 243 + if (vcpu->arch.hfscr & HFSCR_EBB) { 244 + if (current->thread.ebbhr != vcpu->arch.ebbhr) 245 + mtspr(SPRN_EBBHR, vcpu->arch.ebbhr); 246 + if (current->thread.ebbrr != vcpu->arch.ebbrr) 247 + mtspr(SPRN_EBBRR, vcpu->arch.ebbrr); 248 + if (current->thread.bescr != vcpu->arch.bescr) 249 + mtspr(SPRN_BESCR, vcpu->arch.bescr); 250 + } 251 + 252 + if (cpu_has_feature(CPU_FTR_P9_TIDR) && 253 + current->thread.tidr != vcpu->arch.tid) 254 + mtspr(SPRN_TIDR, vcpu->arch.tid); 255 + if (host_os_sprs->iamr != vcpu->arch.iamr) 256 + mtspr(SPRN_IAMR, vcpu->arch.iamr); 257 + if (host_os_sprs->amr != vcpu->arch.amr) 258 + mtspr(SPRN_AMR, vcpu->arch.amr); 259 + if (vcpu->arch.uamor != 0) 260 + mtspr(SPRN_UAMOR, vcpu->arch.uamor); 261 + if (current->thread.fscr != vcpu->arch.fscr) 262 + mtspr(SPRN_FSCR, vcpu->arch.fscr); 263 + if (current->thread.dscr != vcpu->arch.dscr) 264 + mtspr(SPRN_DSCR, vcpu->arch.dscr); 265 + if (vcpu->arch.pspb != 0) 266 + mtspr(SPRN_PSPB, vcpu->arch.pspb); 267 + 268 + /* 269 + * DAR, DSISR, and for nested HV, SPRGs must be set with MSR[RI] 270 + * clear (or hstate set appropriately to catch those registers 271 + * being clobbered if we take a MCE or SRESET), so those are done 272 + * later. 273 + */ 274 + 275 + if (!(vcpu->arch.ctrl & 1)) 276 + mtspr(SPRN_CTRLT, 0); 277 + } 278 + 279 + static void store_spr_state(struct kvm_vcpu *vcpu) 280 + { 281 + vcpu->arch.tar = mfspr(SPRN_TAR); 282 + 283 + #ifdef CONFIG_ALTIVEC 284 + if (cpu_has_feature(CPU_FTR_ALTIVEC)) 285 + vcpu->arch.vrsave = mfspr(SPRN_VRSAVE); 286 + #endif 287 + 288 + if (vcpu->arch.hfscr & HFSCR_EBB) { 289 + vcpu->arch.ebbhr = mfspr(SPRN_EBBHR); 290 + vcpu->arch.ebbrr = mfspr(SPRN_EBBRR); 291 + vcpu->arch.bescr = mfspr(SPRN_BESCR); 292 + } 293 + 294 + if (cpu_has_feature(CPU_FTR_P9_TIDR)) 295 + vcpu->arch.tid = mfspr(SPRN_TIDR); 296 + vcpu->arch.iamr = mfspr(SPRN_IAMR); 297 + vcpu->arch.amr = mfspr(SPRN_AMR); 298 + vcpu->arch.uamor = mfspr(SPRN_UAMOR); 299 + vcpu->arch.fscr = mfspr(SPRN_FSCR); 300 + vcpu->arch.dscr = mfspr(SPRN_DSCR); 301 + vcpu->arch.pspb = mfspr(SPRN_PSPB); 302 + 303 + vcpu->arch.ctrl = mfspr(SPRN_CTRLF); 304 + } 305 + 306 + /* Returns true if current MSR and/or guest MSR may have changed */ 307 + bool load_vcpu_state(struct kvm_vcpu *vcpu, 308 + struct p9_host_os_sprs *host_os_sprs) 309 + { 310 + bool ret = false; 311 + 312 + #ifdef CONFIG_PPC_TRANSACTIONAL_MEM 313 + if (cpu_has_feature(CPU_FTR_TM) || 314 + cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) { 315 + unsigned long guest_msr = vcpu->arch.shregs.msr; 316 + if (MSR_TM_ACTIVE(guest_msr)) { 317 + kvmppc_restore_tm_hv(vcpu, guest_msr, true); 318 + ret = true; 319 + } else if (vcpu->arch.hfscr & HFSCR_TM) { 320 + mtspr(SPRN_TEXASR, vcpu->arch.texasr); 321 + mtspr(SPRN_TFHAR, vcpu->arch.tfhar); 322 + mtspr(SPRN_TFIAR, vcpu->arch.tfiar); 323 + } 324 + } 325 + #endif 326 + 327 + load_spr_state(vcpu, host_os_sprs); 328 + 329 + load_fp_state(&vcpu->arch.fp); 330 + #ifdef CONFIG_ALTIVEC 331 + load_vr_state(&vcpu->arch.vr); 332 + #endif 333 + 334 + return ret; 335 + } 336 + EXPORT_SYMBOL_GPL(load_vcpu_state); 337 + 338 + void store_vcpu_state(struct kvm_vcpu *vcpu) 339 + { 340 + store_spr_state(vcpu); 341 + 342 + store_fp_state(&vcpu->arch.fp); 343 + #ifdef CONFIG_ALTIVEC 344 + store_vr_state(&vcpu->arch.vr); 345 + #endif 346 + 347 + #ifdef CONFIG_PPC_TRANSACTIONAL_MEM 348 + if (cpu_has_feature(CPU_FTR_TM) || 349 + cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) { 350 + unsigned long guest_msr = vcpu->arch.shregs.msr; 351 + if (MSR_TM_ACTIVE(guest_msr)) { 352 + kvmppc_save_tm_hv(vcpu, guest_msr, true); 353 + } else if (vcpu->arch.hfscr & HFSCR_TM) { 354 + vcpu->arch.texasr = mfspr(SPRN_TEXASR); 355 + vcpu->arch.tfhar = mfspr(SPRN_TFHAR); 356 + vcpu->arch.tfiar = mfspr(SPRN_TFIAR); 357 + 358 + if (!vcpu->arch.nested) { 359 + vcpu->arch.load_tm++; /* see load_ebb comment */ 360 + if (!vcpu->arch.load_tm) 361 + vcpu->arch.hfscr &= ~HFSCR_TM; 362 + } 363 + } 364 + } 365 + #endif 366 + } 367 + EXPORT_SYMBOL_GPL(store_vcpu_state); 368 + 369 + void save_p9_host_os_sprs(struct p9_host_os_sprs *host_os_sprs) 370 + { 371 + host_os_sprs->iamr = mfspr(SPRN_IAMR); 372 + host_os_sprs->amr = mfspr(SPRN_AMR); 373 + } 374 + EXPORT_SYMBOL_GPL(save_p9_host_os_sprs); 375 + 376 + /* vcpu guest regs must already be saved */ 377 + void restore_p9_host_os_sprs(struct kvm_vcpu *vcpu, 378 + struct p9_host_os_sprs *host_os_sprs) 379 + { 380 + /* 381 + * current->thread.xxx registers must all be restored to host 382 + * values before a potential context switch, othrewise the context 383 + * switch itself will overwrite current->thread.xxx with the values 384 + * from the guest SPRs. 385 + */ 386 + 387 + mtspr(SPRN_SPRG_VDSO_WRITE, local_paca->sprg_vdso); 388 + 389 + if (cpu_has_feature(CPU_FTR_P9_TIDR) && 390 + current->thread.tidr != vcpu->arch.tid) 391 + mtspr(SPRN_TIDR, current->thread.tidr); 392 + if (host_os_sprs->iamr != vcpu->arch.iamr) 393 + mtspr(SPRN_IAMR, host_os_sprs->iamr); 394 + if (vcpu->arch.uamor != 0) 395 + mtspr(SPRN_UAMOR, 0); 396 + if (host_os_sprs->amr != vcpu->arch.amr) 397 + mtspr(SPRN_AMR, host_os_sprs->amr); 398 + if (current->thread.fscr != vcpu->arch.fscr) 399 + mtspr(SPRN_FSCR, current->thread.fscr); 400 + if (current->thread.dscr != vcpu->arch.dscr) 401 + mtspr(SPRN_DSCR, current->thread.dscr); 402 + if (vcpu->arch.pspb != 0) 403 + mtspr(SPRN_PSPB, 0); 404 + 405 + /* Save guest CTRL register, set runlatch to 1 */ 406 + if (!(vcpu->arch.ctrl & 1)) 407 + mtspr(SPRN_CTRLT, 1); 408 + 409 + #ifdef CONFIG_ALTIVEC 410 + if (cpu_has_feature(CPU_FTR_ALTIVEC) && 411 + vcpu->arch.vrsave != current->thread.vrsave) 412 + mtspr(SPRN_VRSAVE, current->thread.vrsave); 413 + #endif 414 + if (vcpu->arch.hfscr & HFSCR_EBB) { 415 + if (vcpu->arch.bescr != current->thread.bescr) 416 + mtspr(SPRN_BESCR, current->thread.bescr); 417 + if (vcpu->arch.ebbhr != current->thread.ebbhr) 418 + mtspr(SPRN_EBBHR, current->thread.ebbhr); 419 + if (vcpu->arch.ebbrr != current->thread.ebbrr) 420 + mtspr(SPRN_EBBRR, current->thread.ebbrr); 421 + 422 + if (!vcpu->arch.nested) { 423 + /* 424 + * This is like load_fp in context switching, turn off 425 + * the facility after it wraps the u8 to try avoiding 426 + * saving and restoring the registers each partition 427 + * switch. 428 + */ 429 + vcpu->arch.load_ebb++; 430 + if (!vcpu->arch.load_ebb) 431 + vcpu->arch.hfscr &= ~HFSCR_EBB; 432 + } 433 + } 434 + 435 + if (vcpu->arch.tar != current->thread.tar) 436 + mtspr(SPRN_TAR, current->thread.tar); 437 + } 438 + EXPORT_SYMBOL_GPL(restore_p9_host_os_sprs); 8 439 9 440 #ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING 10 441 static void __start_timing(struct kvm_vcpu *vcpu, struct kvmhv_tb_accumulator *next) ··· 487 56 #define accumulate_time(vcpu, next) do {} while (0) 488 57 #endif 489 58 490 - static inline void mfslb(unsigned int idx, u64 *slbee, u64 *slbev) 59 + static inline u64 mfslbv(unsigned int idx) 491 60 { 492 - asm volatile("slbmfev %0,%1" : "=r" (*slbev) : "r" (idx)); 493 - asm volatile("slbmfee %0,%1" : "=r" (*slbee) : "r" (idx)); 61 + u64 slbev; 62 + 63 + asm volatile("slbmfev %0,%1" : "=r" (slbev) : "r" (idx)); 64 + 65 + return slbev; 66 + } 67 + 68 + static inline u64 mfslbe(unsigned int idx) 69 + { 70 + u64 slbee; 71 + 72 + asm volatile("slbmfee %0,%1" : "=r" (slbee) : "r" (idx)); 73 + 74 + return slbee; 494 75 } 495 76 496 77 static inline void mtslb(u64 slbee, u64 slbev) ··· 543 100 lpid = nested ? nested->shadow_lpid : kvm->arch.lpid; 544 101 545 102 /* 546 - * All the isync()s are overkill but trivially follow the ISA 547 - * requirements. Some can likely be replaced with justification 548 - * comment for why they are not needed. 103 + * Prior memory accesses to host PID Q3 must be completed before we 104 + * start switching, and stores must be drained to avoid not-my-LPAR 105 + * logic (see switch_mmu_to_host). 549 106 */ 107 + asm volatile("hwsync" ::: "memory"); 550 108 isync(); 551 109 mtspr(SPRN_LPID, lpid); 552 - isync(); 553 110 mtspr(SPRN_LPCR, lpcr); 554 - isync(); 555 111 mtspr(SPRN_PID, vcpu->arch.pid); 556 - isync(); 112 + /* 113 + * isync not required here because we are HRFID'ing to guest before 114 + * any guest context access, which is context synchronising. 115 + */ 557 116 } 558 117 559 118 static void switch_mmu_to_guest_hpt(struct kvm *kvm, struct kvm_vcpu *vcpu, u64 lpcr) ··· 565 120 566 121 lpid = kvm->arch.lpid; 567 122 123 + /* 124 + * See switch_mmu_to_guest_radix. ptesync should not be required here 125 + * even if the host is in HPT mode because speculative accesses would 126 + * not cause RC updates (we are in real mode). 127 + */ 128 + asm volatile("hwsync" ::: "memory"); 129 + isync(); 568 130 mtspr(SPRN_LPID, lpid); 569 131 mtspr(SPRN_LPCR, lpcr); 570 132 mtspr(SPRN_PID, vcpu->arch.pid); 571 133 572 134 for (i = 0; i < vcpu->arch.slb_max; i++) 573 135 mtslb(vcpu->arch.slb[i].orige, vcpu->arch.slb[i].origv); 574 - 575 - isync(); 136 + /* 137 + * isync not required here, see switch_mmu_to_guest_radix. 138 + */ 576 139 } 577 140 578 141 static void switch_mmu_to_host(struct kvm *kvm, u32 pid) 579 142 { 143 + /* 144 + * The guest has exited, so guest MMU context is no longer being 145 + * non-speculatively accessed, but a hwsync is needed before the 146 + * mtLPIDR / mtPIDR switch, in order to ensure all stores are drained, 147 + * so the not-my-LPAR tlbie logic does not overlook them. 148 + */ 149 + asm volatile("hwsync" ::: "memory"); 580 150 isync(); 581 151 mtspr(SPRN_PID, pid); 582 - isync(); 583 152 mtspr(SPRN_LPID, kvm->arch.host_lpid); 584 - isync(); 585 153 mtspr(SPRN_LPCR, kvm->arch.host_lpcr); 586 - isync(); 154 + /* 155 + * isync is not required after the switch, because mtmsrd with L=0 156 + * is performed after this switch, which is context synchronising. 157 + */ 587 158 588 159 if (!radix_enabled()) 589 160 slb_restore_bolted_realmode(); ··· 632 171 */ 633 172 for (i = 0; i < vcpu->arch.slb_nr; i++) { 634 173 u64 slbee, slbev; 635 - mfslb(i, &slbee, &slbev); 174 + 175 + slbee = mfslbe(i); 636 176 if (slbee & SLB_ESID_V) { 177 + slbev = mfslbv(i); 637 178 vcpu->arch.slb[nr].orige = slbee | i; 638 179 vcpu->arch.slb[nr].origv = slbev; 639 180 nr++; ··· 646 183 } 647 184 } 648 185 649 - int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpcr) 186 + static void flush_guest_tlb(struct kvm *kvm) 650 187 { 188 + unsigned long rb, set; 189 + 190 + rb = PPC_BIT(52); /* IS = 2 */ 191 + if (kvm_is_radix(kvm)) { 192 + /* R=1 PRS=1 RIC=2 */ 193 + asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) 194 + : : "r" (rb), "i" (1), "i" (1), "i" (2), 195 + "r" (0) : "memory"); 196 + for (set = 1; set < kvm->arch.tlb_sets; ++set) { 197 + rb += PPC_BIT(51); /* increment set number */ 198 + /* R=1 PRS=1 RIC=0 */ 199 + asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) 200 + : : "r" (rb), "i" (1), "i" (1), "i" (0), 201 + "r" (0) : "memory"); 202 + } 203 + asm volatile("ptesync": : :"memory"); 204 + // POWER9 congruence-class TLBIEL leaves ERAT. Flush it now. 205 + asm volatile(PPC_RADIX_INVALIDATE_ERAT_GUEST : : :"memory"); 206 + } else { 207 + for (set = 0; set < kvm->arch.tlb_sets; ++set) { 208 + /* R=0 PRS=0 RIC=0 */ 209 + asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) 210 + : : "r" (rb), "i" (0), "i" (0), "i" (0), 211 + "r" (0) : "memory"); 212 + rb += PPC_BIT(51); /* increment set number */ 213 + } 214 + asm volatile("ptesync": : :"memory"); 215 + // POWER9 congruence-class TLBIEL leaves ERAT. Flush it now. 216 + asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT : : :"memory"); 217 + } 218 + } 219 + 220 + static void check_need_tlb_flush(struct kvm *kvm, int pcpu, 221 + struct kvm_nested_guest *nested) 222 + { 223 + cpumask_t *need_tlb_flush; 224 + bool all_set = true; 225 + int i; 226 + 227 + if (nested) 228 + need_tlb_flush = &nested->need_tlb_flush; 229 + else 230 + need_tlb_flush = &kvm->arch.need_tlb_flush; 231 + 232 + if (likely(!cpumask_test_cpu(pcpu, need_tlb_flush))) 233 + return; 234 + 235 + /* 236 + * Individual threads can come in here, but the TLB is shared between 237 + * the 4 threads in a core, hence invalidating on one thread 238 + * invalidates for all, so only invalidate the first time (if all bits 239 + * were set. The others must still execute a ptesync. 240 + * 241 + * If a race occurs and two threads do the TLB flush, that is not a 242 + * problem, just sub-optimal. 243 + */ 244 + for (i = cpu_first_tlb_thread_sibling(pcpu); 245 + i <= cpu_last_tlb_thread_sibling(pcpu); 246 + i += cpu_tlb_thread_sibling_step()) { 247 + if (!cpumask_test_cpu(i, need_tlb_flush)) { 248 + all_set = false; 249 + break; 250 + } 251 + } 252 + if (all_set) 253 + flush_guest_tlb(kvm); 254 + else 255 + asm volatile("ptesync" ::: "memory"); 256 + 257 + /* Clear the bit after the TLB flush */ 258 + cpumask_clear_cpu(pcpu, need_tlb_flush); 259 + } 260 + 261 + unsigned long kvmppc_msr_hard_disable_set_facilities(struct kvm_vcpu *vcpu, unsigned long msr) 262 + { 263 + unsigned long msr_needed = 0; 264 + 265 + msr &= ~MSR_EE; 266 + 267 + /* MSR bits may have been cleared by context switch so must recheck */ 268 + if (IS_ENABLED(CONFIG_PPC_FPU)) 269 + msr_needed |= MSR_FP; 270 + if (cpu_has_feature(CPU_FTR_ALTIVEC)) 271 + msr_needed |= MSR_VEC; 272 + if (cpu_has_feature(CPU_FTR_VSX)) 273 + msr_needed |= MSR_VSX; 274 + if ((cpu_has_feature(CPU_FTR_TM) || 275 + cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) && 276 + (vcpu->arch.hfscr & HFSCR_TM)) 277 + msr_needed |= MSR_TM; 278 + 279 + /* 280 + * This could be combined with MSR[RI] clearing, but that expands 281 + * the unrecoverable window. It would be better to cover unrecoverable 282 + * with KVM bad interrupt handling rather than use MSR[RI] at all. 283 + * 284 + * Much more difficult and less worthwhile to combine with IR/DR 285 + * disable. 286 + */ 287 + if ((msr & msr_needed) != msr_needed) { 288 + msr |= msr_needed; 289 + __mtmsrd(msr, 0); 290 + } else { 291 + __hard_irq_disable(); 292 + } 293 + local_paca->irq_happened |= PACA_IRQ_HARD_DIS; 294 + 295 + return msr; 296 + } 297 + EXPORT_SYMBOL_GPL(kvmppc_msr_hard_disable_set_facilities); 298 + 299 + int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpcr, u64 *tb) 300 + { 301 + struct p9_host_os_sprs host_os_sprs; 651 302 struct kvm *kvm = vcpu->kvm; 652 303 struct kvm_nested_guest *nested = vcpu->arch.nested; 653 304 struct kvmppc_vcore *vc = vcpu->arch.vcore; 654 - s64 hdec; 655 - u64 tb, purr, spurr; 305 + s64 hdec, dec; 306 + u64 purr, spurr; 656 307 u64 *exsave; 657 - bool ri_set; 658 308 int trap; 659 309 unsigned long msr; 660 310 unsigned long host_hfscr; ··· 775 199 unsigned long host_dawr0; 776 200 unsigned long host_dawrx0; 777 201 unsigned long host_psscr; 202 + unsigned long host_hpsscr; 778 203 unsigned long host_pidr; 779 204 unsigned long host_dawr1; 780 205 unsigned long host_dawrx1; 206 + unsigned long dpdes; 781 207 782 - hdec = time_limit - mftb(); 208 + hdec = time_limit - *tb; 783 209 if (hdec < 0) 784 210 return BOOK3S_INTERRUPT_HV_DECREMENTER; 785 211 ··· 792 214 793 215 vcpu->arch.ceded = 0; 794 216 795 - if (vc->tb_offset) { 796 - u64 new_tb = mftb() + vc->tb_offset; 797 - mtspr(SPRN_TBU40, new_tb); 798 - tb = mftb(); 799 - if ((tb & 0xffffff) < (new_tb & 0xffffff)) 800 - mtspr(SPRN_TBU40, new_tb + 0x1000000); 801 - vc->tb_offset_applied = vc->tb_offset; 802 - } 803 - 804 - msr = mfmsr(); 217 + /* Save MSR for restore, with EE clear. */ 218 + msr = mfmsr() & ~MSR_EE; 805 219 806 220 host_hfscr = mfspr(SPRN_HFSCR); 807 221 host_ciabr = mfspr(SPRN_CIABR); 808 - host_dawr0 = mfspr(SPRN_DAWR0); 809 - host_dawrx0 = mfspr(SPRN_DAWRX0); 810 - host_psscr = mfspr(SPRN_PSSCR); 222 + host_psscr = mfspr(SPRN_PSSCR_PR); 223 + if (cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) 224 + host_hpsscr = mfspr(SPRN_PSSCR); 811 225 host_pidr = mfspr(SPRN_PID); 812 - if (cpu_has_feature(CPU_FTR_DAWR1)) { 813 - host_dawr1 = mfspr(SPRN_DAWR1); 814 - host_dawrx1 = mfspr(SPRN_DAWRX1); 815 - } 816 226 817 - if (vc->pcr) 818 - mtspr(SPRN_PCR, vc->pcr | PCR_MASK); 819 - mtspr(SPRN_DPDES, vc->dpdes); 820 - mtspr(SPRN_VTB, vc->vtb); 227 + if (dawr_enabled()) { 228 + host_dawr0 = mfspr(SPRN_DAWR0); 229 + host_dawrx0 = mfspr(SPRN_DAWRX0); 230 + if (cpu_has_feature(CPU_FTR_DAWR1)) { 231 + host_dawr1 = mfspr(SPRN_DAWR1); 232 + host_dawrx1 = mfspr(SPRN_DAWRX1); 233 + } 234 + } 821 235 822 236 local_paca->kvm_hstate.host_purr = mfspr(SPRN_PURR); 823 237 local_paca->kvm_hstate.host_spurr = mfspr(SPRN_SPURR); 238 + 239 + save_p9_host_os_sprs(&host_os_sprs); 240 + 241 + msr = kvmppc_msr_hard_disable_set_facilities(vcpu, msr); 242 + if (lazy_irq_pending()) { 243 + trap = 0; 244 + goto out; 245 + } 246 + 247 + if (unlikely(load_vcpu_state(vcpu, &host_os_sprs))) 248 + msr = mfmsr(); /* MSR may have been updated */ 249 + 250 + if (vc->tb_offset) { 251 + u64 new_tb = *tb + vc->tb_offset; 252 + mtspr(SPRN_TBU40, new_tb); 253 + if ((mftb() & 0xffffff) < (new_tb & 0xffffff)) { 254 + new_tb += 0x1000000; 255 + mtspr(SPRN_TBU40, new_tb); 256 + } 257 + *tb = new_tb; 258 + vc->tb_offset_applied = vc->tb_offset; 259 + } 260 + 261 + mtspr(SPRN_VTB, vc->vtb); 824 262 mtspr(SPRN_PURR, vcpu->arch.purr); 825 263 mtspr(SPRN_SPURR, vcpu->arch.spurr); 826 264 265 + if (vc->pcr) 266 + mtspr(SPRN_PCR, vc->pcr | PCR_MASK); 267 + if (vcpu->arch.doorbell_request) { 268 + vcpu->arch.doorbell_request = 0; 269 + mtspr(SPRN_DPDES, 1); 270 + } 271 + 827 272 if (dawr_enabled()) { 828 - mtspr(SPRN_DAWR0, vcpu->arch.dawr0); 829 - mtspr(SPRN_DAWRX0, vcpu->arch.dawrx0); 273 + if (vcpu->arch.dawr0 != host_dawr0) 274 + mtspr(SPRN_DAWR0, vcpu->arch.dawr0); 275 + if (vcpu->arch.dawrx0 != host_dawrx0) 276 + mtspr(SPRN_DAWRX0, vcpu->arch.dawrx0); 830 277 if (cpu_has_feature(CPU_FTR_DAWR1)) { 831 - mtspr(SPRN_DAWR1, vcpu->arch.dawr1); 832 - mtspr(SPRN_DAWRX1, vcpu->arch.dawrx1); 278 + if (vcpu->arch.dawr1 != host_dawr1) 279 + mtspr(SPRN_DAWR1, vcpu->arch.dawr1); 280 + if (vcpu->arch.dawrx1 != host_dawrx1) 281 + mtspr(SPRN_DAWRX1, vcpu->arch.dawrx1); 833 282 } 834 283 } 835 - mtspr(SPRN_CIABR, vcpu->arch.ciabr); 836 - mtspr(SPRN_IC, vcpu->arch.ic); 284 + if (vcpu->arch.ciabr != host_ciabr) 285 + mtspr(SPRN_CIABR, vcpu->arch.ciabr); 837 286 838 - mtspr(SPRN_PSSCR, vcpu->arch.psscr | PSSCR_EC | 839 - (local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG)); 287 + 288 + if (cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) { 289 + mtspr(SPRN_PSSCR, vcpu->arch.psscr | PSSCR_EC | 290 + (local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG)); 291 + } else { 292 + if (vcpu->arch.psscr != host_psscr) 293 + mtspr(SPRN_PSSCR_PR, vcpu->arch.psscr); 294 + } 840 295 841 296 mtspr(SPRN_HFSCR, vcpu->arch.hfscr); 842 297 ··· 887 276 * HDSI which should correctly update the HDSISR the second time HDSI 888 277 * entry. 889 278 * 890 - * Just do this on all p9 processors for now. 279 + * The "radix prefetch bug" test can be used to test for this bug, as 280 + * it also exists fo DD2.1 and below. 891 281 */ 892 - mtspr(SPRN_HDSISR, HDSISR_CANARY); 282 + if (cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG)) 283 + mtspr(SPRN_HDSISR, HDSISR_CANARY); 893 284 894 285 mtspr(SPRN_SPRG0, vcpu->arch.shregs.sprg0); 895 286 mtspr(SPRN_SPRG1, vcpu->arch.shregs.sprg1); 896 287 mtspr(SPRN_SPRG2, vcpu->arch.shregs.sprg2); 897 288 mtspr(SPRN_SPRG3, vcpu->arch.shregs.sprg3); 898 289 899 - mtspr(SPRN_AMOR, ~0UL); 290 + /* 291 + * It might be preferable to load_vcpu_state here, in order to get the 292 + * GPR/FP register loads executing in parallel with the previous mtSPR 293 + * instructions, but for now that can't be done because the TM handling 294 + * in load_vcpu_state can change some SPRs and vcpu state (nip, msr). 295 + * But TM could be split out if this would be a significant benefit. 296 + */ 900 297 901 - local_paca->kvm_hstate.in_guest = KVM_GUEST_MODE_HV_P9; 298 + /* 299 + * MSR[RI] does not need to be cleared (and is not, for radix guests 300 + * with no prefetch bug), because in_guest is set. If we take a SRESET 301 + * or MCE with in_guest set but still in HV mode, then 302 + * kvmppc_p9_bad_interrupt handles the interrupt, which effectively 303 + * clears MSR[RI] and doesn't return. 304 + */ 305 + WRITE_ONCE(local_paca->kvm_hstate.in_guest, KVM_GUEST_MODE_HV_P9); 306 + barrier(); /* Open in_guest critical section */ 902 307 903 308 /* 904 309 * Hash host, hash guest, or radix guest with prefetch bug, all have ··· 926 299 927 300 save_clear_host_mmu(kvm); 928 301 929 - if (kvm_is_radix(kvm)) { 302 + if (kvm_is_radix(kvm)) 930 303 switch_mmu_to_guest_radix(kvm, vcpu, lpcr); 931 - if (!cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG)) 932 - __mtmsrd(0, 1); /* clear RI */ 933 - 934 - } else { 304 + else 935 305 switch_mmu_to_guest_hpt(kvm, vcpu, lpcr); 936 - } 937 306 938 307 /* TLBIEL uses LPID=LPIDR, so run this after setting guest LPID */ 939 - kvmppc_check_need_tlb_flush(kvm, vc->pcpu, nested); 308 + check_need_tlb_flush(kvm, vc->pcpu, nested); 940 309 941 310 /* 942 311 * P9 suppresses the HDEC exception when LPCR[HDICE] = 0, 943 312 * so set guest LPCR (with HDICE) before writing HDEC. 944 313 */ 945 314 mtspr(SPRN_HDEC, hdec); 315 + 316 + mtspr(SPRN_DEC, vcpu->arch.dec_expires - *tb); 946 317 947 318 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM 948 319 tm_return_to_guest: ··· 952 327 953 328 accumulate_time(vcpu, &vcpu->arch.guest_time); 954 329 330 + switch_pmu_to_guest(vcpu, &host_os_sprs); 955 331 kvmppc_p9_enter_guest(vcpu); 332 + switch_pmu_to_host(vcpu, &host_os_sprs); 956 333 957 334 accumulate_time(vcpu, &vcpu->arch.rm_intr); 958 335 ··· 967 340 /* 0x2 bit for HSRR is only used by PR and P7/8 HV paths, clear it */ 968 341 trap = local_paca->kvm_hstate.scratch0 & ~0x2; 969 342 970 - /* HSRR interrupts leave MSR[RI] unchanged, SRR interrupts clear it. */ 971 - ri_set = false; 972 - if (likely(trap > BOOK3S_INTERRUPT_MACHINE_CHECK)) { 973 - if (trap != BOOK3S_INTERRUPT_SYSCALL && 974 - (vcpu->arch.shregs.msr & MSR_RI)) 975 - ri_set = true; 343 + if (likely(trap > BOOK3S_INTERRUPT_MACHINE_CHECK)) 976 344 exsave = local_paca->exgen; 977 - } else if (trap == BOOK3S_INTERRUPT_SYSTEM_RESET) { 345 + else if (trap == BOOK3S_INTERRUPT_SYSTEM_RESET) 978 346 exsave = local_paca->exnmi; 979 - } else { /* trap == 0x200 */ 347 + else /* trap == 0x200 */ 980 348 exsave = local_paca->exmc; 981 - } 982 349 983 350 vcpu->arch.regs.gpr[1] = local_paca->kvm_hstate.scratch1; 984 351 vcpu->arch.regs.gpr[3] = local_paca->kvm_hstate.scratch2; 985 352 986 353 /* 987 - * Only set RI after reading machine check regs (DAR, DSISR, SRR0/1) 988 - * and hstate scratch (which we need to move into exsave to make 989 - * re-entrant vs SRESET/MCE) 354 + * After reading machine check regs (DAR, DSISR, SRR0/1) and hstate 355 + * scratch (which we need to move into exsave to make re-entrant vs 356 + * SRESET/MCE), register state is protected from reentrancy. However 357 + * timebase, MMU, among other state is still set to guest, so don't 358 + * enable MSR[RI] here. It gets enabled at the end, after in_guest 359 + * is cleared. 360 + * 361 + * It is possible an NMI could come in here, which is why it is 362 + * important to save the above state early so it can be debugged. 990 363 */ 991 - if (ri_set) { 992 - if (unlikely(!(mfmsr() & MSR_RI))) { 993 - __mtmsrd(MSR_RI, 1); 994 - WARN_ON_ONCE(1); 995 - } 996 - } else { 997 - WARN_ON_ONCE(mfmsr() & MSR_RI); 998 - __mtmsrd(MSR_RI, 1); 999 - } 1000 364 1001 365 vcpu->arch.regs.gpr[9] = exsave[EX_R9/sizeof(u64)]; 1002 366 vcpu->arch.regs.gpr[10] = exsave[EX_R10/sizeof(u64)]; ··· 1006 388 kvmppc_realmode_machine_check(vcpu); 1007 389 1008 390 } else if (unlikely(trap == BOOK3S_INTERRUPT_HMI)) { 1009 - kvmppc_realmode_hmi_handler(); 391 + kvmppc_p9_realmode_hmi_handler(vcpu); 1010 392 1011 393 } else if (trap == BOOK3S_INTERRUPT_H_EMUL_ASSIST) { 1012 394 vcpu->arch.emul_inst = mfspr(SPRN_HEIR); ··· 1045 427 */ 1046 428 mtspr(SPRN_HSRR0, vcpu->arch.regs.nip); 1047 429 mtspr(SPRN_HSRR1, vcpu->arch.shregs.msr); 1048 - 1049 - /* 1050 - * tm_return_to_guest re-loads SRR0/1, DAR, 1051 - * DSISR after RI is cleared, in case they had 1052 - * been clobbered by a MCE. 1053 - */ 1054 - __mtmsrd(0, 1); /* clear RI */ 1055 430 goto tm_return_to_guest; 1056 431 } 1057 432 } ··· 1056 445 /* Advance host PURR/SPURR by the amount used by guest */ 1057 446 purr = mfspr(SPRN_PURR); 1058 447 spurr = mfspr(SPRN_SPURR); 1059 - mtspr(SPRN_PURR, local_paca->kvm_hstate.host_purr + 1060 - purr - vcpu->arch.purr); 1061 - mtspr(SPRN_SPURR, local_paca->kvm_hstate.host_spurr + 1062 - spurr - vcpu->arch.spurr); 448 + local_paca->kvm_hstate.host_purr += purr - vcpu->arch.purr; 449 + local_paca->kvm_hstate.host_spurr += spurr - vcpu->arch.spurr; 1063 450 vcpu->arch.purr = purr; 1064 451 vcpu->arch.spurr = spurr; 1065 452 1066 453 vcpu->arch.ic = mfspr(SPRN_IC); 1067 454 vcpu->arch.pid = mfspr(SPRN_PID); 1068 - vcpu->arch.psscr = mfspr(SPRN_PSSCR) & PSSCR_GUEST_VIS; 455 + vcpu->arch.psscr = mfspr(SPRN_PSSCR_PR); 1069 456 1070 457 vcpu->arch.shregs.sprg0 = mfspr(SPRN_SPRG0); 1071 458 vcpu->arch.shregs.sprg1 = mfspr(SPRN_SPRG1); 1072 459 vcpu->arch.shregs.sprg2 = mfspr(SPRN_SPRG2); 1073 460 vcpu->arch.shregs.sprg3 = mfspr(SPRN_SPRG3); 1074 461 1075 - /* Preserve PSSCR[FAKE_SUSPEND] until we've called kvmppc_save_tm_hv */ 1076 - mtspr(SPRN_PSSCR, host_psscr | 1077 - (local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG)); 1078 - mtspr(SPRN_HFSCR, host_hfscr); 1079 - mtspr(SPRN_CIABR, host_ciabr); 1080 - mtspr(SPRN_DAWR0, host_dawr0); 1081 - mtspr(SPRN_DAWRX0, host_dawrx0); 1082 - if (cpu_has_feature(CPU_FTR_DAWR1)) { 1083 - mtspr(SPRN_DAWR1, host_dawr1); 1084 - mtspr(SPRN_DAWRX1, host_dawrx1); 462 + dpdes = mfspr(SPRN_DPDES); 463 + if (dpdes) 464 + vcpu->arch.doorbell_request = 1; 465 + 466 + vc->vtb = mfspr(SPRN_VTB); 467 + 468 + dec = mfspr(SPRN_DEC); 469 + if (!(lpcr & LPCR_LD)) /* Sign extend if not using large decrementer */ 470 + dec = (s32) dec; 471 + *tb = mftb(); 472 + vcpu->arch.dec_expires = dec + *tb; 473 + 474 + if (vc->tb_offset_applied) { 475 + u64 new_tb = *tb - vc->tb_offset_applied; 476 + mtspr(SPRN_TBU40, new_tb); 477 + if ((mftb() & 0xffffff) < (new_tb & 0xffffff)) { 478 + new_tb += 0x1000000; 479 + mtspr(SPRN_TBU40, new_tb); 480 + } 481 + *tb = new_tb; 482 + vc->tb_offset_applied = 0; 1085 483 } 1086 484 1087 - if (kvm_is_radix(kvm)) { 1088 - /* 1089 - * Since this is radix, do a eieio; tlbsync; ptesync sequence 1090 - * in case we interrupted the guest between a tlbie and a 1091 - * ptesync. 1092 - */ 1093 - asm volatile("eieio; tlbsync; ptesync"); 485 + save_clear_guest_mmu(kvm, vcpu); 486 + switch_mmu_to_host(kvm, host_pidr); 487 + 488 + /* 489 + * Enable MSR here in order to have facilities enabled to save 490 + * guest registers. This enables MMU (if we were in realmode), so 491 + * only switch MMU on after the MMU is switched to host, to avoid 492 + * the P9_RADIX_PREFETCH_BUG or hash guest context. 493 + */ 494 + if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) && 495 + vcpu->arch.shregs.msr & MSR_TS_MASK) 496 + msr |= MSR_TS_S; 497 + __mtmsrd(msr, 0); 498 + 499 + store_vcpu_state(vcpu); 500 + 501 + mtspr(SPRN_PURR, local_paca->kvm_hstate.host_purr); 502 + mtspr(SPRN_SPURR, local_paca->kvm_hstate.host_spurr); 503 + 504 + if (cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) { 505 + /* Preserve PSSCR[FAKE_SUSPEND] until we've called kvmppc_save_tm_hv */ 506 + mtspr(SPRN_PSSCR, host_hpsscr | 507 + (local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG)); 1094 508 } 509 + 510 + mtspr(SPRN_HFSCR, host_hfscr); 511 + if (vcpu->arch.ciabr != host_ciabr) 512 + mtspr(SPRN_CIABR, host_ciabr); 513 + 514 + if (dawr_enabled()) { 515 + if (vcpu->arch.dawr0 != host_dawr0) 516 + mtspr(SPRN_DAWR0, host_dawr0); 517 + if (vcpu->arch.dawrx0 != host_dawrx0) 518 + mtspr(SPRN_DAWRX0, host_dawrx0); 519 + if (cpu_has_feature(CPU_FTR_DAWR1)) { 520 + if (vcpu->arch.dawr1 != host_dawr1) 521 + mtspr(SPRN_DAWR1, host_dawr1); 522 + if (vcpu->arch.dawrx1 != host_dawrx1) 523 + mtspr(SPRN_DAWRX1, host_dawrx1); 524 + } 525 + } 526 + 527 + if (dpdes) 528 + mtspr(SPRN_DPDES, 0); 529 + if (vc->pcr) 530 + mtspr(SPRN_PCR, PCR_MASK); 531 + 532 + /* HDEC must be at least as large as DEC, so decrementer_max fits */ 533 + mtspr(SPRN_HDEC, decrementer_max); 534 + 535 + timer_rearm_host_dec(*tb); 536 + 537 + restore_p9_host_os_sprs(vcpu, &host_os_sprs); 538 + 539 + barrier(); /* Close in_guest critical section */ 540 + WRITE_ONCE(local_paca->kvm_hstate.in_guest, KVM_GUEST_MODE_NONE); 541 + /* Interrupts are recoverable at this point */ 1095 542 1096 543 /* 1097 544 * cp_abort is required if the processor supports local copy-paste ··· 1158 489 if (cpu_has_feature(CPU_FTR_ARCH_31)) 1159 490 asm volatile(PPC_CP_ABORT); 1160 491 1161 - vc->dpdes = mfspr(SPRN_DPDES); 1162 - vc->vtb = mfspr(SPRN_VTB); 1163 - mtspr(SPRN_DPDES, 0); 1164 - if (vc->pcr) 1165 - mtspr(SPRN_PCR, PCR_MASK); 1166 - 1167 - if (vc->tb_offset_applied) { 1168 - u64 new_tb = mftb() - vc->tb_offset_applied; 1169 - mtspr(SPRN_TBU40, new_tb); 1170 - tb = mftb(); 1171 - if ((tb & 0xffffff) < (new_tb & 0xffffff)) 1172 - mtspr(SPRN_TBU40, new_tb + 0x1000000); 1173 - vc->tb_offset_applied = 0; 1174 - } 1175 - 1176 - mtspr(SPRN_HDEC, 0x7fffffff); 1177 - 1178 - save_clear_guest_mmu(kvm, vcpu); 1179 - switch_mmu_to_host(kvm, host_pidr); 1180 - local_paca->kvm_hstate.in_guest = KVM_GUEST_MODE_NONE; 1181 - 1182 - /* 1183 - * If we are in real mode, only switch MMU on after the MMU is 1184 - * switched to host, to avoid the P9_RADIX_PREFETCH_BUG. 1185 - */ 1186 - if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) && 1187 - vcpu->arch.shregs.msr & MSR_TS_MASK) 1188 - msr |= MSR_TS_S; 1189 - 1190 - __mtmsrd(msr, 0); 1191 - 492 + out: 1192 493 end_timing(vcpu); 1193 494 1194 495 return trap;
+54
arch/powerpc/kvm/book3s_hv_ras.c
··· 136 136 vcpu->arch.mce_evt = mce_evt; 137 137 } 138 138 139 + 140 + long kvmppc_p9_realmode_hmi_handler(struct kvm_vcpu *vcpu) 141 + { 142 + struct kvmppc_vcore *vc = vcpu->arch.vcore; 143 + long ret = 0; 144 + 145 + /* 146 + * Unapply and clear the offset first. That way, if the TB was not 147 + * resynced then it will remain in host-offset, and if it was resynced 148 + * then it is brought into host-offset. Then the tb offset is 149 + * re-applied before continuing with the KVM exit. 150 + * 151 + * This way, we don't need to actually know whether not OPAL resynced 152 + * the timebase or do any of the complicated dance that the P7/8 153 + * path requires. 154 + */ 155 + if (vc->tb_offset_applied) { 156 + u64 new_tb = mftb() - vc->tb_offset_applied; 157 + mtspr(SPRN_TBU40, new_tb); 158 + if ((mftb() & 0xffffff) < (new_tb & 0xffffff)) { 159 + new_tb += 0x1000000; 160 + mtspr(SPRN_TBU40, new_tb); 161 + } 162 + vc->tb_offset_applied = 0; 163 + } 164 + 165 + local_paca->hmi_irqs++; 166 + 167 + if (hmi_handle_debugtrig(NULL) >= 0) { 168 + ret = 1; 169 + goto out; 170 + } 171 + 172 + if (ppc_md.hmi_exception_early) 173 + ppc_md.hmi_exception_early(NULL); 174 + 175 + out: 176 + if (vc->tb_offset) { 177 + u64 new_tb = mftb() + vc->tb_offset; 178 + mtspr(SPRN_TBU40, new_tb); 179 + if ((mftb() & 0xffffff) < (new_tb & 0xffffff)) { 180 + new_tb += 0x1000000; 181 + mtspr(SPRN_TBU40, new_tb); 182 + } 183 + vc->tb_offset_applied = vc->tb_offset; 184 + } 185 + 186 + return ret; 187 + } 188 + 189 + /* 190 + * The following subcore HMI handling is all only for pre-POWER9 CPUs. 191 + */ 192 + 139 193 /* Check if dynamic split is in force and return subcore size accordingly. */ 140 194 static inline int kvmppc_cur_subcore_size(void) 141 195 {
-6
arch/powerpc/kvm/book3s_hv_rm_mmu.c
··· 55 55 smp_wmb(); 56 56 cpumask_setall(&kvm->arch.need_tlb_flush); 57 57 cpu = local_paca->kvm_hstate.kvm_vcore->pcpu; 58 - /* 59 - * On POWER9, threads are independent but the TLB is shared, 60 - * so use the bit for the first thread to represent the core. 61 - */ 62 - if (cpu_has_feature(CPU_FTR_ARCH_300)) 63 - cpu = cpu_first_tlb_thread_sibling(cpu); 64 58 cpumask_clear_cpu(cpu, &kvm->arch.need_tlb_flush); 65 59 } 66 60
+17 -61
arch/powerpc/kvm/book3s_hv_rmhandlers.S
··· 778 778 /* Restore AMR and UAMOR, set AMOR to all 1s */ 779 779 ld r5,VCPU_AMR(r4) 780 780 ld r6,VCPU_UAMOR(r4) 781 - li r7,-1 782 781 mtspr SPRN_AMR,r5 783 782 mtspr SPRN_UAMOR,r6 784 - mtspr SPRN_AMOR,r7 785 783 786 - /* Restore state of CTRL run bit; assume 1 on entry */ 784 + /* Restore state of CTRL run bit; the host currently has it set to 1 */ 787 785 lwz r5,VCPU_CTRL(r4) 788 786 andi. r5,r5,1 789 787 bne 4f 790 - mfspr r6,SPRN_CTRLF 791 - clrrdi r6,r6,1 788 + li r6,0 792 789 mtspr SPRN_CTRLT,r6 793 790 4: 794 791 /* Secondary threads wait for primary to have done partition switch */ ··· 814 817 * Set the decrementer to the guest decrementer. 815 818 */ 816 819 ld r8,VCPU_DEC_EXPIRES(r4) 817 - /* r8 is a host timebase value here, convert to guest TB */ 818 - ld r5,HSTATE_KVM_VCORE(r13) 819 - ld r6,VCORE_TB_OFFSET_APPL(r5) 820 - add r8,r8,r6 821 820 mftb r7 822 821 subf r3,r7,r8 823 822 mtspr SPRN_DEC,r3 ··· 1188 1195 mftb r6 1189 1196 extsw r5,r5 1190 1197 16: add r5,r5,r6 1191 - /* r5 is a guest timebase value here, convert to host TB */ 1192 - ld r4,VCORE_TB_OFFSET_APPL(r3) 1193 - subf r5,r4,r5 1194 1198 std r5,VCPU_DEC_EXPIRES(r9) 1195 1199 1196 1200 /* Increment exit count, poke other threads to exit */ ··· 1201 1211 stw r0, VCPU_CPU(r9) 1202 1212 stw r0, VCPU_THREAD_CPU(r9) 1203 1213 1204 - /* Save guest CTRL register, set runlatch to 1 */ 1214 + /* Save guest CTRL register, set runlatch to 1 if it was clear */ 1205 1215 mfspr r6,SPRN_CTRLF 1206 1216 stw r6,VCPU_CTRL(r9) 1207 1217 andi. r0,r6,1 1208 1218 bne 4f 1209 - ori r6,r6,1 1219 + li r6,1 1210 1220 mtspr SPRN_CTRLT,r6 1211 1221 4: 1212 1222 /* ··· 2153 2163 /* save expiry time of guest decrementer */ 2154 2164 add r3, r3, r5 2155 2165 ld r4, HSTATE_KVM_VCPU(r13) 2156 - ld r5, HSTATE_KVM_VCORE(r13) 2157 - ld r6, VCORE_TB_OFFSET_APPL(r5) 2158 - subf r3, r6, r3 /* convert to host TB value */ 2159 2166 std r3, VCPU_DEC_EXPIRES(r4) 2160 2167 2161 2168 #ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING ··· 2173 2186 * Also clear the runlatch bit before napping. 2174 2187 */ 2175 2188 kvm_do_nap: 2176 - mfspr r0, SPRN_CTRLF 2177 - clrrdi r0, r0, 1 2189 + li r0,0 2178 2190 mtspr SPRN_CTRLT, r0 2179 2191 2180 2192 li r0,1 ··· 2192 2206 2193 2207 bl isa206_idle_insn_mayloss 2194 2208 2195 - mfspr r0, SPRN_CTRLF 2196 - ori r0, r0, 1 2209 + li r0,1 2197 2210 mtspr SPRN_CTRLT, r0 2198 2211 2199 2212 mtspr SPRN_SRR1, r3 ··· 2249 2264 2250 2265 /* Restore guest decrementer */ 2251 2266 ld r3, VCPU_DEC_EXPIRES(r4) 2252 - ld r5, HSTATE_KVM_VCORE(r13) 2253 - ld r6, VCORE_TB_OFFSET_APPL(r5) 2254 - add r3, r3, r6 /* convert host TB to guest TB value */ 2255 2267 mftb r7 2256 2268 subf r3, r7, r3 2257 2269 mtspr SPRN_DEC, r3 ··· 2693 2711 std r0, GPR0(r1) 2694 2712 std r9, GPR1(r1) 2695 2713 std r2, GPR2(r1) 2696 - SAVE_4GPRS(3, r1) 2697 - SAVE_2GPRS(7, r1) 2714 + SAVE_GPRS(3, 8, r1) 2698 2715 srdi r0, r12, 32 2699 2716 clrldi r12, r12, 32 2700 2717 std r0, _CCR(r1) ··· 2716 2735 ld r9, HSTATE_SCRATCH2(r13) 2717 2736 ld r12, HSTATE_SCRATCH0(r13) 2718 2737 GET_SCRATCH0(r0) 2719 - SAVE_4GPRS(9, r1) 2738 + SAVE_GPRS(9, 12, r1) 2720 2739 std r0, GPR13(r1) 2721 2740 SAVE_NVGPRS(r1) 2722 2741 ld r5, HSTATE_CFAR(r13) ··· 2759 2778 blr 2760 2779 2761 2780 /* 2781 + * void kvmhv_load_guest_pmu(struct kvm_vcpu *vcpu) 2782 + * 2762 2783 * Load up guest PMU state. R3 points to the vcpu struct. 2763 2784 */ 2764 - _GLOBAL(kvmhv_load_guest_pmu) 2765 - EXPORT_SYMBOL_GPL(kvmhv_load_guest_pmu) 2785 + kvmhv_load_guest_pmu: 2766 2786 mr r4, r3 2767 2787 mflr r0 2768 2788 li r3, 1 ··· 2798 2816 mtspr SPRN_SIAR, r7 2799 2817 mtspr SPRN_SDAR, r8 2800 2818 BEGIN_FTR_SECTION 2801 - ld r5, VCPU_MMCR + 24(r4) 2802 - ld r6, VCPU_SIER + 8(r4) 2803 - ld r7, VCPU_SIER + 16(r4) 2804 - mtspr SPRN_MMCR3, r5 2805 - mtspr SPRN_SIER2, r6 2806 - mtspr SPRN_SIER3, r7 2807 - END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31) 2808 - BEGIN_FTR_SECTION 2809 2819 ld r5, VCPU_MMCR + 16(r4) 2810 2820 ld r6, VCPU_SIER(r4) 2811 2821 mtspr SPRN_MMCR2, r5 2812 2822 mtspr SPRN_SIER, r6 2813 - BEGIN_FTR_SECTION_NESTED(96) 2814 2823 lwz r7, VCPU_PMC + 24(r4) 2815 2824 lwz r8, VCPU_PMC + 28(r4) 2816 2825 ld r9, VCPU_MMCRS(r4) 2817 2826 mtspr SPRN_SPMC1, r7 2818 2827 mtspr SPRN_SPMC2, r8 2819 2828 mtspr SPRN_MMCRS, r9 2820 - END_FTR_SECTION_NESTED(CPU_FTR_ARCH_300, 0, 96) 2821 2829 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) 2822 2830 mtspr SPRN_MMCR0, r3 2823 2831 isync ··· 2815 2843 blr 2816 2844 2817 2845 /* 2846 + * void kvmhv_load_host_pmu(void) 2847 + * 2818 2848 * Reload host PMU state saved in the PACA by kvmhv_save_host_pmu. 2819 2849 */ 2820 - _GLOBAL(kvmhv_load_host_pmu) 2821 - EXPORT_SYMBOL_GPL(kvmhv_load_host_pmu) 2850 + kvmhv_load_host_pmu: 2822 2851 mflr r0 2823 2852 lbz r4, PACA_PMCINUSE(r13) /* is the host using the PMU? */ 2824 2853 cmpwi r4, 0 ··· 2857 2884 mtspr SPRN_MMCR2, r8 2858 2885 mtspr SPRN_SIER, r9 2859 2886 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) 2860 - BEGIN_FTR_SECTION 2861 - ld r5, HSTATE_MMCR3(r13) 2862 - ld r6, HSTATE_SIER2(r13) 2863 - ld r7, HSTATE_SIER3(r13) 2864 - mtspr SPRN_MMCR3, r5 2865 - mtspr SPRN_SIER2, r6 2866 - mtspr SPRN_SIER3, r7 2867 - END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31) 2868 2887 mtspr SPRN_MMCR0, r3 2869 2888 isync 2870 2889 mtlr r0 2871 2890 23: blr 2872 2891 2873 2892 /* 2893 + * void kvmhv_save_guest_pmu(struct kvm_vcpu *vcpu, bool pmu_in_use) 2894 + * 2874 2895 * Save guest PMU state into the vcpu struct. 2875 2896 * r3 = vcpu, r4 = full save flag (PMU in use flag set in VPA) 2876 2897 */ 2877 - _GLOBAL(kvmhv_save_guest_pmu) 2878 - EXPORT_SYMBOL_GPL(kvmhv_save_guest_pmu) 2898 + kvmhv_save_guest_pmu: 2879 2899 mr r9, r3 2880 2900 mr r8, r4 2881 2901 BEGIN_FTR_SECTION ··· 2917 2951 BEGIN_FTR_SECTION 2918 2952 std r10, VCPU_MMCR + 16(r9) 2919 2953 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) 2920 - BEGIN_FTR_SECTION 2921 - mfspr r5, SPRN_MMCR3 2922 - mfspr r6, SPRN_SIER2 2923 - mfspr r7, SPRN_SIER3 2924 - std r5, VCPU_MMCR + 24(r9) 2925 - std r6, VCPU_SIER + 8(r9) 2926 - std r7, VCPU_SIER + 16(r9) 2927 - END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31) 2928 2954 std r7, VCPU_SIAR(r9) 2929 2955 std r8, VCPU_SDAR(r9) 2930 2956 mfspr r3, SPRN_PMC1 ··· 2934 2976 BEGIN_FTR_SECTION 2935 2977 mfspr r5, SPRN_SIER 2936 2978 std r5, VCPU_SIER(r9) 2937 - BEGIN_FTR_SECTION_NESTED(96) 2938 2979 mfspr r6, SPRN_SPMC1 2939 2980 mfspr r7, SPRN_SPMC2 2940 2981 mfspr r8, SPRN_MMCRS ··· 2942 2985 std r8, VCPU_MMCRS(r9) 2943 2986 lis r4, 0x8000 2944 2987 mtspr SPRN_MMCRS, r4 2945 - END_FTR_SECTION_NESTED(CPU_FTR_ARCH_300, 0, 96) 2946 2988 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) 2947 2989 22: blr 2948 2990
+6 -1
arch/powerpc/lib/Makefile
··· 19 19 CFLAGS_feature-fixups.o += -DDISABLE_BRANCH_PROFILING 20 20 endif 21 21 22 - obj-y += alloc.o code-patching.o feature-fixups.o pmem.o test_code-patching.o 22 + CFLAGS_code-patching.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) 23 + CFLAGS_feature-fixups.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) 24 + 25 + obj-y += alloc.o code-patching.o feature-fixups.o pmem.o 26 + 27 + obj-$(CONFIG_CODE_PATCHING_SELFTEST) += test-code-patching.o 23 28 24 29 ifndef CONFIG_KASAN 25 30 obj-y += string.o memcmp_$(BITS).o
+50 -449
arch/powerpc/lib/code-patching.c
··· 3 3 * Copyright 2008 Michael Ellerman, IBM Corporation. 4 4 */ 5 5 6 - #include <linux/kernel.h> 7 6 #include <linux/kprobes.h> 8 7 #include <linux/vmalloc.h> 9 8 #include <linux/init.h> 10 - #include <linux/mm.h> 11 9 #include <linux/cpuhotplug.h> 12 - #include <linux/slab.h> 13 10 #include <linux/uaccess.h> 14 11 15 12 #include <asm/tlbflush.h> 16 13 #include <asm/page.h> 17 14 #include <asm/code-patching.h> 18 - #include <asm/setup.h> 19 15 #include <asm/inst.h> 20 16 21 - static int __patch_instruction(u32 *exec_addr, struct ppc_inst instr, u32 *patch_addr) 17 + static int __patch_instruction(u32 *exec_addr, ppc_inst_t instr, u32 *patch_addr) 22 18 { 23 19 if (!ppc_inst_prefixed(instr)) { 24 20 u32 val = ppc_inst_val(instr); ··· 35 39 return -EFAULT; 36 40 } 37 41 38 - int raw_patch_instruction(u32 *addr, struct ppc_inst instr) 42 + int raw_patch_instruction(u32 *addr, ppc_inst_t instr) 39 43 { 40 44 return __patch_instruction(addr, instr, addr); 41 45 } ··· 82 86 static int map_patch_area(void *addr, unsigned long text_poke_addr) 83 87 { 84 88 unsigned long pfn; 85 - int err; 86 89 87 90 if (is_vmalloc_or_module_addr(addr)) 88 91 pfn = vmalloc_to_pfn(addr); 89 92 else 90 93 pfn = __pa_symbol(addr) >> PAGE_SHIFT; 91 94 92 - err = map_kernel_page(text_poke_addr, (pfn << PAGE_SHIFT), PAGE_KERNEL); 93 - 94 - pr_devel("Mapped addr %lx with pfn %lx:%d\n", text_poke_addr, pfn, err); 95 - if (err) 96 - return -1; 97 - 98 - return 0; 95 + return map_kernel_page(text_poke_addr, (pfn << PAGE_SHIFT), PAGE_KERNEL); 99 96 } 100 97 101 - static inline int unmap_patch_area(unsigned long addr) 98 + static void unmap_patch_area(unsigned long addr) 102 99 { 103 100 pte_t *ptep; 104 101 pmd_t *pmdp; ··· 100 111 pgd_t *pgdp; 101 112 102 113 pgdp = pgd_offset_k(addr); 103 - if (unlikely(!pgdp)) 104 - return -EINVAL; 114 + if (WARN_ON(pgd_none(*pgdp))) 115 + return; 105 116 106 117 p4dp = p4d_offset(pgdp, addr); 107 - if (unlikely(!p4dp)) 108 - return -EINVAL; 118 + if (WARN_ON(p4d_none(*p4dp))) 119 + return; 109 120 110 121 pudp = pud_offset(p4dp, addr); 111 - if (unlikely(!pudp)) 112 - return -EINVAL; 122 + if (WARN_ON(pud_none(*pudp))) 123 + return; 113 124 114 125 pmdp = pmd_offset(pudp, addr); 115 - if (unlikely(!pmdp)) 116 - return -EINVAL; 126 + if (WARN_ON(pmd_none(*pmdp))) 127 + return; 117 128 118 129 ptep = pte_offset_kernel(pmdp, addr); 119 - if (unlikely(!ptep)) 120 - return -EINVAL; 121 - 122 - pr_devel("clearing mm %p, pte %p, addr %lx\n", &init_mm, ptep, addr); 130 + if (WARN_ON(pte_none(*ptep))) 131 + return; 123 132 124 133 /* 125 134 * In hash, pte_clear flushes the tlb, in radix, we have to 126 135 */ 127 136 pte_clear(&init_mm, addr, ptep); 128 137 flush_tlb_kernel_range(addr, addr + PAGE_SIZE); 129 - 130 - return 0; 131 138 } 132 139 133 - static int do_patch_instruction(u32 *addr, struct ppc_inst instr) 140 + static int __do_patch_instruction(u32 *addr, ppc_inst_t instr) 134 141 { 135 142 int err; 136 - u32 *patch_addr = NULL; 137 - unsigned long flags; 143 + u32 *patch_addr; 138 144 unsigned long text_poke_addr; 139 - unsigned long kaddr = (unsigned long)addr; 145 + 146 + text_poke_addr = (unsigned long)__this_cpu_read(text_poke_area)->addr; 147 + patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr)); 148 + 149 + err = map_patch_area(addr, text_poke_addr); 150 + if (err) 151 + return err; 152 + 153 + err = __patch_instruction(addr, instr, patch_addr); 154 + 155 + unmap_patch_area(text_poke_addr); 156 + 157 + return err; 158 + } 159 + 160 + static int do_patch_instruction(u32 *addr, ppc_inst_t instr) 161 + { 162 + int err; 163 + unsigned long flags; 140 164 141 165 /* 142 166 * During early early boot patch_instruction is called ··· 160 158 return raw_patch_instruction(addr, instr); 161 159 162 160 local_irq_save(flags); 163 - 164 - text_poke_addr = (unsigned long)__this_cpu_read(text_poke_area)->addr; 165 - if (map_patch_area(addr, text_poke_addr)) { 166 - err = -1; 167 - goto out; 168 - } 169 - 170 - patch_addr = (u32 *)(text_poke_addr + (kaddr & ~PAGE_MASK)); 171 - 172 - __patch_instruction(addr, instr, patch_addr); 173 - 174 - err = unmap_patch_area(text_poke_addr); 175 - if (err) 176 - pr_warn("failed to unmap %lx\n", text_poke_addr); 177 - 178 - out: 161 + err = __do_patch_instruction(addr, instr); 179 162 local_irq_restore(flags); 180 163 181 164 return err; 182 165 } 183 166 #else /* !CONFIG_STRICT_KERNEL_RWX */ 184 167 185 - static int do_patch_instruction(u32 *addr, struct ppc_inst instr) 168 + static int do_patch_instruction(u32 *addr, ppc_inst_t instr) 186 169 { 187 170 return raw_patch_instruction(addr, instr); 188 171 } 189 172 190 173 #endif /* CONFIG_STRICT_KERNEL_RWX */ 191 174 192 - int patch_instruction(u32 *addr, struct ppc_inst instr) 175 + int patch_instruction(u32 *addr, ppc_inst_t instr) 193 176 { 194 177 /* Make sure we aren't patching a freed init section */ 195 - if (init_mem_is_free && init_section_contains(addr, 4)) { 196 - pr_debug("Skipping init section patching addr: 0x%px\n", addr); 178 + if (system_state >= SYSTEM_FREEING_INITMEM && init_section_contains(addr, 4)) 197 179 return 0; 198 - } 180 + 199 181 return do_patch_instruction(addr, instr); 200 182 } 201 183 NOKPROBE_SYMBOL(patch_instruction); 202 184 203 185 int patch_branch(u32 *addr, unsigned long target, int flags) 204 186 { 205 - struct ppc_inst instr; 187 + ppc_inst_t instr; 206 188 207 - create_branch(&instr, addr, target, flags); 189 + if (create_branch(&instr, addr, target, flags)) 190 + return -ERANGE; 191 + 208 192 return patch_instruction(addr, instr); 209 193 } 210 194 ··· 225 237 * Helper to check if a given instruction is a conditional branch 226 238 * Derived from the conditional checks in analyse_instr() 227 239 */ 228 - bool is_conditional_branch(struct ppc_inst instr) 240 + bool is_conditional_branch(ppc_inst_t instr) 229 241 { 230 242 unsigned int opcode = ppc_inst_primary_opcode(instr); 231 243 ··· 243 255 } 244 256 NOKPROBE_SYMBOL(is_conditional_branch); 245 257 246 - int create_branch(struct ppc_inst *instr, const u32 *addr, 258 + int create_branch(ppc_inst_t *instr, const u32 *addr, 247 259 unsigned long target, int flags) 248 260 { 249 261 long offset; ··· 263 275 return 0; 264 276 } 265 277 266 - int create_cond_branch(struct ppc_inst *instr, const u32 *addr, 278 + int create_cond_branch(ppc_inst_t *instr, const u32 *addr, 267 279 unsigned long target, int flags) 268 280 { 269 281 long offset; ··· 282 294 return 0; 283 295 } 284 296 285 - static unsigned int branch_opcode(struct ppc_inst instr) 286 - { 287 - return ppc_inst_primary_opcode(instr) & 0x3F; 288 - } 289 - 290 - static int instr_is_branch_iform(struct ppc_inst instr) 291 - { 292 - return branch_opcode(instr) == 18; 293 - } 294 - 295 - static int instr_is_branch_bform(struct ppc_inst instr) 296 - { 297 - return branch_opcode(instr) == 16; 298 - } 299 - 300 - int instr_is_relative_branch(struct ppc_inst instr) 297 + int instr_is_relative_branch(ppc_inst_t instr) 301 298 { 302 299 if (ppc_inst_val(instr) & BRANCH_ABSOLUTE) 303 300 return 0; ··· 290 317 return instr_is_branch_iform(instr) || instr_is_branch_bform(instr); 291 318 } 292 319 293 - int instr_is_relative_link_branch(struct ppc_inst instr) 320 + int instr_is_relative_link_branch(ppc_inst_t instr) 294 321 { 295 322 return instr_is_relative_branch(instr) && (ppc_inst_val(instr) & BRANCH_SET_LINK); 296 323 } ··· 337 364 return 0; 338 365 } 339 366 340 - int translate_branch(struct ppc_inst *instr, const u32 *dest, const u32 *src) 367 + int translate_branch(ppc_inst_t *instr, const u32 *dest, const u32 *src) 341 368 { 342 369 unsigned long target; 343 370 target = branch_target(src); ··· 351 378 352 379 return 1; 353 380 } 354 - 355 - #ifdef CONFIG_PPC_BOOK3E_64 356 - void __patch_exception(int exc, unsigned long addr) 357 - { 358 - extern unsigned int interrupt_base_book3e; 359 - unsigned int *ibase = &interrupt_base_book3e; 360 - 361 - /* Our exceptions vectors start with a NOP and -then- a branch 362 - * to deal with single stepping from userspace which stops on 363 - * the second instruction. Thus we need to patch the second 364 - * instruction of the exception, not the first one 365 - */ 366 - 367 - patch_branch(ibase + (exc / 4) + 1, addr, 0); 368 - } 369 - #endif 370 - 371 - #ifdef CONFIG_CODE_PATCHING_SELFTEST 372 - 373 - static int instr_is_branch_to_addr(const u32 *instr, unsigned long addr) 374 - { 375 - if (instr_is_branch_iform(ppc_inst_read(instr)) || 376 - instr_is_branch_bform(ppc_inst_read(instr))) 377 - return branch_target(instr) == addr; 378 - 379 - return 0; 380 - } 381 - 382 - static void __init test_trampoline(void) 383 - { 384 - asm ("nop;\n"); 385 - } 386 - 387 - #define check(x) \ 388 - if (!(x)) printk("code-patching: test failed at line %d\n", __LINE__); 389 - 390 - static void __init test_branch_iform(void) 391 - { 392 - int err; 393 - struct ppc_inst instr; 394 - u32 tmp[2]; 395 - u32 *iptr = tmp; 396 - unsigned long addr = (unsigned long)tmp; 397 - 398 - /* The simplest case, branch to self, no flags */ 399 - check(instr_is_branch_iform(ppc_inst(0x48000000))); 400 - /* All bits of target set, and flags */ 401 - check(instr_is_branch_iform(ppc_inst(0x4bffffff))); 402 - /* High bit of opcode set, which is wrong */ 403 - check(!instr_is_branch_iform(ppc_inst(0xcbffffff))); 404 - /* Middle bits of opcode set, which is wrong */ 405 - check(!instr_is_branch_iform(ppc_inst(0x7bffffff))); 406 - 407 - /* Simplest case, branch to self with link */ 408 - check(instr_is_branch_iform(ppc_inst(0x48000001))); 409 - /* All bits of targets set */ 410 - check(instr_is_branch_iform(ppc_inst(0x4bfffffd))); 411 - /* Some bits of targets set */ 412 - check(instr_is_branch_iform(ppc_inst(0x4bff00fd))); 413 - /* Must be a valid branch to start with */ 414 - check(!instr_is_branch_iform(ppc_inst(0x7bfffffd))); 415 - 416 - /* Absolute branch to 0x100 */ 417 - patch_instruction(iptr, ppc_inst(0x48000103)); 418 - check(instr_is_branch_to_addr(iptr, 0x100)); 419 - /* Absolute branch to 0x420fc */ 420 - patch_instruction(iptr, ppc_inst(0x480420ff)); 421 - check(instr_is_branch_to_addr(iptr, 0x420fc)); 422 - /* Maximum positive relative branch, + 20MB - 4B */ 423 - patch_instruction(iptr, ppc_inst(0x49fffffc)); 424 - check(instr_is_branch_to_addr(iptr, addr + 0x1FFFFFC)); 425 - /* Smallest negative relative branch, - 4B */ 426 - patch_instruction(iptr, ppc_inst(0x4bfffffc)); 427 - check(instr_is_branch_to_addr(iptr, addr - 4)); 428 - /* Largest negative relative branch, - 32 MB */ 429 - patch_instruction(iptr, ppc_inst(0x4a000000)); 430 - check(instr_is_branch_to_addr(iptr, addr - 0x2000000)); 431 - 432 - /* Branch to self, with link */ 433 - err = create_branch(&instr, iptr, addr, BRANCH_SET_LINK); 434 - patch_instruction(iptr, instr); 435 - check(instr_is_branch_to_addr(iptr, addr)); 436 - 437 - /* Branch to self - 0x100, with link */ 438 - err = create_branch(&instr, iptr, addr - 0x100, BRANCH_SET_LINK); 439 - patch_instruction(iptr, instr); 440 - check(instr_is_branch_to_addr(iptr, addr - 0x100)); 441 - 442 - /* Branch to self + 0x100, no link */ 443 - err = create_branch(&instr, iptr, addr + 0x100, 0); 444 - patch_instruction(iptr, instr); 445 - check(instr_is_branch_to_addr(iptr, addr + 0x100)); 446 - 447 - /* Maximum relative negative offset, - 32 MB */ 448 - err = create_branch(&instr, iptr, addr - 0x2000000, BRANCH_SET_LINK); 449 - patch_instruction(iptr, instr); 450 - check(instr_is_branch_to_addr(iptr, addr - 0x2000000)); 451 - 452 - /* Out of range relative negative offset, - 32 MB + 4*/ 453 - err = create_branch(&instr, iptr, addr - 0x2000004, BRANCH_SET_LINK); 454 - check(err); 455 - 456 - /* Out of range relative positive offset, + 32 MB */ 457 - err = create_branch(&instr, iptr, addr + 0x2000000, BRANCH_SET_LINK); 458 - check(err); 459 - 460 - /* Unaligned target */ 461 - err = create_branch(&instr, iptr, addr + 3, BRANCH_SET_LINK); 462 - check(err); 463 - 464 - /* Check flags are masked correctly */ 465 - err = create_branch(&instr, iptr, addr, 0xFFFFFFFC); 466 - patch_instruction(iptr, instr); 467 - check(instr_is_branch_to_addr(iptr, addr)); 468 - check(ppc_inst_equal(instr, ppc_inst(0x48000000))); 469 - } 470 - 471 - static void __init test_create_function_call(void) 472 - { 473 - u32 *iptr; 474 - unsigned long dest; 475 - struct ppc_inst instr; 476 - 477 - /* Check we can create a function call */ 478 - iptr = (u32 *)ppc_function_entry(test_trampoline); 479 - dest = ppc_function_entry(test_create_function_call); 480 - create_branch(&instr, iptr, dest, BRANCH_SET_LINK); 481 - patch_instruction(iptr, instr); 482 - check(instr_is_branch_to_addr(iptr, dest)); 483 - } 484 - 485 - static void __init test_branch_bform(void) 486 - { 487 - int err; 488 - unsigned long addr; 489 - struct ppc_inst instr; 490 - u32 tmp[2]; 491 - u32 *iptr = tmp; 492 - unsigned int flags; 493 - 494 - addr = (unsigned long)iptr; 495 - 496 - /* The simplest case, branch to self, no flags */ 497 - check(instr_is_branch_bform(ppc_inst(0x40000000))); 498 - /* All bits of target set, and flags */ 499 - check(instr_is_branch_bform(ppc_inst(0x43ffffff))); 500 - /* High bit of opcode set, which is wrong */ 501 - check(!instr_is_branch_bform(ppc_inst(0xc3ffffff))); 502 - /* Middle bits of opcode set, which is wrong */ 503 - check(!instr_is_branch_bform(ppc_inst(0x7bffffff))); 504 - 505 - /* Absolute conditional branch to 0x100 */ 506 - patch_instruction(iptr, ppc_inst(0x43ff0103)); 507 - check(instr_is_branch_to_addr(iptr, 0x100)); 508 - /* Absolute conditional branch to 0x20fc */ 509 - patch_instruction(iptr, ppc_inst(0x43ff20ff)); 510 - check(instr_is_branch_to_addr(iptr, 0x20fc)); 511 - /* Maximum positive relative conditional branch, + 32 KB - 4B */ 512 - patch_instruction(iptr, ppc_inst(0x43ff7ffc)); 513 - check(instr_is_branch_to_addr(iptr, addr + 0x7FFC)); 514 - /* Smallest negative relative conditional branch, - 4B */ 515 - patch_instruction(iptr, ppc_inst(0x43fffffc)); 516 - check(instr_is_branch_to_addr(iptr, addr - 4)); 517 - /* Largest negative relative conditional branch, - 32 KB */ 518 - patch_instruction(iptr, ppc_inst(0x43ff8000)); 519 - check(instr_is_branch_to_addr(iptr, addr - 0x8000)); 520 - 521 - /* All condition code bits set & link */ 522 - flags = 0x3ff000 | BRANCH_SET_LINK; 523 - 524 - /* Branch to self */ 525 - err = create_cond_branch(&instr, iptr, addr, flags); 526 - patch_instruction(iptr, instr); 527 - check(instr_is_branch_to_addr(iptr, addr)); 528 - 529 - /* Branch to self - 0x100 */ 530 - err = create_cond_branch(&instr, iptr, addr - 0x100, flags); 531 - patch_instruction(iptr, instr); 532 - check(instr_is_branch_to_addr(iptr, addr - 0x100)); 533 - 534 - /* Branch to self + 0x100 */ 535 - err = create_cond_branch(&instr, iptr, addr + 0x100, flags); 536 - patch_instruction(iptr, instr); 537 - check(instr_is_branch_to_addr(iptr, addr + 0x100)); 538 - 539 - /* Maximum relative negative offset, - 32 KB */ 540 - err = create_cond_branch(&instr, iptr, addr - 0x8000, flags); 541 - patch_instruction(iptr, instr); 542 - check(instr_is_branch_to_addr(iptr, addr - 0x8000)); 543 - 544 - /* Out of range relative negative offset, - 32 KB + 4*/ 545 - err = create_cond_branch(&instr, iptr, addr - 0x8004, flags); 546 - check(err); 547 - 548 - /* Out of range relative positive offset, + 32 KB */ 549 - err = create_cond_branch(&instr, iptr, addr + 0x8000, flags); 550 - check(err); 551 - 552 - /* Unaligned target */ 553 - err = create_cond_branch(&instr, iptr, addr + 3, flags); 554 - check(err); 555 - 556 - /* Check flags are masked correctly */ 557 - err = create_cond_branch(&instr, iptr, addr, 0xFFFFFFFC); 558 - patch_instruction(iptr, instr); 559 - check(instr_is_branch_to_addr(iptr, addr)); 560 - check(ppc_inst_equal(instr, ppc_inst(0x43FF0000))); 561 - } 562 - 563 - static void __init test_translate_branch(void) 564 - { 565 - unsigned long addr; 566 - void *p, *q; 567 - struct ppc_inst instr; 568 - void *buf; 569 - 570 - buf = vmalloc(PAGE_ALIGN(0x2000000 + 1)); 571 - check(buf); 572 - if (!buf) 573 - return; 574 - 575 - /* Simple case, branch to self moved a little */ 576 - p = buf; 577 - addr = (unsigned long)p; 578 - patch_branch(p, addr, 0); 579 - check(instr_is_branch_to_addr(p, addr)); 580 - q = p + 4; 581 - translate_branch(&instr, q, p); 582 - patch_instruction(q, instr); 583 - check(instr_is_branch_to_addr(q, addr)); 584 - 585 - /* Maximum negative case, move b . to addr + 32 MB */ 586 - p = buf; 587 - addr = (unsigned long)p; 588 - patch_branch(p, addr, 0); 589 - q = buf + 0x2000000; 590 - translate_branch(&instr, q, p); 591 - patch_instruction(q, instr); 592 - check(instr_is_branch_to_addr(p, addr)); 593 - check(instr_is_branch_to_addr(q, addr)); 594 - check(ppc_inst_equal(ppc_inst_read(q), ppc_inst(0x4a000000))); 595 - 596 - /* Maximum positive case, move x to x - 32 MB + 4 */ 597 - p = buf + 0x2000000; 598 - addr = (unsigned long)p; 599 - patch_branch(p, addr, 0); 600 - q = buf + 4; 601 - translate_branch(&instr, q, p); 602 - patch_instruction(q, instr); 603 - check(instr_is_branch_to_addr(p, addr)); 604 - check(instr_is_branch_to_addr(q, addr)); 605 - check(ppc_inst_equal(ppc_inst_read(q), ppc_inst(0x49fffffc))); 606 - 607 - /* Jump to x + 16 MB moved to x + 20 MB */ 608 - p = buf; 609 - addr = 0x1000000 + (unsigned long)buf; 610 - patch_branch(p, addr, BRANCH_SET_LINK); 611 - q = buf + 0x1400000; 612 - translate_branch(&instr, q, p); 613 - patch_instruction(q, instr); 614 - check(instr_is_branch_to_addr(p, addr)); 615 - check(instr_is_branch_to_addr(q, addr)); 616 - 617 - /* Jump to x + 16 MB moved to x - 16 MB + 4 */ 618 - p = buf + 0x1000000; 619 - addr = 0x2000000 + (unsigned long)buf; 620 - patch_branch(p, addr, 0); 621 - q = buf + 4; 622 - translate_branch(&instr, q, p); 623 - patch_instruction(q, instr); 624 - check(instr_is_branch_to_addr(p, addr)); 625 - check(instr_is_branch_to_addr(q, addr)); 626 - 627 - 628 - /* Conditional branch tests */ 629 - 630 - /* Simple case, branch to self moved a little */ 631 - p = buf; 632 - addr = (unsigned long)p; 633 - create_cond_branch(&instr, p, addr, 0); 634 - patch_instruction(p, instr); 635 - check(instr_is_branch_to_addr(p, addr)); 636 - q = buf + 4; 637 - translate_branch(&instr, q, p); 638 - patch_instruction(q, instr); 639 - check(instr_is_branch_to_addr(q, addr)); 640 - 641 - /* Maximum negative case, move b . to addr + 32 KB */ 642 - p = buf; 643 - addr = (unsigned long)p; 644 - create_cond_branch(&instr, p, addr, 0xFFFFFFFC); 645 - patch_instruction(p, instr); 646 - q = buf + 0x8000; 647 - translate_branch(&instr, q, p); 648 - patch_instruction(q, instr); 649 - check(instr_is_branch_to_addr(p, addr)); 650 - check(instr_is_branch_to_addr(q, addr)); 651 - check(ppc_inst_equal(ppc_inst_read(q), ppc_inst(0x43ff8000))); 652 - 653 - /* Maximum positive case, move x to x - 32 KB + 4 */ 654 - p = buf + 0x8000; 655 - addr = (unsigned long)p; 656 - create_cond_branch(&instr, p, addr, 0xFFFFFFFC); 657 - patch_instruction(p, instr); 658 - q = buf + 4; 659 - translate_branch(&instr, q, p); 660 - patch_instruction(q, instr); 661 - check(instr_is_branch_to_addr(p, addr)); 662 - check(instr_is_branch_to_addr(q, addr)); 663 - check(ppc_inst_equal(ppc_inst_read(q), ppc_inst(0x43ff7ffc))); 664 - 665 - /* Jump to x + 12 KB moved to x + 20 KB */ 666 - p = buf; 667 - addr = 0x3000 + (unsigned long)buf; 668 - create_cond_branch(&instr, p, addr, BRANCH_SET_LINK); 669 - patch_instruction(p, instr); 670 - q = buf + 0x5000; 671 - translate_branch(&instr, q, p); 672 - patch_instruction(q, instr); 673 - check(instr_is_branch_to_addr(p, addr)); 674 - check(instr_is_branch_to_addr(q, addr)); 675 - 676 - /* Jump to x + 8 KB moved to x - 8 KB + 4 */ 677 - p = buf + 0x2000; 678 - addr = 0x4000 + (unsigned long)buf; 679 - create_cond_branch(&instr, p, addr, 0); 680 - patch_instruction(p, instr); 681 - q = buf + 4; 682 - translate_branch(&instr, q, p); 683 - patch_instruction(q, instr); 684 - check(instr_is_branch_to_addr(p, addr)); 685 - check(instr_is_branch_to_addr(q, addr)); 686 - 687 - /* Free the buffer we were using */ 688 - vfree(buf); 689 - } 690 - 691 - #ifdef CONFIG_PPC64 692 - static void __init test_prefixed_patching(void) 693 - { 694 - extern unsigned int code_patching_test1[]; 695 - extern unsigned int code_patching_test1_expected[]; 696 - extern unsigned int end_code_patching_test1[]; 697 - 698 - __patch_instruction(code_patching_test1, 699 - ppc_inst_prefix(OP_PREFIX << 26, 0x00000000), 700 - code_patching_test1); 701 - 702 - check(!memcmp(code_patching_test1, 703 - code_patching_test1_expected, 704 - sizeof(unsigned int) * 705 - (end_code_patching_test1 - code_patching_test1))); 706 - } 707 - #else 708 - static inline void test_prefixed_patching(void) {} 709 - #endif 710 - 711 - static int __init test_code_patching(void) 712 - { 713 - printk(KERN_DEBUG "Running code patching self-tests ...\n"); 714 - 715 - test_branch_iform(); 716 - test_branch_bform(); 717 - test_create_function_call(); 718 - test_translate_branch(); 719 - test_prefixed_patching(); 720 - 721 - return 0; 722 - } 723 - late_initcall(test_code_patching); 724 - 725 - #endif /* CONFIG_CODE_PATCHING_SELFTEST */
+15 -15
arch/powerpc/lib/feature-fixups.c
··· 47 47 static int patch_alt_instruction(u32 *src, u32 *dest, u32 *alt_start, u32 *alt_end) 48 48 { 49 49 int err; 50 - struct ppc_inst instr; 50 + ppc_inst_t instr; 51 51 52 52 instr = ppc_inst_read(src); 53 53 ··· 580 580 printk(KERN_DEBUG "barrier-nospec: patched %d locations\n", i); 581 581 } 582 582 583 - static void patch_btb_flush_section(long *curr) 583 + static void __init patch_btb_flush_section(long *curr) 584 584 { 585 585 unsigned int *start, *end; 586 586 ··· 592 592 } 593 593 } 594 594 595 - void do_btb_flush_fixups(void) 595 + void __init do_btb_flush_fixups(void) 596 596 { 597 597 long *start, *end; 598 598 ··· 621 621 } 622 622 } 623 623 624 - static void do_final_fixups(void) 624 + static void __init do_final_fixups(void) 625 625 { 626 626 #if defined(CONFIG_PPC64) && defined(CONFIG_RELOCATABLE) 627 - struct ppc_inst inst; 627 + ppc_inst_t inst; 628 628 u32 *src, *dest, *end; 629 629 630 630 if (PHYSICAL_START == 0) ··· 715 715 /* This must be after the text it fixes up, vmlinux.lds.S enforces that atm */ 716 716 static struct fixup_entry fixup; 717 717 718 - static long calc_offset(struct fixup_entry *entry, unsigned int *p) 718 + static long __init calc_offset(struct fixup_entry *entry, unsigned int *p) 719 719 { 720 720 return (unsigned long)p - (unsigned long)entry; 721 721 } 722 722 723 - static void test_basic_patching(void) 723 + static void __init test_basic_patching(void) 724 724 { 725 725 extern unsigned int ftr_fixup_test1[]; 726 726 extern unsigned int end_ftr_fixup_test1[]; ··· 751 751 check(memcmp(ftr_fixup_test1, ftr_fixup_test1_expected, size) == 0); 752 752 } 753 753 754 - static void test_alternative_patching(void) 754 + static void __init test_alternative_patching(void) 755 755 { 756 756 extern unsigned int ftr_fixup_test2[]; 757 757 extern unsigned int end_ftr_fixup_test2[]; ··· 784 784 check(memcmp(ftr_fixup_test2, ftr_fixup_test2_expected, size) == 0); 785 785 } 786 786 787 - static void test_alternative_case_too_big(void) 787 + static void __init test_alternative_case_too_big(void) 788 788 { 789 789 extern unsigned int ftr_fixup_test3[]; 790 790 extern unsigned int end_ftr_fixup_test3[]; ··· 810 810 check(memcmp(ftr_fixup_test3, ftr_fixup_test3_orig, size) == 0); 811 811 } 812 812 813 - static void test_alternative_case_too_small(void) 813 + static void __init test_alternative_case_too_small(void) 814 814 { 815 815 extern unsigned int ftr_fixup_test4[]; 816 816 extern unsigned int end_ftr_fixup_test4[]; ··· 856 856 check(memcmp(ftr_fixup_test5, ftr_fixup_test5_expected, size) == 0); 857 857 } 858 858 859 - static void test_alternative_case_with_external_branch(void) 859 + static void __init test_alternative_case_with_external_branch(void) 860 860 { 861 861 extern unsigned int ftr_fixup_test6[]; 862 862 extern unsigned int end_ftr_fixup_test6[]; ··· 866 866 check(memcmp(ftr_fixup_test6, ftr_fixup_test6_expected, size) == 0); 867 867 } 868 868 869 - static void test_alternative_case_with_branch_to_end(void) 869 + static void __init test_alternative_case_with_branch_to_end(void) 870 870 { 871 871 extern unsigned int ftr_fixup_test7[]; 872 872 extern unsigned int end_ftr_fixup_test7[]; ··· 876 876 check(memcmp(ftr_fixup_test7, ftr_fixup_test7_expected, size) == 0); 877 877 } 878 878 879 - static void test_cpu_macros(void) 879 + static void __init test_cpu_macros(void) 880 880 { 881 881 extern u8 ftr_fixup_test_FTR_macros[]; 882 882 extern u8 ftr_fixup_test_FTR_macros_expected[]; ··· 888 888 ftr_fixup_test_FTR_macros_expected, size) == 0); 889 889 } 890 890 891 - static void test_fw_macros(void) 891 + static void __init test_fw_macros(void) 892 892 { 893 893 #ifdef CONFIG_PPC64 894 894 extern u8 ftr_fixup_test_FW_FTR_macros[]; ··· 902 902 #endif 903 903 } 904 904 905 - static void test_lwsync_macros(void) 905 + static void __init test_lwsync_macros(void) 906 906 { 907 907 extern u8 lwsync_fixup_test[]; 908 908 extern u8 end_lwsync_fixup_test[];
+2 -2
arch/powerpc/lib/sstep.c
··· 1354 1354 * otherwise. 1355 1355 */ 1356 1356 int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, 1357 - struct ppc_inst instr) 1357 + ppc_inst_t instr) 1358 1358 { 1359 1359 #ifdef CONFIG_PPC64 1360 1360 unsigned int suffixopcode, prefixtype, prefix_r; ··· 3578 3578 * or -1 if the instruction is one that should not be stepped, 3579 3579 * such as an rfid, or a mtmsrd that would clear MSR_RI. 3580 3580 */ 3581 - int emulate_step(struct pt_regs *regs, struct ppc_inst instr) 3581 + int emulate_step(struct pt_regs *regs, ppc_inst_t instr) 3582 3582 { 3583 3583 struct instruction_op op; 3584 3584 int r, err, type;
+362
arch/powerpc/lib/test-code-patching.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-or-later 2 + /* 3 + * Copyright 2008 Michael Ellerman, IBM Corporation. 4 + */ 5 + 6 + #include <linux/vmalloc.h> 7 + #include <linux/init.h> 8 + 9 + #include <asm/code-patching.h> 10 + 11 + static int __init instr_is_branch_to_addr(const u32 *instr, unsigned long addr) 12 + { 13 + if (instr_is_branch_iform(ppc_inst_read(instr)) || 14 + instr_is_branch_bform(ppc_inst_read(instr))) 15 + return branch_target(instr) == addr; 16 + 17 + return 0; 18 + } 19 + 20 + static void __init test_trampoline(void) 21 + { 22 + asm ("nop;nop;\n"); 23 + } 24 + 25 + #define check(x) do { \ 26 + if (!(x)) \ 27 + pr_err("code-patching: test failed at line %d\n", __LINE__); \ 28 + } while (0) 29 + 30 + static void __init test_branch_iform(void) 31 + { 32 + int err; 33 + ppc_inst_t instr; 34 + u32 tmp[2]; 35 + u32 *iptr = tmp; 36 + unsigned long addr = (unsigned long)tmp; 37 + 38 + /* The simplest case, branch to self, no flags */ 39 + check(instr_is_branch_iform(ppc_inst(0x48000000))); 40 + /* All bits of target set, and flags */ 41 + check(instr_is_branch_iform(ppc_inst(0x4bffffff))); 42 + /* High bit of opcode set, which is wrong */ 43 + check(!instr_is_branch_iform(ppc_inst(0xcbffffff))); 44 + /* Middle bits of opcode set, which is wrong */ 45 + check(!instr_is_branch_iform(ppc_inst(0x7bffffff))); 46 + 47 + /* Simplest case, branch to self with link */ 48 + check(instr_is_branch_iform(ppc_inst(0x48000001))); 49 + /* All bits of targets set */ 50 + check(instr_is_branch_iform(ppc_inst(0x4bfffffd))); 51 + /* Some bits of targets set */ 52 + check(instr_is_branch_iform(ppc_inst(0x4bff00fd))); 53 + /* Must be a valid branch to start with */ 54 + check(!instr_is_branch_iform(ppc_inst(0x7bfffffd))); 55 + 56 + /* Absolute branch to 0x100 */ 57 + ppc_inst_write(iptr, ppc_inst(0x48000103)); 58 + check(instr_is_branch_to_addr(iptr, 0x100)); 59 + /* Absolute branch to 0x420fc */ 60 + ppc_inst_write(iptr, ppc_inst(0x480420ff)); 61 + check(instr_is_branch_to_addr(iptr, 0x420fc)); 62 + /* Maximum positive relative branch, + 20MB - 4B */ 63 + ppc_inst_write(iptr, ppc_inst(0x49fffffc)); 64 + check(instr_is_branch_to_addr(iptr, addr + 0x1FFFFFC)); 65 + /* Smallest negative relative branch, - 4B */ 66 + ppc_inst_write(iptr, ppc_inst(0x4bfffffc)); 67 + check(instr_is_branch_to_addr(iptr, addr - 4)); 68 + /* Largest negative relative branch, - 32 MB */ 69 + ppc_inst_write(iptr, ppc_inst(0x4a000000)); 70 + check(instr_is_branch_to_addr(iptr, addr - 0x2000000)); 71 + 72 + /* Branch to self, with link */ 73 + err = create_branch(&instr, iptr, addr, BRANCH_SET_LINK); 74 + ppc_inst_write(iptr, instr); 75 + check(instr_is_branch_to_addr(iptr, addr)); 76 + 77 + /* Branch to self - 0x100, with link */ 78 + err = create_branch(&instr, iptr, addr - 0x100, BRANCH_SET_LINK); 79 + ppc_inst_write(iptr, instr); 80 + check(instr_is_branch_to_addr(iptr, addr - 0x100)); 81 + 82 + /* Branch to self + 0x100, no link */ 83 + err = create_branch(&instr, iptr, addr + 0x100, 0); 84 + ppc_inst_write(iptr, instr); 85 + check(instr_is_branch_to_addr(iptr, addr + 0x100)); 86 + 87 + /* Maximum relative negative offset, - 32 MB */ 88 + err = create_branch(&instr, iptr, addr - 0x2000000, BRANCH_SET_LINK); 89 + ppc_inst_write(iptr, instr); 90 + check(instr_is_branch_to_addr(iptr, addr - 0x2000000)); 91 + 92 + /* Out of range relative negative offset, - 32 MB + 4*/ 93 + err = create_branch(&instr, iptr, addr - 0x2000004, BRANCH_SET_LINK); 94 + check(err); 95 + 96 + /* Out of range relative positive offset, + 32 MB */ 97 + err = create_branch(&instr, iptr, addr + 0x2000000, BRANCH_SET_LINK); 98 + check(err); 99 + 100 + /* Unaligned target */ 101 + err = create_branch(&instr, iptr, addr + 3, BRANCH_SET_LINK); 102 + check(err); 103 + 104 + /* Check flags are masked correctly */ 105 + err = create_branch(&instr, iptr, addr, 0xFFFFFFFC); 106 + ppc_inst_write(iptr, instr); 107 + check(instr_is_branch_to_addr(iptr, addr)); 108 + check(ppc_inst_equal(instr, ppc_inst(0x48000000))); 109 + } 110 + 111 + static void __init test_create_function_call(void) 112 + { 113 + u32 *iptr; 114 + unsigned long dest; 115 + ppc_inst_t instr; 116 + 117 + /* Check we can create a function call */ 118 + iptr = (u32 *)ppc_function_entry(test_trampoline); 119 + dest = ppc_function_entry(test_create_function_call); 120 + create_branch(&instr, iptr, dest, BRANCH_SET_LINK); 121 + patch_instruction(iptr, instr); 122 + check(instr_is_branch_to_addr(iptr, dest)); 123 + } 124 + 125 + static void __init test_branch_bform(void) 126 + { 127 + int err; 128 + unsigned long addr; 129 + ppc_inst_t instr; 130 + u32 tmp[2]; 131 + u32 *iptr = tmp; 132 + unsigned int flags; 133 + 134 + addr = (unsigned long)iptr; 135 + 136 + /* The simplest case, branch to self, no flags */ 137 + check(instr_is_branch_bform(ppc_inst(0x40000000))); 138 + /* All bits of target set, and flags */ 139 + check(instr_is_branch_bform(ppc_inst(0x43ffffff))); 140 + /* High bit of opcode set, which is wrong */ 141 + check(!instr_is_branch_bform(ppc_inst(0xc3ffffff))); 142 + /* Middle bits of opcode set, which is wrong */ 143 + check(!instr_is_branch_bform(ppc_inst(0x7bffffff))); 144 + 145 + /* Absolute conditional branch to 0x100 */ 146 + ppc_inst_write(iptr, ppc_inst(0x43ff0103)); 147 + check(instr_is_branch_to_addr(iptr, 0x100)); 148 + /* Absolute conditional branch to 0x20fc */ 149 + ppc_inst_write(iptr, ppc_inst(0x43ff20ff)); 150 + check(instr_is_branch_to_addr(iptr, 0x20fc)); 151 + /* Maximum positive relative conditional branch, + 32 KB - 4B */ 152 + ppc_inst_write(iptr, ppc_inst(0x43ff7ffc)); 153 + check(instr_is_branch_to_addr(iptr, addr + 0x7FFC)); 154 + /* Smallest negative relative conditional branch, - 4B */ 155 + ppc_inst_write(iptr, ppc_inst(0x43fffffc)); 156 + check(instr_is_branch_to_addr(iptr, addr - 4)); 157 + /* Largest negative relative conditional branch, - 32 KB */ 158 + ppc_inst_write(iptr, ppc_inst(0x43ff8000)); 159 + check(instr_is_branch_to_addr(iptr, addr - 0x8000)); 160 + 161 + /* All condition code bits set & link */ 162 + flags = 0x3ff000 | BRANCH_SET_LINK; 163 + 164 + /* Branch to self */ 165 + err = create_cond_branch(&instr, iptr, addr, flags); 166 + ppc_inst_write(iptr, instr); 167 + check(instr_is_branch_to_addr(iptr, addr)); 168 + 169 + /* Branch to self - 0x100 */ 170 + err = create_cond_branch(&instr, iptr, addr - 0x100, flags); 171 + ppc_inst_write(iptr, instr); 172 + check(instr_is_branch_to_addr(iptr, addr - 0x100)); 173 + 174 + /* Branch to self + 0x100 */ 175 + err = create_cond_branch(&instr, iptr, addr + 0x100, flags); 176 + ppc_inst_write(iptr, instr); 177 + check(instr_is_branch_to_addr(iptr, addr + 0x100)); 178 + 179 + /* Maximum relative negative offset, - 32 KB */ 180 + err = create_cond_branch(&instr, iptr, addr - 0x8000, flags); 181 + ppc_inst_write(iptr, instr); 182 + check(instr_is_branch_to_addr(iptr, addr - 0x8000)); 183 + 184 + /* Out of range relative negative offset, - 32 KB + 4*/ 185 + err = create_cond_branch(&instr, iptr, addr - 0x8004, flags); 186 + check(err); 187 + 188 + /* Out of range relative positive offset, + 32 KB */ 189 + err = create_cond_branch(&instr, iptr, addr + 0x8000, flags); 190 + check(err); 191 + 192 + /* Unaligned target */ 193 + err = create_cond_branch(&instr, iptr, addr + 3, flags); 194 + check(err); 195 + 196 + /* Check flags are masked correctly */ 197 + err = create_cond_branch(&instr, iptr, addr, 0xFFFFFFFC); 198 + ppc_inst_write(iptr, instr); 199 + check(instr_is_branch_to_addr(iptr, addr)); 200 + check(ppc_inst_equal(instr, ppc_inst(0x43FF0000))); 201 + } 202 + 203 + static void __init test_translate_branch(void) 204 + { 205 + unsigned long addr; 206 + void *p, *q; 207 + ppc_inst_t instr; 208 + void *buf; 209 + 210 + buf = vmalloc(PAGE_ALIGN(0x2000000 + 1)); 211 + check(buf); 212 + if (!buf) 213 + return; 214 + 215 + /* Simple case, branch to self moved a little */ 216 + p = buf; 217 + addr = (unsigned long)p; 218 + create_branch(&instr, p, addr, 0); 219 + ppc_inst_write(p, instr); 220 + check(instr_is_branch_to_addr(p, addr)); 221 + q = p + 4; 222 + translate_branch(&instr, q, p); 223 + ppc_inst_write(q, instr); 224 + check(instr_is_branch_to_addr(q, addr)); 225 + 226 + /* Maximum negative case, move b . to addr + 32 MB */ 227 + p = buf; 228 + addr = (unsigned long)p; 229 + create_branch(&instr, p, addr, 0); 230 + ppc_inst_write(p, instr); 231 + q = buf + 0x2000000; 232 + translate_branch(&instr, q, p); 233 + ppc_inst_write(q, instr); 234 + check(instr_is_branch_to_addr(p, addr)); 235 + check(instr_is_branch_to_addr(q, addr)); 236 + check(ppc_inst_equal(ppc_inst_read(q), ppc_inst(0x4a000000))); 237 + 238 + /* Maximum positive case, move x to x - 32 MB + 4 */ 239 + p = buf + 0x2000000; 240 + addr = (unsigned long)p; 241 + create_branch(&instr, p, addr, 0); 242 + ppc_inst_write(p, instr); 243 + q = buf + 4; 244 + translate_branch(&instr, q, p); 245 + ppc_inst_write(q, instr); 246 + check(instr_is_branch_to_addr(p, addr)); 247 + check(instr_is_branch_to_addr(q, addr)); 248 + check(ppc_inst_equal(ppc_inst_read(q), ppc_inst(0x49fffffc))); 249 + 250 + /* Jump to x + 16 MB moved to x + 20 MB */ 251 + p = buf; 252 + addr = 0x1000000 + (unsigned long)buf; 253 + create_branch(&instr, p, addr, BRANCH_SET_LINK); 254 + ppc_inst_write(p, instr); 255 + q = buf + 0x1400000; 256 + translate_branch(&instr, q, p); 257 + ppc_inst_write(q, instr); 258 + check(instr_is_branch_to_addr(p, addr)); 259 + check(instr_is_branch_to_addr(q, addr)); 260 + 261 + /* Jump to x + 16 MB moved to x - 16 MB + 4 */ 262 + p = buf + 0x1000000; 263 + addr = 0x2000000 + (unsigned long)buf; 264 + create_branch(&instr, p, addr, 0); 265 + ppc_inst_write(p, instr); 266 + q = buf + 4; 267 + translate_branch(&instr, q, p); 268 + ppc_inst_write(q, instr); 269 + check(instr_is_branch_to_addr(p, addr)); 270 + check(instr_is_branch_to_addr(q, addr)); 271 + 272 + 273 + /* Conditional branch tests */ 274 + 275 + /* Simple case, branch to self moved a little */ 276 + p = buf; 277 + addr = (unsigned long)p; 278 + create_cond_branch(&instr, p, addr, 0); 279 + ppc_inst_write(p, instr); 280 + check(instr_is_branch_to_addr(p, addr)); 281 + q = buf + 4; 282 + translate_branch(&instr, q, p); 283 + ppc_inst_write(q, instr); 284 + check(instr_is_branch_to_addr(q, addr)); 285 + 286 + /* Maximum negative case, move b . to addr + 32 KB */ 287 + p = buf; 288 + addr = (unsigned long)p; 289 + create_cond_branch(&instr, p, addr, 0xFFFFFFFC); 290 + ppc_inst_write(p, instr); 291 + q = buf + 0x8000; 292 + translate_branch(&instr, q, p); 293 + ppc_inst_write(q, instr); 294 + check(instr_is_branch_to_addr(p, addr)); 295 + check(instr_is_branch_to_addr(q, addr)); 296 + check(ppc_inst_equal(ppc_inst_read(q), ppc_inst(0x43ff8000))); 297 + 298 + /* Maximum positive case, move x to x - 32 KB + 4 */ 299 + p = buf + 0x8000; 300 + addr = (unsigned long)p; 301 + create_cond_branch(&instr, p, addr, 0xFFFFFFFC); 302 + ppc_inst_write(p, instr); 303 + q = buf + 4; 304 + translate_branch(&instr, q, p); 305 + ppc_inst_write(q, instr); 306 + check(instr_is_branch_to_addr(p, addr)); 307 + check(instr_is_branch_to_addr(q, addr)); 308 + check(ppc_inst_equal(ppc_inst_read(q), ppc_inst(0x43ff7ffc))); 309 + 310 + /* Jump to x + 12 KB moved to x + 20 KB */ 311 + p = buf; 312 + addr = 0x3000 + (unsigned long)buf; 313 + create_cond_branch(&instr, p, addr, BRANCH_SET_LINK); 314 + ppc_inst_write(p, instr); 315 + q = buf + 0x5000; 316 + translate_branch(&instr, q, p); 317 + ppc_inst_write(q, instr); 318 + check(instr_is_branch_to_addr(p, addr)); 319 + check(instr_is_branch_to_addr(q, addr)); 320 + 321 + /* Jump to x + 8 KB moved to x - 8 KB + 4 */ 322 + p = buf + 0x2000; 323 + addr = 0x4000 + (unsigned long)buf; 324 + create_cond_branch(&instr, p, addr, 0); 325 + ppc_inst_write(p, instr); 326 + q = buf + 4; 327 + translate_branch(&instr, q, p); 328 + ppc_inst_write(q, instr); 329 + check(instr_is_branch_to_addr(p, addr)); 330 + check(instr_is_branch_to_addr(q, addr)); 331 + 332 + /* Free the buffer we were using */ 333 + vfree(buf); 334 + } 335 + 336 + static void __init test_prefixed_patching(void) 337 + { 338 + u32 *iptr = (u32 *)ppc_function_entry(test_trampoline); 339 + u32 expected[2] = {OP_PREFIX << 26, 0}; 340 + ppc_inst_t inst = ppc_inst_prefix(OP_PREFIX << 26, 0); 341 + 342 + if (!IS_ENABLED(CONFIG_PPC64)) 343 + return; 344 + 345 + patch_instruction(iptr, inst); 346 + 347 + check(!memcmp(iptr, expected, sizeof(expected))); 348 + } 349 + 350 + static int __init test_code_patching(void) 351 + { 352 + pr_info("Running code patching self-tests ...\n"); 353 + 354 + test_branch_iform(); 355 + test_branch_bform(); 356 + test_create_function_call(); 357 + test_translate_branch(); 358 + test_prefixed_patching(); 359 + 360 + return 0; 361 + } 362 + late_initcall(test_code_patching);
-20
arch/powerpc/lib/test_code-patching.S
··· 1 - /* SPDX-License-Identifier: GPL-2.0 */ 2 - /* 3 - * Copyright (C) 2020 IBM Corporation 4 - */ 5 - #include <asm/ppc-opcode.h> 6 - 7 - .text 8 - 9 - #define globl(x) \ 10 - .globl x; \ 11 - x: 12 - 13 - globl(code_patching_test1) 14 - nop 15 - nop 16 - globl(end_code_patching_test1) 17 - 18 - globl(code_patching_test1_expected) 19 - .long OP_PREFIX << 26 20 - .long 0x0000000
+5 -5
arch/powerpc/lib/test_emulate_step.c
··· 792 792 #ifdef CONFIG_VSX 793 793 static void __init test_plxvp_pstxvp(void) 794 794 { 795 - struct ppc_inst instr; 795 + ppc_inst_t instr; 796 796 struct pt_regs regs; 797 797 union { 798 798 vector128 a; ··· 906 906 struct { 907 907 char *descr; 908 908 unsigned long flags; 909 - struct ppc_inst instr; 909 + ppc_inst_t instr; 910 910 struct pt_regs regs; 911 911 } subtests[MAX_SUBTESTS + 1]; 912 912 }; ··· 1600 1600 }; 1601 1601 1602 1602 static int __init emulate_compute_instr(struct pt_regs *regs, 1603 - struct ppc_inst instr, 1603 + ppc_inst_t instr, 1604 1604 bool negative) 1605 1605 { 1606 1606 int analysed; ··· 1627 1627 } 1628 1628 1629 1629 static int __init execute_compute_instr(struct pt_regs *regs, 1630 - struct ppc_inst instr) 1630 + ppc_inst_t instr) 1631 1631 { 1632 1632 extern int exec_instr(struct pt_regs *regs); 1633 1633 ··· 1658 1658 struct compute_test *test; 1659 1659 struct pt_regs *regs, exp, got; 1660 1660 unsigned int i, j, k; 1661 - struct ppc_inst instr; 1661 + ppc_inst_t instr; 1662 1662 bool ignore_gpr, ignore_xer, ignore_ccr, passed, rc, negative; 1663 1663 1664 1664 for (i = 0; i < ARRAY_SIZE(compute_tests); i++) {
+3 -5
arch/powerpc/lib/test_emulate_step_exec_instr.S
··· 37 37 * The stack pointer (GPR1) and the thread pointer (GPR13) are not 38 38 * saved as these should not be modified anyway. 39 39 */ 40 - SAVE_2GPRS(2, r1) 40 + SAVE_GPRS(2, 3, r1) 41 41 SAVE_NVGPRS(r1) 42 42 43 43 /* ··· 75 75 76 76 /* Load GPRs from pt_regs */ 77 77 REST_GPR(0, r31) 78 - REST_10GPRS(2, r31) 79 - REST_GPR(12, r31) 78 + REST_GPRS(2, 12, r31) 80 79 REST_NVGPRS(r31) 81 80 82 81 /* Placeholder for the test instruction */ ··· 98 99 subi r3, r3, GPR0 99 100 SAVE_GPR(0, r3) 100 101 SAVE_GPR(2, r3) 101 - SAVE_8GPRS(4, r3) 102 - SAVE_GPR(12, r3) 102 + SAVE_GPRS(4, 12, r3) 103 103 SAVE_NVGPRS(r3) 104 104 105 105 /* Save resulting LR to pt_regs */
-1
arch/powerpc/mm/book3s32/Makefile
··· 9 9 obj-y += mmu.o mmu_context.o 10 10 obj-$(CONFIG_PPC_BOOK3S_603) += nohash_low.o 11 11 obj-$(CONFIG_PPC_BOOK3S_604) += hash_low.o tlb.o 12 - obj-$(CONFIG_PPC_KUEP) += kuep.o 13 12 obj-$(CONFIG_PPC_KUAP) += kuap.o
+4 -1
arch/powerpc/mm/book3s32/kuap.c
··· 20 20 21 21 void setup_kuap(bool disabled) 22 22 { 23 - if (!disabled) 23 + if (!disabled) { 24 24 kuap_lock_all_ool(); 25 + init_mm.context.sr0 |= SR_KS; 26 + current->thread.sr0 |= SR_KS; 27 + } 25 28 26 29 if (smp_processor_id() != boot_cpuid) 27 30 return;
-20
arch/powerpc/mm/book3s32/kuep.c
··· 1 - // SPDX-License-Identifier: GPL-2.0-or-later 2 - 3 - #include <asm/kup.h> 4 - #include <asm/smp.h> 5 - 6 - struct static_key_false disable_kuep_key; 7 - 8 - void setup_kuep(bool disabled) 9 - { 10 - if (!disabled) 11 - kuep_lock(); 12 - 13 - if (smp_processor_id() != boot_cpuid) 14 - return; 15 - 16 - if (disabled) 17 - static_branch_enable(&disable_kuep_key); 18 - else 19 - pr_info("Activating Kernel Userspace Execution Prevention\n"); 20 - }
+3 -4
arch/powerpc/mm/book3s32/mmu.c
··· 76 76 return 0; 77 77 } 78 78 79 - static int find_free_bat(void) 79 + static int __init find_free_bat(void) 80 80 { 81 81 int b; 82 82 int n = mmu_has_feature(MMU_FTR_USE_HIGH_BATS) ? 8 : 4; ··· 196 196 int nb = mmu_has_feature(MMU_FTR_USE_HIGH_BATS) ? 8 : 4; 197 197 int i; 198 198 unsigned long base = (unsigned long)_stext - PAGE_OFFSET; 199 - unsigned long top = (unsigned long)_etext - PAGE_OFFSET; 199 + unsigned long top = ALIGN((unsigned long)_etext - PAGE_OFFSET, SZ_128K); 200 200 unsigned long border = (unsigned long)__init_begin - PAGE_OFFSET; 201 201 unsigned long size; 202 202 203 - for (i = 0; i < nb - 1 && base < top && top - base > (128 << 10);) { 203 + for (i = 0; i < nb - 1 && base < top;) { 204 204 size = block_size(base, top); 205 205 setibat(i++, PAGE_OFFSET + base, base, size, PAGE_KERNEL_TEXT); 206 206 base += size; 207 207 } 208 208 if (base < top) { 209 209 size = block_size(base, top); 210 - size = max(size, 128UL << 10); 211 210 if ((top - base) > size) { 212 211 size <<= 1; 213 212 if (strict_kernel_rwx_enabled() && base + size > border)
+7 -8
arch/powerpc/mm/book3s32/mmu_context.c
··· 69 69 int init_new_context(struct task_struct *t, struct mm_struct *mm) 70 70 { 71 71 mm->context.id = __init_new_context(); 72 + mm->context.sr0 = CTX_TO_VSID(mm->context.id, 0); 73 + 74 + if (!kuep_is_disabled()) 75 + mm->context.sr0 |= SR_NX; 76 + if (!kuap_is_disabled()) 77 + mm->context.sr0 |= SR_KS; 72 78 73 79 return 0; 74 80 } ··· 114 108 void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk) 115 109 { 116 110 long id = next->context.id; 117 - unsigned long val; 118 111 119 112 if (id < 0) 120 113 panic("mm_struct %p has no context ID", next); 121 114 122 115 isync(); 123 116 124 - val = CTX_TO_VSID(id, 0); 125 - if (!kuep_is_disabled()) 126 - val |= SR_NX; 127 - if (!kuap_is_disabled()) 128 - val |= SR_KS; 129 - 130 - update_user_segments(val); 117 + update_user_segments(next->context.sr0); 131 118 132 119 if (IS_ENABLED(CONFIG_BDI_SWITCH)) 133 120 abatron_pteptrs[1] = next->pgd;
+11 -8
arch/powerpc/mm/book3s64/Makefile
··· 2 2 3 3 ccflags-y := $(NO_MINIMAL_TOC) 4 4 5 + obj-y += mmu_context.o pgtable.o trace.o 6 + ifdef CONFIG_PPC_64S_HASH_MMU 5 7 CFLAGS_REMOVE_slb.o = $(CC_FLAGS_FTRACE) 6 - 7 - obj-y += hash_pgtable.o hash_utils.o slb.o \ 8 - mmu_context.o pgtable.o hash_tlb.o 9 - obj-$(CONFIG_PPC_NATIVE) += hash_native.o 10 - obj-$(CONFIG_PPC_RADIX_MMU) += radix_pgtable.o radix_tlb.o 8 + obj-y += hash_pgtable.o hash_utils.o hash_tlb.o slb.o 9 + obj-$(CONFIG_PPC_HASH_MMU_NATIVE) += hash_native.o 11 10 obj-$(CONFIG_PPC_4K_PAGES) += hash_4k.o 12 11 obj-$(CONFIG_PPC_64K_PAGES) += hash_64k.o 13 - obj-$(CONFIG_HUGETLB_PAGE) += hash_hugetlbpage.o 12 + obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += hash_hugepage.o 13 + obj-$(CONFIG_PPC_SUBPAGE_PROT) += subpage_prot.o 14 + endif 15 + 16 + obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o 17 + 18 + obj-$(CONFIG_PPC_RADIX_MMU) += radix_pgtable.o radix_tlb.o 14 19 ifdef CONFIG_HUGETLB_PAGE 15 20 obj-$(CONFIG_PPC_RADIX_MMU) += radix_hugetlbpage.o 16 21 endif 17 - obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += hash_hugepage.o 18 - obj-$(CONFIG_PPC_SUBPAGE_PROT) += subpage_prot.o 19 22 obj-$(CONFIG_SPAPR_TCE_IOMMU) += iommu_api.o 20 23 obj-$(CONFIG_PPC_PKEY) += pkeys.o 21 24
+3 -1
arch/powerpc/mm/book3s64/hash_hugetlbpage.c arch/powerpc/mm/book3s64/hugetlbpage.c
··· 16 16 unsigned int hpage_shift; 17 17 EXPORT_SYMBOL(hpage_shift); 18 18 19 + #ifdef CONFIG_PPC_64S_HASH_MMU 19 20 int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, 20 21 pte_t *ptep, unsigned long trap, unsigned long flags, 21 22 int ssize, unsigned int shift, unsigned int mmu_psize) ··· 123 122 *ptep = __pte(new_pte & ~H_PAGE_BUSY); 124 123 return 0; 125 124 } 125 + #endif 126 126 127 127 pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma, 128 128 unsigned long addr, pte_t *ptep) ··· 150 148 set_huge_pte_at(vma->vm_mm, addr, ptep, pte); 151 149 } 152 150 153 - void hugetlbpage_init_default(void) 151 + void __init hugetlbpage_init_default(void) 154 152 { 155 153 /* Set default large page size. Currently, we pick 16M or 1M 156 154 * depending on what is available
+2 -106
arch/powerpc/mm/book3s64/hash_native.c
··· 43 43 44 44 static DEFINE_RAW_SPINLOCK(native_tlbie_lock); 45 45 46 - static inline void tlbiel_hash_set_isa206(unsigned int set, unsigned int is) 47 - { 48 - unsigned long rb; 49 - 50 - rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53)); 51 - 52 - asm volatile("tlbiel %0" : : "r" (rb)); 53 - } 54 - 55 - /* 56 - * tlbiel instruction for hash, set invalidation 57 - * i.e., r=1 and is=01 or is=10 or is=11 58 - */ 59 - static __always_inline void tlbiel_hash_set_isa300(unsigned int set, unsigned int is, 60 - unsigned int pid, 61 - unsigned int ric, unsigned int prs) 62 - { 63 - unsigned long rb; 64 - unsigned long rs; 65 - unsigned int r = 0; /* hash format */ 66 - 67 - rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53)); 68 - rs = ((unsigned long)pid << PPC_BITLSHIFT(31)); 69 - 70 - asm volatile(PPC_TLBIEL(%0, %1, %2, %3, %4) 71 - : : "r"(rb), "r"(rs), "i"(ric), "i"(prs), "i"(r) 72 - : "memory"); 73 - } 74 - 75 - 76 - static void tlbiel_all_isa206(unsigned int num_sets, unsigned int is) 77 - { 78 - unsigned int set; 79 - 80 - asm volatile("ptesync": : :"memory"); 81 - 82 - for (set = 0; set < num_sets; set++) 83 - tlbiel_hash_set_isa206(set, is); 84 - 85 - ppc_after_tlbiel_barrier(); 86 - } 87 - 88 - static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is) 89 - { 90 - unsigned int set; 91 - 92 - asm volatile("ptesync": : :"memory"); 93 - 94 - /* 95 - * Flush the partition table cache if this is HV mode. 96 - */ 97 - if (early_cpu_has_feature(CPU_FTR_HVMODE)) 98 - tlbiel_hash_set_isa300(0, is, 0, 2, 0); 99 - 100 - /* 101 - * Now invalidate the process table cache. UPRT=0 HPT modes (what 102 - * current hardware implements) do not use the process table, but 103 - * add the flushes anyway. 104 - * 105 - * From ISA v3.0B p. 1078: 106 - * The following forms are invalid. 107 - * * PRS=1, R=0, and RIC!=2 (The only process-scoped 108 - * HPT caching is of the Process Table.) 109 - */ 110 - tlbiel_hash_set_isa300(0, is, 0, 2, 1); 111 - 112 - /* 113 - * Then flush the sets of the TLB proper. Hash mode uses 114 - * partition scoped TLB translations, which may be flushed 115 - * in !HV mode. 116 - */ 117 - for (set = 0; set < num_sets; set++) 118 - tlbiel_hash_set_isa300(set, is, 0, 0, 0); 119 - 120 - ppc_after_tlbiel_barrier(); 121 - 122 - asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT "; isync" : : :"memory"); 123 - } 124 - 125 - void hash__tlbiel_all(unsigned int action) 126 - { 127 - unsigned int is; 128 - 129 - switch (action) { 130 - case TLB_INVAL_SCOPE_GLOBAL: 131 - is = 3; 132 - break; 133 - case TLB_INVAL_SCOPE_LPID: 134 - is = 2; 135 - break; 136 - default: 137 - BUG(); 138 - } 139 - 140 - if (early_cpu_has_feature(CPU_FTR_ARCH_300)) 141 - tlbiel_all_isa300(POWER9_TLB_SETS_HASH, is); 142 - else if (early_cpu_has_feature(CPU_FTR_ARCH_207S)) 143 - tlbiel_all_isa206(POWER8_TLB_SETS, is); 144 - else if (early_cpu_has_feature(CPU_FTR_ARCH_206)) 145 - tlbiel_all_isa206(POWER7_TLB_SETS, is); 146 - else 147 - WARN(1, "%s called on pre-POWER7 CPU\n", __func__); 148 - } 149 - 150 46 static inline unsigned long ___tlbie(unsigned long vpn, int psize, 151 47 int apsize, int ssize) 152 48 { ··· 163 267 va |= ssize << 8; 164 268 sllp = get_sllp_encoding(apsize); 165 269 va |= sllp << 5; 166 - asm volatile(ASM_FTR_IFSET("tlbiel %0", "tlbiel %0,0", %1) 270 + asm volatile(ASM_FTR_IFSET("tlbiel %0", PPC_TLBIEL_v205(%0, 0), %1) 167 271 : : "r" (va), "i" (CPU_FTR_ARCH_206) 168 272 : "memory"); 169 273 break; ··· 182 286 */ 183 287 va |= (vpn & 0xfe); 184 288 va |= 1; /* L */ 185 - asm volatile(ASM_FTR_IFSET("tlbiel %0", "tlbiel %0,1", %1) 289 + asm volatile(ASM_FTR_IFSET("tlbiel %0", PPC_TLBIEL_v205(%0, 1), %1) 186 290 : : "r" (va), "i" (CPU_FTR_ARCH_206) 187 291 : "memory"); 188 292 break;
-1
arch/powerpc/mm/book3s64/hash_pgtable.c
··· 16 16 17 17 #include <mm/mmu_decl.h> 18 18 19 - #define CREATE_TRACE_POINTS 20 19 #include <trace/events/thp.h> 21 20 22 21 #if H_PGTABLE_RANGE > (USER_VSID_RANGE * (TASK_SIZE_USER64 / TASK_CONTEXT_SIZE))
+108 -9
arch/powerpc/mm/book3s64/hash_utils.c
··· 99 99 */ 100 100 101 101 static unsigned long _SDR1; 102 - struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT]; 103 - EXPORT_SYMBOL_GPL(mmu_psize_defs); 104 102 105 103 u8 hpte_page_sizes[1 << LP_BITS]; 106 104 EXPORT_SYMBOL_GPL(hpte_page_sizes); ··· 112 114 int mmu_virtual_psize = MMU_PAGE_4K; 113 115 int mmu_vmalloc_psize = MMU_PAGE_4K; 114 116 EXPORT_SYMBOL_GPL(mmu_vmalloc_psize); 115 - #ifdef CONFIG_SPARSEMEM_VMEMMAP 116 - int mmu_vmemmap_psize = MMU_PAGE_4K; 117 - #endif 118 117 int mmu_io_psize = MMU_PAGE_4K; 119 118 int mmu_kernel_ssize = MMU_SEGSIZE_256M; 120 119 EXPORT_SYMBOL_GPL(mmu_kernel_ssize); ··· 169 174 .tlbiel = 0, 170 175 }, 171 176 }; 177 + 178 + static inline void tlbiel_hash_set_isa206(unsigned int set, unsigned int is) 179 + { 180 + unsigned long rb; 181 + 182 + rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53)); 183 + 184 + asm volatile("tlbiel %0" : : "r" (rb)); 185 + } 186 + 187 + /* 188 + * tlbiel instruction for hash, set invalidation 189 + * i.e., r=1 and is=01 or is=10 or is=11 190 + */ 191 + static __always_inline void tlbiel_hash_set_isa300(unsigned int set, unsigned int is, 192 + unsigned int pid, 193 + unsigned int ric, unsigned int prs) 194 + { 195 + unsigned long rb; 196 + unsigned long rs; 197 + unsigned int r = 0; /* hash format */ 198 + 199 + rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53)); 200 + rs = ((unsigned long)pid << PPC_BITLSHIFT(31)); 201 + 202 + asm volatile(PPC_TLBIEL(%0, %1, %2, %3, %4) 203 + : : "r"(rb), "r"(rs), "i"(ric), "i"(prs), "i"(r) 204 + : "memory"); 205 + } 206 + 207 + 208 + static void tlbiel_all_isa206(unsigned int num_sets, unsigned int is) 209 + { 210 + unsigned int set; 211 + 212 + asm volatile("ptesync": : :"memory"); 213 + 214 + for (set = 0; set < num_sets; set++) 215 + tlbiel_hash_set_isa206(set, is); 216 + 217 + ppc_after_tlbiel_barrier(); 218 + } 219 + 220 + static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is) 221 + { 222 + unsigned int set; 223 + 224 + asm volatile("ptesync": : :"memory"); 225 + 226 + /* 227 + * Flush the partition table cache if this is HV mode. 228 + */ 229 + if (early_cpu_has_feature(CPU_FTR_HVMODE)) 230 + tlbiel_hash_set_isa300(0, is, 0, 2, 0); 231 + 232 + /* 233 + * Now invalidate the process table cache. UPRT=0 HPT modes (what 234 + * current hardware implements) do not use the process table, but 235 + * add the flushes anyway. 236 + * 237 + * From ISA v3.0B p. 1078: 238 + * The following forms are invalid. 239 + * * PRS=1, R=0, and RIC!=2 (The only process-scoped 240 + * HPT caching is of the Process Table.) 241 + */ 242 + tlbiel_hash_set_isa300(0, is, 0, 2, 1); 243 + 244 + /* 245 + * Then flush the sets of the TLB proper. Hash mode uses 246 + * partition scoped TLB translations, which may be flushed 247 + * in !HV mode. 248 + */ 249 + for (set = 0; set < num_sets; set++) 250 + tlbiel_hash_set_isa300(set, is, 0, 0, 0); 251 + 252 + ppc_after_tlbiel_barrier(); 253 + 254 + asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT "; isync" : : :"memory"); 255 + } 256 + 257 + void hash__tlbiel_all(unsigned int action) 258 + { 259 + unsigned int is; 260 + 261 + switch (action) { 262 + case TLB_INVAL_SCOPE_GLOBAL: 263 + is = 3; 264 + break; 265 + case TLB_INVAL_SCOPE_LPID: 266 + is = 2; 267 + break; 268 + default: 269 + BUG(); 270 + } 271 + 272 + if (early_cpu_has_feature(CPU_FTR_ARCH_300)) 273 + tlbiel_all_isa300(POWER9_TLB_SETS_HASH, is); 274 + else if (early_cpu_has_feature(CPU_FTR_ARCH_207S)) 275 + tlbiel_all_isa206(POWER8_TLB_SETS, is); 276 + else if (early_cpu_has_feature(CPU_FTR_ARCH_206)) 277 + tlbiel_all_isa206(POWER7_TLB_SETS, is); 278 + else 279 + WARN(1, "%s called on pre-POWER7 CPU\n", __func__); 280 + } 172 281 173 282 /* 174 283 * 'R' and 'C' update notes: ··· 662 563 } 663 564 #endif /* CONFIG_HUGETLB_PAGE */ 664 565 665 - static void mmu_psize_set_default_penc(void) 566 + static void __init mmu_psize_set_default_penc(void) 666 567 { 667 568 int bpsize, apsize; 668 569 for (bpsize = 0; bpsize < MMU_PAGE_COUNT; bpsize++) ··· 672 573 673 574 #ifdef CONFIG_PPC_64K_PAGES 674 575 675 - static bool might_have_hea(void) 576 + static bool __init might_have_hea(void) 676 577 { 677 578 /* 678 579 * The HEA ethernet adapter requires awareness of the ··· 743 644 * low-order N bits as the encoding for the 2^(12+N) byte page size 744 645 * (if it exists). 745 646 */ 746 - static void init_hpte_page_sizes(void) 647 + static void __init init_hpte_page_sizes(void) 747 648 { 748 649 long int ap, bp; 749 650 long int shift, penc; ··· 1190 1091 ps3_early_mm_init(); 1191 1092 else if (firmware_has_feature(FW_FEATURE_LPAR)) 1192 1093 hpte_init_pseries(); 1193 - else if (IS_ENABLED(CONFIG_PPC_NATIVE)) 1094 + else if (IS_ENABLED(CONFIG_PPC_HASH_MMU_NATIVE)) 1194 1095 hpte_init_native(); 1195 1096 1196 1097 if (!mmu_hash_ops.hpte_insert)
+27 -7
arch/powerpc/mm/book3s64/mmu_context.c
··· 31 31 return ida_alloc_range(&mmu_context_ida, min_id, max_id, GFP_KERNEL); 32 32 } 33 33 34 - void hash__reserve_context_id(int id) 34 + #ifdef CONFIG_PPC_64S_HASH_MMU 35 + void __init hash__reserve_context_id(int id) 35 36 { 36 37 int result = ida_alloc_range(&mmu_context_ida, id, id, GFP_KERNEL); 37 38 ··· 51 50 return alloc_context_id(MIN_USER_CONTEXT, max); 52 51 } 53 52 EXPORT_SYMBOL_GPL(hash__alloc_context_id); 53 + #endif 54 54 55 + #ifdef CONFIG_PPC_64S_HASH_MMU 55 56 static int realloc_context_ids(mm_context_t *ctx) 56 57 { 57 58 int i, id; ··· 153 150 154 151 slb_setup_new_exec(); 155 152 } 153 + #else 154 + static inline int hash__init_new_context(struct mm_struct *mm) 155 + { 156 + BUILD_BUG(); 157 + return 0; 158 + } 159 + #endif 156 160 157 161 static int radix__init_new_context(struct mm_struct *mm) 158 162 { ··· 185 175 */ 186 176 asm volatile("ptesync;isync" : : : "memory"); 187 177 178 + #ifdef CONFIG_PPC_64S_HASH_MMU 188 179 mm->context.hash_context = NULL; 180 + #endif 189 181 190 182 return index; 191 183 } ··· 225 213 226 214 static void destroy_contexts(mm_context_t *ctx) 227 215 { 228 - int index, context_id; 216 + if (radix_enabled()) { 217 + ida_free(&mmu_context_ida, ctx->id); 218 + } else { 219 + #ifdef CONFIG_PPC_64S_HASH_MMU 220 + int index, context_id; 229 221 230 - for (index = 0; index < ARRAY_SIZE(ctx->extended_id); index++) { 231 - context_id = ctx->extended_id[index]; 232 - if (context_id) 233 - ida_free(&mmu_context_ida, context_id); 222 + for (index = 0; index < ARRAY_SIZE(ctx->extended_id); index++) { 223 + context_id = ctx->extended_id[index]; 224 + if (context_id) 225 + ida_free(&mmu_context_ida, context_id); 226 + } 227 + kfree(ctx->hash_context); 228 + #else 229 + BUILD_BUG(); // radix_enabled() should be constant true 230 + #endif 234 231 } 235 - kfree(ctx->hash_context); 236 232 } 237 233 238 234 static void pmd_frag_destroy(void *pmd_frag)
+27 -5
arch/powerpc/mm/book3s64/pgtable.c
··· 22 22 23 23 #include "internal.h" 24 24 25 + struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT]; 26 + EXPORT_SYMBOL_GPL(mmu_psize_defs); 27 + 28 + #ifdef CONFIG_SPARSEMEM_VMEMMAP 29 + int mmu_vmemmap_psize = MMU_PAGE_4K; 30 + #endif 31 + 25 32 unsigned long __pmd_frag_nr; 26 33 EXPORT_SYMBOL(__pmd_frag_nr); 27 34 unsigned long __pmd_frag_size_shift; ··· 214 207 unsigned long patb_size = 1UL << PATB_SIZE_SHIFT; 215 208 unsigned long ptcr; 216 209 217 - BUILD_BUG_ON_MSG((PATB_SIZE_SHIFT > 36), "Partition table size too large."); 218 210 /* Initialize the Partition Table with no entries */ 219 211 partition_tb = memblock_alloc(patb_size, patb_size); 220 212 if (!partition_tb) 221 213 panic("%s: Failed to allocate %lu bytes align=0x%lx\n", 222 214 __func__, patb_size, patb_size); 223 215 224 - /* 225 - * update partition table control register, 226 - * 64 K size. 227 - */ 228 216 ptcr = __pa(partition_tb) | (PATB_SIZE_SHIFT - 12); 229 217 set_ptcr_when_no_uv(ptcr); 230 218 powernv_set_nmmu_ptcr(ptcr); ··· 528 526 return 0; 529 527 } 530 528 arch_initcall(pgtable_debugfs_setup); 529 + 530 + #if defined(CONFIG_ZONE_DEVICE) && defined(CONFIG_ARCH_HAS_MEMREMAP_COMPAT_ALIGN) 531 + /* 532 + * Override the generic version in mm/memremap.c. 533 + * 534 + * With hash translation, the direct-map range is mapped with just one 535 + * page size selected by htab_init_page_sizes(). Consult 536 + * mmu_psize_defs[] to determine the minimum page size alignment. 537 + */ 538 + unsigned long memremap_compat_align(void) 539 + { 540 + if (!radix_enabled()) { 541 + unsigned int shift = mmu_psize_defs[mmu_linear_psize].shift; 542 + return max(SUBSECTION_SIZE, 1UL << shift); 543 + } 544 + 545 + return SUBSECTION_SIZE; 546 + } 547 + EXPORT_SYMBOL_GPL(memremap_compat_align); 548 + #endif
+1 -1
arch/powerpc/mm/book3s64/pkeys.c
··· 66 66 return 1; 67 67 } 68 68 69 - static int scan_pkey_feature(void) 69 + static int __init scan_pkey_feature(void) 70 70 { 71 71 int ret; 72 72 int pkeys_total = 0;
+8 -32
arch/powerpc/mm/book3s64/radix_pgtable.c
··· 33 33 34 34 #include <trace/events/thp.h> 35 35 36 - unsigned int mmu_pid_bits; 37 36 unsigned int mmu_base_pid; 38 37 unsigned long radix_mem_block_size __ro_after_init; 39 38 ··· 334 335 u64 i; 335 336 336 337 /* We don't support slb for radix */ 337 - mmu_slb_size = 0; 338 + slb_set_size(0); 338 339 339 340 /* 340 341 * Create the linear mapping ··· 356 357 -1, PAGE_KERNEL)); 357 358 } 358 359 359 - /* Find out how many PID bits are supported */ 360 360 if (!cpu_has_feature(CPU_FTR_HVMODE) && 361 361 cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG)) { 362 362 /* 363 363 * Older versions of KVM on these machines perfer if the 364 364 * guest only uses the low 19 PID bits. 365 365 */ 366 - if (!mmu_pid_bits) 367 - mmu_pid_bits = 19; 368 - } else { 369 - if (!mmu_pid_bits) 370 - mmu_pid_bits = 20; 366 + mmu_pid_bits = 19; 371 367 } 372 368 mmu_base_pid = 1; 373 369 ··· 443 449 if (type == NULL || strcmp(type, "cpu") != 0) 444 450 return 0; 445 451 446 - /* Find MMU PID size */ 447 - prop = of_get_flat_dt_prop(node, "ibm,mmu-pid-bits", &size); 448 - if (prop && size == 4) 449 - mmu_pid_bits = be32_to_cpup(prop); 450 - 451 452 /* Grab page size encodings */ 452 453 prop = of_get_flat_dt_prop(node, "ibm,processor-radix-AP-encodings", &size); 453 454 if (!prop) ··· 499 510 return 1; 500 511 } 501 512 502 - static unsigned long radix_memory_block_size(void) 513 + static unsigned long __init radix_memory_block_size(void) 503 514 { 504 515 unsigned long mem_block_size = MIN_MEMORY_BLOCK_SIZE; 505 516 ··· 517 528 518 529 #else /* CONFIG_MEMORY_HOTPLUG */ 519 530 520 - static unsigned long radix_memory_block_size(void) 531 + static unsigned long __init radix_memory_block_size(void) 521 532 { 522 533 return 1UL * 1024 * 1024 * 1024; 523 534 } ··· 561 572 return; 562 573 } 563 574 564 - static void radix_init_amor(void) 565 - { 566 - /* 567 - * In HV mode, we init AMOR (Authority Mask Override Register) so that 568 - * the hypervisor and guest can setup IAMR (Instruction Authority Mask 569 - * Register), enable key 0 and set it to 1. 570 - * 571 - * AMOR = 0b1100 .... 0000 (Mask for key 0 is 11) 572 - */ 573 - mtspr(SPRN_AMOR, (3ul << 62)); 574 - } 575 - 576 575 void __init radix__early_init_mmu(void) 577 576 { 578 577 unsigned long lpcr; 579 578 579 + #ifdef CONFIG_PPC_64S_HASH_MMU 580 580 #ifdef CONFIG_PPC_64K_PAGES 581 581 /* PAGE_SIZE mappings */ 582 582 mmu_virtual_psize = MMU_PAGE_64K; ··· 582 604 mmu_vmemmap_psize = MMU_PAGE_2M; 583 605 } else 584 606 mmu_vmemmap_psize = mmu_virtual_psize; 607 + #endif 585 608 #endif 586 609 /* 587 610 * initialize page table size ··· 623 644 lpcr = mfspr(SPRN_LPCR); 624 645 mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR); 625 646 radix_init_partition_table(); 626 - radix_init_amor(); 627 647 } else { 628 648 radix_init_pseries(); 629 649 } ··· 646 668 647 669 set_ptcr_when_no_uv(__pa(partition_tb) | 648 670 (PATB_SIZE_SHIFT - 12)); 649 - 650 - radix_init_amor(); 651 671 } 652 672 653 673 radix__switch_mmu_context(NULL, &init_mm); ··· 1076 1100 1077 1101 int pud_clear_huge(pud_t *pud) 1078 1102 { 1079 - if (pud_huge(*pud)) { 1103 + if (pud_is_leaf(*pud)) { 1080 1104 pud_clear(pud); 1081 1105 return 1; 1082 1106 } ··· 1123 1147 1124 1148 int pmd_clear_huge(pmd_t *pmd) 1125 1149 { 1126 - if (pmd_huge(*pmd)) { 1150 + if (pmd_is_leaf(*pmd)) { 1127 1151 pmd_clear(pmd); 1128 1152 return 1; 1129 1153 }
-16
arch/powerpc/mm/book3s64/slb.c
··· 868 868 return err; 869 869 } 870 870 } 871 - 872 - DEFINE_INTERRUPT_HANDLER(do_bad_slb_fault) 873 - { 874 - int err = regs->result; 875 - 876 - if (err == -EFAULT) { 877 - if (user_mode(regs)) 878 - _exception(SIGSEGV, regs, SEGV_BNDERR, regs->dar); 879 - else 880 - bad_page_fault(regs, SIGSEGV); 881 - } else if (err == -EINVAL) { 882 - unrecoverable_exception(regs); 883 - } else { 884 - BUG(); 885 - } 886 - }
+8
arch/powerpc/mm/book3s64/trace.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-or-later 2 + /* 3 + * This file is for defining trace points and trace related helpers. 4 + */ 5 + #ifdef CONFIG_TRANSPARENT_HUGEPAGE 6 + #define CREATE_TRACE_POINTS 7 + #include <trace/events/thp.h> 8 + #endif
+2
arch/powerpc/mm/copro_fault.c
··· 82 82 } 83 83 EXPORT_SYMBOL_GPL(copro_handle_mm_fault); 84 84 85 + #ifdef CONFIG_PPC_64S_HASH_MMU 85 86 int copro_calculate_slb(struct mm_struct *mm, u64 ea, struct copro_slb *slb) 86 87 { 87 88 u64 vsid, vsidkey; ··· 147 146 cxl_slbia(mm); 148 147 } 149 148 EXPORT_SYMBOL_GPL(copro_flush_all_slbs); 149 + #endif
+24
arch/powerpc/mm/fault.c
··· 35 35 #include <linux/kfence.h> 36 36 #include <linux/pkeys.h> 37 37 38 + #include <asm/asm-prototypes.h> 38 39 #include <asm/firmware.h> 39 40 #include <asm/interrupt.h> 40 41 #include <asm/page.h> ··· 620 619 DEFINE_INTERRUPT_HANDLER(do_bad_page_fault_segv) 621 620 { 622 621 bad_page_fault(regs, SIGSEGV); 622 + } 623 + 624 + /* 625 + * In radix, segment interrupts indicate the EA is not addressable by the 626 + * page table geometry, so they are always sent here. 627 + * 628 + * In hash, this is called if do_slb_fault returns error. Typically it is 629 + * because the EA was outside the region allowed by software. 630 + */ 631 + DEFINE_INTERRUPT_HANDLER(do_bad_segment_interrupt) 632 + { 633 + int err = regs->result; 634 + 635 + if (err == -EFAULT) { 636 + if (user_mode(regs)) 637 + _exception(SIGSEGV, regs, SEGV_BNDERR, regs->dar); 638 + else 639 + bad_page_fault(regs, SIGSEGV); 640 + } else if (err == -EINVAL) { 641 + unrecoverable_exception(regs); 642 + } else { 643 + BUG(); 644 + } 623 645 } 624 646 #endif
+11 -5
arch/powerpc/mm/hugetlbpage.c
··· 542 542 return page; 543 543 } 544 544 545 - #ifdef CONFIG_PPC_MM_SLICES 545 + #ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA 546 + static inline int file_to_psize(struct file *file) 547 + { 548 + struct hstate *hstate = hstate_file(file); 549 + return shift_to_mmu_psize(huge_page_shift(hstate)); 550 + } 551 + 546 552 unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, 547 553 unsigned long len, unsigned long pgoff, 548 554 unsigned long flags) 549 555 { 550 - struct hstate *hstate = hstate_file(file); 551 - int mmu_psize = shift_to_mmu_psize(huge_page_shift(hstate)); 552 - 553 556 #ifdef CONFIG_PPC_RADIX_MMU 554 557 if (radix_enabled()) 555 558 return radix__hugetlb_get_unmapped_area(file, addr, len, 556 559 pgoff, flags); 557 560 #endif 558 - return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1); 561 + #ifdef CONFIG_PPC_MM_SLICES 562 + return slice_get_unmapped_area(addr, len, flags, file_to_psize(file), 1); 563 + #endif 564 + BUG(); 559 565 } 560 566 #endif 561 567
+21
arch/powerpc/mm/init-common.c
··· 20 20 #include <linux/pgtable.h> 21 21 #include <asm/pgalloc.h> 22 22 #include <asm/kup.h> 23 + #include <asm/smp.h> 23 24 24 25 phys_addr_t memstart_addr __ro_after_init = (phys_addr_t)~0ull; 25 26 EXPORT_SYMBOL_GPL(memstart_addr); ··· 34 33 35 34 static int __init parse_nosmep(char *p) 36 35 { 36 + if (!IS_ENABLED(CONFIG_PPC_BOOK3S_64)) 37 + return 0; 38 + 37 39 disable_kuep = true; 38 40 pr_warn("Disabling Kernel Userspace Execution Prevention\n"); 39 41 return 0; ··· 50 46 return 0; 51 47 } 52 48 early_param("nosmap", parse_nosmap); 49 + 50 + void __weak setup_kuep(bool disabled) 51 + { 52 + if (!IS_ENABLED(CONFIG_PPC_KUEP) || disabled) 53 + return; 54 + 55 + if (smp_processor_id() != boot_cpuid) 56 + return; 57 + 58 + pr_info("Activating Kernel Userspace Execution Prevention\n"); 59 + } 60 + 61 + void setup_kup(void) 62 + { 63 + setup_kuap(disable_kuap); 64 + setup_kuep(disable_kuep); 65 + } 53 66 54 67 #define CTOR(shift) static void ctor_##shift(void *addr) \ 55 68 { \
+56 -3
arch/powerpc/mm/init_64.c
··· 370 370 #endif /* CONFIG_SPARSEMEM_VMEMMAP */ 371 371 372 372 #ifdef CONFIG_PPC_BOOK3S_64 373 + unsigned int mmu_lpid_bits; 374 + unsigned int mmu_pid_bits; 375 + 373 376 static bool disable_radix = !IS_ENABLED(CONFIG_PPC_RADIX_MMU_DEFAULT); 374 377 375 378 static int __init parse_disable_radix(char *p) ··· 440 437 } 441 438 } 442 439 440 + static int __init dt_scan_mmu_pid_width(unsigned long node, 441 + const char *uname, int depth, 442 + void *data) 443 + { 444 + int size = 0; 445 + const __be32 *prop; 446 + const char *type = of_get_flat_dt_prop(node, "device_type", NULL); 447 + 448 + /* We are scanning "cpu" nodes only */ 449 + if (type == NULL || strcmp(type, "cpu") != 0) 450 + return 0; 451 + 452 + /* Find MMU LPID, PID register size */ 453 + prop = of_get_flat_dt_prop(node, "ibm,mmu-lpid-bits", &size); 454 + if (prop && size == 4) 455 + mmu_lpid_bits = be32_to_cpup(prop); 456 + 457 + prop = of_get_flat_dt_prop(node, "ibm,mmu-pid-bits", &size); 458 + if (prop && size == 4) 459 + mmu_pid_bits = be32_to_cpup(prop); 460 + 461 + if (!mmu_pid_bits && !mmu_lpid_bits) 462 + return 0; 463 + 464 + return 1; 465 + } 466 + 443 467 void __init mmu_early_init_devtree(void) 444 468 { 469 + bool hvmode = !!(mfmsr() & MSR_HV); 470 + 445 471 /* Disable radix mode based on kernel command line. */ 446 - if (disable_radix) 447 - cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX; 472 + if (disable_radix) { 473 + if (IS_ENABLED(CONFIG_PPC_64S_HASH_MMU)) 474 + cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX; 475 + else 476 + pr_warn("WARNING: Ignoring cmdline option disable_radix\n"); 477 + } 478 + 479 + of_scan_flat_dt(dt_scan_mmu_pid_width, NULL); 480 + if (hvmode && !mmu_lpid_bits) { 481 + if (early_cpu_has_feature(CPU_FTR_ARCH_207S)) 482 + mmu_lpid_bits = 12; /* POWER8-10 */ 483 + else 484 + mmu_lpid_bits = 10; /* POWER7 */ 485 + } 486 + if (!mmu_pid_bits) { 487 + if (early_cpu_has_feature(CPU_FTR_ARCH_300)) 488 + mmu_pid_bits = 20; /* POWER9-10 */ 489 + } 448 490 449 491 /* 450 492 * Check /chosen/ibm,architecture-vec-5 if running as a guest. ··· 497 449 * even though the ibm,architecture-vec-5 property created by 498 450 * skiboot doesn't have the necessary bits set. 499 451 */ 500 - if (!(mfmsr() & MSR_HV)) 452 + if (!hvmode) 501 453 early_check_vec5(); 502 454 503 455 if (early_radix_enabled()) { 504 456 radix__early_init_devtree(); 457 + 505 458 /* 506 459 * We have finalized the translation we are going to use by now. 507 460 * Radix mode is not limited by RMA / VRMA addressing. ··· 512 463 memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE); 513 464 } else 514 465 hash__early_init_devtree(); 466 + 467 + if (!(cur_cpu_spec->mmu_features & MMU_FTR_HPTE_TABLE) && 468 + !(cur_cpu_spec->mmu_features & MMU_FTR_TYPE_RADIX)) 469 + panic("kernel does not support any MMU type offered by platform"); 515 470 } 516 471 #endif /* CONFIG_PPC_BOOK3S_64 */
-20
arch/powerpc/mm/ioremap.c
··· 98 98 99 99 return NULL; 100 100 } 101 - 102 - #ifdef CONFIG_ZONE_DEVICE 103 - /* 104 - * Override the generic version in mm/memremap.c. 105 - * 106 - * With hash translation, the direct-map range is mapped with just one 107 - * page size selected by htab_init_page_sizes(). Consult 108 - * mmu_psize_defs[] to determine the minimum page size alignment. 109 - */ 110 - unsigned long memremap_compat_align(void) 111 - { 112 - unsigned int shift = mmu_psize_defs[mmu_linear_psize].shift; 113 - 114 - if (radix_enabled()) 115 - return SUBSECTION_SIZE; 116 - return max(SUBSECTION_SIZE, 1UL << shift); 117 - 118 - } 119 - EXPORT_SYMBOL_GPL(memremap_compat_align); 120 - #endif
+2 -1
arch/powerpc/mm/kasan/book3s_32.c
··· 19 19 block = memblock_alloc(k_size, k_size_base); 20 20 21 21 if (block && k_size_base >= SZ_128K && k_start == ALIGN(k_start, k_size_base)) { 22 - int k_size_more = 1 << (ffs(k_size - k_size_base) - 1); 22 + int shift = ffs(k_size - k_size_base); 23 + int k_size_more = shift ? 1 << (shift - 1) : 0; 23 24 24 25 setbat(-1, k_start, __pa(block), k_size_base, PAGE_KERNEL); 25 26 if (k_size_more >= SZ_128K)
-17
arch/powerpc/mm/maccess.c
··· 11 11 { 12 12 return is_kernel_addr((unsigned long)unsafe_src); 13 13 } 14 - 15 - int copy_inst_from_kernel_nofault(struct ppc_inst *inst, u32 *src) 16 - { 17 - unsigned int val, suffix; 18 - int err; 19 - 20 - err = copy_from_kernel_nofault(&val, src, sizeof(val)); 21 - if (err) 22 - return err; 23 - if (IS_ENABLED(CONFIG_PPC64) && get_op(val) == OP_PREFIX) { 24 - err = copy_from_kernel_nofault(&suffix, src + 1, sizeof(suffix)); 25 - *inst = ppc_inst_prefix(val, suffix); 26 - } else { 27 - *inst = ppc_inst(val); 28 - } 29 - return err; 30 - }
-2
arch/powerpc/mm/mem.c
··· 26 26 #include <mm/mmu_decl.h> 27 27 28 28 unsigned long long memory_limit; 29 - bool init_mem_is_free; 30 29 31 30 unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss; 32 31 EXPORT_SYMBOL(empty_zero_page); ··· 311 312 { 312 313 ppc_md.progress = ppc_printk_progress; 313 314 mark_initmem_nx(); 314 - init_mem_is_free = true; 315 315 free_initmem_default(POISON_FREE_INITMEM); 316 316 } 317 317
+34 -6
arch/powerpc/mm/mmap.c
··· 80 80 return PAGE_ALIGN(DEFAULT_MAP_WINDOW - gap - rnd); 81 81 } 82 82 83 + #ifdef HAVE_ARCH_UNMAPPED_AREA 83 84 #ifdef CONFIG_PPC_RADIX_MMU 84 85 /* 85 86 * Same function as generic code used only for radix, because we don't need to overload ··· 182 181 */ 183 182 return radix__arch_get_unmapped_area(filp, addr0, len, pgoff, flags); 184 183 } 184 + #endif 185 + 186 + unsigned long arch_get_unmapped_area(struct file *filp, 187 + unsigned long addr, 188 + unsigned long len, 189 + unsigned long pgoff, 190 + unsigned long flags) 191 + { 192 + #ifdef CONFIG_PPC_MM_SLICES 193 + return slice_get_unmapped_area(addr, len, flags, 194 + mm_ctx_user_psize(&current->mm->context), 0); 195 + #else 196 + BUG(); 197 + #endif 198 + } 199 + 200 + unsigned long arch_get_unmapped_area_topdown(struct file *filp, 201 + const unsigned long addr0, 202 + const unsigned long len, 203 + const unsigned long pgoff, 204 + const unsigned long flags) 205 + { 206 + #ifdef CONFIG_PPC_MM_SLICES 207 + return slice_get_unmapped_area(addr0, len, flags, 208 + mm_ctx_user_psize(&current->mm->context), 1); 209 + #else 210 + BUG(); 211 + #endif 212 + } 213 + #endif /* HAVE_ARCH_UNMAPPED_AREA */ 185 214 186 215 static void radix__arch_pick_mmap_layout(struct mm_struct *mm, 187 216 unsigned long random_factor, 188 217 struct rlimit *rlim_stack) 189 218 { 219 + #ifdef CONFIG_PPC_RADIX_MMU 190 220 if (mmap_is_legacy(rlim_stack)) { 191 221 mm->mmap_base = TASK_UNMAPPED_BASE; 192 222 mm->get_unmapped_area = radix__arch_get_unmapped_area; ··· 225 193 mm->mmap_base = mmap_base(random_factor, rlim_stack); 226 194 mm->get_unmapped_area = radix__arch_get_unmapped_area_topdown; 227 195 } 228 - } 229 - #else 230 - /* dummy */ 231 - extern void radix__arch_pick_mmap_layout(struct mm_struct *mm, 232 - unsigned long random_factor, 233 - struct rlimit *rlim_stack); 234 196 #endif 197 + } 198 + 235 199 /* 236 200 * This function, called very early during the creation of a new 237 201 * process VM image, sets up which VM layout function to use:
+10 -1
arch/powerpc/mm/mmu_context.c
··· 18 18 { 19 19 /* 32-bit keeps track of the current PGDIR in the thread struct */ 20 20 tsk->thread.pgdir = mm->pgd; 21 + #ifdef CONFIG_PPC_BOOK3S_32 22 + tsk->thread.sr0 = mm->context.sr0; 23 + #endif 24 + #if defined(CONFIG_BOOKE_OR_40x) && defined(CONFIG_PPC_KUAP) 25 + tsk->thread.pid = mm->context.id; 26 + #endif 21 27 } 22 28 #elif defined(CONFIG_PPC_BOOK3E_64) 23 29 static inline void switch_mm_pgdir(struct task_struct *tsk, ··· 31 25 { 32 26 /* 64-bit Book3E keeps track of current PGD in the PACA */ 33 27 get_paca()->pgd = mm->pgd; 28 + #ifdef CONFIG_PPC_KUAP 29 + tsk->thread.pid = mm->context.id; 30 + #endif 34 31 } 35 32 #else 36 33 static inline void switch_mm_pgdir(struct task_struct *tsk, ··· 90 81 * context 91 82 */ 92 83 if (cpu_has_feature(CPU_FTR_ALTIVEC)) 93 - asm volatile ("dssall"); 84 + asm volatile (PPC_DSSALL); 94 85 95 86 if (!new_on_cpu) 96 87 membarrier_arch_switch_mm(prev, next, tsk);
+2 -18
arch/powerpc/mm/nohash/44x.c
··· 38 38 39 39 unsigned long tlb_47x_boltmap[1024/8]; 40 40 41 - static void ppc44x_update_tlb_hwater(void) 41 + static void __init ppc44x_update_tlb_hwater(void) 42 42 { 43 43 /* The TLB miss handlers hard codes the watermark in a cmpli 44 44 * instruction to improve performances rather than loading it ··· 122 122 /* 123 123 * "Pins" a 256MB TLB entry in AS0 for kernel lowmem for 47x type MMU 124 124 */ 125 - static void ppc47x_pin_tlb(unsigned int virt, unsigned int phys) 125 + static void __init ppc47x_pin_tlb(unsigned int virt, unsigned int phys) 126 126 { 127 127 unsigned int rA; 128 128 int bolted; ··· 240 240 } 241 241 } 242 242 #endif /* CONFIG_SMP */ 243 - 244 - #ifdef CONFIG_PPC_KUEP 245 - void setup_kuep(bool disabled) 246 - { 247 - if (smp_processor_id() != boot_cpuid) 248 - return; 249 - 250 - if (disabled) 251 - patch_instruction_site(&patch__tlb_44x_kuep, ppc_inst(PPC_RAW_NOP())); 252 - else 253 - pr_info("Activating Kernel Userspace Execution Prevention\n"); 254 - 255 - if (IS_ENABLED(CONFIG_PPC_47x) && disabled) 256 - patch_instruction_site(&patch__tlb_47x_kuep, ppc_inst(PPC_RAW_NOP())); 257 - } 258 - #endif
-33
arch/powerpc/mm/nohash/8xx.c
··· 8 8 */ 9 9 10 10 #include <linux/memblock.h> 11 - #include <linux/mmu_context.h> 12 11 #include <linux/hugetlb.h> 13 - #include <asm/fixmap.h> 14 - #include <asm/code-patching.h> 15 - #include <asm/inst.h> 16 12 17 13 #include <mm/mmu_decl.h> 18 14 ··· 207 211 /* 8xx can only access 32MB at the moment */ 208 212 memblock_set_current_limit(min_t(u64, first_memblock_size, SZ_32M)); 209 213 } 210 - 211 - #ifdef CONFIG_PPC_KUEP 212 - void __init setup_kuep(bool disabled) 213 - { 214 - if (disabled) 215 - return; 216 - 217 - pr_info("Activating Kernel Userspace Execution Prevention\n"); 218 - 219 - mtspr(SPRN_MI_AP, MI_APG_KUEP); 220 - } 221 - #endif 222 - 223 - #ifdef CONFIG_PPC_KUAP 224 - struct static_key_false disable_kuap_key; 225 - EXPORT_SYMBOL(disable_kuap_key); 226 - 227 - void __init setup_kuap(bool disabled) 228 - { 229 - if (disabled) { 230 - static_branch_enable(&disable_kuap_key); 231 - return; 232 - } 233 - 234 - pr_info("Activating Kernel Userspace Access Protection\n"); 235 - 236 - mtspr(SPRN_MD_AP, MD_APG_KUAP); 237 - } 238 - #endif 239 214 240 215 int pud_clear_huge(pud_t *pud) 241 216 {
+1 -1
arch/powerpc/mm/nohash/Makefile
··· 2 2 3 3 ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) 4 4 5 - obj-y += mmu_context.o tlb.o tlb_low.o 5 + obj-y += mmu_context.o tlb.o tlb_low.o kup.o 6 6 obj-$(CONFIG_PPC_BOOK3E_64) += tlb_low_64e.o book3e_pgtable.o 7 7 obj-$(CONFIG_40x) += 40x.o 8 8 obj-$(CONFIG_44x) += 44x.o
+15
arch/powerpc/mm/nohash/book3e_pgtable.c
··· 10 10 #include <asm/pgalloc.h> 11 11 #include <asm/tlb.h> 12 12 #include <asm/dma.h> 13 + #include <asm/code-patching.h> 13 14 14 15 #include <mm/mmu_decl.h> 15 16 ··· 115 114 116 115 smp_wmb(); 117 116 return 0; 117 + } 118 + 119 + void __patch_exception(int exc, unsigned long addr) 120 + { 121 + unsigned int *ibase = &interrupt_base_book3e; 122 + 123 + /* 124 + * Our exceptions vectors start with a NOP and -then- a branch 125 + * to deal with single stepping from userspace which stops on 126 + * the second instruction. Thus we need to patch the second 127 + * instruction of the exception, not the first one. 128 + */ 129 + 130 + patch_branch(ibase + (exc / 4) + 1, addr, 0); 118 131 }
+5 -5
arch/powerpc/mm/nohash/fsl_book3e.c
··· 60 60 phys_addr_t phys; 61 61 } tlbcam_addrs[NUM_TLBCAMS]; 62 62 63 - unsigned long tlbcam_sz(int idx) 64 - { 65 - return tlbcam_addrs[idx].limit - tlbcam_addrs[idx].start + 1; 66 - } 67 - 68 63 #ifdef CONFIG_FSL_BOOKE 69 64 /* 70 65 * Return PA for this VA if it is mapped by a CAM, or 0 ··· 257 262 void __init MMU_init_hw(void) 258 263 { 259 264 flush_instruction_cache(); 265 + } 266 + 267 + static unsigned long __init tlbcam_sz(int idx) 268 + { 269 + return tlbcam_addrs[idx].limit - tlbcam_addrs[idx].start + 1; 260 270 } 261 271 262 272 void __init adjust_total_lowmem(void)
+33
arch/powerpc/mm/nohash/kup.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-or-later 2 + /* 3 + * This file contains the routines for initializing kernel userspace protection 4 + */ 5 + 6 + #include <linux/export.h> 7 + #include <linux/init.h> 8 + #include <linux/jump_label.h> 9 + #include <linux/printk.h> 10 + #include <linux/smp.h> 11 + 12 + #include <asm/kup.h> 13 + #include <asm/smp.h> 14 + 15 + #ifdef CONFIG_PPC_KUAP 16 + struct static_key_false disable_kuap_key; 17 + EXPORT_SYMBOL(disable_kuap_key); 18 + 19 + void setup_kuap(bool disabled) 20 + { 21 + if (disabled) { 22 + if (IS_ENABLED(CONFIG_40x)) 23 + disable_kuep = true; 24 + if (smp_processor_id() == boot_cpuid) 25 + static_branch_enable(&disable_kuap_key); 26 + return; 27 + } 28 + 29 + pr_info("Activating Kernel Userspace Access Protection\n"); 30 + 31 + __prevent_user_access(KUAP_READ_WRITE); 32 + } 33 + #endif
+5 -1
arch/powerpc/mm/nohash/mmu_context.c
··· 33 33 #include <asm/mmu_context.h> 34 34 #include <asm/tlbflush.h> 35 35 #include <asm/smp.h> 36 + #include <asm/kup.h> 36 37 37 38 #include <mm/mmu_decl.h> 38 39 ··· 218 217 219 218 /* sync */ 220 219 mb(); 221 - } else { 220 + } else if (kuap_is_disabled()) { 222 221 if (IS_ENABLED(CONFIG_40x)) 223 222 mb(); /* sync */ 224 223 ··· 306 305 if (IS_ENABLED(CONFIG_BDI_SWITCH)) 307 306 abatron_pteptrs[1] = next->pgd; 308 307 set_context(id, next->pgd); 308 + #if defined(CONFIG_BOOKE_OR_40x) && defined(CONFIG_PPC_KUAP) 309 + tsk->thread.pid = id; 310 + #endif 309 311 raw_spin_unlock(&context_lock); 310 312 } 311 313
+2 -11
arch/powerpc/mm/nohash/tlb.c
··· 150 150 */ 151 151 #ifdef CONFIG_PPC64 152 152 153 - int mmu_linear_psize; /* Page size used for the linear mapping */ 154 153 int mmu_pte_psize; /* Page size used for PTE pages */ 155 154 int mmu_vmemmap_psize; /* Page size used for the virtual mem map */ 156 155 int book3e_htw_mode; /* HW tablewalk? Value is PPC_HTW_* */ ··· 432 433 } 433 434 } 434 435 435 - static void setup_page_sizes(void) 436 + static void __init setup_page_sizes(void) 436 437 { 437 438 unsigned int tlb0cfg; 438 439 unsigned int tlb0ps; ··· 570 571 } 571 572 } 572 573 573 - static void setup_mmu_htw(void) 574 + static void __init setup_mmu_htw(void) 574 575 { 575 576 /* 576 577 * If we want to use HW tablewalk, enable it by patching the TLB miss ··· 656 657 657 658 static void __init early_init_mmu_global(void) 658 659 { 659 - /* XXX This will have to be decided at runtime, but right 660 - * now our boot and TLB miss code hard wires it. Ideally 661 - * we should find out a suitable page size and patch the 662 - * TLB miss code (either that or use the PACA to store 663 - * the value we want) 664 - */ 665 - mmu_linear_psize = MMU_PAGE_1G; 666 - 667 660 /* XXX This should be decided at runtime based on supported 668 661 * page sizes in the TLB, but for now let's assume 16M is 669 662 * always there and a good fit (which it probably is)
+34 -6
arch/powerpc/mm/nohash/tlb_low_64e.S
··· 128 128 129 129 bne tlb_miss_kernel_bolted 130 130 131 + tlb_miss_user_bolted: 132 + #ifdef CONFIG_PPC_KUAP 133 + mfspr r10,SPRN_MAS1 134 + rlwinm. r10,r10,0,0x3fff0000 135 + beq- tlb_miss_fault_bolted /* KUAP fault */ 136 + #endif 137 + 131 138 tlb_miss_common_bolted: 132 139 /* 133 140 * This is the guts of the TLB miss handler for bolted-linear. ··· 253 246 254 247 cmpldi cr0,r15,0 /* Check for user region */ 255 248 oris r11,r11,_PAGE_ACCESSED@h 256 - beq tlb_miss_common_bolted 249 + beq tlb_miss_user_bolted 257 250 b itlb_miss_kernel_bolted 258 251 259 252 #ifdef CONFIG_PPC_FSL_BOOK3E ··· 683 676 /* Check if required permissions are met */ 684 677 andc. r15,r11,r14 685 678 bne- normal_tlb_miss_access_fault 679 + #ifdef CONFIG_PPC_KUAP 680 + mfspr r11,SPRN_MAS1 681 + rlwinm. r10,r11,0,0x3fff0000 682 + beq- normal_tlb_miss_access_fault /* KUAP fault */ 683 + #endif 686 684 687 685 /* Now we build the MAS: 688 686 * ··· 701 689 * 702 690 * TODO: mix up code below for better scheduling 703 691 */ 704 - clrrdi r11,r16,12 /* Clear low crap in EA */ 705 - rlwimi r11,r14,32-19,27,31 /* Insert WIMGE */ 706 - mtspr SPRN_MAS2,r11 692 + clrrdi r10,r16,12 /* Clear low crap in EA */ 693 + rlwimi r10,r14,32-19,27,31 /* Insert WIMGE */ 694 + mtspr SPRN_MAS2,r10 707 695 708 696 /* Check page size, if not standard, update MAS1 */ 709 - rldicl r11,r14,64-8,64-8 710 - cmpldi cr0,r11,BOOK3E_PAGESZ_4K 697 + rldicl r10,r14,64-8,64-8 698 + cmpldi cr0,r10,BOOK3E_PAGESZ_4K 711 699 beq- 1f 700 + #ifndef CONFIG_PPC_KUAP 712 701 mfspr r11,SPRN_MAS1 702 + #endif 713 703 rlwimi r11,r14,31,21,24 714 704 rlwinm r11,r11,0,21,19 715 705 mtspr SPRN_MAS1,r11 ··· 800 786 mfspr r10,SPRN_MAS1 801 787 rlwinm r10,r10,0,16,1 /* Clear TID */ 802 788 mtspr SPRN_MAS1,r10 789 + #ifdef CONFIG_PPC_KUAP 790 + b 2f 803 791 1: 792 + mfspr r10,SPRN_MAS1 793 + rlwinm. r10,r10,0,0x3fff0000 794 + beq- virt_page_table_tlb_miss_fault /* KUAP fault */ 795 + 2: 796 + #else 797 + 1: 798 + #endif 804 799 BEGIN_MMU_FTR_SECTION 805 800 /* Search if we already have a TLB entry for that virtual address, and 806 801 * if we do, bail out. ··· 1050 1027 * avoid too much complication, it will save/restore things for us 1051 1028 */ 1052 1029 htw_tlb_miss: 1030 + #ifdef CONFIG_PPC_KUAP 1031 + mfspr r10,SPRN_MAS1 1032 + rlwinm. r10,r10,0,0x3fff0000 1033 + beq- htw_tlb_miss_fault /* KUAP fault */ 1034 + #endif 1053 1035 /* Search if we already have a TLB entry for that virtual address, and 1054 1036 * if we do, bail out. 1055 1037 *
+3 -3
arch/powerpc/mm/numa.c
··· 134 134 return 0; 135 135 } 136 136 137 - static void reset_numa_cpu_lookup_table(void) 137 + static void __init reset_numa_cpu_lookup_table(void) 138 138 { 139 139 unsigned int cpu; 140 140 ··· 372 372 * ibm,numa-lookup-index-table= {N, domainid1, domainid2, ..... domainidN} 373 373 * ibm,numa-distance-table = { N, 1, 2, 4, 5, 1, 6, .... N elements} 374 374 */ 375 - static void initialize_form2_numa_distance_lookup_table(void) 375 + static void __init initialize_form2_numa_distance_lookup_table(void) 376 376 { 377 377 int i, j; 378 378 struct device_node *root; ··· 581 581 return 0; 582 582 } 583 583 584 - static int get_nid_and_numa_distance(struct drmem_lmb *lmb) 584 + static int __init get_nid_and_numa_distance(struct drmem_lmb *lmb) 585 585 { 586 586 struct assoc_arrays aa = { .arrays = NULL }; 587 587 int default_nid = NUMA_NO_NODE;
+6 -3
arch/powerpc/mm/pgtable.c
··· 81 81 82 82 static pte_t set_pte_filter_hash(pte_t pte) 83 83 { 84 - if (radix_enabled()) 85 - return pte; 86 - 87 84 pte = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS); 88 85 if (pte_looks_normal(pte) && !(cpu_has_feature(CPU_FTR_COHERENT_ICACHE) || 89 86 cpu_has_feature(CPU_FTR_NOEXECUTE))) { ··· 108 111 static inline pte_t set_pte_filter(pte_t pte) 109 112 { 110 113 struct page *pg; 114 + 115 + if (radix_enabled()) 116 + return pte; 111 117 112 118 if (mmu_has_feature(MMU_FTR_HPTE_TABLE)) 113 119 return set_pte_filter_hash(pte); ··· 143 143 int dirty) 144 144 { 145 145 struct page *pg; 146 + 147 + if (IS_ENABLED(CONFIG_PPC_BOOK3S_64)) 148 + return pte; 146 149 147 150 if (mmu_has_feature(MMU_FTR_HPTE_TABLE)) 148 151 return pte;
+11 -3
arch/powerpc/mm/pgtable_64.c
··· 102 102 struct page *p4d_page(p4d_t p4d) 103 103 { 104 104 if (p4d_is_leaf(p4d)) { 105 - VM_WARN_ON(!p4d_huge(p4d)); 105 + if (!IS_ENABLED(CONFIG_HAVE_ARCH_HUGE_VMAP)) 106 + VM_WARN_ON(!p4d_huge(p4d)); 106 107 return pte_page(p4d_pte(p4d)); 107 108 } 108 109 return virt_to_page(p4d_pgtable(p4d)); ··· 113 112 struct page *pud_page(pud_t pud) 114 113 { 115 114 if (pud_is_leaf(pud)) { 116 - VM_WARN_ON(!pud_huge(pud)); 115 + if (!IS_ENABLED(CONFIG_HAVE_ARCH_HUGE_VMAP)) 116 + VM_WARN_ON(!pud_huge(pud)); 117 117 return pte_page(pud_pte(pud)); 118 118 } 119 119 return virt_to_page(pud_pgtable(pud)); ··· 127 125 struct page *pmd_page(pmd_t pmd) 128 126 { 129 127 if (pmd_is_leaf(pmd)) { 130 - VM_WARN_ON(!(pmd_large(pmd) || pmd_huge(pmd))); 128 + /* 129 + * vmalloc_to_page may be called on any vmap address (not only 130 + * vmalloc), and it uses pmd_page() etc., when huge vmap is 131 + * enabled so these checks can't be used. 132 + */ 133 + if (!IS_ENABLED(CONFIG_HAVE_ARCH_HUGE_VMAP)) 134 + VM_WARN_ON(!(pmd_large(pmd) || pmd_huge(pmd))); 131 135 return pte_page(pmd_pte(pmd)); 132 136 } 133 137 return virt_to_page(pmd_page_vaddr(pmd));
+1 -1
arch/powerpc/mm/ptdump/Makefile
··· 10 10 11 11 ifdef CONFIG_PTDUMP_DEBUGFS 12 12 obj-$(CONFIG_PPC_BOOK3S_32) += bats.o segment_regs.o 13 - obj-$(CONFIG_PPC_BOOK3S_64) += hashpagetable.o 13 + obj-$(CONFIG_PPC_64S_HASH_MMU) += hashpagetable.o 14 14 endif
+3 -3
arch/powerpc/mm/ptdump/ptdump.c
··· 123 123 124 124 void pt_dump_size(struct seq_file *m, unsigned long size) 125 125 { 126 - static const char units[] = "KMGTPE"; 126 + static const char units[] = " KMGTPE"; 127 127 const char *unit = units; 128 128 129 129 /* Work out what appropriate unit to use */ ··· 176 176 177 177 pt_dump_seq_printf(st->seq, REG "-" REG " ", st->start_address, addr - 1); 178 178 pt_dump_seq_printf(st->seq, " " REG " ", st->start_pa); 179 - pt_dump_size(st->seq, (addr - st->start_address) >> 10); 179 + pt_dump_size(st->seq, addr - st->start_address); 180 180 } 181 181 182 182 static void note_prot_wx(struct pg_state *st, unsigned long addr) ··· 315 315 316 316 DEFINE_SHOW_ATTRIBUTE(ptdump); 317 317 318 - static void build_pgtable_complete_mask(void) 318 + static void __init build_pgtable_complete_mask(void) 319 319 { 320 320 unsigned int i, j; 321 321
-20
arch/powerpc/mm/slice.c
··· 639 639 } 640 640 EXPORT_SYMBOL_GPL(slice_get_unmapped_area); 641 641 642 - unsigned long arch_get_unmapped_area(struct file *filp, 643 - unsigned long addr, 644 - unsigned long len, 645 - unsigned long pgoff, 646 - unsigned long flags) 647 - { 648 - return slice_get_unmapped_area(addr, len, flags, 649 - mm_ctx_user_psize(&current->mm->context), 0); 650 - } 651 - 652 - unsigned long arch_get_unmapped_area_topdown(struct file *filp, 653 - const unsigned long addr0, 654 - const unsigned long len, 655 - const unsigned long pgoff, 656 - const unsigned long flags) 657 - { 658 - return slice_get_unmapped_area(addr0, len, flags, 659 - mm_ctx_user_psize(&current->mm->context), 1); 660 - } 661 - 662 642 unsigned int notrace get_slice_psize(struct mm_struct *mm, unsigned long addr) 663 643 { 664 644 unsigned char *psizes;
+13 -4
arch/powerpc/net/bpf_jit.h
··· 31 31 pr_err_ratelimited("Branch offset 0x%lx (@%u) out of range\n", offset, ctx->idx); \ 32 32 return -ERANGE; \ 33 33 } \ 34 - EMIT(PPC_INST_BRANCH | (offset & 0x03fffffc)); \ 34 + EMIT(PPC_RAW_BRANCH(offset)); \ 35 35 } while (0) 36 36 37 37 /* blr; (unconditional 'branch' with link) to absolute address */ ··· 125 125 #define COND_LE (CR0_GT | COND_CMP_FALSE) 126 126 127 127 #define SEEN_FUNC 0x20000000 /* might call external helpers */ 128 - #define SEEN_STACK 0x40000000 /* uses BPF stack */ 129 - #define SEEN_TAILCALL 0x80000000 /* uses tail calls */ 128 + #define SEEN_TAILCALL 0x40000000 /* uses tail calls */ 130 129 131 130 #define SEEN_VREG_MASK 0x1ff80000 /* Volatile registers r3-r12 */ 132 131 #define SEEN_NVREG_MASK 0x0003ffff /* Non volatile registers r14-r31 */ ··· 150 151 unsigned int idx; 151 152 unsigned int stack_size; 152 153 int b2p[ARRAY_SIZE(b2p)]; 154 + unsigned int exentry_idx; 153 155 }; 156 + 157 + #ifdef CONFIG_PPC32 158 + #define BPF_FIXUP_LEN 3 /* Three instructions => 12 bytes */ 159 + #else 160 + #define BPF_FIXUP_LEN 2 /* Two instructions => 8 bytes */ 161 + #endif 154 162 155 163 static inline void bpf_flush_icache(void *start, void *end) 156 164 { ··· 182 176 183 177 void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func); 184 178 int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx, 185 - u32 *addrs, bool extra_pass); 179 + u32 *addrs, int pass); 186 180 void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx); 187 181 void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx); 188 182 void bpf_jit_realloc_regs(struct codegen_context *ctx); 183 + 184 + int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, int pass, struct codegen_context *ctx, 185 + int insn_idx, int jmp_off, int dst_reg); 189 186 190 187 #endif 191 188
+62 -6
arch/powerpc/net/bpf_jit_comp.c
··· 101 101 struct bpf_prog *tmp_fp; 102 102 bool bpf_blinded = false; 103 103 bool extra_pass = false; 104 + u32 extable_len; 105 + u32 fixup_len; 104 106 105 107 if (!fp->jit_requested) 106 108 return org_fp; ··· 133 131 image = jit_data->image; 134 132 bpf_hdr = jit_data->header; 135 133 proglen = jit_data->proglen; 136 - alloclen = proglen + FUNCTION_DESCR_SIZE; 137 134 extra_pass = true; 138 135 goto skip_init_ctx; 139 136 } ··· 150 149 cgctx.stack_size = round_up(fp->aux->stack_depth, 16); 151 150 152 151 /* Scouting faux-generate pass 0 */ 153 - if (bpf_jit_build_body(fp, 0, &cgctx, addrs, false)) { 152 + if (bpf_jit_build_body(fp, 0, &cgctx, addrs, 0)) { 154 153 /* We hit something illegal or unsupported. */ 155 154 fp = org_fp; 156 155 goto out_addrs; ··· 163 162 */ 164 163 if (cgctx.seen & SEEN_TAILCALL) { 165 164 cgctx.idx = 0; 166 - if (bpf_jit_build_body(fp, 0, &cgctx, addrs, false)) { 165 + if (bpf_jit_build_body(fp, 0, &cgctx, addrs, 0)) { 167 166 fp = org_fp; 168 167 goto out_addrs; 169 168 } ··· 178 177 bpf_jit_build_prologue(0, &cgctx); 179 178 bpf_jit_build_epilogue(0, &cgctx); 180 179 180 + fixup_len = fp->aux->num_exentries * BPF_FIXUP_LEN * 4; 181 + extable_len = fp->aux->num_exentries * sizeof(struct exception_table_entry); 182 + 181 183 proglen = cgctx.idx * 4; 182 - alloclen = proglen + FUNCTION_DESCR_SIZE; 184 + alloclen = proglen + FUNCTION_DESCR_SIZE + fixup_len + extable_len; 183 185 184 186 bpf_hdr = bpf_jit_binary_alloc(alloclen, &image, 4, bpf_jit_fill_ill_insns); 185 187 if (!bpf_hdr) { 186 188 fp = org_fp; 187 189 goto out_addrs; 188 190 } 191 + 192 + if (extable_len) 193 + fp->aux->extable = (void *)image + FUNCTION_DESCR_SIZE + proglen + fixup_len; 189 194 190 195 skip_init_ctx: 191 196 code_base = (u32 *)(image + FUNCTION_DESCR_SIZE); ··· 217 210 /* Now build the prologue, body code & epilogue for real. */ 218 211 cgctx.idx = 0; 219 212 bpf_jit_build_prologue(code_base, &cgctx); 220 - if (bpf_jit_build_body(fp, code_base, &cgctx, addrs, extra_pass)) { 213 + if (bpf_jit_build_body(fp, code_base, &cgctx, addrs, pass)) { 221 214 bpf_jit_binary_free(bpf_hdr); 222 215 fp = org_fp; 223 216 goto out_addrs; ··· 245 238 246 239 fp->bpf_func = (void *)image; 247 240 fp->jited = 1; 248 - fp->jited_len = alloclen; 241 + fp->jited_len = proglen + FUNCTION_DESCR_SIZE; 249 242 250 243 bpf_flush_icache(bpf_hdr, (u8 *)bpf_hdr + (bpf_hdr->pages * PAGE_SIZE)); 251 244 if (!fp->is_func || extra_pass) { ··· 268 261 bpf_jit_prog_release_other(fp, fp == org_fp ? tmp_fp : org_fp); 269 262 270 263 return fp; 264 + } 265 + 266 + /* 267 + * The caller should check for (BPF_MODE(code) == BPF_PROBE_MEM) before calling 268 + * this function, as this only applies to BPF_PROBE_MEM, for now. 269 + */ 270 + int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, int pass, struct codegen_context *ctx, 271 + int insn_idx, int jmp_off, int dst_reg) 272 + { 273 + off_t offset; 274 + unsigned long pc; 275 + struct exception_table_entry *ex; 276 + u32 *fixup; 277 + 278 + /* Populate extable entries only in the last pass */ 279 + if (pass != 2) 280 + return 0; 281 + 282 + if (!fp->aux->extable || 283 + WARN_ON_ONCE(ctx->exentry_idx >= fp->aux->num_exentries)) 284 + return -EINVAL; 285 + 286 + pc = (unsigned long)&image[insn_idx]; 287 + 288 + fixup = (void *)fp->aux->extable - 289 + (fp->aux->num_exentries * BPF_FIXUP_LEN * 4) + 290 + (ctx->exentry_idx * BPF_FIXUP_LEN * 4); 291 + 292 + fixup[0] = PPC_RAW_LI(dst_reg, 0); 293 + if (IS_ENABLED(CONFIG_PPC32)) 294 + fixup[1] = PPC_RAW_LI(dst_reg - 1, 0); /* clear higher 32-bit register too */ 295 + 296 + fixup[BPF_FIXUP_LEN - 1] = 297 + PPC_RAW_BRANCH((long)(pc + jmp_off) - (long)&fixup[BPF_FIXUP_LEN - 1]); 298 + 299 + ex = &fp->aux->extable[ctx->exentry_idx]; 300 + 301 + offset = pc - (long)&ex->insn; 302 + if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN)) 303 + return -ERANGE; 304 + ex->insn = offset; 305 + 306 + offset = (long)fixup - (long)&ex->fixup; 307 + if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN)) 308 + return -ERANGE; 309 + ex->fixup = offset; 310 + 311 + ctx->exentry_idx++; 312 + return 0; 271 313 }
+85 -16
arch/powerpc/net/bpf_jit_comp32.c
··· 268 268 269 269 /* Assemble the body code between the prologue & epilogue */ 270 270 int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx, 271 - u32 *addrs, bool extra_pass) 271 + u32 *addrs, int pass) 272 272 { 273 273 const struct bpf_insn *insn = fp->insnsi; 274 274 int flen = fp->len; ··· 284 284 u32 src_reg = bpf_to_ppc(ctx, insn[i].src_reg); 285 285 u32 src_reg_h = src_reg - 1; 286 286 u32 tmp_reg = bpf_to_ppc(ctx, TMP_REG); 287 + u32 size = BPF_SIZE(code); 287 288 s16 off = insn[i].off; 288 289 s32 imm = insn[i].imm; 289 290 bool func_addr_fixed; ··· 813 812 * BPF_LDX 814 813 */ 815 814 case BPF_LDX | BPF_MEM | BPF_B: /* dst = *(u8 *)(ul) (src + off) */ 816 - EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off)); 817 - if (!fp->aux->verifier_zext) 818 - EMIT(PPC_RAW_LI(dst_reg_h, 0)); 819 - break; 815 + case BPF_LDX | BPF_PROBE_MEM | BPF_B: 820 816 case BPF_LDX | BPF_MEM | BPF_H: /* dst = *(u16 *)(ul) (src + off) */ 821 - EMIT(PPC_RAW_LHZ(dst_reg, src_reg, off)); 822 - if (!fp->aux->verifier_zext) 823 - EMIT(PPC_RAW_LI(dst_reg_h, 0)); 824 - break; 817 + case BPF_LDX | BPF_PROBE_MEM | BPF_H: 825 818 case BPF_LDX | BPF_MEM | BPF_W: /* dst = *(u32 *)(ul) (src + off) */ 826 - EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off)); 827 - if (!fp->aux->verifier_zext) 828 - EMIT(PPC_RAW_LI(dst_reg_h, 0)); 829 - break; 819 + case BPF_LDX | BPF_PROBE_MEM | BPF_W: 830 820 case BPF_LDX | BPF_MEM | BPF_DW: /* dst = *(u64 *)(ul) (src + off) */ 831 - EMIT(PPC_RAW_LWZ(dst_reg_h, src_reg, off)); 832 - EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off + 4)); 821 + case BPF_LDX | BPF_PROBE_MEM | BPF_DW: 822 + /* 823 + * As PTR_TO_BTF_ID that uses BPF_PROBE_MEM mode could either be a valid 824 + * kernel pointer or NULL but not a userspace address, execute BPF_PROBE_MEM 825 + * load only if addr is kernel address (see is_kernel_addr()), otherwise 826 + * set dst_reg=0 and move on. 827 + */ 828 + if (BPF_MODE(code) == BPF_PROBE_MEM) { 829 + PPC_LI32(_R0, TASK_SIZE - off); 830 + EMIT(PPC_RAW_CMPLW(src_reg, _R0)); 831 + PPC_BCC(COND_GT, (ctx->idx + 5) * 4); 832 + EMIT(PPC_RAW_LI(dst_reg, 0)); 833 + /* 834 + * For BPF_DW case, "li reg_h,0" would be needed when 835 + * !fp->aux->verifier_zext. Emit NOP otherwise. 836 + * 837 + * Note that "li reg_h,0" is emitted for BPF_B/H/W case, 838 + * if necessary. So, jump there insted of emitting an 839 + * additional "li reg_h,0" instruction. 840 + */ 841 + if (size == BPF_DW && !fp->aux->verifier_zext) 842 + EMIT(PPC_RAW_LI(dst_reg_h, 0)); 843 + else 844 + EMIT(PPC_RAW_NOP()); 845 + /* 846 + * Need to jump two instructions instead of one for BPF_DW case 847 + * as there are two load instructions for dst_reg_h & dst_reg 848 + * respectively. 849 + */ 850 + if (size == BPF_DW) 851 + PPC_JMP((ctx->idx + 3) * 4); 852 + else 853 + PPC_JMP((ctx->idx + 2) * 4); 854 + } 855 + 856 + switch (size) { 857 + case BPF_B: 858 + EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off)); 859 + break; 860 + case BPF_H: 861 + EMIT(PPC_RAW_LHZ(dst_reg, src_reg, off)); 862 + break; 863 + case BPF_W: 864 + EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off)); 865 + break; 866 + case BPF_DW: 867 + EMIT(PPC_RAW_LWZ(dst_reg_h, src_reg, off)); 868 + EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off + 4)); 869 + break; 870 + } 871 + 872 + if (size != BPF_DW && !fp->aux->verifier_zext) 873 + EMIT(PPC_RAW_LI(dst_reg_h, 0)); 874 + 875 + if (BPF_MODE(code) == BPF_PROBE_MEM) { 876 + int insn_idx = ctx->idx - 1; 877 + int jmp_off = 4; 878 + 879 + /* 880 + * In case of BPF_DW, two lwz instructions are emitted, one 881 + * for higher 32-bit and another for lower 32-bit. So, set 882 + * ex->insn to the first of the two and jump over both 883 + * instructions in fixup. 884 + * 885 + * Similarly, with !verifier_zext, two instructions are 886 + * emitted for BPF_B/H/W case. So, set ex->insn to the 887 + * instruction that could fault and skip over both 888 + * instructions. 889 + */ 890 + if (size == BPF_DW || !fp->aux->verifier_zext) { 891 + insn_idx -= 1; 892 + jmp_off += 4; 893 + } 894 + 895 + ret = bpf_add_extable_entry(fp, image, pass, ctx, insn_idx, 896 + jmp_off, dst_reg); 897 + if (ret) 898 + return ret; 899 + } 833 900 break; 834 901 835 902 /* ··· 931 862 case BPF_JMP | BPF_CALL: 932 863 ctx->seen |= SEEN_FUNC; 933 864 934 - ret = bpf_jit_get_func_addr(fp, &insn[i], extra_pass, 865 + ret = bpf_jit_get_func_addr(fp, &insn[i], false, 935 866 &func_addr, &func_addr_fixed); 936 867 if (ret < 0) 937 868 return ret;
+57 -15
arch/powerpc/net/bpf_jit_comp64.c
··· 297 297 298 298 /* Assemble the body code between the prologue & epilogue */ 299 299 int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx, 300 - u32 *addrs, bool extra_pass) 300 + u32 *addrs, int pass) 301 301 { 302 302 enum stf_barrier_type stf_barrier = stf_barrier_type_get(); 303 303 const struct bpf_insn *insn = fp->insnsi; ··· 311 311 u32 code = insn[i].code; 312 312 u32 dst_reg = b2p[insn[i].dst_reg]; 313 313 u32 src_reg = b2p[insn[i].src_reg]; 314 + u32 size = BPF_SIZE(code); 314 315 s16 off = insn[i].off; 315 316 s32 imm = insn[i].imm; 316 317 bool func_addr_fixed; ··· 779 778 */ 780 779 /* dst = *(u8 *)(ul) (src + off) */ 781 780 case BPF_LDX | BPF_MEM | BPF_B: 782 - EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off)); 783 - if (insn_is_zext(&insn[i + 1])) 784 - addrs[++i] = ctx->idx * 4; 785 - break; 781 + case BPF_LDX | BPF_PROBE_MEM | BPF_B: 786 782 /* dst = *(u16 *)(ul) (src + off) */ 787 783 case BPF_LDX | BPF_MEM | BPF_H: 788 - EMIT(PPC_RAW_LHZ(dst_reg, src_reg, off)); 789 - if (insn_is_zext(&insn[i + 1])) 790 - addrs[++i] = ctx->idx * 4; 791 - break; 784 + case BPF_LDX | BPF_PROBE_MEM | BPF_H: 792 785 /* dst = *(u32 *)(ul) (src + off) */ 793 786 case BPF_LDX | BPF_MEM | BPF_W: 794 - EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off)); 795 - if (insn_is_zext(&insn[i + 1])) 796 - addrs[++i] = ctx->idx * 4; 797 - break; 787 + case BPF_LDX | BPF_PROBE_MEM | BPF_W: 798 788 /* dst = *(u64 *)(ul) (src + off) */ 799 789 case BPF_LDX | BPF_MEM | BPF_DW: 800 - PPC_BPF_LL(dst_reg, src_reg, off); 790 + case BPF_LDX | BPF_PROBE_MEM | BPF_DW: 791 + /* 792 + * As PTR_TO_BTF_ID that uses BPF_PROBE_MEM mode could either be a valid 793 + * kernel pointer or NULL but not a userspace address, execute BPF_PROBE_MEM 794 + * load only if addr is kernel address (see is_kernel_addr()), otherwise 795 + * set dst_reg=0 and move on. 796 + */ 797 + if (BPF_MODE(code) == BPF_PROBE_MEM) { 798 + EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], src_reg, off)); 799 + if (IS_ENABLED(CONFIG_PPC_BOOK3E_64)) 800 + PPC_LI64(b2p[TMP_REG_2], 0x8000000000000000ul); 801 + else /* BOOK3S_64 */ 802 + PPC_LI64(b2p[TMP_REG_2], PAGE_OFFSET); 803 + EMIT(PPC_RAW_CMPLD(b2p[TMP_REG_1], b2p[TMP_REG_2])); 804 + PPC_BCC(COND_GT, (ctx->idx + 4) * 4); 805 + EMIT(PPC_RAW_LI(dst_reg, 0)); 806 + /* 807 + * Check if 'off' is word aligned because PPC_BPF_LL() 808 + * (BPF_DW case) generates two instructions if 'off' is not 809 + * word-aligned and one instruction otherwise. 810 + */ 811 + if (BPF_SIZE(code) == BPF_DW && (off & 3)) 812 + PPC_JMP((ctx->idx + 3) * 4); 813 + else 814 + PPC_JMP((ctx->idx + 2) * 4); 815 + } 816 + 817 + switch (size) { 818 + case BPF_B: 819 + EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off)); 820 + break; 821 + case BPF_H: 822 + EMIT(PPC_RAW_LHZ(dst_reg, src_reg, off)); 823 + break; 824 + case BPF_W: 825 + EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off)); 826 + break; 827 + case BPF_DW: 828 + PPC_BPF_LL(dst_reg, src_reg, off); 829 + break; 830 + } 831 + 832 + if (size != BPF_DW && insn_is_zext(&insn[i + 1])) 833 + addrs[++i] = ctx->idx * 4; 834 + 835 + if (BPF_MODE(code) == BPF_PROBE_MEM) { 836 + ret = bpf_add_extable_entry(fp, image, pass, ctx, ctx->idx - 1, 837 + 4, dst_reg); 838 + if (ret) 839 + return ret; 840 + } 801 841 break; 802 842 803 843 /* ··· 873 831 case BPF_JMP | BPF_CALL: 874 832 ctx->seen |= SEEN_FUNC; 875 833 876 - ret = bpf_jit_get_func_addr(fp, &insn[i], extra_pass, 834 + ret = bpf_jit_get_func_addr(fp, &insn[i], false, 877 835 &func_addr, &func_addr_fixed); 878 836 if (ret < 0) 879 837 return ret;
+1 -1
arch/powerpc/perf/8xx-pmu.c
··· 153 153 154 154 static void mpc8xx_pmu_del(struct perf_event *event, int flags) 155 155 { 156 - struct ppc_inst insn = ppc_inst(PPC_RAW_MFSPR(10, SPRN_SPRG_SCRATCH2)); 156 + ppc_inst_t insn = ppc_inst(PPC_RAW_MFSPR(10, SPRN_SPRG_SCRATCH2)); 157 157 158 158 mpc8xx_pmu_read(event); 159 159
+124 -2
arch/powerpc/perf/core-book3s.c
··· 17 17 #include <asm/firmware.h> 18 18 #include <asm/ptrace.h> 19 19 #include <asm/code-patching.h> 20 + #include <asm/hw_irq.h> 20 21 #include <asm/interrupt.h> 21 22 22 23 #ifdef CONFIG_PPC64 ··· 858 857 } 859 858 } 860 859 860 + static int any_pmc_overflown(struct cpu_hw_events *cpuhw) 861 + { 862 + int i, idx; 863 + 864 + for (i = 0; i < cpuhw->n_events; i++) { 865 + idx = cpuhw->event[i]->hw.idx; 866 + if ((idx) && ((int)read_pmc(idx) < 0)) 867 + return idx; 868 + } 869 + 870 + return 0; 871 + } 872 + 861 873 /* Called from sysrq_handle_showregs() */ 862 874 void perf_event_print_debug(void) 863 875 { ··· 1295 1281 1296 1282 /* 1297 1283 * Set the 'freeze counters' bit, clear EBE/BHRBA/PMCC/PMAO/FC56 1284 + * Also clear PMXE to disable PMI's getting triggered in some 1285 + * corner cases during PMU disable. 1298 1286 */ 1299 1287 val = mmcr0 = mfspr(SPRN_MMCR0); 1300 1288 val |= MMCR0_FC; 1301 1289 val &= ~(MMCR0_EBE | MMCR0_BHRBA | MMCR0_PMCC | MMCR0_PMAO | 1302 - MMCR0_FC56); 1290 + MMCR0_PMXE | MMCR0_FC56); 1303 1291 /* Set mmcr0 PMCCEXT for p10 */ 1304 1292 if (ppmu->flags & PPMU_ARCH_31) 1305 1293 val |= MMCR0_PMCCEXT; ··· 1314 1298 write_mmcr0(cpuhw, val); 1315 1299 mb(); 1316 1300 isync(); 1301 + 1302 + /* 1303 + * Some corner cases could clear the PMU counter overflow 1304 + * while a masked PMI is pending. One such case is when 1305 + * a PMI happens during interrupt replay and perf counter 1306 + * values are cleared by PMU callbacks before replay. 1307 + * 1308 + * If any PMC corresponding to the active PMU events are 1309 + * overflown, disable the interrupt by clearing the paca 1310 + * bit for PMI since we are disabling the PMU now. 1311 + * Otherwise provide a warning if there is PMI pending, but 1312 + * no counter is found overflown. 1313 + */ 1314 + if (any_pmc_overflown(cpuhw)) 1315 + clear_pmi_irq_pending(); 1316 + else 1317 + WARN_ON(pmi_irq_pending()); 1317 1318 1318 1319 val = mmcra = cpuhw->mmcr.mmcra; 1319 1320 ··· 1423 1390 * (possibly updated for removal of events). 1424 1391 */ 1425 1392 if (!cpuhw->n_added) { 1393 + /* 1394 + * If there is any active event with an overflown PMC 1395 + * value, set back PACA_IRQ_PMI which would have been 1396 + * cleared in power_pmu_disable(). 1397 + */ 1398 + hard_irq_disable(); 1399 + if (any_pmc_overflown(cpuhw)) 1400 + set_pmi_irq_pending(); 1401 + 1426 1402 mtspr(SPRN_MMCRA, cpuhw->mmcr.mmcra & ~MMCRA_SAMPLE_ENABLE); 1427 1403 mtspr(SPRN_MMCR1, cpuhw->mmcr.mmcr1); 1428 1404 if (ppmu->flags & PPMU_ARCH_31) ··· 2379 2337 break; 2380 2338 } 2381 2339 } 2340 + 2341 + /* 2342 + * Clear PACA_IRQ_PMI in case it was set by 2343 + * set_pmi_irq_pending() when PMU was enabled 2344 + * after accounting for interrupts. 2345 + */ 2346 + clear_pmi_irq_pending(); 2347 + 2382 2348 if (!active) 2383 2349 /* reset non active counters that have overflowed */ 2384 2350 write_pmc(i + 1, 0); ··· 2406 2356 } 2407 2357 } 2408 2358 } 2359 + 2360 + /* 2361 + * During system wide profling or while specific CPU is monitored for an 2362 + * event, some corner cases could cause PMC to overflow in idle path. This 2363 + * will trigger a PMI after waking up from idle. Since counter values are _not_ 2364 + * saved/restored in idle path, can lead to below "Can't find PMC" message. 2365 + */ 2409 2366 if (unlikely(!found) && !arch_irq_disabled_regs(regs)) 2410 2367 printk_ratelimited(KERN_WARNING "Can't find PMC that caused IRQ\n"); 2411 2368 ··· 2438 2381 perf_sample_event_took(sched_clock() - start_clock); 2439 2382 } 2440 2383 2384 + /* 2385 + * If the perf subsystem wants performance monitor interrupts as soon as 2386 + * possible (e.g., to sample the instruction address and stack chain), 2387 + * this should return true. The IRQ masking code can then enable MSR[EE] 2388 + * in some places (e.g., interrupt handlers) that allows PMI interrupts 2389 + * though to improve accuracy of profiles, at the cost of some performance. 2390 + * 2391 + * The PMU counters can be enabled by other means (e.g., sysfs raw SPR 2392 + * access), but in that case there is no need for prompt PMI handling. 2393 + * 2394 + * This currently returns true if any perf counter is being used. It 2395 + * could possibly return false if only events are being counted rather than 2396 + * samples being taken, but for now this is good enough. 2397 + */ 2398 + bool power_pmu_wants_prompt_pmi(void) 2399 + { 2400 + struct cpu_hw_events *cpuhw; 2401 + 2402 + /* 2403 + * This could simply test local_paca->pmcregs_in_use if that were not 2404 + * under ifdef KVM. 2405 + */ 2406 + 2407 + if (!ppmu) 2408 + return false; 2409 + 2410 + cpuhw = this_cpu_ptr(&cpu_hw_events); 2411 + return cpuhw->n_events; 2412 + } 2413 + 2441 2414 static int power_pmu_prepare_cpu(unsigned int cpu) 2442 2415 { 2443 2416 struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu); ··· 2479 2392 return 0; 2480 2393 } 2481 2394 2482 - int register_power_pmu(struct power_pmu *pmu) 2395 + int __init register_power_pmu(struct power_pmu *pmu) 2483 2396 { 2484 2397 if (ppmu) 2485 2398 return -EBUSY; /* something's already registered */ ··· 2506 2419 } 2507 2420 2508 2421 #ifdef CONFIG_PPC64 2422 + static bool pmu_override = false; 2423 + static unsigned long pmu_override_val; 2424 + static void do_pmu_override(void *data) 2425 + { 2426 + ppc_set_pmu_inuse(1); 2427 + if (pmu_override_val) 2428 + mtspr(SPRN_MMCR1, pmu_override_val); 2429 + mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) & ~MMCR0_FC); 2430 + } 2431 + 2509 2432 static int __init init_ppc64_pmu(void) 2510 2433 { 2434 + if (cpu_has_feature(CPU_FTR_HVMODE) && pmu_override) { 2435 + pr_warn("disabling perf due to pmu_override= command line option.\n"); 2436 + on_each_cpu(do_pmu_override, NULL, 1); 2437 + return 0; 2438 + } 2439 + 2511 2440 /* run through all the pmu drivers one at a time */ 2512 2441 if (!init_power5_pmu()) 2513 2442 return 0; ··· 2545 2442 return init_generic_compat_pmu(); 2546 2443 } 2547 2444 early_initcall(init_ppc64_pmu); 2445 + 2446 + static int __init pmu_setup(char *str) 2447 + { 2448 + unsigned long val; 2449 + 2450 + if (!early_cpu_has_feature(CPU_FTR_HVMODE)) 2451 + return 0; 2452 + 2453 + pmu_override = true; 2454 + 2455 + if (kstrtoul(str, 0, &val)) 2456 + val = 0; 2457 + 2458 + pmu_override_val = val; 2459 + 2460 + return 1; 2461 + } 2462 + __setup("pmu_override=", pmu_setup); 2463 + 2548 2464 #endif
+1 -1
arch/powerpc/perf/generic-compat-pmu.c
··· 307 307 .attr_groups = generic_compat_pmu_attr_groups, 308 308 }; 309 309 310 - int init_generic_compat_pmu(void) 310 + int __init init_generic_compat_pmu(void) 311 311 { 312 312 int rc = 0; 313 313
+1 -1
arch/powerpc/perf/hv-24x7.c
··· 756 756 } 757 757 758 758 if (calc_ev_end > ev_end) { 759 - pr_warn("event %zu exceeds it's own length: event=%pK, end=%pK, offset=%zu, calc_ev_end=%pK\n", 759 + pr_warn("event %zu exceeds its own length: event=%pK, end=%pK, offset=%zu, calc_ev_end=%pK\n", 760 760 event_idx, event, ev_end, offset, calc_ev_end); 761 761 return -1; 762 762 }
+9 -9
arch/powerpc/perf/internal.h
··· 2 2 // 3 3 // Copyright 2019 Madhavan Srinivasan, IBM Corporation. 4 4 5 - extern int init_ppc970_pmu(void); 6 - extern int init_power5_pmu(void); 7 - extern int init_power5p_pmu(void); 8 - extern int init_power6_pmu(void); 9 - extern int init_power7_pmu(void); 10 - extern int init_power8_pmu(void); 11 - extern int init_power9_pmu(void); 12 - extern int init_power10_pmu(void); 13 - extern int init_generic_compat_pmu(void); 5 + int __init init_ppc970_pmu(void); 6 + int __init init_power5_pmu(void); 7 + int __init init_power5p_pmu(void); 8 + int __init init_power6_pmu(void); 9 + int __init init_power7_pmu(void); 10 + int __init init_power8_pmu(void); 11 + int __init init_power9_pmu(void); 12 + int __init init_power10_pmu(void); 13 + int __init init_generic_compat_pmu(void);
+45 -15
arch/powerpc/perf/isa207-common.c
··· 220 220 /* Nothing to do */ 221 221 break; 222 222 case 1: 223 - ret = PH(LVL, L1); 223 + ret = PH(LVL, L1) | LEVEL(L1) | P(SNOOP, HIT); 224 224 break; 225 225 case 2: 226 - ret = PH(LVL, L2); 226 + ret = PH(LVL, L2) | LEVEL(L2) | P(SNOOP, HIT); 227 227 break; 228 228 case 3: 229 - ret = PH(LVL, L3); 229 + ret = PH(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT); 230 230 break; 231 231 case 4: 232 - if (sub_idx <= 1) 233 - ret = PH(LVL, LOC_RAM); 234 - else if (sub_idx > 1 && sub_idx <= 2) 235 - ret = PH(LVL, REM_RAM1); 236 - else 237 - ret = PH(LVL, REM_RAM2); 238 - ret |= P(SNOOP, HIT); 232 + if (cpu_has_feature(CPU_FTR_ARCH_31)) { 233 + ret = P(SNOOP, HIT); 234 + 235 + if (sub_idx == 1) 236 + ret |= PH(LVL, LOC_RAM) | LEVEL(RAM); 237 + else if (sub_idx == 2 || sub_idx == 3) 238 + ret |= P(LVL, HIT) | LEVEL(PMEM); 239 + else if (sub_idx == 4) 240 + ret |= PH(LVL, REM_RAM1) | REM | LEVEL(RAM) | P(HOPS, 2); 241 + else if (sub_idx == 5 || sub_idx == 7) 242 + ret |= P(LVL, HIT) | LEVEL(PMEM) | REM; 243 + else if (sub_idx == 6) 244 + ret |= PH(LVL, REM_RAM2) | REM | LEVEL(RAM) | P(HOPS, 3); 245 + } else { 246 + if (sub_idx <= 1) 247 + ret = PH(LVL, LOC_RAM); 248 + else if (sub_idx > 1 && sub_idx <= 2) 249 + ret = PH(LVL, REM_RAM1); 250 + else 251 + ret = PH(LVL, REM_RAM2); 252 + ret |= P(SNOOP, HIT); 253 + } 239 254 break; 240 255 case 5: 241 256 if (cpu_has_feature(CPU_FTR_ARCH_31)) { ··· 276 261 } 277 262 break; 278 263 case 6: 279 - ret = PH(LVL, REM_CCE2); 280 - if ((sub_idx == 0) || (sub_idx == 2)) 281 - ret |= P(SNOOP, HIT); 282 - else if ((sub_idx == 1) || (sub_idx == 3)) 283 - ret |= P(SNOOP, HITM); 264 + if (cpu_has_feature(CPU_FTR_ARCH_31)) { 265 + if (sub_idx == 0) 266 + ret = PH(LVL, REM_CCE1) | LEVEL(ANY_CACHE) | REM | 267 + P(SNOOP, HIT) | P(HOPS, 2); 268 + else if (sub_idx == 1) 269 + ret = PH(LVL, REM_CCE1) | LEVEL(ANY_CACHE) | REM | 270 + P(SNOOP, HITM) | P(HOPS, 2); 271 + else if (sub_idx == 2) 272 + ret = PH(LVL, REM_CCE2) | LEVEL(ANY_CACHE) | REM | 273 + P(SNOOP, HIT) | P(HOPS, 3); 274 + else if (sub_idx == 3) 275 + ret = PH(LVL, REM_CCE2) | LEVEL(ANY_CACHE) | REM | 276 + P(SNOOP, HITM) | P(HOPS, 3); 277 + } else { 278 + ret = PH(LVL, REM_CCE2); 279 + if (sub_idx == 0 || sub_idx == 2) 280 + ret |= P(SNOOP, HIT); 281 + else if (sub_idx == 1 || sub_idx == 3) 282 + ret |= P(SNOOP, HITM); 283 + } 284 284 break; 285 285 case 7: 286 286 ret = PM(LVL, L1);
+1 -1
arch/powerpc/perf/power10-pmu.c
··· 592 592 .check_attr_config = power10_check_attr_config, 593 593 }; 594 594 595 - int init_power10_pmu(void) 595 + int __init init_power10_pmu(void) 596 596 { 597 597 unsigned int pvr; 598 598 int rc;
+1 -1
arch/powerpc/perf/power5+-pmu.c
··· 677 677 .cache_events = &power5p_cache_events, 678 678 }; 679 679 680 - int init_power5p_pmu(void) 680 + int __init init_power5p_pmu(void) 681 681 { 682 682 if (!cur_cpu_spec->oprofile_cpu_type || 683 683 (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power5+")
+1 -1
arch/powerpc/perf/power5-pmu.c
··· 618 618 .flags = PPMU_HAS_SSLOT, 619 619 }; 620 620 621 - int init_power5_pmu(void) 621 + int __init init_power5_pmu(void) 622 622 { 623 623 if (!cur_cpu_spec->oprofile_cpu_type || 624 624 strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power5"))
+1 -1
arch/powerpc/perf/power6-pmu.c
··· 539 539 .cache_events = &power6_cache_events, 540 540 }; 541 541 542 - int init_power6_pmu(void) 542 + int __init init_power6_pmu(void) 543 543 { 544 544 if (!cur_cpu_spec->oprofile_cpu_type || 545 545 strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power6"))
+1 -1
arch/powerpc/perf/power7-pmu.c
··· 445 445 .cache_events = &power7_cache_events, 446 446 }; 447 447 448 - int init_power7_pmu(void) 448 + int __init init_power7_pmu(void) 449 449 { 450 450 if (!cur_cpu_spec->oprofile_cpu_type || 451 451 strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power7"))
+1 -1
arch/powerpc/perf/power8-pmu.c
··· 378 378 .bhrb_nr = 32, 379 379 }; 380 380 381 - int init_power8_pmu(void) 381 + int __init init_power8_pmu(void) 382 382 { 383 383 int rc; 384 384
+1 -1
arch/powerpc/perf/power9-pmu.c
··· 452 452 .check_attr_config = power9_check_attr_config, 453 453 }; 454 454 455 - int init_power9_pmu(void) 455 + int __init init_power9_pmu(void) 456 456 { 457 457 int rc = 0; 458 458 unsigned int pvr = mfspr(SPRN_PVR);
+1 -1
arch/powerpc/perf/ppc970-pmu.c
··· 489 489 .flags = PPMU_NO_SIPR | PPMU_NO_CONT_SAMPLING, 490 490 }; 491 491 492 - int init_ppc970_pmu(void) 492 + int __init init_ppc970_pmu(void) 493 493 { 494 494 if (!cur_cpu_spec->oprofile_cpu_type || 495 495 (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/970")
+2 -2
arch/powerpc/platforms/44x/fsp2.c
··· 197 197 } 198 198 } 199 199 200 - static void node_irq_request(const char *compat, irq_handler_t errirq_handler) 200 + static void __init node_irq_request(const char *compat, irq_handler_t errirq_handler) 201 201 { 202 202 struct device_node *np; 203 203 unsigned int irq; ··· 222 222 } 223 223 } 224 224 225 - static void critical_irq_setup(void) 225 + static void __init critical_irq_setup(void) 226 226 { 227 227 node_irq_request(FSP2_CMU_ERR, cmu_err_handler); 228 228 node_irq_request(FSP2_BUS_ERR, bus_err_handler);
+2 -2
arch/powerpc/platforms/4xx/cpm.c
··· 163 163 static struct kobj_attribute cpm_idle_attr = 164 164 __ATTR(idle, 0644, cpm_idle_show, cpm_idle_store); 165 165 166 - static void cpm_idle_config_sysfs(void) 166 + static void __init cpm_idle_config_sysfs(void) 167 167 { 168 168 struct device *dev; 169 169 unsigned long ret; ··· 231 231 .enter = cpm_suspend_enter, 232 232 }; 233 233 234 - static int cpm_get_uint_property(struct device_node *np, 234 + static int __init cpm_get_uint_property(struct device_node *np, 235 235 const char *name) 236 236 { 237 237 int len;
+1 -1
arch/powerpc/platforms/4xx/pci.c
··· 1273 1273 return 2; 1274 1274 } 1275 1275 1276 - static void ppc405ex_pcie_phy_reset(struct ppc4xx_pciex_port *port) 1276 + static void __init ppc405ex_pcie_phy_reset(struct ppc4xx_pciex_port *port) 1277 1277 { 1278 1278 /* Assert the PE0_PHY reset */ 1279 1279 mtdcri(SDR0, port->sdr_base + PESDRn_RCSSET, 0x01010000);
+26 -26
arch/powerpc/platforms/512x/clock-commonclk.c
··· 97 97 MPC512x_SOC_MPC5125, 98 98 } soc; 99 99 100 - static void mpc512x_clk_determine_soc(void) 100 + static void __init mpc512x_clk_determine_soc(void) 101 101 { 102 102 if (of_machine_is_compatible("fsl,mpc5121")) { 103 103 soc = MPC512x_SOC_MPC5121; ··· 113 113 } 114 114 } 115 115 116 - static bool soc_has_mbx(void) 116 + static bool __init soc_has_mbx(void) 117 117 { 118 118 if (soc == MPC512x_SOC_MPC5121) 119 119 return true; 120 120 return false; 121 121 } 122 122 123 - static bool soc_has_axe(void) 123 + static bool __init soc_has_axe(void) 124 124 { 125 125 if (soc == MPC512x_SOC_MPC5125) 126 126 return false; 127 127 return true; 128 128 } 129 129 130 - static bool soc_has_viu(void) 130 + static bool __init soc_has_viu(void) 131 131 { 132 132 if (soc == MPC512x_SOC_MPC5125) 133 133 return false; 134 134 return true; 135 135 } 136 136 137 - static bool soc_has_spdif(void) 137 + static bool __init soc_has_spdif(void) 138 138 { 139 139 if (soc == MPC512x_SOC_MPC5125) 140 140 return false; 141 141 return true; 142 142 } 143 143 144 - static bool soc_has_pata(void) 144 + static bool __init soc_has_pata(void) 145 145 { 146 146 if (soc == MPC512x_SOC_MPC5125) 147 147 return false; 148 148 return true; 149 149 } 150 150 151 - static bool soc_has_sata(void) 151 + static bool __init soc_has_sata(void) 152 152 { 153 153 if (soc == MPC512x_SOC_MPC5125) 154 154 return false; 155 155 return true; 156 156 } 157 157 158 - static bool soc_has_pci(void) 158 + static bool __init soc_has_pci(void) 159 159 { 160 160 if (soc == MPC512x_SOC_MPC5125) 161 161 return false; 162 162 return true; 163 163 } 164 164 165 - static bool soc_has_fec2(void) 165 + static bool __init soc_has_fec2(void) 166 166 { 167 167 if (soc == MPC512x_SOC_MPC5125) 168 168 return true; 169 169 return false; 170 170 } 171 171 172 - static int soc_max_pscnum(void) 172 + static int __init soc_max_pscnum(void) 173 173 { 174 174 if (soc == MPC512x_SOC_MPC5125) 175 175 return 10; 176 176 return 12; 177 177 } 178 178 179 - static bool soc_has_sdhc2(void) 179 + static bool __init soc_has_sdhc2(void) 180 180 { 181 181 if (soc == MPC512x_SOC_MPC5125) 182 182 return true; 183 183 return false; 184 184 } 185 185 186 - static bool soc_has_nfc_5125(void) 186 + static bool __init soc_has_nfc_5125(void) 187 187 { 188 188 if (soc == MPC512x_SOC_MPC5125) 189 189 return true; 190 190 return false; 191 191 } 192 192 193 - static bool soc_has_outclk(void) 193 + static bool __init soc_has_outclk(void) 194 194 { 195 195 if (soc == MPC512x_SOC_MPC5125) 196 196 return true; 197 197 return false; 198 198 } 199 199 200 - static bool soc_has_cpmf_0_bypass(void) 200 + static bool __init soc_has_cpmf_0_bypass(void) 201 201 { 202 202 if (soc == MPC512x_SOC_MPC5125) 203 203 return true; 204 204 return false; 205 205 } 206 206 207 - static bool soc_has_mclk_mux0_canin(void) 207 + static bool __init soc_has_mclk_mux0_canin(void) 208 208 { 209 209 if (soc == MPC512x_SOC_MPC5125) 210 210 return true; ··· 294 294 } 295 295 296 296 /* get the SPMF and translate it into the "sys pll" multiplier */ 297 - static int get_spmf_mult(void) 297 + static int __init get_spmf_mult(void) 298 298 { 299 299 static int spmf_to_mult[] = { 300 300 68, 1, 12, 16, 20, 24, 28, 32, ··· 312 312 * values returned from here are a multiple of the real factor since the 313 313 * divide ratio is fractional 314 314 */ 315 - static int get_sys_div_x2(void) 315 + static int __init get_sys_div_x2(void) 316 316 { 317 317 static int sysdiv_code_to_x2[] = { 318 318 4, 5, 6, 7, 8, 9, 10, 14, ··· 333 333 * values returned from here are a multiple of the real factor since the 334 334 * multiplier ratio is fractional 335 335 */ 336 - static int get_cpmf_mult_x2(void) 336 + static int __init get_cpmf_mult_x2(void) 337 337 { 338 338 static int cpmf_to_mult_x36[] = { 339 339 /* 0b000 is "times 36" */ ··· 379 379 { .div = 0, }, 380 380 }; 381 381 382 - static int get_freq_from_dt(char *propname) 382 + static int __init get_freq_from_dt(char *propname) 383 383 { 384 384 struct device_node *np; 385 385 const unsigned int *prop; ··· 396 396 return val; 397 397 } 398 398 399 - static void mpc512x_clk_preset_data(void) 399 + static void __init mpc512x_clk_preset_data(void) 400 400 { 401 401 size_t i; 402 402 ··· 418 418 * SYS -> CSB -> IPS) from the REF clock rate and the returned mul/div 419 419 * values 420 420 */ 421 - static void mpc512x_clk_setup_ref_clock(struct device_node *np, int bus_freq, 421 + static void __init mpc512x_clk_setup_ref_clock(struct device_node *np, int bus_freq, 422 422 int *sys_mul, int *sys_div, 423 423 int *ips_div) 424 424 { ··· 592 592 }; 593 593 594 594 /* setup the MCLK clock subtree of an individual PSC/MSCAN/SPDIF */ 595 - static void mpc512x_clk_setup_mclk(struct mclk_setup_data *entry, size_t idx) 595 + static void __init mpc512x_clk_setup_mclk(struct mclk_setup_data *entry, size_t idx) 596 596 { 597 597 size_t clks_idx_pub, clks_idx_int; 598 598 u32 __iomem *mccr_reg; /* MCLK control register (mux, en, div) */ ··· 701 701 702 702 /* }}} MCLK helpers */ 703 703 704 - static void mpc512x_clk_setup_clock_tree(struct device_node *np, int busfreq) 704 + static void __init mpc512x_clk_setup_clock_tree(struct device_node *np, int busfreq) 705 705 { 706 706 int sys_mul, sys_div, ips_div; 707 707 int mul, div; ··· 937 937 * registers the set of public clocks (those listed in the dt-bindings/ 938 938 * header file) for OF lookups, keeps the intermediates private to us 939 939 */ 940 - static void mpc5121_clk_register_of_provider(struct device_node *np) 940 + static void __init mpc5121_clk_register_of_provider(struct device_node *np) 941 941 { 942 942 clk_data.clks = clks; 943 943 clk_data.clk_num = MPC512x_CLK_LAST_PUBLIC + 1; /* _not_ ARRAY_SIZE() */ ··· 948 948 * temporary support for the period of time between introduction of CCF 949 949 * support and the adjustment of peripheral drivers to OF based lookups 950 950 */ 951 - static void mpc5121_clk_provide_migration_support(void) 951 + static void __init mpc5121_clk_provide_migration_support(void) 952 952 { 953 953 954 954 /* ··· 1009 1009 * case of not yet adjusted device tree data, where clock related specs 1010 1010 * are missing) 1011 1011 */ 1012 - static void mpc5121_clk_provide_backwards_compat(void) 1012 + static void __init mpc5121_clk_provide_backwards_compat(void) 1013 1013 { 1014 1014 enum did_reg_flags { 1015 1015 DID_REG_PSC = BIT(0),
+2 -2
arch/powerpc/platforms/512x/mpc512x.h
··· 12 12 extern void __init mpc512x_init(void); 13 13 extern void __init mpc512x_setup_arch(void); 14 14 extern int __init mpc5121_clk_init(void); 15 - extern const char *mpc512x_select_psc_compat(void); 16 - extern const char *mpc512x_select_reset_compat(void); 15 + const char *__init mpc512x_select_psc_compat(void); 16 + const char *__init mpc512x_select_reset_compat(void); 17 17 extern void __noreturn mpc512x_restart(char *cmd); 18 18 19 19 #endif /* __MPC512X_H__ */
+2 -2
arch/powerpc/platforms/512x/mpc512x_shared.c
··· 352 352 353 353 #define DEFAULT_FIFO_SIZE 16 354 354 355 - const char *mpc512x_select_psc_compat(void) 355 + const char *__init mpc512x_select_psc_compat(void) 356 356 { 357 357 if (of_machine_is_compatible("fsl,mpc5121")) 358 358 return "fsl,mpc5121-psc"; ··· 363 363 return NULL; 364 364 } 365 365 366 - const char *mpc512x_select_reset_compat(void) 366 + const char *__init mpc512x_select_reset_compat(void) 367 367 { 368 368 if (of_machine_is_compatible("fsl,mpc5121")) 369 369 return "fsl,mpc5121-reset";
+1 -1
arch/powerpc/platforms/52xx/Kconfig
··· 34 34 bool "bPlan Efika 5k2. MPC5200B based computer" 35 35 depends on PPC_MPC52xx 36 36 select PPC_RTAS 37 - select PPC_NATIVE 37 + select PPC_HASH_MMU_NATIVE 38 38 39 39 config PPC_LITE5200 40 40 bool "Freescale Lite5200 Eval Board"
+1 -1
arch/powerpc/platforms/83xx/km83xx.c
··· 39 39 40 40 #define SVR_REV(svr) (((svr) >> 0) & 0xFFFF) /* Revision field */ 41 41 42 - static void quirk_mpc8360e_qe_enet10(void) 42 + static void __init quirk_mpc8360e_qe_enet10(void) 43 43 { 44 44 /* 45 45 * handle mpc8360E Erratum QE_ENET10:
+1 -1
arch/powerpc/platforms/83xx/mpc834x_mds.c
··· 35 35 #include "mpc83xx.h" 36 36 37 37 #define BCSR5_INT_USB 0x02 38 - static int mpc834xemds_usb_cfg(void) 38 + static int __init mpc834xemds_usb_cfg(void) 39 39 { 40 40 struct device_node *np; 41 41 void __iomem *bcsr_regs = NULL;
+1 -1
arch/powerpc/platforms/83xx/mpc837x_mds.c
··· 23 23 #define BCSR12_USB_SER_PIN 0x80 24 24 #define BCSR12_USB_SER_DEVICE 0x02 25 25 26 - static int mpc837xmds_usb_cfg(void) 26 + static int __init mpc837xmds_usb_cfg(void) 27 27 { 28 28 struct device_node *np; 29 29 const void *phy_type, *mode;
+1 -1
arch/powerpc/platforms/83xx/mpc837x_rdb.c
··· 18 18 19 19 #include "mpc83xx.h" 20 20 21 - static void mpc837x_rdb_sd_cfg(void) 21 + static void __init mpc837x_rdb_sd_cfg(void) 22 22 { 23 23 void __iomem *im; 24 24
+3 -3
arch/powerpc/platforms/83xx/mpc83xx.h
··· 68 68 69 69 extern void __noreturn mpc83xx_restart(char *cmd); 70 70 extern long mpc83xx_time_init(void); 71 - extern int mpc837x_usb_cfg(void); 72 - extern int mpc834x_usb_cfg(void); 73 - extern int mpc831x_usb_cfg(void); 71 + int __init mpc837x_usb_cfg(void); 72 + int __init mpc834x_usb_cfg(void); 73 + int __init mpc831x_usb_cfg(void); 74 74 extern void mpc83xx_ipic_init_IRQ(void); 75 75 76 76 #ifdef CONFIG_PCI
+3 -3
arch/powerpc/platforms/83xx/usb.c
··· 20 20 21 21 22 22 #ifdef CONFIG_PPC_MPC834x 23 - int mpc834x_usb_cfg(void) 23 + int __init mpc834x_usb_cfg(void) 24 24 { 25 25 unsigned long sccr, sicrl, sicrh; 26 26 void __iomem *immap; ··· 96 96 #endif /* CONFIG_PPC_MPC834x */ 97 97 98 98 #ifdef CONFIG_PPC_MPC831x 99 - int mpc831x_usb_cfg(void) 99 + int __init mpc831x_usb_cfg(void) 100 100 { 101 101 u32 temp; 102 102 void __iomem *immap, *usb_regs; ··· 209 209 #endif /* CONFIG_PPC_MPC831x */ 210 210 211 211 #ifdef CONFIG_PPC_MPC837x 212 - int mpc837x_usb_cfg(void) 212 + int __init mpc837x_usb_cfg(void) 213 213 { 214 214 void __iomem *immap; 215 215 struct device_node *np = NULL;
+1 -1
arch/powerpc/platforms/85xx/c293pcie.c
··· 19 19 20 20 #include "mpc85xx.h" 21 21 22 - void __init c293_pcie_pic_init(void) 22 + static void __init c293_pcie_pic_init(void) 23 23 { 24 24 struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN | 25 25 MPIC_SINGLE_DEST_CPU, 0, 256, " OpenPIC ");
+1 -1
arch/powerpc/platforms/85xx/ge_imp3a.c
··· 78 78 of_node_put(cascade_node); 79 79 } 80 80 81 - static void ge_imp3a_pci_assign_primary(void) 81 + static void __init ge_imp3a_pci_assign_primary(void) 82 82 { 83 83 #ifdef CONFIG_PCI 84 84 struct device_node *np;
+1 -1
arch/powerpc/platforms/85xx/mpc85xx_cds.c
··· 282 282 283 283 #endif /* CONFIG_PPC_I8259 */ 284 284 285 - static void mpc85xx_cds_pci_assign_primary(void) 285 + static void __init mpc85xx_cds_pci_assign_primary(void) 286 286 { 287 287 #ifdef CONFIG_PCI 288 288 struct device_node *np;
+2
arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c
··· 15 15 #include <asm/io.h> 16 16 #include <asm/fsl_pm.h> 17 17 18 + #include "smp.h" 19 + 18 20 static struct ccsr_guts __iomem *guts; 19 21 20 22 #ifdef CONFIG_FSL_PMC
+2 -2
arch/powerpc/platforms/85xx/smp.c
··· 366 366 #ifdef CONFIG_PPC32 367 367 atomic_t kexec_down_cpus = ATOMIC_INIT(0); 368 368 369 - void mpc85xx_smp_kexec_cpu_down(int crash_shutdown, int secondary) 369 + static void mpc85xx_smp_kexec_cpu_down(int crash_shutdown, int secondary) 370 370 { 371 371 local_irq_disable(); 372 372 ··· 384 384 ppc_md.kexec_cpu_down(0,1); 385 385 } 386 386 #else 387 - void mpc85xx_smp_kexec_cpu_down(int crash_shutdown, int secondary) 387 + static void mpc85xx_smp_kexec_cpu_down(int crash_shutdown, int secondary) 388 388 { 389 389 int cpu = smp_processor_id(); 390 390 int sibling = cpu_last_thread_sibling(cpu);
+1 -1
arch/powerpc/platforms/85xx/socrates_fpga_pic.c
··· 271 271 .xlate = socrates_fpga_pic_host_xlate, 272 272 }; 273 273 274 - void socrates_fpga_pic_init(struct device_node *pic) 274 + void __init socrates_fpga_pic_init(struct device_node *pic) 275 275 { 276 276 unsigned long flags; 277 277 int i;
+1 -1
arch/powerpc/platforms/85xx/socrates_fpga_pic.h
··· 6 6 #ifndef SOCRATES_FPGA_PIC_H 7 7 #define SOCRATES_FPGA_PIC_H 8 8 9 - void socrates_fpga_pic_init(struct device_node *pic); 9 + void __init socrates_fpga_pic_init(struct device_node *pic); 10 10 11 11 #endif
+2 -2
arch/powerpc/platforms/85xx/xes_mpc85xx.c
··· 45 45 mpic_init(mpic); 46 46 } 47 47 48 - static void xes_mpc85xx_configure_l2(void __iomem *l2_base) 48 + static void __init xes_mpc85xx_configure_l2(void __iomem *l2_base) 49 49 { 50 50 volatile uint32_t ctl, tmp; 51 51 ··· 72 72 asm volatile("msync; isync"); 73 73 } 74 74 75 - static void xes_mpc85xx_fixups(void) 75 + static void __init xes_mpc85xx_fixups(void) 76 76 { 77 77 struct device_node *np; 78 78 int err;
+2 -2
arch/powerpc/platforms/Kconfig
··· 40 40 41 41 In case of doubt, say Y 42 42 43 - config PPC_NATIVE 43 + config PPC_HASH_MMU_NATIVE 44 44 bool 45 - depends on PPC_BOOK3S_32 || PPC64 45 + depends on PPC_BOOK3S 46 46 help 47 47 Support for running natively on the hardware, i.e. without 48 48 a hypervisor. This option is not user-selectable but should
+38 -20
arch/powerpc/platforms/Kconfig.cputype
··· 30 30 bool "512x/52xx/6xx/7xx/74xx/82xx/83xx/86xx" 31 31 imply PPC_FPU 32 32 select PPC_HAVE_PMU_SUPPORT 33 - select PPC_HAVE_KUEP 34 - select PPC_HAVE_KUAP 35 33 select HAVE_ARCH_VMAP_STACK 36 34 37 35 config PPC_85xx ··· 40 42 bool "Freescale 8xx" 41 43 select ARCH_SUPPORTS_HUGETLBFS 42 44 select FSL_SOC 43 - select PPC_HAVE_KUEP 44 - select PPC_HAVE_KUAP 45 + select PPC_KUEP 45 46 select HAVE_ARCH_VMAP_STACK 46 47 select HUGETLBFS 47 48 ··· 50 53 select PPC_UDBG_16550 51 54 select 4xx_SOC 52 55 select HAVE_PCI 56 + select PPC_KUEP if PPC_KUAP 53 57 54 58 config 44x 55 59 bool "AMCC 44x, 46x or 47x" ··· 59 61 select 4xx_SOC 60 62 select HAVE_PCI 61 63 select PHYS_64BIT 62 - select PPC_HAVE_KUEP 64 + select PPC_KUEP 63 65 64 66 endchoice 65 67 ··· 103 105 select HAVE_MOVE_PMD 104 106 select HAVE_MOVE_PUD 105 107 select IRQ_WORK 106 - select PPC_MM_SLICES 107 - select PPC_HAVE_KUEP 108 - select PPC_HAVE_KUAP 108 + select PPC_64S_HASH_MMU if !PPC_RADIX_MMU 109 109 110 110 config PPC_BOOK3E_64 111 111 bool "Embedded processors" ··· 126 130 config GENERIC_CPU 127 131 bool "Generic (POWER4 and above)" 128 132 depends on PPC64 && !CPU_LITTLE_ENDIAN 133 + select PPC_64S_HASH_MMU if PPC_BOOK3S_64 129 134 130 135 config GENERIC_CPU 131 136 bool "Generic (POWER8 and above)" 132 137 depends on PPC64 && CPU_LITTLE_ENDIAN 133 138 select ARCH_HAS_FAST_MULTIPLIER 139 + select PPC_64S_HASH_MMU 134 140 135 141 config GENERIC_CPU 136 142 bool "Generic 32 bits powerpc" ··· 141 143 config CELL_CPU 142 144 bool "Cell Broadband Engine" 143 145 depends on PPC_BOOK3S_64 && !CPU_LITTLE_ENDIAN 146 + select PPC_64S_HASH_MMU 144 147 145 148 config POWER5_CPU 146 149 bool "POWER5" 147 150 depends on PPC_BOOK3S_64 && !CPU_LITTLE_ENDIAN 151 + select PPC_64S_HASH_MMU 148 152 149 153 config POWER6_CPU 150 154 bool "POWER6" 151 155 depends on PPC_BOOK3S_64 && !CPU_LITTLE_ENDIAN 156 + select PPC_64S_HASH_MMU 152 157 153 158 config POWER7_CPU 154 159 bool "POWER7" 155 160 depends on PPC_BOOK3S_64 156 161 select ARCH_HAS_FAST_MULTIPLIER 162 + select PPC_64S_HASH_MMU 157 163 158 164 config POWER8_CPU 159 165 bool "POWER8" 160 166 depends on PPC_BOOK3S_64 161 167 select ARCH_HAS_FAST_MULTIPLIER 168 + select PPC_64S_HASH_MMU 162 169 163 170 config POWER9_CPU 164 171 bool "POWER9" ··· 281 278 depends on E500 || 44x || PPC_BOOK3E 282 279 default y 283 280 281 + config BOOKE_OR_40x 282 + bool 283 + depends on BOOKE || 40x 284 + default y 285 + 284 286 config FSL_BOOKE 285 287 bool 286 288 depends on E500 && PPC32 ··· 298 290 select FSL_EMB_PERFMON 299 291 select PPC_SMP_MUXED_IPI 300 292 select PPC_DOORBELL 293 + select PPC_KUEP 301 294 default y if FSL_BOOKE 302 295 303 296 config PTE_64BIT ··· 373 364 374 365 If in doubt, say Y here. 375 366 367 + config PPC_64S_HASH_MMU 368 + bool "Hash MMU Support" 369 + depends on PPC_BOOK3S_64 370 + select PPC_MM_SLICES 371 + default y 372 + help 373 + Enable support for the Power ISA Hash style MMU. This is implemented 374 + by all IBM Power and other 64-bit Book3S CPUs before ISA v3.0. The 375 + OpenPOWER ISA does not mandate the hash MMU and some CPUs do not 376 + implement it (e.g., Microwatt). 377 + 378 + Note that POWER9 PowerVM platforms only support the hash 379 + MMU. From POWER10 radix is also supported by PowerVM. 380 + 381 + If you're unsure, say Y. 382 + 376 383 config PPC_RADIX_MMU 377 384 bool "Radix MMU Support" 378 385 depends on PPC_BOOK3S_64 ··· 400 375 you can probably disable this. 401 376 402 377 config PPC_RADIX_MMU_DEFAULT 403 - bool "Default to using the Radix MMU when possible" 378 + bool "Default to using the Radix MMU when possible" if PPC_64S_HASH_MMU 379 + depends on PPC_BOOK3S_64 404 380 depends on PPC_RADIX_MMU 405 381 default y 406 382 help ··· 413 387 414 388 If you're unsure, say Y. 415 389 416 - config PPC_HAVE_KUEP 417 - bool 418 - 419 390 config PPC_KUEP 420 - bool "Kernel Userspace Execution Prevention" 421 - depends on PPC_HAVE_KUEP 422 - default y 391 + bool "Kernel Userspace Execution Prevention" if !40x 392 + default y if !40x 423 393 help 424 394 Enable support for Kernel Userspace Execution Prevention (KUEP) 425 395 426 396 If you're unsure, say Y. 427 397 428 - config PPC_HAVE_KUAP 429 - bool 430 - 431 398 config PPC_KUAP 432 399 bool "Kernel Userspace Access Protection" 433 - depends on PPC_HAVE_KUAP 434 400 default y 435 401 help 436 402 Enable support for Kernel Userspace Access Protection (KUAP) ··· 431 413 432 414 config PPC_KUAP_DEBUG 433 415 bool "Extra debugging for Kernel Userspace Access Protection" 434 - depends on PPC_KUAP && (PPC_RADIX_MMU || PPC32) 416 + depends on PPC_KUAP 435 417 help 436 418 Add extra debugging for Kernel Userspace Access Protection (KUAP) 437 419 If you're unsure, say N.
+2 -1
arch/powerpc/platforms/cell/Kconfig
··· 1 1 # SPDX-License-Identifier: GPL-2.0 2 2 config PPC_CELL 3 + select PPC_64S_HASH_MMU if PPC64 3 4 bool 4 5 5 6 config PPC_CELL_COMMON ··· 9 8 select PPC_DCR_MMIO 10 9 select PPC_INDIRECT_PIO 11 10 select PPC_INDIRECT_MMIO 12 - select PPC_NATIVE 11 + select PPC_HASH_MMU_NATIVE 13 12 select PPC_RTAS 14 13 select IRQ_EDGE_EOI_HANDLER 15 14
+1 -1
arch/powerpc/platforms/cell/cbe_regs.c
··· 165 165 } 166 166 EXPORT_SYMBOL_GPL(cbe_node_to_cpu); 167 167 168 - static struct device_node *cbe_get_be_node(int cpu_id) 168 + static struct device_node *__init cbe_get_be_node(int cpu_id) 169 169 { 170 170 struct device_node *np; 171 171
+8 -7
arch/powerpc/platforms/cell/iommu.c
··· 253 253 return IRQ_HANDLED; 254 254 } 255 255 256 - static int cell_iommu_find_ioc(int nid, unsigned long *base) 256 + static int __init cell_iommu_find_ioc(int nid, unsigned long *base) 257 257 { 258 258 struct device_node *np; 259 259 struct resource r; ··· 293 293 return -ENODEV; 294 294 } 295 295 296 - static void cell_iommu_setup_stab(struct cbe_iommu *iommu, 296 + static void __init cell_iommu_setup_stab(struct cbe_iommu *iommu, 297 297 unsigned long dbase, unsigned long dsize, 298 298 unsigned long fbase, unsigned long fsize) 299 299 { ··· 313 313 memset(iommu->stab, 0, stab_size); 314 314 } 315 315 316 - static unsigned long *cell_iommu_alloc_ptab(struct cbe_iommu *iommu, 316 + static unsigned long *__init cell_iommu_alloc_ptab(struct cbe_iommu *iommu, 317 317 unsigned long base, unsigned long size, unsigned long gap_base, 318 318 unsigned long gap_size, unsigned long page_shift) 319 319 { ··· 373 373 return ptab; 374 374 } 375 375 376 - static void cell_iommu_enable_hardware(struct cbe_iommu *iommu) 376 + static void __init cell_iommu_enable_hardware(struct cbe_iommu *iommu) 377 377 { 378 378 int ret; 379 379 unsigned long reg, xlate_base; ··· 413 413 out_be64(iommu->cmd_regs + IOC_IOCmd_Cfg, reg); 414 414 } 415 415 416 - static void cell_iommu_setup_hardware(struct cbe_iommu *iommu, 416 + static void __init cell_iommu_setup_hardware(struct cbe_iommu *iommu, 417 417 unsigned long base, unsigned long size) 418 418 { 419 419 cell_iommu_setup_stab(iommu, base, size, 0, 0); ··· 858 858 cell_iommu_get_fixed_address(&pdev->dev) != OF_BAD_ADDR; 859 859 } 860 860 861 - static void insert_16M_pte(unsigned long addr, unsigned long *ptab, 861 + static void __init insert_16M_pte(unsigned long addr, unsigned long *ptab, 862 862 unsigned long base_pte) 863 863 { 864 864 unsigned long segment, offset; ··· 873 873 ptab[offset] = base_pte | (__pa(addr) & CBE_IOPTE_RPN_Mask); 874 874 } 875 875 876 - static void cell_iommu_setup_fixed_ptab(struct cbe_iommu *iommu, 876 + static void __init cell_iommu_setup_fixed_ptab(struct cbe_iommu *iommu, 877 877 struct device_node *np, unsigned long dbase, unsigned long dsize, 878 878 unsigned long fbase, unsigned long fsize) 879 879 { ··· 977 977 if (hbase < dbase || (hend > (dbase + dsize))) { 978 978 pr_debug("iommu: hash window doesn't fit in" 979 979 "real DMA window\n"); 980 + of_node_put(np); 980 981 return -1; 981 982 } 982 983 }
+1
arch/powerpc/platforms/cell/pervasive.c
··· 78 78 switch (regs->msr & SRR1_WAKEMASK) { 79 79 case SRR1_WAKEDEC: 80 80 set_dec(1); 81 + break; 81 82 case SRR1_WAKEEE: 82 83 /* 83 84 * Handle these when interrupts get re-enabled and we take
+3 -3
arch/powerpc/platforms/cell/spu_base.c
··· 387 387 return stat ? IRQ_HANDLED : IRQ_NONE; 388 388 } 389 389 390 - static int spu_request_irqs(struct spu *spu) 390 + static int __init spu_request_irqs(struct spu *spu) 391 391 { 392 392 int ret = 0; 393 393 ··· 540 540 } 541 541 EXPORT_SYMBOL_GPL(spu_remove_dev_attr_group); 542 542 543 - static int spu_create_dev(struct spu *spu) 543 + static int __init spu_create_dev(struct spu *spu) 544 544 { 545 545 int ret; 546 546 ··· 711 711 } 712 712 } 713 713 714 - static void crash_register_spus(struct list_head *list) 714 + static void __init crash_register_spus(struct list_head *list) 715 715 { 716 716 struct spu *spu; 717 717 int ret;
+8 -8
arch/powerpc/platforms/cell/spu_manage.c
··· 186 186 return -EINVAL; 187 187 } 188 188 189 - static int spu_map_resource(struct spu *spu, int nr, 189 + static int __init spu_map_resource(struct spu *spu, int nr, 190 190 void __iomem** virt, unsigned long *phys) 191 191 { 192 192 struct device_node *np = spu->devnode; ··· 361 361 static int qs20_reg_idxs[QS20_SPES_PER_BE] = { 0, 2, 4, 6, 7, 5, 3, 1 }; 362 362 static int qs20_reg_memory[QS20_SPES_PER_BE] = { 1, 1, 0, 0, 0, 0, 0, 0 }; 363 363 364 - static struct spu *spu_lookup_reg(int node, u32 reg) 364 + static struct spu *__init spu_lookup_reg(int node, u32 reg) 365 365 { 366 366 struct spu *spu; 367 367 const u32 *spu_reg; ··· 374 374 return NULL; 375 375 } 376 376 377 - static void init_affinity_qs20_harcoded(void) 377 + static void __init init_affinity_qs20_harcoded(void) 378 378 { 379 379 int node, i; 380 380 struct spu *last_spu, *spu; ··· 396 396 } 397 397 } 398 398 399 - static int of_has_vicinity(void) 399 + static int __init of_has_vicinity(void) 400 400 { 401 401 struct device_node *dn; 402 402 ··· 409 409 return 0; 410 410 } 411 411 412 - static struct spu *devnode_spu(int cbe, struct device_node *dn) 412 + static struct spu *__init devnode_spu(int cbe, struct device_node *dn) 413 413 { 414 414 struct spu *spu; 415 415 ··· 419 419 return NULL; 420 420 } 421 421 422 - static struct spu * 422 + static struct spu * __init 423 423 neighbour_spu(int cbe, struct device_node *target, struct device_node *avoid) 424 424 { 425 425 struct spu *spu; ··· 440 440 return NULL; 441 441 } 442 442 443 - static void init_affinity_node(int cbe) 443 + static void __init init_affinity_node(int cbe) 444 444 { 445 445 struct spu *spu, *last_spu; 446 446 struct device_node *vic_dn, *last_spu_dn; ··· 494 494 } 495 495 } 496 496 497 - static void init_affinity_fw(void) 497 + static void __init init_affinity_fw(void) 498 498 { 499 499 int cbe; 500 500
+1 -1
arch/powerpc/platforms/cell/spufs/inode.c
··· 648 648 get_order(isolated_loader_size)); 649 649 } 650 650 651 - static void 651 + static void __init 652 652 spufs_init_isolated_loader(void) 653 653 { 654 654 struct device_node *dn;
+1 -1
arch/powerpc/platforms/chrp/Kconfig
··· 11 11 select RTAS_ERROR_LOGGING 12 12 select PPC_MPC106 13 13 select PPC_UDBG_16550 14 - select PPC_NATIVE 14 + select PPC_HASH_MMU_NATIVE 15 15 select FORCE_PCI 16 16 default y
+1 -1
arch/powerpc/platforms/chrp/pegasos_eth.c
··· 113 113 114 114 static void __iomem *mv643xx_reg_base; 115 115 116 - static int Enable_SRAM(void) 116 + static int __init Enable_SRAM(void) 117 117 { 118 118 u32 ALong; 119 119
+1 -1
arch/powerpc/platforms/embedded6xx/Kconfig
··· 55 55 select FORCE_PCI 56 56 select PPC_INDIRECT_PCI 57 57 select PPC_I8259 58 - select PPC_NATIVE 58 + select PPC_HASH_MMU_NATIVE 59 59 select PPC_UDBG_16550 60 60 help 61 61 This option enables support for the Motorola (now Emerson) MVME5100
+3 -2
arch/powerpc/platforms/embedded6xx/hlwd-pic.c
··· 153 153 out_be32(io_base + HW_BROADWAY_ICR, 0xffffffff); 154 154 } 155 155 156 - static struct irq_domain *hlwd_pic_init(struct device_node *np) 156 + static struct irq_domain *__init hlwd_pic_init(struct device_node *np) 157 157 { 158 158 struct irq_domain *irq_domain; 159 159 struct resource res; ··· 197 197 * 198 198 */ 199 199 200 - void hlwd_pic_probe(void) 200 + void __init hlwd_pic_probe(void) 201 201 { 202 202 struct irq_domain *host; 203 203 struct device_node *np; ··· 214 214 irq_set_chained_handler(cascade_virq, 215 215 hlwd_pic_irq_cascade); 216 216 hlwd_irq_host = host; 217 + of_node_put(np); 217 218 break; 218 219 } 219 220 }
+1 -1
arch/powerpc/platforms/embedded6xx/hlwd-pic.h
··· 11 11 #define __HLWD_PIC_H 12 12 13 13 extern unsigned int hlwd_pic_get_irq(void); 14 - extern void hlwd_pic_probe(void); 14 + void __init hlwd_pic_probe(void); 15 15 extern void hlwd_quiesce(void); 16 16 17 17 #endif
+1 -1
arch/powerpc/platforms/embedded6xx/holly.c
··· 50 50 return PCIBIOS_SUCCESSFUL; 51 51 } 52 52 53 - static void holly_remap_bridge(void) 53 + static void __init holly_remap_bridge(void) 54 54 { 55 55 u32 lut_val, lut_addr; 56 56 int i;
+2 -2
arch/powerpc/platforms/embedded6xx/usbgecko_udbg.c
··· 194 194 /* 195 195 * Retrieves and prepares the virtual address needed to access the hardware. 196 196 */ 197 - static void __iomem *ug_udbg_setup_exi_io_base(struct device_node *np) 197 + static void __iomem *__init ug_udbg_setup_exi_io_base(struct device_node *np) 198 198 { 199 199 void __iomem *exi_io_base = NULL; 200 200 phys_addr_t paddr; ··· 212 212 /* 213 213 * Checks if a USB Gecko adapter is inserted in any memory card slot. 214 214 */ 215 - static void __iomem *ug_udbg_probe(void __iomem *exi_io_base) 215 + static void __iomem *__init ug_udbg_probe(void __iomem *exi_io_base) 216 216 { 217 217 int i; 218 218
+1 -1
arch/powerpc/platforms/embedded6xx/wii.c
··· 69 69 cpu_relax(); 70 70 } 71 71 72 - static void __iomem *wii_ioremap_hw_regs(char *name, char *compatible) 72 + static void __iomem *__init wii_ioremap_hw_regs(char *name, char *compatible) 73 73 { 74 74 void __iomem *hw_regs = NULL; 75 75 struct device_node *np;
+2 -1
arch/powerpc/platforms/maple/Kconfig
··· 9 9 select GENERIC_TBSYNC 10 10 select PPC_UDBG_16550 11 11 select PPC_970_NAP 12 - select PPC_NATIVE 12 + select PPC_64S_HASH_MMU 13 + select PPC_HASH_MMU_NATIVE 13 14 select PPC_RTAS 14 15 select MMIO_NVRAM 15 16 select ATA_NONSTANDARD if ATA
-1
arch/powerpc/platforms/microwatt/Kconfig
··· 5 5 select PPC_XICS 6 6 select PPC_ICS_NATIVE 7 7 select PPC_ICP_NATIVE 8 - select PPC_NATIVE 9 8 select PPC_UDBG_16550 10 9 select ARCH_RANDOM 11 10 help
+1 -1
arch/powerpc/platforms/microwatt/rng.c
··· 14 14 15 15 #define DARN_ERR 0xFFFFFFFFFFFFFFFFul 16 16 17 - int microwatt_get_random_darn(unsigned long *v) 17 + static int microwatt_get_random_darn(unsigned long *v) 18 18 { 19 19 unsigned long val; 20 20
+2 -1
arch/powerpc/platforms/pasemi/Kconfig
··· 5 5 select MPIC 6 6 select FORCE_PCI 7 7 select PPC_UDBG_16550 8 - select PPC_NATIVE 8 + select PPC_64S_HASH_MMU 9 + select PPC_HASH_MMU_NATIVE 9 10 select MPIC_BROKEN_REGREAD 10 11 help 11 12 This option enables support for PA Semi's PWRficient line
+1 -1
arch/powerpc/platforms/pasemi/msi.c
··· 130 130 return 0; 131 131 } 132 132 133 - int mpic_pasemi_msi_init(struct mpic *mpic) 133 + int __init mpic_pasemi_msi_init(struct mpic *mpic) 134 134 { 135 135 int rc; 136 136 struct pci_controller *phb;
+1 -1
arch/powerpc/platforms/pasemi/pasemi.h
··· 7 7 extern void pas_pci_irq_fixup(struct pci_dev *dev); 8 8 extern void pas_pci_dma_dev_setup(struct pci_dev *dev); 9 9 10 - extern void __iomem *pasemi_pci_getcfgaddr(struct pci_dev *dev, int offset); 10 + void __iomem *__init pasemi_pci_getcfgaddr(struct pci_dev *dev, int offset); 11 11 12 12 extern void __init pasemi_map_registers(void); 13 13
+1 -1
arch/powerpc/platforms/pasemi/pci.c
··· 287 287 } 288 288 } 289 289 290 - void __iomem *pasemi_pci_getcfgaddr(struct pci_dev *dev, int offset) 290 + void __iomem *__init pasemi_pci_getcfgaddr(struct pci_dev *dev, int offset) 291 291 { 292 292 struct pci_controller *hose; 293 293
+1 -1
arch/powerpc/platforms/pasemi/setup.c
··· 212 212 chip->irq_eoi(&desc->irq_data); 213 213 } 214 214 215 - static void nemo_init_IRQ(struct mpic *mpic) 215 + static void __init nemo_init_IRQ(struct mpic *mpic) 216 216 { 217 217 struct device_node *np; 218 218 int gpio_virq;
+2 -1
arch/powerpc/platforms/powermac/Kconfig
··· 6 6 select FORCE_PCI 7 7 select PPC_INDIRECT_PCI if PPC32 8 8 select PPC_MPC106 if PPC32 9 - select PPC_NATIVE 9 + select PPC_64S_HASH_MMU if PPC64 10 + select PPC_HASH_MMU_NATIVE 10 11 select ZONE_DMA if PPC32 11 12 default y 12 13
+2 -2
arch/powerpc/platforms/powermac/cache.S
··· 48 48 49 49 /* Stop DST streams */ 50 50 BEGIN_FTR_SECTION 51 - DSSALL 51 + PPC_DSSALL 52 52 sync 53 53 END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) 54 54 ··· 197 197 isync 198 198 199 199 /* Stop prefetch streams */ 200 - DSSALL 200 + PPC_DSSALL 201 201 sync 202 202 203 203 /* Disable L2 prefetching */
+1 -1
arch/powerpc/platforms/powermac/feature.c
··· 1530 1530 * This takes the second CPU off the bus on dual CPU machines 1531 1531 * running UP 1532 1532 */ 1533 - void g5_phy_disable_cpu1(void) 1533 + void __init g5_phy_disable_cpu1(void) 1534 1534 { 1535 1535 if (uninorth_maj == 3) 1536 1536 UN_OUT(U3_API_PHY_CONFIG_1, 0);
+3
arch/powerpc/platforms/powermac/low_i2c.c
··· 582 582 bus->close = kw_i2c_close; 583 583 bus->xfer = kw_i2c_xfer; 584 584 mutex_init(&bus->mutex); 585 + lockdep_register_key(&bus->lock_key); 585 586 lockdep_set_class(&bus->mutex, &bus->lock_key); 586 587 if (controller == busnode) 587 588 bus->flags = pmac_i2c_multibus; ··· 811 810 bus->hostdata = bus + 1; 812 811 bus->xfer = pmu_i2c_xfer; 813 812 mutex_init(&bus->mutex); 813 + lockdep_register_key(&bus->lock_key); 814 814 lockdep_set_class(&bus->mutex, &bus->lock_key); 815 815 bus->flags = pmac_i2c_multibus; 816 816 list_add(&bus->link, &pmac_i2c_busses); ··· 935 933 bus->hostdata = bus + 1; 936 934 bus->xfer = smu_i2c_xfer; 937 935 mutex_init(&bus->mutex); 936 + lockdep_register_key(&bus->lock_key); 938 937 lockdep_set_class(&bus->mutex, &bus->lock_key); 939 938 bus->flags = 0; 940 939 list_add(&bus->link, &pmac_i2c_busses);
+1 -1
arch/powerpc/platforms/powermac/nvram.c
··· 258 258 return (high << 16) | low; 259 259 } 260 260 261 - static u32 core99_check(u8* datas) 261 + static u32 __init core99_check(u8 *datas) 262 262 { 263 263 struct core99_header* hdr99 = (struct core99_header*)datas; 264 264
+3 -3
arch/powerpc/platforms/powermac/pfunc_base.c
··· 93 93 .delay = macio_do_delay, 94 94 }; 95 95 96 - static void macio_gpio_init_one(struct macio_chip *macio) 96 + static void __init macio_gpio_init_one(struct macio_chip *macio) 97 97 { 98 98 struct device_node *gparent, *gp; 99 99 ··· 265 265 .delay = macio_do_delay, 266 266 }; 267 267 268 - static void macio_mmio_init_one(struct macio_chip *macio) 268 + static void __init macio_mmio_init_one(struct macio_chip *macio) 269 269 { 270 270 DBG("Installing MMIO functions for macio %pOF\n", 271 271 macio->of_node); ··· 294 294 .delay = macio_do_delay, 295 295 }; 296 296 297 - static void uninorth_install_pfunc(void) 297 + static void __init uninorth_install_pfunc(void) 298 298 { 299 299 struct device_node *np; 300 300
+2 -4
arch/powerpc/platforms/powermac/pic.c
··· 18 18 #include <linux/interrupt.h> 19 19 #include <linux/syscore_ops.h> 20 20 #include <linux/adb.h> 21 + #include <linux/minmax.h> 21 22 #include <linux/pmu.h> 22 23 23 24 #include <asm/sections.h> ··· 312 311 313 312 /* Check ordering of master & slave */ 314 313 if (of_device_is_compatible(master, "gatwick")) { 315 - struct device_node *tmp; 316 314 BUG_ON(slave == NULL); 317 - tmp = master; 318 - master = slave; 319 - slave = tmp; 315 + swap(master, slave); 320 316 } 321 317 322 318 /* We found a slave */
+3 -3
arch/powerpc/platforms/powermac/setup.c
··· 166 166 } 167 167 168 168 #ifndef CONFIG_ADB_CUDA 169 - int find_via_cuda(void) 169 + int __init find_via_cuda(void) 170 170 { 171 171 struct device_node *dn = of_find_node_by_name(NULL, "via-cuda"); 172 172 ··· 180 180 #endif 181 181 182 182 #ifndef CONFIG_ADB_PMU 183 - int find_via_pmu(void) 183 + int __init find_via_pmu(void) 184 184 { 185 185 struct device_node *dn = of_find_node_by_name(NULL, "via-pmu"); 186 186 ··· 194 194 #endif 195 195 196 196 #ifndef CONFIG_PMAC_SMU 197 - int smu_init(void) 197 + int __init smu_init(void) 198 198 { 199 199 /* should check and warn if SMU is present */ 200 200 return 0;
+2 -2
arch/powerpc/platforms/powermac/smp.c
··· 186 186 .map = psurge_host_map, 187 187 }; 188 188 189 - static int psurge_secondary_ipi_init(void) 189 + static int __init psurge_secondary_ipi_init(void) 190 190 { 191 191 int rc = -ENOMEM; 192 192 ··· 875 875 876 876 static void __init smp_core99_bringup_done(void) 877 877 { 878 - extern void g5_phy_disable_cpu1(void); 878 + extern void __init g5_phy_disable_cpu1(void); 879 879 880 880 /* Close i2c bus if it was used for tb sync */ 881 881 if (pmac_tb_clock_chip_host)
+1 -1
arch/powerpc/platforms/powermac/udbg_scc.c
··· 62 62 3, 0xc1, /* rx enable, 8 bits */ 63 63 }; 64 64 65 - void udbg_scc_init(int force_scc) 65 + void __init udbg_scc_init(int force_scc) 66 66 { 67 67 const u32 *reg; 68 68 unsigned long addr;
+1 -1
arch/powerpc/platforms/powernv/Kconfig
··· 2 2 config PPC_POWERNV 3 3 depends on PPC64 && PPC_BOOK3S 4 4 bool "IBM PowerNV (Non-Virtualized) platform support" 5 - select PPC_NATIVE 5 + select PPC_HASH_MMU_NATIVE if PPC_64S_HASH_MMU 6 6 select PPC_XICS 7 7 select PPC_ICP_NATIVE 8 8 select PPC_XIVE_NATIVE
+13 -14
arch/powerpc/platforms/powernv/idle.c
··· 62 62 63 63 static unsigned long power7_offline_type; 64 64 65 - static int pnv_save_sprs_for_deep_states(void) 65 + static int __init pnv_save_sprs_for_deep_states(void) 66 66 { 67 67 int cpu; 68 68 int rc; ··· 146 146 static void pnv_fastsleep_workaround_apply(void *info) 147 147 148 148 { 149 + int cpu = smp_processor_id(); 149 150 int rc; 150 151 int *err = info; 152 + 153 + if (cpu_first_thread_sibling(cpu) != cpu) 154 + return; 151 155 152 156 rc = opal_config_cpu_idle_state(OPAL_CONFIG_IDLE_FASTSLEEP, 153 157 OPAL_CONFIG_IDLE_APPLY); ··· 179 175 struct device_attribute *attr, const char *buf, 180 176 size_t count) 181 177 { 182 - cpumask_t primary_thread_mask; 183 178 int err; 184 179 u8 val; 185 180 ··· 203 200 power7_fastsleep_workaround_exit = false; 204 201 205 202 cpus_read_lock(); 206 - primary_thread_mask = cpu_online_cores_map(); 207 - on_each_cpu_mask(&primary_thread_mask, 208 - pnv_fastsleep_workaround_apply, 209 - &err, 1); 203 + on_each_cpu(pnv_fastsleep_workaround_apply, &err, 1); 210 204 cpus_read_unlock(); 211 205 if (err) { 212 206 pr_err("fastsleep_workaround_applyonce change failed while running pnv_fastsleep_workaround_apply"); ··· 306 306 /* per thread SPRs that get lost in shallow states */ 307 307 u64 amr; 308 308 u64 iamr; 309 - u64 amor; 310 309 u64 uamor; 310 + /* amor is restored to constant ~0 */ 311 311 }; 312 312 313 313 static unsigned long power7_idle_insn(unsigned long type) ··· 378 378 if (cpu_has_feature(CPU_FTR_ARCH_207S)) { 379 379 sprs.amr = mfspr(SPRN_AMR); 380 380 sprs.iamr = mfspr(SPRN_IAMR); 381 - sprs.amor = mfspr(SPRN_AMOR); 382 381 sprs.uamor = mfspr(SPRN_UAMOR); 383 382 } 384 383 ··· 396 397 */ 397 398 mtspr(SPRN_AMR, sprs.amr); 398 399 mtspr(SPRN_IAMR, sprs.iamr); 399 - mtspr(SPRN_AMOR, sprs.amor); 400 + mtspr(SPRN_AMOR, ~0); 400 401 mtspr(SPRN_UAMOR, sprs.uamor); 401 402 } 402 403 } ··· 491 492 492 493 mtspr(SPRN_SPRG3, local_paca->sprg_vdso); 493 494 495 + #ifdef CONFIG_PPC_64S_HASH_MMU 494 496 /* 495 497 * The SLB has to be restored here, but it sometimes still 496 498 * contains entries, so the __ variant must be used to prevent 497 499 * multi hits. 498 500 */ 499 501 __slb_restore_bolted_realmode(); 502 + #endif 500 503 501 504 return srr1; 502 505 } ··· 590 589 u64 purr; 591 590 u64 spurr; 592 591 u64 dscr; 593 - u64 wort; 594 592 u64 ciabr; 595 593 596 594 u64 mmcra; ··· 687 687 688 688 sprs.amr = mfspr(SPRN_AMR); 689 689 sprs.iamr = mfspr(SPRN_IAMR); 690 - sprs.amor = mfspr(SPRN_AMOR); 691 690 sprs.uamor = mfspr(SPRN_UAMOR); 692 691 693 692 srr1 = isa300_idle_stop_mayloss(psscr); /* go idle */ ··· 707 708 */ 708 709 mtspr(SPRN_AMR, sprs.amr); 709 710 mtspr(SPRN_IAMR, sprs.iamr); 710 - mtspr(SPRN_AMOR, sprs.amor); 711 + mtspr(SPRN_AMOR, ~0); 711 712 mtspr(SPRN_UAMOR, sprs.uamor); 712 713 713 714 /* ··· 1123 1124 * stop instruction 1124 1125 */ 1125 1126 1126 - int validate_psscr_val_mask(u64 *psscr_val, u64 *psscr_mask, u32 flags) 1127 + int __init validate_psscr_val_mask(u64 *psscr_val, u64 *psscr_mask, u32 flags) 1127 1128 { 1128 1129 int err = 0; 1129 1130 ··· 1317 1318 * which is the number of cpuidle states discovered through device-tree. 1318 1319 */ 1319 1320 1320 - static int pnv_parse_cpuidle_dt(void) 1321 + static int __init pnv_parse_cpuidle_dt(void) 1321 1322 { 1322 1323 struct device_node *np; 1323 1324 int nr_idle_states, i;
+3 -3
arch/powerpc/platforms/powernv/opal-core.c
··· 89 89 return (oc_conf && oc_conf->opalcorebuf != NULL) ? 1 : 0; 90 90 } 91 91 92 - static Elf64_Word *append_elf64_note(Elf64_Word *buf, char *name, 92 + static Elf64_Word *__init append_elf64_note(Elf64_Word *buf, char *name, 93 93 u32 type, void *data, 94 94 size_t data_len) 95 95 { ··· 108 108 return buf; 109 109 } 110 110 111 - static void fill_prstatus(struct elf_prstatus *prstatus, int pir, 111 + static void __init fill_prstatus(struct elf_prstatus *prstatus, int pir, 112 112 struct pt_regs *regs) 113 113 { 114 114 memset(prstatus, 0, sizeof(struct elf_prstatus)); ··· 134 134 } 135 135 } 136 136 137 - static Elf64_Word *auxv_to_elf64_notes(Elf64_Word *buf, 137 + static Elf64_Word *__init auxv_to_elf64_notes(Elf64_Word *buf, 138 138 u64 opal_boot_entry) 139 139 { 140 140 Elf64_Off *bufp = (Elf64_Off *)oc_conf->auxv_buf;
+2 -1
arch/powerpc/platforms/powernv/opal-dump.c
··· 208 208 &ack_attribute.attr, 209 209 NULL, 210 210 }; 211 + ATTRIBUTE_GROUPS(dump_default); 211 212 212 213 static struct kobj_type dump_ktype = { 213 214 .sysfs_ops = &dump_sysfs_ops, 214 215 .release = &dump_release, 215 - .default_attrs = dump_default_attrs, 216 + .default_groups = dump_default_groups, 216 217 }; 217 218 218 219 static int64_t dump_read_info(uint32_t *dump_id, uint32_t *dump_size, uint32_t *dump_type)
+2 -1
arch/powerpc/platforms/powernv/opal-elog.c
··· 144 144 &ack_attribute.attr, 145 145 NULL, 146 146 }; 147 + ATTRIBUTE_GROUPS(elog_default); 147 148 148 149 static struct kobj_type elog_ktype = { 149 150 .sysfs_ops = &elog_sysfs_ops, 150 151 .release = &elog_release, 151 - .default_attrs = elog_default_attrs, 152 + .default_groups = elog_default_groups, 152 153 }; 153 154 154 155 /* Maximum size of a single log on FSP is 16KB */
+1 -1
arch/powerpc/platforms/powernv/opal-fadump.c
··· 112 112 * This function is called in the capture kernel to get configuration details 113 113 * from metadata setup by the first kernel. 114 114 */ 115 - static void opal_fadump_get_config(struct fw_dump *fadump_conf, 115 + static void __init opal_fadump_get_config(struct fw_dump *fadump_conf, 116 116 const struct opal_fadump_mem_struct *fdm) 117 117 { 118 118 unsigned long base, size, last_end, hole_size;
+3 -3
arch/powerpc/platforms/powernv/opal-imc.c
··· 200 200 201 201 static void disable_core_pmu_counters(void) 202 202 { 203 - cpumask_t cores_map; 204 203 int cpu, rc; 205 204 206 205 cpus_read_lock(); 207 206 /* Disable the IMC Core functions */ 208 - cores_map = cpu_online_cores_map(); 209 - for_each_cpu(cpu, &cores_map) { 207 + for_each_online_cpu(cpu) { 208 + if (cpu_first_thread_sibling(cpu) != cpu) 209 + continue; 210 210 rc = opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE, 211 211 get_hard_smp_processor_id(cpu)); 212 212 if (rc)
+1
arch/powerpc/platforms/powernv/opal-lpc.c
··· 396 396 if (!of_get_property(np, "primary", NULL)) 397 397 continue; 398 398 opal_lpc_chip_id = of_get_ibm_chip_id(np); 399 + of_node_put(np); 399 400 break; 400 401 } 401 402 if (opal_lpc_chip_id < 0)
+2 -2
arch/powerpc/platforms/powernv/opal-msglog.c
··· 105 105 .read = opal_msglog_read 106 106 }; 107 107 108 - struct memcons *memcons_init(struct device_node *node, const char *mc_prop_name) 108 + struct memcons *__init memcons_init(struct device_node *node, const char *mc_prop_name) 109 109 { 110 110 u64 mcaddr; 111 111 struct memcons *mc; ··· 133 133 return NULL; 134 134 } 135 135 136 - u32 memcons_get_size(struct memcons *mc) 136 + u32 __init memcons_get_size(struct memcons *mc) 137 137 { 138 138 return be32_to_cpu(mc->ibuf_size) + be32_to_cpu(mc->obuf_size); 139 139 }
+1 -1
arch/powerpc/platforms/powernv/opal-power.c
··· 53 53 } 54 54 55 55 /* Check for existing EPOW, DPO events */ 56 - static bool poweroff_pending(void) 56 + static bool __init poweroff_pending(void) 57 57 { 58 58 int rc; 59 59 __be64 opal_dpo_timeout;
+1 -1
arch/powerpc/platforms/powernv/opal-powercap.c
··· 129 129 return ret; 130 130 } 131 131 132 - static void powercap_add_attr(int handle, const char *name, 132 + static void __init powercap_add_attr(int handle, const char *name, 133 133 struct powercap_attr *attr) 134 134 { 135 135 attr->handle = handle;
+1 -1
arch/powerpc/platforms/powernv/opal-rtc.c
··· 18 18 #include <asm/firmware.h> 19 19 #include <asm/machdep.h> 20 20 21 - static void opal_to_tm(u32 y_m_d, u64 h_m_s_ms, struct rtc_time *tm) 21 + static void __init opal_to_tm(u32 y_m_d, u64 h_m_s_ms, struct rtc_time *tm) 22 22 { 23 23 tm->tm_year = ((bcd2bin(y_m_d >> 24) * 100) + 24 24 bcd2bin((y_m_d >> 16) & 0xff)) - 1900;
+2 -2
arch/powerpc/platforms/powernv/opal-sensor-groups.c
··· 126 126 attr->attr.store = ops_info[index].store; 127 127 } 128 128 129 - static int add_attr_group(const __be32 *ops, int len, struct sensor_group *sg, 129 + static int __init add_attr_group(const __be32 *ops, int len, struct sensor_group *sg, 130 130 u32 handle) 131 131 { 132 132 int i, j; ··· 144 144 return sysfs_create_group(sg_kobj, &sg->sg); 145 145 } 146 146 147 - static int get_nr_attrs(const __be32 *ops, int len) 147 + static int __init get_nr_attrs(const __be32 *ops, int len) 148 148 { 149 149 int i, j; 150 150 int nr_attrs = 0;
+4 -4
arch/powerpc/platforms/powernv/opal.c
··· 73 73 static struct opal_msg *opal_msg; 74 74 static u32 opal_msg_size __ro_after_init; 75 75 76 - void opal_configure_cores(void) 76 + void __init opal_configure_cores(void) 77 77 { 78 78 u64 reinit_flags = 0; 79 79 ··· 779 779 return !!recover_addr; 780 780 } 781 781 782 - static int opal_sysfs_init(void) 782 + static int __init opal_sysfs_init(void) 783 783 { 784 784 opal_kobj = kobject_create_and_add("opal", firmware_kobj); 785 785 if (!opal_kobj) { ··· 937 937 "rc = %d\n", rc); 938 938 } 939 939 940 - static void opal_pdev_init(const char *compatible) 940 + static void __init opal_pdev_init(const char *compatible) 941 941 { 942 942 struct device_node *np; 943 943 ··· 981 981 wake_up_process(kopald_tsk); 982 982 } 983 983 984 - static void opal_init_heartbeat(void) 984 + static void __init opal_init_heartbeat(void) 985 985 { 986 986 /* Old firwmware, we assume the HVC heartbeat is sufficient */ 987 987 if (of_property_read_u32(opal_node, "ibm,heartbeat-ms",
+2 -2
arch/powerpc/platforms/powernv/pci-ioda.c
··· 2265 2265 .free = pnv_irq_domain_free, 2266 2266 }; 2267 2267 2268 - static int pnv_msi_allocate_domains(struct pci_controller *hose, unsigned int count) 2268 + static int __init pnv_msi_allocate_domains(struct pci_controller *hose, unsigned int count) 2269 2269 { 2270 2270 struct pnv_phb *phb = hose->private_data; 2271 2271 struct irq_domain *parent = irq_get_default_host(); ··· 2298 2298 return 0; 2299 2299 } 2300 2300 2301 - static void pnv_pci_init_ioda_msis(struct pnv_phb *phb) 2301 + static void __init pnv_pci_init_ioda_msis(struct pnv_phb *phb) 2302 2302 { 2303 2303 unsigned int count; 2304 2304 const __be32 *prop = of_get_property(phb->hose->dn,
+2 -2
arch/powerpc/platforms/powernv/powernv.h
··· 39 39 40 40 struct memcons; 41 41 ssize_t memcons_copy(struct memcons *mc, char *to, loff_t pos, size_t count); 42 - u32 memcons_get_size(struct memcons *mc); 43 - struct memcons *memcons_init(struct device_node *node, const char *mc_prop_name); 42 + u32 __init memcons_get_size(struct memcons *mc); 43 + struct memcons *__init memcons_init(struct device_node *node, const char *mc_prop_name); 44 44 45 45 #endif /* _POWERNV_H */
+1 -1
arch/powerpc/platforms/powernv/rng.c
··· 80 80 return 1; 81 81 } 82 82 83 - static int initialise_darn(void) 83 + static int __init initialise_darn(void) 84 84 { 85 85 unsigned long val; 86 86 int i;
+12 -6
arch/powerpc/platforms/powernv/setup.c
··· 40 40 #include "powernv.h" 41 41 42 42 43 - static bool fw_feature_is(const char *state, const char *name, 43 + static bool __init fw_feature_is(const char *state, const char *name, 44 44 struct device_node *fw_features) 45 45 { 46 46 struct device_node *np; ··· 55 55 return rc; 56 56 } 57 57 58 - static void init_fw_feat_flags(struct device_node *np) 58 + static void __init init_fw_feat_flags(struct device_node *np) 59 59 { 60 60 if (fw_feature_is("enabled", "inst-spec-barrier-ori31,31,0", np)) 61 61 security_ftr_set(SEC_FTR_SPEC_BAR_ORI31); ··· 98 98 security_ftr_clear(SEC_FTR_BNDS_CHK_SPEC_BAR); 99 99 } 100 100 101 - static void pnv_setup_security_mitigations(void) 101 + static void __init pnv_setup_security_mitigations(void) 102 102 { 103 103 struct device_node *np, *fw_features; 104 104 enum l1d_flush_type type; ··· 123 123 } 124 124 125 125 /* 126 - * If we are non-Power9 bare metal, we don't need to flush on kernel 127 - * entry or after user access: they fix a P9 specific vulnerability. 126 + * The issues addressed by the entry and uaccess flush don't affect P7 127 + * or P8, so on bare metal disable them explicitly in case firmware does 128 + * not include the features to disable them. POWER9 and newer processors 129 + * should have the appropriate firmware flags. 128 130 */ 129 - if (!pvr_version_is(PVR_POWER9)) { 131 + if (pvr_version_is(PVR_POWER7) || pvr_version_is(PVR_POWER7p) || 132 + pvr_version_is(PVR_POWER8E) || pvr_version_is(PVR_POWER8NVL) || 133 + pvr_version_is(PVR_POWER8)) { 130 134 security_ftr_clear(SEC_FTR_L1D_FLUSH_ENTRY); 131 135 security_ftr_clear(SEC_FTR_L1D_FLUSH_UACCESS); 132 136 } ··· 211 207 #endif 212 208 add_preferred_console("hvc", 0, NULL); 213 209 210 + #ifdef CONFIG_PPC_64S_HASH_MMU 214 211 if (!radix_enabled()) { 215 212 size_t size = sizeof(struct slb_entry) * mmu_slb_size; 216 213 int i; ··· 224 219 cpu_to_node(i)); 225 220 } 226 221 } 222 + #endif 227 223 } 228 224 229 225 static void __init pnv_init_IRQ(void)
+1 -1
arch/powerpc/platforms/ps3/gelic_udbg.c
··· 113 113 return lv1_free_device_dma_region(bus_id, dev_id, real_bus_addr); 114 114 } 115 115 116 - static void gelic_debug_init(void) 116 + static void __init gelic_debug_init(void) 117 117 { 118 118 s64 result; 119 119 u64 v2;
+2 -2
arch/powerpc/platforms/ps3/mm.c
··· 41 41 PAGE_SHIFT_16M = 24U, 42 42 }; 43 43 44 - static unsigned long make_page_sizes(unsigned long a, unsigned long b) 44 + static unsigned long __init make_page_sizes(unsigned long a, unsigned long b) 45 45 { 46 46 return (a << 56) | (b << 48); 47 47 } ··· 215 215 } 216 216 } 217 217 218 - static int ps3_mm_get_repository_highmem(struct mem_region *r) 218 + static int __init ps3_mm_get_repository_highmem(struct mem_region *r) 219 219 { 220 220 int result; 221 221
+2 -2
arch/powerpc/platforms/ps3/os-area.c
··· 501 501 return -1; 502 502 } 503 503 504 - static int db_get_64(const struct os_area_db *db, 504 + static int __init db_get_64(const struct os_area_db *db, 505 505 const struct os_area_db_id *id, uint64_t *value) 506 506 { 507 507 struct db_iterator i; ··· 517 517 return -1; 518 518 } 519 519 520 - static int db_get_rtc_diff(const struct os_area_db *db, int64_t *rtc_diff) 520 + static int __init db_get_rtc_diff(const struct os_area_db *db, int64_t *rtc_diff) 521 521 { 522 522 return db_get_64(db, &os_area_db_id_rtc_diff, (uint64_t*)rtc_diff); 523 523 }
+7 -7
arch/powerpc/platforms/ps3/platform.h
··· 35 35 36 36 /* smp */ 37 37 38 - void smp_init_ps3(void); 38 + void __init smp_init_ps3(void); 39 39 #ifdef CONFIG_SMP 40 40 void ps3_smp_cleanup_cpu(int cpu); 41 41 #else ··· 134 134 int ps3_repository_find_device(struct ps3_repository_device *repo); 135 135 int ps3_repository_find_device_by_id(struct ps3_repository_device *repo, 136 136 u64 bus_id, u64 dev_id); 137 - int ps3_repository_find_devices(enum ps3_bus_type bus_type, 137 + int __init ps3_repository_find_devices(enum ps3_bus_type bus_type, 138 138 int (*callback)(const struct ps3_repository_device *repo)); 139 - int ps3_repository_find_bus(enum ps3_bus_type bus_type, unsigned int from, 139 + int __init ps3_repository_find_bus(enum ps3_bus_type bus_type, unsigned int from, 140 140 unsigned int *bus_index); 141 141 int ps3_repository_find_interrupt(const struct ps3_repository_device *repo, 142 142 enum ps3_interrupt_type intr_type, unsigned int *interrupt_id); ··· 211 211 int ps3_repository_read_num_be(unsigned int *num_be); 212 212 int ps3_repository_read_be_node_id(unsigned int be_index, u64 *node_id); 213 213 int ps3_repository_read_be_id(u64 node_id, u64 *be_id); 214 - int ps3_repository_read_tb_freq(u64 node_id, u64 *tb_freq); 215 - int ps3_repository_read_be_tb_freq(unsigned int be_index, u64 *tb_freq); 214 + int __init ps3_repository_read_tb_freq(u64 node_id, u64 *tb_freq); 215 + int __init ps3_repository_read_be_tb_freq(unsigned int be_index, u64 *tb_freq); 216 216 217 217 /* repository performance monitor info */ 218 218 ··· 247 247 248 248 /* repository vuart info */ 249 249 250 - int ps3_repository_read_vuart_av_port(unsigned int *port); 251 - int ps3_repository_read_vuart_sysmgr_port(unsigned int *port); 250 + int __init ps3_repository_read_vuart_av_port(unsigned int *port); 251 + int __init ps3_repository_read_vuart_sysmgr_port(unsigned int *port); 252 252 253 253 #endif
+10 -10
arch/powerpc/platforms/ps3/repository.c
··· 413 413 return 0; 414 414 } 415 415 416 - int ps3_repository_find_devices(enum ps3_bus_type bus_type, 416 + int __init ps3_repository_find_devices(enum ps3_bus_type bus_type, 417 417 int (*callback)(const struct ps3_repository_device *repo)) 418 418 { 419 419 int result = 0; ··· 455 455 return result; 456 456 } 457 457 458 - int ps3_repository_find_bus(enum ps3_bus_type bus_type, unsigned int from, 458 + int __init ps3_repository_find_bus(enum ps3_bus_type bus_type, unsigned int from, 459 459 unsigned int *bus_index) 460 460 { 461 461 unsigned int i; ··· 908 908 return result; 909 909 } 910 910 911 - int ps3_repository_read_vuart_av_port(unsigned int *port) 911 + int __init ps3_repository_read_vuart_av_port(unsigned int *port) 912 912 { 913 913 int result; 914 914 u64 v1 = 0; ··· 923 923 return result; 924 924 } 925 925 926 - int ps3_repository_read_vuart_sysmgr_port(unsigned int *port) 926 + int __init ps3_repository_read_vuart_sysmgr_port(unsigned int *port) 927 927 { 928 928 int result; 929 929 u64 v1 = 0; ··· 1005 1005 be_id, NULL); 1006 1006 } 1007 1007 1008 - int ps3_repository_read_tb_freq(u64 node_id, u64 *tb_freq) 1008 + int __init ps3_repository_read_tb_freq(u64 node_id, u64 *tb_freq) 1009 1009 { 1010 1010 return read_node(PS3_LPAR_ID_PME, 1011 1011 make_first_field("be", 0), ··· 1015 1015 tb_freq, NULL); 1016 1016 } 1017 1017 1018 - int ps3_repository_read_be_tb_freq(unsigned int be_index, u64 *tb_freq) 1018 + int __init ps3_repository_read_be_tb_freq(unsigned int be_index, u64 *tb_freq) 1019 1019 { 1020 1020 int result; 1021 1021 u64 node_id; ··· 1178 1178 1179 1179 #if defined(DEBUG) 1180 1180 1181 - int ps3_repository_dump_resource_info(const struct ps3_repository_device *repo) 1181 + int __init ps3_repository_dump_resource_info(const struct ps3_repository_device *repo) 1182 1182 { 1183 1183 int result = 0; 1184 1184 unsigned int res_index; ··· 1231 1231 return result; 1232 1232 } 1233 1233 1234 - static int dump_stor_dev_info(struct ps3_repository_device *repo) 1234 + static int __init dump_stor_dev_info(struct ps3_repository_device *repo) 1235 1235 { 1236 1236 int result = 0; 1237 1237 unsigned int num_regions, region_index; ··· 1279 1279 return result; 1280 1280 } 1281 1281 1282 - static int dump_device_info(struct ps3_repository_device *repo, 1282 + static int __init dump_device_info(struct ps3_repository_device *repo, 1283 1283 unsigned int num_dev) 1284 1284 { 1285 1285 int result = 0; ··· 1323 1323 return result; 1324 1324 } 1325 1325 1326 - int ps3_repository_dump_bus_info(void) 1326 + int __init ps3_repository_dump_bus_info(void) 1327 1327 { 1328 1328 int result = 0; 1329 1329 struct ps3_repository_device repo;
+1 -1
arch/powerpc/platforms/ps3/smp.c
··· 112 112 .kick_cpu = smp_generic_kick_cpu, 113 113 }; 114 114 115 - void smp_init_ps3(void) 115 + void __init smp_init_ps3(void) 116 116 { 117 117 DBG(" -> %s\n", __func__); 118 118 smp_ops = &ps3_smp_ops;
+1 -1
arch/powerpc/platforms/ps3/spu.c
··· 137 137 } 138 138 EXPORT_SYMBOL_GPL(ps3_get_spe_id); 139 139 140 - static unsigned long get_vas_id(void) 140 + static unsigned long __init get_vas_id(void) 141 141 { 142 142 u64 id; 143 143
-5
arch/powerpc/platforms/pseries/Kconfig
··· 17 17 select PPC_RTAS_DAEMON 18 18 select RTAS_ERROR_LOGGING 19 19 select PPC_UDBG_16550 20 - select PPC_NATIVE 21 20 select PPC_DOORBELL 22 21 select HOTPLUG_CPU 23 22 select ARCH_RANDOM ··· 59 60 on supported PSERIES platforms. 60 61 Provides: /sys/devices/system/cpu/pseries_(de)activation_hint_list 61 62 and /sys/devices/system/cpu/cpuN/pseries_(de)activation_hint 62 - 63 - config SCANLOG 64 - tristate "Scanlog dump interface" 65 - depends on RTAS_PROC && PPC_PSERIES 66 63 67 64 config IO_EVENT_IRQ 68 65 bool "IO Event Interrupt support"
-1
arch/powerpc/platforms/pseries/Makefile
··· 8 8 firmware.o power.o dlpar.o mobility.o rng.o \ 9 9 pci.o pci_dlpar.o eeh_pseries.o msi.o 10 10 obj-$(CONFIG_SMP) += smp.o 11 - obj-$(CONFIG_SCANLOG) += scanlog.o 12 11 obj-$(CONFIG_KEXEC_CORE) += kexec.o 13 12 obj-$(CONFIG_PSERIES_ENERGY) += pseries_energy.o 14 13
+1 -1
arch/powerpc/platforms/pseries/event_sources.c
··· 8 8 9 9 #include "pseries.h" 10 10 11 - void request_event_sources_irqs(struct device_node *np, 11 + void __init request_event_sources_irqs(struct device_node *np, 12 12 irq_handler_t handler, 13 13 const char *name) 14 14 {
+5 -4
arch/powerpc/platforms/pseries/hotplug-cpu.c
··· 864 864 /* Processors can be added/removed only on LPAR */ 865 865 if (firmware_has_feature(FW_FEATURE_LPAR)) { 866 866 for_each_node(node) { 867 - alloc_bootmem_cpumask_var(&node_recorded_ids_map[node]); 867 + if (!alloc_cpumask_var_node(&node_recorded_ids_map[node], 868 + GFP_KERNEL, node)) 869 + return -ENOMEM; 868 870 869 871 /* Record ids of CPU added at boot time */ 870 - cpumask_or(node_recorded_ids_map[node], 871 - node_recorded_ids_map[node], 872 - cpumask_of_node(node)); 872 + cpumask_copy(node_recorded_ids_map[node], 873 + cpumask_of_node(node)); 873 874 } 874 875 875 876 of_reconfig_notifier_register(&pseries_smp_nb);
+1 -1
arch/powerpc/platforms/pseries/iommu.c
··· 1654 1654 }; 1655 1655 1656 1656 /* These are called very early. */ 1657 - void iommu_init_early_pSeries(void) 1657 + void __init iommu_init_early_pSeries(void) 1658 1658 { 1659 1659 if (of_chosen && of_get_property(of_chosen, "linux,iommu-off", NULL)) 1660 1660 return;
+39 -32
arch/powerpc/platforms/pseries/lpar.c
··· 58 58 EXPORT_SYMBOL(plpar_hcall9); 59 59 EXPORT_SYMBOL(plpar_hcall_norets); 60 60 61 + #ifdef CONFIG_PPC_64S_HASH_MMU 61 62 /* 62 63 * H_BLOCK_REMOVE supported block size for this page size in segment who's base 63 64 * page size is that page size. ··· 67 66 * page size. 68 67 */ 69 68 static int hblkrm_size[MMU_PAGE_COUNT][MMU_PAGE_COUNT] __ro_after_init; 69 + #endif 70 70 71 71 /* 72 72 * Due to the involved complexity, and that the current hypervisor is only ··· 691 689 return; 692 690 } 693 691 694 - #ifdef CONFIG_PPC_BOOK3S_64 692 + #ifdef CONFIG_PPC_64S_HASH_MMU 695 693 /* 696 694 * PAPR says this feature is SLB-Buffer but firmware never 697 695 * reports that. All SPLPAR support SLB shadow buffer. ··· 704 702 "cpu %d (hw %d) of area %lx failed with %ld\n", 705 703 cpu, hwcpu, addr, ret); 706 704 } 707 - #endif /* CONFIG_PPC_BOOK3S_64 */ 705 + #endif /* CONFIG_PPC_64S_HASH_MMU */ 708 706 709 707 /* 710 708 * Register dispatch trace log, if one has been allocated. ··· 713 711 } 714 712 715 713 #ifdef CONFIG_PPC_BOOK3S_64 714 + 715 + static int __init pseries_lpar_register_process_table(unsigned long base, 716 + unsigned long page_size, unsigned long table_size) 717 + { 718 + long rc; 719 + unsigned long flags = 0; 720 + 721 + if (table_size) 722 + flags |= PROC_TABLE_NEW; 723 + if (radix_enabled()) { 724 + flags |= PROC_TABLE_RADIX; 725 + if (mmu_has_feature(MMU_FTR_GTSE)) 726 + flags |= PROC_TABLE_GTSE; 727 + } else 728 + flags |= PROC_TABLE_HPT_SLB; 729 + for (;;) { 730 + rc = plpar_hcall_norets(H_REGISTER_PROC_TBL, flags, base, 731 + page_size, table_size); 732 + if (!H_IS_LONG_BUSY(rc)) 733 + break; 734 + mdelay(get_longbusy_msecs(rc)); 735 + } 736 + if (rc != H_SUCCESS) { 737 + pr_err("Failed to register process table (rc=%ld)\n", rc); 738 + BUG(); 739 + } 740 + return rc; 741 + } 742 + 743 + #ifdef CONFIG_PPC_64S_HASH_MMU 716 744 717 745 static long pSeries_lpar_hpte_insert(unsigned long hpte_group, 718 746 unsigned long vpn, unsigned long pa, ··· 1712 1680 return 0; 1713 1681 } 1714 1682 1715 - static int pseries_lpar_register_process_table(unsigned long base, 1716 - unsigned long page_size, unsigned long table_size) 1717 - { 1718 - long rc; 1719 - unsigned long flags = 0; 1720 - 1721 - if (table_size) 1722 - flags |= PROC_TABLE_NEW; 1723 - if (radix_enabled()) { 1724 - flags |= PROC_TABLE_RADIX; 1725 - if (mmu_has_feature(MMU_FTR_GTSE)) 1726 - flags |= PROC_TABLE_GTSE; 1727 - } else 1728 - flags |= PROC_TABLE_HPT_SLB; 1729 - for (;;) { 1730 - rc = plpar_hcall_norets(H_REGISTER_PROC_TBL, flags, base, 1731 - page_size, table_size); 1732 - if (!H_IS_LONG_BUSY(rc)) 1733 - break; 1734 - mdelay(get_longbusy_msecs(rc)); 1735 - } 1736 - if (rc != H_SUCCESS) { 1737 - pr_err("Failed to register process table (rc=%ld)\n", rc); 1738 - BUG(); 1739 - } 1740 - return rc; 1741 - } 1742 - 1743 1683 void __init hpte_init_pseries(void) 1744 1684 { 1745 1685 mmu_hash_ops.hpte_invalidate = pSeries_lpar_hpte_invalidate; ··· 1734 1730 if (cpu_has_feature(CPU_FTR_ARCH_300)) 1735 1731 pseries_lpar_register_process_table(0, 0, 0); 1736 1732 } 1733 + #endif /* CONFIG_PPC_64S_HASH_MMU */ 1737 1734 1738 1735 #ifdef CONFIG_PPC_RADIX_MMU 1739 - void radix_init_pseries(void) 1736 + void __init radix_init_pseries(void) 1740 1737 { 1741 1738 pr_info("Using radix MMU under hypervisor\n"); 1742 1739 ··· 1937 1932 return rc; 1938 1933 } 1939 1934 1940 - static unsigned long vsid_unscramble(unsigned long vsid, int ssize) 1935 + #ifdef CONFIG_PPC_64S_HASH_MMU 1936 + static unsigned long __init vsid_unscramble(unsigned long vsid, int ssize) 1941 1937 { 1942 1938 unsigned long protovsid; 1943 1939 unsigned long va_bits = VA_BITS; ··· 1998 1992 return 0; 1999 1993 } 2000 1994 machine_device_initcall(pseries, reserve_vrma_context_id); 1995 + #endif 2001 1996 2002 1997 #ifdef CONFIG_DEBUG_FS 2003 1998 /* debugfs file interface for vpa data */
+3 -2
arch/powerpc/platforms/pseries/lparcfg.c
··· 531 531 seq_printf(m, "shared_processor_mode=%d\n", 532 532 lppaca_shared_proc(get_lppaca())); 533 533 534 - #ifdef CONFIG_PPC_BOOK3S_64 535 - seq_printf(m, "slb_size=%d\n", mmu_slb_size); 534 + #ifdef CONFIG_PPC_64S_HASH_MMU 535 + if (!radix_enabled()) 536 + seq_printf(m, "slb_size=%d\n", mmu_slb_size); 536 537 #endif 537 538 parse_em_data(m); 538 539 maxmem_data(m);
+4
arch/powerpc/platforms/pseries/mobility.c
··· 451 451 452 452 static u16 clamp_slb_size(void) 453 453 { 454 + #ifdef CONFIG_PPC_64S_HASH_MMU 454 455 u16 prev = mmu_slb_size; 455 456 456 457 slb_set_size(SLB_MIN_SIZE); 457 458 458 459 return prev; 460 + #else 461 + return 0; 462 + #endif 459 463 } 460 464 461 465 static int do_suspend(void)
+6 -1
arch/powerpc/platforms/pseries/pseries.h
··· 11 11 12 12 struct device_node; 13 13 14 - extern void request_event_sources_irqs(struct device_node *np, 14 + void __init request_event_sources_irqs(struct device_node *np, 15 15 irq_handler_t handler, const char *name); 16 16 17 17 #include <linux/of.h> ··· 113 113 114 114 extern u32 pseries_security_flavor; 115 115 void pseries_setup_security_mitigations(void); 116 + 117 + #ifdef CONFIG_PPC_64S_HASH_MMU 116 118 void pseries_lpar_read_hblkrm_characteristics(void); 119 + #else 120 + static inline void pseries_lpar_read_hblkrm_characteristics(void) { } 121 + #endif 117 122 118 123 #endif /* _PSERIES_PSERIES_H */
+2
arch/powerpc/platforms/pseries/ras.c
··· 526 526 disposition = RTAS_DISP_FULLY_RECOVERED; 527 527 break; 528 528 case MC_ERROR_TYPE_SLB: 529 + #ifdef CONFIG_PPC_64S_HASH_MMU 529 530 /* 530 531 * Store the old slb content in paca before flushing. 531 532 * Print this when we go to virtual mode. ··· 539 538 slb_save_contents(local_paca->mce_faulty_slbs); 540 539 flush_and_reload_slb(); 541 540 disposition = RTAS_DISP_FULLY_RECOVERED; 541 + #endif 542 542 break; 543 543 default: 544 544 break;
+3 -3
arch/powerpc/platforms/pseries/rtas-fadump.c
··· 39 39 * This function is called in the capture kernel to get configuration details 40 40 * setup in the first kernel and passed to the f/w. 41 41 */ 42 - static void rtas_fadump_get_config(struct fw_dump *fadump_conf, 42 + static void __init rtas_fadump_get_config(struct fw_dump *fadump_conf, 43 43 const struct rtas_fadump_mem_struct *fdm) 44 44 { 45 45 fadump_conf->boot_mem_addr[0] = ··· 247 247 return i; 248 248 } 249 249 250 - static void rtas_fadump_set_regval(struct pt_regs *regs, u64 reg_id, u64 reg_val) 250 + static void __init rtas_fadump_set_regval(struct pt_regs *regs, u64 reg_id, u64 reg_val) 251 251 { 252 252 int i; 253 253 ··· 272 272 regs->dsisr = (unsigned long)reg_val; 273 273 } 274 274 275 - static struct rtas_fadump_reg_entry* 275 + static struct rtas_fadump_reg_entry* __init 276 276 rtas_fadump_read_regs(struct rtas_fadump_reg_entry *reg_entry, 277 277 struct pt_regs *regs) 278 278 {
-195
arch/powerpc/platforms/pseries/scanlog.c
··· 1 - // SPDX-License-Identifier: GPL-2.0-or-later 2 - /* 3 - * c 2001 PPC 64 Team, IBM Corp 4 - * 5 - * scan-log-data driver for PPC64 Todd Inglett <tinglett@vnet.ibm.com> 6 - * 7 - * When ppc64 hardware fails the service processor dumps internal state 8 - * of the system. After a reboot the operating system can access a dump 9 - * of this data using this driver. A dump exists if the device-tree 10 - * /chosen/ibm,scan-log-data property exists. 11 - * 12 - * This driver exports /proc/powerpc/scan-log-dump which can be read. 13 - * The driver supports only sequential reads. 14 - * 15 - * The driver looks at a write to the driver for the single word "reset". 16 - * If given, the driver will reset the scanlog so the platform can free it. 17 - */ 18 - 19 - #include <linux/module.h> 20 - #include <linux/types.h> 21 - #include <linux/errno.h> 22 - #include <linux/proc_fs.h> 23 - #include <linux/init.h> 24 - #include <linux/delay.h> 25 - #include <linux/slab.h> 26 - #include <linux/uaccess.h> 27 - #include <asm/rtas.h> 28 - #include <asm/prom.h> 29 - 30 - #define MODULE_VERS "1.0" 31 - #define MODULE_NAME "scanlog" 32 - 33 - /* Status returns from ibm,scan-log-dump */ 34 - #define SCANLOG_COMPLETE 0 35 - #define SCANLOG_HWERROR -1 36 - #define SCANLOG_CONTINUE 1 37 - 38 - 39 - static unsigned int ibm_scan_log_dump; /* RTAS token */ 40 - static unsigned int *scanlog_buffer; /* The data buffer */ 41 - 42 - static ssize_t scanlog_read(struct file *file, char __user *buf, 43 - size_t count, loff_t *ppos) 44 - { 45 - unsigned int *data = scanlog_buffer; 46 - int status; 47 - unsigned long len, off; 48 - unsigned int wait_time; 49 - 50 - if (count > RTAS_DATA_BUF_SIZE) 51 - count = RTAS_DATA_BUF_SIZE; 52 - 53 - if (count < 1024) { 54 - /* This is the min supported by this RTAS call. Rather 55 - * than do all the buffering we insist the user code handle 56 - * larger reads. As long as cp works... :) 57 - */ 58 - printk(KERN_ERR "scanlog: cannot perform a small read (%ld)\n", count); 59 - return -EINVAL; 60 - } 61 - 62 - if (!access_ok(buf, count)) 63 - return -EFAULT; 64 - 65 - for (;;) { 66 - wait_time = 500; /* default wait if no data */ 67 - spin_lock(&rtas_data_buf_lock); 68 - memcpy(rtas_data_buf, data, RTAS_DATA_BUF_SIZE); 69 - status = rtas_call(ibm_scan_log_dump, 2, 1, NULL, 70 - (u32) __pa(rtas_data_buf), (u32) count); 71 - memcpy(data, rtas_data_buf, RTAS_DATA_BUF_SIZE); 72 - spin_unlock(&rtas_data_buf_lock); 73 - 74 - pr_debug("scanlog: status=%d, data[0]=%x, data[1]=%x, " \ 75 - "data[2]=%x\n", status, data[0], data[1], data[2]); 76 - switch (status) { 77 - case SCANLOG_COMPLETE: 78 - pr_debug("scanlog: hit eof\n"); 79 - return 0; 80 - case SCANLOG_HWERROR: 81 - pr_debug("scanlog: hardware error reading data\n"); 82 - return -EIO; 83 - case SCANLOG_CONTINUE: 84 - /* We may or may not have data yet */ 85 - len = data[1]; 86 - off = data[2]; 87 - if (len > 0) { 88 - if (copy_to_user(buf, ((char *)data)+off, len)) 89 - return -EFAULT; 90 - return len; 91 - } 92 - /* Break to sleep default time */ 93 - break; 94 - default: 95 - /* Assume extended busy */ 96 - wait_time = rtas_busy_delay_time(status); 97 - if (!wait_time) { 98 - printk(KERN_ERR "scanlog: unknown error " \ 99 - "from rtas: %d\n", status); 100 - return -EIO; 101 - } 102 - } 103 - /* Apparently no data yet. Wait and try again. */ 104 - msleep_interruptible(wait_time); 105 - } 106 - /*NOTREACHED*/ 107 - } 108 - 109 - static ssize_t scanlog_write(struct file * file, const char __user * buf, 110 - size_t count, loff_t *ppos) 111 - { 112 - char stkbuf[20]; 113 - int status; 114 - 115 - if (count > 19) count = 19; 116 - if (copy_from_user (stkbuf, buf, count)) { 117 - return -EFAULT; 118 - } 119 - stkbuf[count] = 0; 120 - 121 - if (buf) { 122 - if (strncmp(stkbuf, "reset", 5) == 0) { 123 - pr_debug("scanlog: reset scanlog\n"); 124 - status = rtas_call(ibm_scan_log_dump, 2, 1, NULL, 0, 0); 125 - pr_debug("scanlog: rtas returns %d\n", status); 126 - } 127 - } 128 - return count; 129 - } 130 - 131 - static int scanlog_open(struct inode * inode, struct file * file) 132 - { 133 - unsigned int *data = scanlog_buffer; 134 - 135 - if (data[0] != 0) { 136 - /* This imperfect test stops a second copy of the 137 - * data (or a reset while data is being copied) 138 - */ 139 - return -EBUSY; 140 - } 141 - 142 - data[0] = 0; /* re-init so we restart the scan */ 143 - 144 - return 0; 145 - } 146 - 147 - static int scanlog_release(struct inode * inode, struct file * file) 148 - { 149 - unsigned int *data = scanlog_buffer; 150 - 151 - data[0] = 0; 152 - return 0; 153 - } 154 - 155 - static const struct proc_ops scanlog_proc_ops = { 156 - .proc_read = scanlog_read, 157 - .proc_write = scanlog_write, 158 - .proc_open = scanlog_open, 159 - .proc_release = scanlog_release, 160 - .proc_lseek = noop_llseek, 161 - }; 162 - 163 - static int __init scanlog_init(void) 164 - { 165 - struct proc_dir_entry *ent; 166 - int err = -ENOMEM; 167 - 168 - ibm_scan_log_dump = rtas_token("ibm,scan-log-dump"); 169 - if (ibm_scan_log_dump == RTAS_UNKNOWN_SERVICE) 170 - return -ENODEV; 171 - 172 - /* Ideally we could allocate a buffer < 4G */ 173 - scanlog_buffer = kzalloc(RTAS_DATA_BUF_SIZE, GFP_KERNEL); 174 - if (!scanlog_buffer) 175 - goto err; 176 - 177 - ent = proc_create("powerpc/rtas/scan-log-dump", 0400, NULL, 178 - &scanlog_proc_ops); 179 - if (!ent) 180 - goto err; 181 - return 0; 182 - err: 183 - kfree(scanlog_buffer); 184 - return err; 185 - } 186 - 187 - static void __exit scanlog_cleanup(void) 188 - { 189 - remove_proc_entry("powerpc/rtas/scan-log-dump", NULL); 190 - kfree(scanlog_buffer); 191 - } 192 - 193 - module_init(scanlog_init); 194 - module_exit(scanlog_cleanup); 195 - MODULE_LICENSE("GPL");
+6 -4
arch/powerpc/platforms/pseries/setup.c
··· 112 112 u8 *mce_data_buf; 113 113 unsigned int i; 114 114 int nr_cpus = num_possible_cpus(); 115 - #ifdef CONFIG_PPC_BOOK3S_64 115 + #ifdef CONFIG_PPC_64S_HASH_MMU 116 116 struct slb_entry *slb_ptr; 117 117 size_t size; 118 118 #endif ··· 152 152 (RTAS_ERROR_LOG_MAX * i); 153 153 } 154 154 155 - #ifdef CONFIG_PPC_BOOK3S_64 155 + #ifdef CONFIG_PPC_64S_HASH_MMU 156 156 if (!radix_enabled()) { 157 157 /* Allocate per cpu area to save old slb contents during MCE */ 158 158 size = sizeof(struct slb_entry) * mmu_slb_size * nr_cpus; ··· 447 447 panic("Could not enable big endian exceptions"); 448 448 } 449 449 450 - void pseries_little_endian_exceptions(void) 450 + void __init pseries_little_endian_exceptions(void) 451 451 { 452 452 long rc; 453 453 ··· 801 801 fwnmi_init(); 802 802 803 803 pseries_setup_security_mitigations(); 804 + #ifdef CONFIG_PPC_64S_HASH_MMU 804 805 pseries_lpar_read_hblkrm_characteristics(); 806 + #endif 805 807 806 808 /* By default, only probe PCI (can be overridden by rtas_pci) */ 807 809 pci_add_flags(PCI_PROBE_ONLY); ··· 907 905 * fw_cmo_feature_init - FW_FEATURE_CMO is not stored in ibm,hypertas-functions, 908 906 * handle that here. (Stolen from parse_system_parameter_string) 909 907 */ 910 - static void pSeries_cmo_feature_init(void) 908 + static void __init pSeries_cmo_feature_init(void) 911 909 { 912 910 char *ptr, *key, *value, *end; 913 911 int call_status;
+10 -3
arch/powerpc/platforms/pseries/vas.c
··· 151 151 if (rc == H_SUCCESS) 152 152 return 0; 153 153 154 - pr_err("HCALL(%llx) error %ld, query_type %u, result buffer 0x%llx\n", 155 - hcall, rc, query_type, result); 154 + /* H_FUNCTION means HV does not support VAS so don't print an error */ 155 + if (rc != H_FUNCTION) { 156 + pr_err("%s error %ld, query_type %u, result buffer 0x%llx\n", 157 + (hcall == H_QUERY_VAS_CAPABILITIES) ? 158 + "H_QUERY_VAS_CAPABILITIES" : 159 + "H_QUERY_NX_CAPABILITIES", 160 + rc, query_type, result); 161 + } 162 + 156 163 return -EIO; 157 164 } 158 165 EXPORT_SYMBOL_GPL(h_query_vas_capabilities); ··· 489 482 * Get the specific capabilities based on the feature type. 490 483 * Right now supports GZIP default and GZIP QoS capabilities. 491 484 */ 492 - static int get_vas_capabilities(u8 feat, enum vas_cop_feat_type type, 485 + static int __init get_vas_capabilities(u8 feat, enum vas_cop_feat_type type, 493 486 struct hv_vas_cop_feat_caps *hv_caps) 494 487 { 495 488 struct vas_cop_feat_caps *caps;
+3 -3
arch/powerpc/platforms/pseries/vio.c
··· 1061 1061 }; 1062 1062 ATTRIBUTE_GROUPS(vio_bus); 1063 1063 1064 - static void vio_cmo_sysfs_init(void) 1064 + static void __init vio_cmo_sysfs_init(void) 1065 1065 { 1066 1066 vio_bus_type.dev_groups = vio_cmo_dev_groups; 1067 1067 vio_bus_type.bus_groups = vio_bus_groups; ··· 1073 1073 static void vio_cmo_bus_remove(struct vio_dev *viodev) {} 1074 1074 static void vio_cmo_set_dma_ops(struct vio_dev *viodev) {} 1075 1075 static void vio_cmo_bus_init(void) {} 1076 - static void vio_cmo_sysfs_init(void) { } 1076 + static void __init vio_cmo_sysfs_init(void) { } 1077 1077 #endif /* CONFIG_PPC_SMLPAR */ 1078 1078 EXPORT_SYMBOL(vio_cmo_entitlement_update); 1079 1079 EXPORT_SYMBOL(vio_cmo_set_dev_desired); ··· 1479 1479 * Starting from the root node provide, register the device node for 1480 1480 * each child beneath the root. 1481 1481 */ 1482 - static void vio_bus_scan_register_devices(char *root_name) 1482 + static void __init vio_bus_scan_register_devices(char *root_name) 1483 1483 { 1484 1484 struct device_node *node_root, *node_child; 1485 1485
+3 -3
arch/powerpc/sysdev/cpm2.c
··· 135 135 } 136 136 EXPORT_SYMBOL(__cpm2_setbrg); 137 137 138 - int cpm2_clk_setup(enum cpm_clk_target target, int clock, int mode) 138 + int __init cpm2_clk_setup(enum cpm_clk_target target, int clock, int mode) 139 139 { 140 140 int ret = 0; 141 141 int shift; ··· 265 265 return ret; 266 266 } 267 267 268 - int cpm2_smc_clk_setup(enum cpm_clk_target target, int clock) 268 + int __init cpm2_smc_clk_setup(enum cpm_clk_target target, int clock) 269 269 { 270 270 int ret = 0; 271 271 int shift; ··· 326 326 u32 res[3]; 327 327 }; 328 328 329 - void cpm2_set_pin(int port, int pin, int flags) 329 + void __init cpm2_set_pin(int port, int pin, int flags) 330 330 { 331 331 struct cpm2_ioports __iomem *iop = 332 332 (struct cpm2_ioports __iomem *)&cpm2_immr->im_ioport;
+1 -1
arch/powerpc/sysdev/dart_iommu.c
··· 226 226 dart_cache_sync(orig_dp, orig_npages); 227 227 } 228 228 229 - static void allocate_dart(void) 229 + static void __init allocate_dart(void) 230 230 { 231 231 unsigned long tmp; 232 232
+2 -2
arch/powerpc/sysdev/fsl_mpic_err.c
··· 58 58 .irq_unmask = fsl_mpic_unmask_err, 59 59 }; 60 60 61 - int mpic_setup_error_int(struct mpic *mpic, int intvec) 61 + int __init mpic_setup_error_int(struct mpic *mpic, int intvec) 62 62 { 63 63 int i; 64 64 ··· 121 121 return IRQ_HANDLED; 122 122 } 123 123 124 - void mpic_err_int_init(struct mpic *mpic, irq_hw_number_t irqnum) 124 + void __init mpic_err_int_init(struct mpic *mpic, irq_hw_number_t irqnum) 125 125 { 126 126 unsigned int virq; 127 127 int ret;
+1 -1
arch/powerpc/sysdev/fsl_pci.c
··· 1106 1106 1107 1107 struct device_node *fsl_pci_primary; 1108 1108 1109 - void fsl_pci_assign_primary(void) 1109 + void __init fsl_pci_assign_primary(void) 1110 1110 { 1111 1111 struct device_node *np; 1112 1112
+1 -1
arch/powerpc/sysdev/fsl_pci.h
··· 120 120 extern struct device_node *fsl_pci_primary; 121 121 122 122 #ifdef CONFIG_PCI 123 - void fsl_pci_assign_primary(void); 123 + void __init fsl_pci_assign_primary(void); 124 124 #else 125 125 static inline void fsl_pci_assign_primary(void) {} 126 126 #endif
+1 -1
arch/powerpc/sysdev/i8259.c
··· 208 208 .xlate = i8259_host_xlate, 209 209 }; 210 210 211 - struct irq_domain *i8259_get_host(void) 211 + struct irq_domain *__init i8259_get_host(void) 212 212 { 213 213 return i8259_host; 214 214 }
+1 -1
arch/powerpc/sysdev/ipic.c
··· 767 767 return ipic; 768 768 } 769 769 770 - void ipic_set_default_priority(void) 770 + void __init ipic_set_default_priority(void) 771 771 { 772 772 ipic_write(primary_ipic->regs, IPIC_SIPRR_A, IPIC_PRIORITY_DEFAULT); 773 773 ipic_write(primary_ipic->regs, IPIC_SIPRR_B, IPIC_PRIORITY_DEFAULT);
+2 -3
arch/powerpc/sysdev/mpic.c
··· 1323 1323 psrc = of_get_property(mpic->node, "protected-sources", &psize); 1324 1324 if (psrc) { 1325 1325 /* Allocate a bitmap with one bit per interrupt */ 1326 - unsigned int mapsize = BITS_TO_LONGS(intvec_top + 1); 1327 - mpic->protected = kcalloc(mapsize, sizeof(long), GFP_KERNEL); 1326 + mpic->protected = bitmap_zalloc(intvec_top + 1, GFP_KERNEL); 1328 1327 BUG_ON(mpic->protected == NULL); 1329 1328 for (i = 0; i < psize/sizeof(u32); i++) { 1330 1329 if (psrc[i] > intvec_top) ··· 1839 1840 } 1840 1841 1841 1842 #ifdef CONFIG_SMP 1842 - void mpic_request_ipis(void) 1843 + void __init mpic_request_ipis(void) 1843 1844 { 1844 1845 struct mpic *mpic = mpic_primary; 1845 1846 int i;
+5 -5
arch/powerpc/sysdev/mpic.h
··· 8 8 9 9 #ifdef CONFIG_PCI_MSI 10 10 extern void mpic_msi_reserve_hwirq(struct mpic *mpic, irq_hw_number_t hwirq); 11 - extern int mpic_msi_init_allocator(struct mpic *mpic); 12 - extern int mpic_u3msi_init(struct mpic *mpic); 11 + int __init mpic_msi_init_allocator(struct mpic *mpic); 12 + int __init mpic_u3msi_init(struct mpic *mpic); 13 13 #else 14 14 static inline void mpic_msi_reserve_hwirq(struct mpic *mpic, 15 15 irq_hw_number_t hwirq) ··· 24 24 #endif 25 25 26 26 #if defined(CONFIG_PCI_MSI) && defined(CONFIG_PPC_PASEMI) 27 - int mpic_pasemi_msi_init(struct mpic *mpic); 27 + int __init mpic_pasemi_msi_init(struct mpic *mpic); 28 28 #else 29 29 static inline int mpic_pasemi_msi_init(struct mpic *mpic) { return -1; } 30 30 #endif ··· 37 37 38 38 #ifdef CONFIG_FSL_SOC 39 39 extern int mpic_map_error_int(struct mpic *mpic, unsigned int virq, irq_hw_number_t hw); 40 - extern void mpic_err_int_init(struct mpic *mpic, irq_hw_number_t irqnum); 41 - extern int mpic_setup_error_int(struct mpic *mpic, int intvec); 40 + void __init mpic_err_int_init(struct mpic *mpic, irq_hw_number_t irqnum); 41 + int __init mpic_setup_error_int(struct mpic *mpic, int intvec); 42 42 #else 43 43 static inline int mpic_map_error_int(struct mpic *mpic, unsigned int virq, irq_hw_number_t hw) 44 44 {
+3 -3
arch/powerpc/sysdev/mpic_msi.c
··· 24 24 } 25 25 26 26 #ifdef CONFIG_MPIC_U3_HT_IRQS 27 - static int mpic_msi_reserve_u3_hwirqs(struct mpic *mpic) 27 + static int __init mpic_msi_reserve_u3_hwirqs(struct mpic *mpic) 28 28 { 29 29 irq_hw_number_t hwirq; 30 30 const struct irq_domain_ops *ops = mpic->irqhost->ops; ··· 68 68 return 0; 69 69 } 70 70 #else 71 - static int mpic_msi_reserve_u3_hwirqs(struct mpic *mpic) 71 + static int __init mpic_msi_reserve_u3_hwirqs(struct mpic *mpic) 72 72 { 73 73 return -1; 74 74 } 75 75 #endif 76 76 77 - int mpic_msi_init_allocator(struct mpic *mpic) 77 + int __init mpic_msi_init_allocator(struct mpic *mpic) 78 78 { 79 79 int rc; 80 80
+3 -3
arch/powerpc/sysdev/mpic_timer.c
··· 384 384 } 385 385 EXPORT_SYMBOL(mpic_request_timer); 386 386 387 - static int timer_group_get_freq(struct device_node *np, 387 + static int __init timer_group_get_freq(struct device_node *np, 388 388 struct timer_group_priv *priv) 389 389 { 390 390 u32 div; ··· 411 411 return 0; 412 412 } 413 413 414 - static int timer_group_get_irq(struct device_node *np, 414 + static int __init timer_group_get_irq(struct device_node *np, 415 415 struct timer_group_priv *priv) 416 416 { 417 417 const u32 all_timer[] = { 0, TIMERS_PER_GROUP }; ··· 459 459 return 0; 460 460 } 461 461 462 - static void timer_group_init(struct device_node *np) 462 + static void __init timer_group_init(struct device_node *np) 463 463 { 464 464 struct timer_group_priv *priv; 465 465 unsigned int i = 0;
+1 -1
arch/powerpc/sysdev/mpic_u3msi.c
··· 169 169 return 0; 170 170 } 171 171 172 - int mpic_u3msi_init(struct mpic *mpic) 172 + int __init mpic_u3msi_init(struct mpic *mpic) 173 173 { 174 174 int rc; 175 175 struct pci_controller *phb;
+1 -2
arch/powerpc/sysdev/tsi108_dev.c
··· 51 51 } 52 52 return tsi108_csr_base; 53 53 } 54 + EXPORT_SYMBOL(get_csrbase); 54 55 55 56 u32 get_vir_csrbase(void) 56 57 { 57 58 return (u32) (ioremap(get_csrbase(), 0x10000)); 58 59 } 59 - 60 - EXPORT_SYMBOL(get_csrbase); 61 60 EXPORT_SYMBOL(get_vir_csrbase); 62 61 63 62 static int __init tsi108_eth_of_init(void)
+1 -1
arch/powerpc/sysdev/tsi108_pci.c
··· 257 257 mb(); 258 258 } 259 259 260 - static void init_pci_source(void) 260 + static void __init init_pci_source(void) 261 261 { 262 262 tsi108_write_reg(TSI108_PCI_OFFSET + TSI108_PCI_IRP_CFG_CTL, 263 263 0x0000ff00);
+1 -1
arch/powerpc/sysdev/udbg_memcons.c
··· 92 92 return c; 93 93 } 94 94 95 - void udbg_init_memcons(void) 95 + void __init udbg_init_memcons(void) 96 96 { 97 97 udbg_putc = memcons_putc; 98 98 udbg_getc = memcons_getc;
+1 -1
arch/powerpc/sysdev/xics/icp-hv.c
··· 162 162 #endif 163 163 }; 164 164 165 - int icp_hv_init(void) 165 + int __init icp_hv_init(void) 166 166 { 167 167 struct device_node *np; 168 168
+1 -1
arch/powerpc/sysdev/xics/icp-opal.c
··· 184 184 #endif 185 185 }; 186 186 187 - int icp_opal_init(void) 187 + int __init icp_opal_init(void) 188 188 { 189 189 struct device_node *np; 190 190
+1 -1
arch/powerpc/sysdev/xics/xics-common.c
··· 121 121 122 122 #ifdef CONFIG_SMP 123 123 124 - static void xics_request_ipi(void) 124 + static void __init xics_request_ipi(void) 125 125 { 126 126 unsigned int ipi; 127 127
+147 -69
arch/powerpc/sysdev/xive/common.c
··· 85 85 #define XIVE_INVALID_TARGET (-1) 86 86 87 87 /* 88 + * Global toggle to switch on/off StoreEOI 89 + */ 90 + static bool xive_store_eoi = true; 91 + 92 + static bool xive_is_store_eoi(struct xive_irq_data *xd) 93 + { 94 + return xd->flags & XIVE_IRQ_FLAG_STORE_EOI && xive_store_eoi; 95 + } 96 + 97 + /* 88 98 * Read the next entry in a queue, return its content if it's valid 89 99 * or 0 if there is no new entry. 90 100 * ··· 218 208 { 219 209 u64 val; 220 210 221 - if (offset == XIVE_ESB_SET_PQ_10 && xd->flags & XIVE_IRQ_FLAG_STORE_EOI) 211 + if (offset == XIVE_ESB_SET_PQ_10 && xive_is_store_eoi(xd)) 222 212 offset |= XIVE_ESB_LD_ST_MO; 223 213 224 214 if ((xd->flags & XIVE_IRQ_FLAG_H_INT_ESB) && xive_ops->esb_rw) ··· 236 226 else 237 227 out_be64(xd->eoi_mmio + offset, data); 238 228 } 229 + 230 + #if defined(CONFIG_XMON) || defined(CONFIG_DEBUG_FS) 231 + static void xive_irq_data_dump(struct xive_irq_data *xd, char *buffer, size_t size) 232 + { 233 + u64 val = xive_esb_read(xd, XIVE_ESB_GET); 234 + 235 + snprintf(buffer, size, "flags=%c%c%c PQ=%c%c 0x%016llx 0x%016llx", 236 + xive_is_store_eoi(xd) ? 'S' : ' ', 237 + xd->flags & XIVE_IRQ_FLAG_LSI ? 'L' : ' ', 238 + xd->flags & XIVE_IRQ_FLAG_H_INT_ESB ? 'H' : ' ', 239 + val & XIVE_ESB_VAL_P ? 'P' : '-', 240 + val & XIVE_ESB_VAL_Q ? 'Q' : '-', 241 + xd->trig_page, xd->eoi_page); 242 + } 243 + #endif 239 244 240 245 #ifdef CONFIG_XMON 241 246 static notrace void xive_dump_eq(const char *name, struct xive_q *q) ··· 277 252 278 253 #ifdef CONFIG_SMP 279 254 { 280 - u64 val = xive_esb_read(&xc->ipi_data, XIVE_ESB_GET); 255 + char buffer[128]; 281 256 282 - xmon_printf("IPI=0x%08x PQ=%c%c ", xc->hw_ipi, 283 - val & XIVE_ESB_VAL_P ? 'P' : '-', 284 - val & XIVE_ESB_VAL_Q ? 'Q' : '-'); 257 + xive_irq_data_dump(&xc->ipi_data, buffer, sizeof(buffer)); 258 + xmon_printf("IPI=0x%08x %s", xc->hw_ipi, buffer); 285 259 } 286 260 #endif 287 261 xive_dump_eq("EQ", &xc->queue[xive_irq_priority]); ··· 315 291 d = xive_get_irq_data(hw_irq); 316 292 317 293 if (d) { 318 - struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); 319 - u64 val = xive_esb_read(xd, XIVE_ESB_GET); 294 + char buffer[128]; 320 295 321 - xmon_printf("flags=%c%c%c PQ=%c%c", 322 - xd->flags & XIVE_IRQ_FLAG_STORE_EOI ? 'S' : ' ', 323 - xd->flags & XIVE_IRQ_FLAG_LSI ? 'L' : ' ', 324 - xd->flags & XIVE_IRQ_FLAG_H_INT_ESB ? 'H' : ' ', 325 - val & XIVE_ESB_VAL_P ? 'P' : '-', 326 - val & XIVE_ESB_VAL_Q ? 'Q' : '-'); 296 + xive_irq_data_dump(irq_data_get_irq_handler_data(d), 297 + buffer, sizeof(buffer)); 298 + xmon_printf("%s", buffer); 327 299 } 328 300 329 301 xmon_printf("\n"); ··· 405 385 xd->stale_p = false; 406 386 407 387 /* If the XIVE supports the new "store EOI facility, use it */ 408 - if (xd->flags & XIVE_IRQ_FLAG_STORE_EOI) { 388 + if (xive_is_store_eoi(xd)) { 409 389 xive_esb_write(xd, XIVE_ESB_STORE_EOI, 0); 410 390 return; 411 391 } ··· 470 450 bool mask) 471 451 { 472 452 u64 val; 453 + 454 + pr_debug("%s: HW 0x%x %smask\n", __func__, xd->hw_irq, mask ? "" : "un"); 473 455 474 456 /* 475 457 * If the interrupt had P set, it may be in a queue. ··· 634 612 635 613 xd->saved_p = false; 636 614 xd->stale_p = false; 637 - pr_devel("xive_irq_startup: irq %d [0x%x] data @%p\n", 638 - d->irq, hw_irq, d); 615 + 616 + pr_debug("%s: irq %d [0x%x] data @%p\n", __func__, d->irq, hw_irq, d); 639 617 640 618 /* Pick a target */ 641 619 target = xive_pick_irq_target(d, irq_data_get_affinity_mask(d)); ··· 676 654 struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); 677 655 unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d); 678 656 679 - pr_devel("xive_irq_shutdown: irq %d [0x%x] data @%p\n", 680 - d->irq, hw_irq, d); 657 + pr_debug("%s: irq %d [0x%x] data @%p\n", __func__, d->irq, hw_irq, d); 681 658 682 659 if (WARN_ON(xd->target == XIVE_INVALID_TARGET)) 683 660 return; ··· 700 679 { 701 680 struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); 702 681 703 - pr_devel("xive_irq_unmask: irq %d data @%p\n", d->irq, xd); 682 + pr_debug("%s: irq %d data @%p\n", __func__, d->irq, xd); 704 683 705 684 xive_do_source_set_mask(xd, false); 706 685 } ··· 709 688 { 710 689 struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); 711 690 712 - pr_devel("xive_irq_mask: irq %d data @%p\n", d->irq, xd); 691 + pr_debug("%s: irq %d data @%p\n", __func__, d->irq, xd); 713 692 714 693 xive_do_source_set_mask(xd, true); 715 694 } ··· 723 702 u32 target, old_target; 724 703 int rc = 0; 725 704 726 - pr_debug("%s: irq %d/%x\n", __func__, d->irq, hw_irq); 705 + pr_debug("%s: irq %d/0x%x\n", __func__, d->irq, hw_irq); 727 706 728 707 /* Is this valid ? */ 729 708 if (cpumask_any_and(cpumask, cpu_online_mask) >= nr_cpu_ids) ··· 996 975 997 976 void xive_cleanup_irq_data(struct xive_irq_data *xd) 998 977 { 999 - pr_debug("%s for HW %x\n", __func__, xd->hw_irq); 978 + pr_debug("%s for HW 0x%x\n", __func__, xd->hw_irq); 1000 979 1001 980 if (xd->eoi_mmio) { 1002 981 iounmap(xd->eoi_mmio); ··· 1232 1211 pr_err("Failed to map IPI CPU %d\n", cpu); 1233 1212 return -EIO; 1234 1213 } 1235 - pr_devel("CPU %d HW IPI %x, virq %d, trig_mmio=%p\n", cpu, 1236 - xc->hw_ipi, xive_ipi_irq, xc->ipi_data.trig_mmio); 1214 + pr_debug("CPU %d HW IPI 0x%x, virq %d, trig_mmio=%p\n", cpu, 1215 + xc->hw_ipi, xive_ipi_irq, xc->ipi_data.trig_mmio); 1237 1216 1238 1217 /* Unmask it */ 1239 1218 xive_do_source_set_mask(&xc->ipi_data, false); ··· 1411 1390 if (rc) 1412 1391 return rc; 1413 1392 1414 - pr_debug("%s %d/%lx #%d\n", __func__, virq, hwirq, nr_irqs); 1393 + pr_debug("%s %d/0x%lx #%d\n", __func__, virq, hwirq, nr_irqs); 1415 1394 1416 1395 for (i = 0; i < nr_irqs; i++) { 1417 1396 /* TODO: call xive_irq_domain_map() */ ··· 1525 1504 #ifdef CONFIG_SMP 1526 1505 void xive_smp_setup_cpu(void) 1527 1506 { 1528 - pr_devel("SMP setup CPU %d\n", smp_processor_id()); 1507 + pr_debug("SMP setup CPU %d\n", smp_processor_id()); 1529 1508 1530 1509 /* This will have already been done on the boot CPU */ 1531 1510 if (smp_processor_id() != boot_cpuid) ··· 1671 1650 ppc_md.get_irq = xive_get_irq; 1672 1651 __xive_enabled = true; 1673 1652 1674 - pr_devel("Initializing host..\n"); 1653 + pr_debug("Initializing host..\n"); 1675 1654 xive_init_host(np); 1676 1655 1677 - pr_devel("Initializing boot CPU..\n"); 1656 + pr_debug("Initializing boot CPU..\n"); 1678 1657 1679 1658 /* Allocate per-CPU data and queues */ 1680 1659 xive_prepare_cpu(smp_processor_id()); ··· 1712 1691 } 1713 1692 __setup("xive=off", xive_off); 1714 1693 1715 - static void xive_debug_show_cpu(struct seq_file *m, int cpu) 1694 + static int __init xive_store_eoi_cmdline(char *arg) 1695 + { 1696 + if (!arg) 1697 + return -EINVAL; 1698 + 1699 + if (strncmp(arg, "off", 3) == 0) { 1700 + pr_info("StoreEOI disabled on kernel command line\n"); 1701 + xive_store_eoi = false; 1702 + } 1703 + return 0; 1704 + } 1705 + __setup("xive.store-eoi=", xive_store_eoi_cmdline); 1706 + 1707 + #ifdef CONFIG_DEBUG_FS 1708 + static void xive_debug_show_ipi(struct seq_file *m, int cpu) 1716 1709 { 1717 1710 struct xive_cpu *xc = per_cpu(xive_cpu, cpu); 1718 1711 1719 - seq_printf(m, "CPU %d:", cpu); 1712 + seq_printf(m, "CPU %d: ", cpu); 1720 1713 if (xc) { 1721 1714 seq_printf(m, "pp=%02x CPPR=%02x ", xc->pending_prio, xc->cppr); 1722 1715 1723 1716 #ifdef CONFIG_SMP 1724 1717 { 1725 - u64 val = xive_esb_read(&xc->ipi_data, XIVE_ESB_GET); 1718 + char buffer[128]; 1726 1719 1727 - seq_printf(m, "IPI=0x%08x PQ=%c%c ", xc->hw_ipi, 1728 - val & XIVE_ESB_VAL_P ? 'P' : '-', 1729 - val & XIVE_ESB_VAL_Q ? 'Q' : '-'); 1720 + xive_irq_data_dump(&xc->ipi_data, buffer, sizeof(buffer)); 1721 + seq_printf(m, "IPI=0x%08x %s", xc->hw_ipi, buffer); 1730 1722 } 1731 1723 #endif 1732 - { 1733 - struct xive_q *q = &xc->queue[xive_irq_priority]; 1734 - u32 i0, i1, idx; 1735 - 1736 - if (q->qpage) { 1737 - idx = q->idx; 1738 - i0 = be32_to_cpup(q->qpage + idx); 1739 - idx = (idx + 1) & q->msk; 1740 - i1 = be32_to_cpup(q->qpage + idx); 1741 - seq_printf(m, "EQ idx=%d T=%d %08x %08x ...", 1742 - q->idx, q->toggle, i0, i1); 1743 - } 1744 - } 1745 1724 } 1746 1725 seq_puts(m, "\n"); 1747 1726 } ··· 1753 1732 u32 target; 1754 1733 u8 prio; 1755 1734 u32 lirq; 1756 - struct xive_irq_data *xd; 1757 - u64 val; 1735 + char buffer[128]; 1758 1736 1759 1737 rc = xive_ops->get_irq_config(hw_irq, &target, &prio, &lirq); 1760 1738 if (rc) { ··· 1764 1744 seq_printf(m, "IRQ 0x%08x : target=0x%x prio=%02x lirq=0x%x ", 1765 1745 hw_irq, target, prio, lirq); 1766 1746 1767 - xd = irq_data_get_irq_handler_data(d); 1768 - val = xive_esb_read(xd, XIVE_ESB_GET); 1769 - seq_printf(m, "flags=%c%c%c PQ=%c%c", 1770 - xd->flags & XIVE_IRQ_FLAG_STORE_EOI ? 'S' : ' ', 1771 - xd->flags & XIVE_IRQ_FLAG_LSI ? 'L' : ' ', 1772 - xd->flags & XIVE_IRQ_FLAG_H_INT_ESB ? 'H' : ' ', 1773 - val & XIVE_ESB_VAL_P ? 'P' : '-', 1774 - val & XIVE_ESB_VAL_Q ? 'Q' : '-'); 1747 + xive_irq_data_dump(irq_data_get_irq_handler_data(d), buffer, sizeof(buffer)); 1748 + seq_puts(m, buffer); 1775 1749 seq_puts(m, "\n"); 1776 1750 } 1777 1751 1778 - static int xive_core_debug_show(struct seq_file *m, void *private) 1752 + static int xive_irq_debug_show(struct seq_file *m, void *private) 1779 1753 { 1780 1754 unsigned int i; 1781 1755 struct irq_desc *desc; 1782 - int cpu; 1783 - 1784 - if (xive_ops->debug_show) 1785 - xive_ops->debug_show(m, private); 1786 - 1787 - for_each_possible_cpu(cpu) 1788 - xive_debug_show_cpu(m, cpu); 1789 1756 1790 1757 for_each_irq_desc(i, desc) { 1791 1758 struct irq_data *d = irq_domain_get_irq_data(xive_irq_domain, i); ··· 1782 1775 } 1783 1776 return 0; 1784 1777 } 1785 - DEFINE_SHOW_ATTRIBUTE(xive_core_debug); 1778 + DEFINE_SHOW_ATTRIBUTE(xive_irq_debug); 1779 + 1780 + static int xive_ipi_debug_show(struct seq_file *m, void *private) 1781 + { 1782 + int cpu; 1783 + 1784 + if (xive_ops->debug_show) 1785 + xive_ops->debug_show(m, private); 1786 + 1787 + for_each_possible_cpu(cpu) 1788 + xive_debug_show_ipi(m, cpu); 1789 + return 0; 1790 + } 1791 + DEFINE_SHOW_ATTRIBUTE(xive_ipi_debug); 1792 + 1793 + static void xive_eq_debug_show_one(struct seq_file *m, struct xive_q *q, u8 prio) 1794 + { 1795 + int i; 1796 + 1797 + seq_printf(m, "EQ%d idx=%d T=%d\n", prio, q->idx, q->toggle); 1798 + if (q->qpage) { 1799 + for (i = 0; i < q->msk + 1; i++) { 1800 + if (!(i % 8)) 1801 + seq_printf(m, "%05d ", i); 1802 + seq_printf(m, "%08x%s", be32_to_cpup(q->qpage + i), 1803 + (i + 1) % 8 ? " " : "\n"); 1804 + } 1805 + } 1806 + seq_puts(m, "\n"); 1807 + } 1808 + 1809 + static int xive_eq_debug_show(struct seq_file *m, void *private) 1810 + { 1811 + int cpu = (long)m->private; 1812 + struct xive_cpu *xc = per_cpu(xive_cpu, cpu); 1813 + 1814 + if (xc) 1815 + xive_eq_debug_show_one(m, &xc->queue[xive_irq_priority], 1816 + xive_irq_priority); 1817 + return 0; 1818 + } 1819 + DEFINE_SHOW_ATTRIBUTE(xive_eq_debug); 1820 + 1821 + static void xive_core_debugfs_create(void) 1822 + { 1823 + struct dentry *xive_dir; 1824 + struct dentry *xive_eq_dir; 1825 + long cpu; 1826 + char name[16]; 1827 + 1828 + xive_dir = debugfs_create_dir("xive", arch_debugfs_dir); 1829 + if (IS_ERR(xive_dir)) 1830 + return; 1831 + 1832 + debugfs_create_file("ipis", 0400, xive_dir, 1833 + NULL, &xive_ipi_debug_fops); 1834 + debugfs_create_file("interrupts", 0400, xive_dir, 1835 + NULL, &xive_irq_debug_fops); 1836 + xive_eq_dir = debugfs_create_dir("eqs", xive_dir); 1837 + for_each_possible_cpu(cpu) { 1838 + snprintf(name, sizeof(name), "cpu%ld", cpu); 1839 + debugfs_create_file(name, 0400, xive_eq_dir, (void *)cpu, 1840 + &xive_eq_debug_fops); 1841 + } 1842 + debugfs_create_bool("store-eoi", 0600, xive_dir, &xive_store_eoi); 1843 + 1844 + if (xive_ops->debug_create) 1845 + xive_ops->debug_create(xive_dir); 1846 + } 1847 + #else 1848 + static inline void xive_core_debugfs_create(void) { } 1849 + #endif /* CONFIG_DEBUG_FS */ 1786 1850 1787 1851 int xive_core_debug_init(void) 1788 1852 { 1789 - if (xive_enabled()) 1790 - debugfs_create_file("xive", 0400, arch_debugfs_dir, 1791 - NULL, &xive_core_debug_fops); 1853 + if (xive_enabled() && IS_ENABLED(CONFIG_DEBUG_FS)) 1854 + xive_core_debugfs_create(); 1855 + 1792 1856 return 0; 1793 1857 }
+16 -3
arch/powerpc/sysdev/xive/native.c
··· 41 41 static u32 xive_pool_vps = XIVE_INVALID_VP; 42 42 static struct kmem_cache *xive_provision_cache; 43 43 static bool xive_has_single_esc; 44 - static bool xive_has_save_restore; 44 + bool xive_has_save_restore; 45 45 46 46 int xive_native_populate_irq_data(u32 hw_irq, struct xive_irq_data *data) 47 47 { ··· 62 62 63 63 opal_flags = be64_to_cpu(flags); 64 64 if (opal_flags & OPAL_XIVE_IRQ_STORE_EOI) 65 + data->flags |= XIVE_IRQ_FLAG_STORE_EOI; 66 + if (opal_flags & OPAL_XIVE_IRQ_STORE_EOI2) 65 67 data->flags |= XIVE_IRQ_FLAG_STORE_EOI; 66 68 if (opal_flags & OPAL_XIVE_IRQ_LSI) 67 69 data->flags |= XIVE_IRQ_FLAG_LSI; ··· 461 459 } 462 460 EXPORT_SYMBOL_GPL(xive_native_sync_queue); 463 461 462 + #ifdef CONFIG_DEBUG_FS 463 + static int xive_native_debug_create(struct dentry *xive_dir) 464 + { 465 + debugfs_create_bool("save-restore", 0600, xive_dir, &xive_has_save_restore); 466 + return 0; 467 + } 468 + #endif 469 + 464 470 static const struct xive_ops xive_native_ops = { 465 471 .populate_irq_data = xive_native_populate_irq_data, 466 472 .configure_irq = xive_native_configure_irq, ··· 486 476 .get_ipi = xive_native_get_ipi, 487 477 .put_ipi = xive_native_put_ipi, 488 478 #endif /* CONFIG_SMP */ 479 + #ifdef CONFIG_DEBUG_FS 480 + .debug_create = xive_native_debug_create, 481 + #endif /* CONFIG_DEBUG_FS */ 489 482 .name = "native", 490 483 }; 491 484 492 - static bool xive_parse_provisioning(struct device_node *np) 485 + static bool __init xive_parse_provisioning(struct device_node *np) 493 486 { 494 487 int rc; 495 488 ··· 532 519 return true; 533 520 } 534 521 535 - static void xive_native_setup_pools(void) 522 + static void __init xive_native_setup_pools(void) 536 523 { 537 524 /* Allocate a pool big enough */ 538 525 pr_debug("XIVE: Allocating VP block for pool size %u\n", nr_cpu_ids);
+25 -22
arch/powerpc/sysdev/xive/spapr.c
··· 44 44 45 45 static LIST_HEAD(xive_irq_bitmaps); 46 46 47 - static int xive_irq_bitmap_add(int base, int count) 47 + static int __init xive_irq_bitmap_add(int base, int count) 48 48 { 49 49 struct xive_irq_bitmap *xibm; 50 50 ··· 173 173 } while (plpar_busy_delay(rc)); 174 174 175 175 if (rc) { 176 - pr_err("H_INT_GET_SOURCE_INFO lisn=%ld failed %ld\n", lisn, rc); 176 + pr_err("H_INT_GET_SOURCE_INFO lisn=0x%lx failed %ld\n", lisn, rc); 177 177 return rc; 178 178 } 179 179 ··· 182 182 *trig_page = retbuf[2]; 183 183 *esb_shift = retbuf[3]; 184 184 185 - pr_devel("H_INT_GET_SOURCE_INFO flags=%lx eoi=%lx trig=%lx shift=%lx\n", 186 - retbuf[0], retbuf[1], retbuf[2], retbuf[3]); 185 + pr_debug("H_INT_GET_SOURCE_INFO lisn=0x%lx flags=0x%lx eoi=0x%lx trig=0x%lx shift=0x%lx\n", 186 + lisn, retbuf[0], retbuf[1], retbuf[2], retbuf[3]); 187 187 188 188 return 0; 189 189 } ··· 200 200 long rc; 201 201 202 202 203 - pr_devel("H_INT_SET_SOURCE_CONFIG flags=%lx lisn=%lx target=%lx prio=%lx sw_irq=%lx\n", 204 - flags, lisn, target, prio, sw_irq); 203 + pr_debug("H_INT_SET_SOURCE_CONFIG flags=0x%lx lisn=0x%lx target=%ld prio=%ld sw_irq=%ld\n", 204 + flags, lisn, target, prio, sw_irq); 205 205 206 206 207 207 do { ··· 210 210 } while (plpar_busy_delay(rc)); 211 211 212 212 if (rc) { 213 - pr_err("H_INT_SET_SOURCE_CONFIG lisn=%ld target=%lx prio=%lx failed %ld\n", 213 + pr_err("H_INT_SET_SOURCE_CONFIG lisn=0x%lx target=%ld prio=%ld failed %ld\n", 214 214 lisn, target, prio, rc); 215 215 return rc; 216 216 } ··· 227 227 unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; 228 228 long rc; 229 229 230 - pr_devel("H_INT_GET_SOURCE_CONFIG flags=%lx lisn=%lx\n", flags, lisn); 230 + pr_debug("H_INT_GET_SOURCE_CONFIG flags=0x%lx lisn=0x%lx\n", flags, lisn); 231 231 232 232 do { 233 233 rc = plpar_hcall(H_INT_GET_SOURCE_CONFIG, retbuf, flags, lisn, ··· 235 235 } while (plpar_busy_delay(rc)); 236 236 237 237 if (rc) { 238 - pr_err("H_INT_GET_SOURCE_CONFIG lisn=%ld failed %ld\n", 238 + pr_err("H_INT_GET_SOURCE_CONFIG lisn=0x%lx failed %ld\n", 239 239 lisn, rc); 240 240 return rc; 241 241 } ··· 244 244 *prio = retbuf[1]; 245 245 *sw_irq = retbuf[2]; 246 246 247 - pr_devel("H_INT_GET_SOURCE_CONFIG target=%lx prio=%lx sw_irq=%lx\n", 248 - retbuf[0], retbuf[1], retbuf[2]); 247 + pr_debug("H_INT_GET_SOURCE_CONFIG target=%ld prio=%ld sw_irq=%ld\n", 248 + retbuf[0], retbuf[1], retbuf[2]); 249 249 250 250 return 0; 251 251 } ··· 273 273 *esn_page = retbuf[0]; 274 274 *esn_size = retbuf[1]; 275 275 276 - pr_devel("H_INT_GET_QUEUE_INFO page=%lx size=%lx\n", 277 - retbuf[0], retbuf[1]); 276 + pr_debug("H_INT_GET_QUEUE_INFO cpu=%ld prio=%ld page=0x%lx size=0x%lx\n", 277 + target, priority, retbuf[0], retbuf[1]); 278 278 279 279 return 0; 280 280 } ··· 289 289 { 290 290 long rc; 291 291 292 - pr_devel("H_INT_SET_QUEUE_CONFIG flags=%lx target=%lx priority=%lx qpage=%lx qsize=%lx\n", 293 - flags, target, priority, qpage, qsize); 292 + pr_debug("H_INT_SET_QUEUE_CONFIG flags=0x%lx target=%ld priority=0x%lx qpage=0x%lx qsize=0x%lx\n", 293 + flags, target, priority, qpage, qsize); 294 294 295 295 do { 296 296 rc = plpar_hcall_norets(H_INT_SET_QUEUE_CONFIG, flags, target, ··· 298 298 } while (plpar_busy_delay(rc)); 299 299 300 300 if (rc) { 301 - pr_err("H_INT_SET_QUEUE_CONFIG cpu=%ld prio=%ld qpage=%lx returned %ld\n", 301 + pr_err("H_INT_SET_QUEUE_CONFIG cpu=%ld prio=%ld qpage=0x%lx returned %ld\n", 302 302 target, priority, qpage, rc); 303 303 return rc; 304 304 } ··· 315 315 } while (plpar_busy_delay(rc)); 316 316 317 317 if (rc) { 318 - pr_err("H_INT_SYNC lisn=%ld returned %ld\n", lisn, rc); 318 + pr_err("H_INT_SYNC lisn=0x%lx returned %ld\n", lisn, rc); 319 319 return rc; 320 320 } 321 321 ··· 333 333 unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; 334 334 long rc; 335 335 336 - pr_devel("H_INT_ESB flags=%lx lisn=%lx offset=%lx in=%lx\n", 337 - flags, lisn, offset, in_data); 336 + pr_debug("H_INT_ESB flags=0x%lx lisn=0x%lx offset=0x%lx in=0x%lx\n", 337 + flags, lisn, offset, in_data); 338 338 339 339 do { 340 340 rc = plpar_hcall(H_INT_ESB, retbuf, flags, lisn, offset, ··· 342 342 } while (plpar_busy_delay(rc)); 343 343 344 344 if (rc) { 345 - pr_err("H_INT_ESB lisn=%ld offset=%ld returned %ld\n", 345 + pr_err("H_INT_ESB lisn=0x%lx offset=0x%lx returned %ld\n", 346 346 lisn, offset, rc); 347 347 return rc; 348 348 } ··· 653 653 struct xive_irq_bitmap *xibm; 654 654 char *buf = kmalloc(PAGE_SIZE, GFP_KERNEL); 655 655 656 + if (!buf) 657 + return -ENOMEM; 658 + 656 659 list_for_each_entry(xibm, &xive_irq_bitmaps, list) { 657 660 memset(buf, 0, PAGE_SIZE); 658 661 bitmap_print_to_pagebuf(true, buf, xibm->bitmap, xibm->count); ··· 690 687 /* 691 688 * get max priority from "/ibm,plat-res-int-priorities" 692 689 */ 693 - static bool xive_get_max_prio(u8 *max_prio) 690 + static bool __init xive_get_max_prio(u8 *max_prio) 694 691 { 695 692 struct device_node *rootdn; 696 693 const __be32 *reg; ··· 744 741 return true; 745 742 } 746 743 747 - static const u8 *get_vec5_feature(unsigned int index) 744 + static const u8 *__init get_vec5_feature(unsigned int index) 748 745 { 749 746 unsigned long root, chosen; 750 747 int size;
+2
arch/powerpc/sysdev/xive/xive-internal.h
··· 58 58 void (*put_ipi)(unsigned int cpu, struct xive_cpu *xc); 59 59 #endif 60 60 int (*debug_show)(struct seq_file *m, void *private); 61 + int (*debug_create)(struct dentry *xive_dir); 61 62 const char *name; 62 63 }; 63 64 ··· 73 72 } 74 73 75 74 extern bool xive_cmdline_disabled; 75 + extern bool xive_has_save_restore; 76 76 77 77 #endif /* __XIVE_INTERNAL_H */
+23 -15
arch/powerpc/xmon/xmon.c
··· 125 125 static int cmds(struct pt_regs *); 126 126 static int mread(unsigned long, void *, int); 127 127 static int mwrite(unsigned long, void *, int); 128 - static int mread_instr(unsigned long, struct ppc_inst *); 128 + static int mread_instr(unsigned long, ppc_inst_t *); 129 129 static int handle_fault(struct pt_regs *); 130 130 static void byterev(unsigned char *, int); 131 131 static void memex(void); ··· 908 908 static void insert_bpts(void) 909 909 { 910 910 int i; 911 - struct ppc_inst instr, instr2; 911 + ppc_inst_t instr, instr2; 912 912 struct bpt *bp, *bp2; 913 913 914 914 bp = bpts; ··· 988 988 { 989 989 int i; 990 990 struct bpt *bp; 991 - struct ppc_inst instr; 991 + ppc_inst_t instr; 992 992 993 993 bp = bpts; 994 994 for (i = 0; i < NBPTS; ++i, ++bp) { ··· 1159 1159 case 'P': 1160 1160 show_tasks(); 1161 1161 break; 1162 - #ifdef CONFIG_PPC_BOOK3S 1162 + #if defined(CONFIG_PPC_BOOK3S_32) || defined(CONFIG_PPC_64S_HASH_MMU) 1163 1163 case 'u': 1164 1164 dump_segments(); 1165 1165 break; ··· 1204 1204 */ 1205 1205 static int do_step(struct pt_regs *regs) 1206 1206 { 1207 - struct ppc_inst instr; 1207 + ppc_inst_t instr; 1208 1208 int stepped; 1209 1209 1210 1210 force_enable_xmon(); ··· 1459 1459 */ 1460 1460 static long check_bp_loc(unsigned long addr) 1461 1461 { 1462 - struct ppc_inst instr; 1462 + ppc_inst_t instr; 1463 1463 1464 1464 addr &= ~3; 1465 1465 if (!is_kernel_addr(addr)) { ··· 2107 2107 if (!cpu_has_feature(CPU_FTR_ARCH_300)) 2108 2108 return; 2109 2109 2110 - printf("pidr = %.16lx tidr = %.16lx\n", 2111 - mfspr(SPRN_PID), mfspr(SPRN_TIDR)); 2110 + if (cpu_has_feature(CPU_FTR_P9_TIDR)) { 2111 + printf("pidr = %.16lx tidr = %.16lx\n", 2112 + mfspr(SPRN_PID), mfspr(SPRN_TIDR)); 2113 + } else { 2114 + printf("pidr = %.16lx\n", 2115 + mfspr(SPRN_PID)); 2116 + } 2117 + 2112 2118 printf("psscr = %.16lx\n", 2113 2119 hv ? mfspr(SPRN_PSSCR) : mfspr(SPRN_PSSCR_PR)); 2114 2120 ··· 2306 2300 } 2307 2301 2308 2302 static int 2309 - mread_instr(unsigned long adrs, struct ppc_inst *instr) 2303 + mread_instr(unsigned long adrs, ppc_inst_t *instr) 2310 2304 { 2311 2305 volatile int n; 2312 2306 ··· 2614 2608 static void dump_one_paca(int cpu) 2615 2609 { 2616 2610 struct paca_struct *p; 2617 - #ifdef CONFIG_PPC_BOOK3S_64 2611 + #ifdef CONFIG_PPC_64S_HASH_MMU 2618 2612 int i = 0; 2619 2613 #endif 2620 2614 ··· 2656 2650 DUMP(p, cpu_start, "%#-*x"); 2657 2651 DUMP(p, kexec_state, "%#-*x"); 2658 2652 #ifdef CONFIG_PPC_BOOK3S_64 2653 + #ifdef CONFIG_PPC_64S_HASH_MMU 2659 2654 if (!early_radix_enabled()) { 2660 2655 for (i = 0; i < SLB_NUM_BOLTED; i++) { 2661 2656 u64 esid, vsid; ··· 2684 2677 22, "slb_cache", i, p->slb_cache[i]); 2685 2678 } 2686 2679 } 2680 + #endif 2687 2681 2688 2682 DUMP(p, rfi_flush_fallback_area, "%-*px"); 2689 2683 #endif ··· 2817 2809 { 2818 2810 int cpu; 2819 2811 2820 - if (num_possible_cpus() == 0) { 2812 + if (num_online_cpus() == 0) { 2821 2813 printf("No possible cpus, use 'dx #' to dump individual cpus\n"); 2822 2814 return; 2823 2815 } 2824 2816 2825 - for_each_possible_cpu(cpu) 2817 + for_each_online_cpu(cpu) 2826 2818 dump_one_xive(cpu); 2827 2819 } 2828 2820 ··· 3028 3020 { 3029 3021 int nr, dotted; 3030 3022 unsigned long first_adr; 3031 - struct ppc_inst inst, last_inst = ppc_inst(0); 3023 + ppc_inst_t inst, last_inst = ppc_inst(0); 3032 3024 3033 3025 dotted = 0; 3034 3026 for (first_adr = adr; count > 0; --count, adr += ppc_inst_len(inst)) { ··· 3748 3740 printf("%s", after); 3749 3741 } 3750 3742 3751 - #ifdef CONFIG_PPC_BOOK3S_64 3743 + #ifdef CONFIG_PPC_64S_HASH_MMU 3752 3744 void dump_segments(void) 3753 3745 { 3754 3746 int i; ··· 4136 4128 4137 4129 static struct spu_info spu_info[XMON_NUM_SPUS]; 4138 4130 4139 - void xmon_register_spus(struct list_head *list) 4131 + void __init xmon_register_spus(struct list_head *list) 4140 4132 { 4141 4133 struct spu *spu; 4142 4134
+2 -2
arch/powerpc/xmon/xmon_bpts.h
··· 5 5 #define NBPTS 256 6 6 #ifndef __ASSEMBLY__ 7 7 #include <asm/inst.h> 8 - #define BPT_SIZE (sizeof(struct ppc_inst) * 2) 9 - #define BPT_WORDS (BPT_SIZE / sizeof(struct ppc_inst)) 8 + #define BPT_SIZE (sizeof(ppc_inst_t) * 2) 9 + #define BPT_WORDS (BPT_SIZE / sizeof(ppc_inst_t)) 10 10 11 11 extern unsigned int bpt_table[NBPTS * BPT_WORDS]; 12 12 #endif /* __ASSEMBLY__ */
+1 -1
drivers/macintosh/mediabay.c
··· 703 703 * Therefore we do it all by polling the media bay once each tick. 704 704 */ 705 705 706 - static struct of_device_id media_bay_match[] = 706 + static const struct of_device_id media_bay_match[] = 707 707 { 708 708 { 709 709 .name = "media-bay",
+1
drivers/misc/cxl/Kconfig
··· 6 6 config CXL_BASE 7 7 bool 8 8 select PPC_COPRO_BASE 9 + select PPC_64S_HASH_MMU 9 10 10 11 config CXL 11 12 tristate "Support for IBM Coherent Accelerators (CXL)"
+1 -1
drivers/misc/lkdtm/Makefile
··· 11 11 lkdtm-$(CONFIG_LKDTM) += stackleak.o 12 12 lkdtm-$(CONFIG_LKDTM) += cfi.o 13 13 lkdtm-$(CONFIG_LKDTM) += fortify.o 14 - lkdtm-$(CONFIG_PPC_BOOK3S_64) += powerpc.o 14 + lkdtm-$(CONFIG_PPC_64S_HASH_MMU) += powerpc.o 15 15 16 16 KASAN_SANITIZE_rodata.o := n 17 17 KASAN_SANITIZE_stackleak.o := n
+1 -1
drivers/misc/lkdtm/core.c
··· 182 182 CRASHTYPE(FORTIFIED_SUBOBJECT), 183 183 CRASHTYPE(FORTIFIED_STRSCPY), 184 184 CRASHTYPE(DOUBLE_FAULT), 185 - #ifdef CONFIG_PPC_BOOK3S_64 185 + #ifdef CONFIG_PPC_64S_HASH_MMU 186 186 CRASHTYPE(PPC_SLB_MULTIHIT), 187 187 #endif 188 188 };
+1 -3
drivers/misc/ocxl/file.c
··· 74 74 { 75 75 struct ocxl_ioctl_attach arg; 76 76 u64 amr = 0; 77 - int rc; 78 77 79 78 pr_debug("%s for context %d\n", __func__, ctx->pasid); 80 79 ··· 85 86 return -EINVAL; 86 87 87 88 amr = arg.amr & mfspr(SPRN_UAMOR); 88 - rc = ocxl_context_attach(ctx, amr, current->mm); 89 - return rc; 89 + return ocxl_context_attach(ctx, amr, current->mm); 90 90 } 91 91 92 92 static long afu_ioctl_get_metadata(struct ocxl_context *ctx,
+1 -1
include/linux/cuda.h
··· 12 12 #include <uapi/linux/cuda.h> 13 13 14 14 15 - extern int find_via_cuda(void); 15 + extern int __init find_via_cuda(void); 16 16 extern int cuda_request(struct adb_request *req, 17 17 void (*done)(struct adb_request *), int nbytes, ...); 18 18 extern void cuda_poll(void);
+1 -1
include/linux/pmu.h
··· 13 13 #include <uapi/linux/pmu.h> 14 14 15 15 16 - extern int find_via_pmu(void); 16 + extern int __init find_via_pmu(void); 17 17 18 18 extern int pmu_request(struct adb_request *req, 19 19 void (*done)(struct adb_request *), int nbytes, ...);
+4 -1
include/uapi/linux/perf_event.h
··· 1332 1332 1333 1333 /* hop level */ 1334 1334 #define PERF_MEM_HOPS_0 0x01 /* remote core, same node */ 1335 - /* 2-7 available */ 1335 + #define PERF_MEM_HOPS_1 0x02 /* remote node, same socket */ 1336 + #define PERF_MEM_HOPS_2 0x03 /* remote socket, same board */ 1337 + #define PERF_MEM_HOPS_3 0x04 /* remote board */ 1338 + /* 5-7 available */ 1336 1339 #define PERF_MEM_HOPS_SHIFT 43 1337 1340 1338 1341 #define PERF_MEM_S(a, s) \
+4 -1
tools/testing/selftests/powerpc/security/mitigation-patching.sh
··· 44 44 45 45 for m in $mitigations 46 46 do 47 - do_one "$m" & 47 + if [[ -f /sys/kernel/debug/powerpc/$m ]] 48 + then 49 + do_one "$m" & 50 + fi 48 51 done 49 52 50 53 echo "Spawned threads enabling/disabling mitigations ..."
+1 -1
tools/testing/selftests/powerpc/security/spectre_v2.c
··· 193 193 * We are not vulnerable and reporting otherwise, so 194 194 * missing such a mismatch is safe. 195 195 */ 196 - if (state == VULNERABLE) 196 + if (miss_percent > 95) 197 197 return 4; 198 198 199 199 return 1;
+2
tools/testing/selftests/powerpc/signal/.gitignore
··· 4 4 sigfuz 5 5 sigreturn_vdso 6 6 sig_sc_double_restart 7 + sigreturn_kernel 8 + sigreturn_unaligned
+2
tools/testing/selftests/powerpc/signal/Makefile
··· 1 1 # SPDX-License-Identifier: GPL-2.0 2 2 TEST_GEN_PROGS := signal signal_tm sigfuz sigreturn_vdso sig_sc_double_restart 3 + TEST_GEN_PROGS += sigreturn_kernel 4 + TEST_GEN_PROGS += sigreturn_unaligned 3 5 4 6 CFLAGS += -maltivec 5 7 $(OUTPUT)/signal_tm: CFLAGS += -mhtm
+132
tools/testing/selftests/powerpc/signal/sigreturn_kernel.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * Test that we can't sigreturn to kernel addresses, or to kernel mode. 4 + */ 5 + 6 + #define _GNU_SOURCE 7 + 8 + #include <stdio.h> 9 + #include <signal.h> 10 + #include <stdlib.h> 11 + #include <sys/types.h> 12 + #include <sys/wait.h> 13 + #include <unistd.h> 14 + 15 + #include "utils.h" 16 + 17 + #define MSR_PR (1ul << 14) 18 + 19 + static volatile unsigned long long sigreturn_addr; 20 + static volatile unsigned long long sigreturn_msr_mask; 21 + 22 + static void sigusr1_handler(int signo, siginfo_t *si, void *uc_ptr) 23 + { 24 + ucontext_t *uc = (ucontext_t *)uc_ptr; 25 + 26 + if (sigreturn_addr) 27 + UCONTEXT_NIA(uc) = sigreturn_addr; 28 + 29 + if (sigreturn_msr_mask) 30 + UCONTEXT_MSR(uc) &= sigreturn_msr_mask; 31 + } 32 + 33 + static pid_t fork_child(void) 34 + { 35 + pid_t pid; 36 + 37 + pid = fork(); 38 + if (pid == 0) { 39 + raise(SIGUSR1); 40 + exit(0); 41 + } 42 + 43 + return pid; 44 + } 45 + 46 + static int expect_segv(pid_t pid) 47 + { 48 + int child_ret; 49 + 50 + waitpid(pid, &child_ret, 0); 51 + FAIL_IF(WIFEXITED(child_ret)); 52 + FAIL_IF(!WIFSIGNALED(child_ret)); 53 + FAIL_IF(WTERMSIG(child_ret) != 11); 54 + 55 + return 0; 56 + } 57 + 58 + int test_sigreturn_kernel(void) 59 + { 60 + struct sigaction act; 61 + int child_ret, i; 62 + pid_t pid; 63 + 64 + act.sa_sigaction = sigusr1_handler; 65 + act.sa_flags = SA_SIGINFO; 66 + sigemptyset(&act.sa_mask); 67 + 68 + FAIL_IF(sigaction(SIGUSR1, &act, NULL)); 69 + 70 + for (i = 0; i < 2; i++) { 71 + // Return to kernel 72 + sigreturn_addr = 0xcull << 60; 73 + pid = fork_child(); 74 + expect_segv(pid); 75 + 76 + // Return to kernel virtual 77 + sigreturn_addr = 0xc008ull << 48; 78 + pid = fork_child(); 79 + expect_segv(pid); 80 + 81 + // Return out of range 82 + sigreturn_addr = 0xc010ull << 48; 83 + pid = fork_child(); 84 + expect_segv(pid); 85 + 86 + // Return to no-man's land, just below PAGE_OFFSET 87 + sigreturn_addr = (0xcull << 60) - (64 * 1024); 88 + pid = fork_child(); 89 + expect_segv(pid); 90 + 91 + // Return to no-man's land, above TASK_SIZE_4PB 92 + sigreturn_addr = 0x1ull << 52; 93 + pid = fork_child(); 94 + expect_segv(pid); 95 + 96 + // Return to 0xd space 97 + sigreturn_addr = 0xdull << 60; 98 + pid = fork_child(); 99 + expect_segv(pid); 100 + 101 + // Return to 0xe space 102 + sigreturn_addr = 0xeull << 60; 103 + pid = fork_child(); 104 + expect_segv(pid); 105 + 106 + // Return to 0xf space 107 + sigreturn_addr = 0xfull << 60; 108 + pid = fork_child(); 109 + expect_segv(pid); 110 + 111 + // Attempt to set PR=0 for 2nd loop (should be blocked by kernel) 112 + sigreturn_msr_mask = ~MSR_PR; 113 + } 114 + 115 + printf("All children killed as expected\n"); 116 + 117 + // Don't change address, just MSR, should return to user as normal 118 + sigreturn_addr = 0; 119 + sigreturn_msr_mask = ~MSR_PR; 120 + pid = fork_child(); 121 + waitpid(pid, &child_ret, 0); 122 + FAIL_IF(!WIFEXITED(child_ret)); 123 + FAIL_IF(WIFSIGNALED(child_ret)); 124 + FAIL_IF(WEXITSTATUS(child_ret) != 0); 125 + 126 + return 0; 127 + } 128 + 129 + int main(void) 130 + { 131 + return test_harness(test_sigreturn_kernel, "sigreturn_kernel"); 132 + }
+43
tools/testing/selftests/powerpc/signal/sigreturn_unaligned.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * Test sigreturn to an unaligned address, ie. low 2 bits set. 4 + * Nothing bad should happen. 5 + * This was able to trigger warnings with CONFIG_PPC_RFI_SRR_DEBUG=y. 6 + */ 7 + 8 + #include <signal.h> 9 + #include <stdio.h> 10 + #include <stdlib.h> 11 + #include <string.h> 12 + #include <ucontext.h> 13 + #include <unistd.h> 14 + 15 + #include "utils.h" 16 + 17 + 18 + static void sigusr1_handler(int signo, siginfo_t *info, void *ptr) 19 + { 20 + ucontext_t *uc = ptr; 21 + 22 + UCONTEXT_NIA(uc) |= 3; 23 + } 24 + 25 + static int test_sigreturn_unaligned(void) 26 + { 27 + struct sigaction action; 28 + 29 + memset(&action, 0, sizeof(action)); 30 + action.sa_sigaction = sigusr1_handler; 31 + action.sa_flags = SA_SIGINFO; 32 + 33 + FAIL_IF(sigaction(SIGUSR1, &action, NULL) == -1); 34 + 35 + raise(SIGUSR1); 36 + 37 + return 0; 38 + } 39 + 40 + int main(void) 41 + { 42 + return test_harness(test_sigreturn_unaligned, "sigreturn_unaligned"); 43 + }