Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'powerpc-4.13-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux

Pull powerpc updates from Michael Ellerman:
"Highlights include:

- Support for STRICT_KERNEL_RWX on 64-bit server CPUs.

- Platform support for FSP2 (476fpe) board

- Enable ZONE_DEVICE on 64-bit server CPUs.

- Generic & powerpc spin loop primitives to optimise busy waiting

- Convert VDSO update function to use new update_vsyscall() interface

- Optimisations to hypercall/syscall/context-switch paths

- Improvements to the CPU idle code on Power8 and Power9.

As well as many other fixes and improvements.

Thanks to: Akshay Adiga, Andrew Donnellan, Andrew Jeffery, Anshuman
Khandual, Anton Blanchard, Balbir Singh, Benjamin Herrenschmidt,
Christophe Leroy, Christophe Lombard, Colin Ian King, Dan Carpenter,
Gautham R. Shenoy, Hari Bathini, Ian Munsie, Ivan Mikhaylov, Javier
Martinez Canillas, Madhavan Srinivasan, Masahiro Yamada, Matt Brown,
Michael Neuling, Michal Suchanek, Murilo Opsfelder Araujo, Naveen N.
Rao, Nicholas Piggin, Oliver O'Halloran, Paul Mackerras, Pavel Machek,
Russell Currey, Santosh Sivaraj, Stephen Rothwell, Thiago Jung
Bauermann, Yang Li"

* tag 'powerpc-4.13-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux: (158 commits)
powerpc/Kconfig: Enable STRICT_KERNEL_RWX for some configs
powerpc/mm/radix: Implement STRICT_RWX/mark_rodata_ro() for Radix
powerpc/mm/hash: Implement mark_rodata_ro() for hash
powerpc/vmlinux.lds: Align __init_begin to 16M
powerpc/lib/code-patching: Use alternate map for patch_instruction()
powerpc/xmon: Add patch_instruction() support for xmon
powerpc/kprobes/optprobes: Use patch_instruction()
powerpc/kprobes: Move kprobes over to patch_instruction()
powerpc/mm/radix: Fix execute permissions for interrupt_vectors
powerpc/pseries: Fix passing of pp0 in updatepp() and updateboltedpp()
powerpc/64s: Blacklist rtas entry/exit from kprobes
powerpc/64s: Blacklist functions invoked on a trap
powerpc/64s: Un-blacklist system_call() from kprobes
powerpc/64s: Move system_call() symbol to just after setting MSR_EE
powerpc/64s: Blacklist system_call() and system_call_common() from kprobes
powerpc/64s: Convert .L__replay_interrupt_return to a local label
powerpc64/elfv1: Only dereference function descriptor for non-text symbols
cxl: Export library to support IBM XSL
powerpc/dts: Use #include "..." to include local DT
powerpc/perf/hv-24x7: Aggregate result elements on POWER9 SMT8
...

+4327 -1081
+2 -2
Documentation/powerpc/firmware-assisted-dump.txt
··· 61 61 boot successfully. For syntax of crashkernel= parameter, 62 62 refer to Documentation/kdump/kdump.txt. If any offset is 63 63 provided in crashkernel= parameter, it will be ignored 64 - as fadump reserves memory at end of RAM for boot memory 65 - dump preservation in case of a crash. 64 + as fadump uses a predefined offset to reserve memory 65 + for boot memory dump preservation in case of a crash. 66 66 67 67 -- After the low memory (boot memory) area has been saved, the 68 68 firmware will reset PCI and other hardware state. It will
+7 -6
MAINTAINERS
··· 3781 3781 F: drivers/net/ethernet/chelsio/cxgb4vf/ 3782 3782 3783 3783 CXL (IBM Coherent Accelerator Processor Interface CAPI) DRIVER 3784 - M: Ian Munsie <imunsie@au1.ibm.com> 3785 3784 M: Frederic Barrat <fbarrat@linux.vnet.ibm.com> 3785 + M: Andrew Donnellan <andrew.donnellan@au1.ibm.com> 3786 3786 L: linuxppc-dev@lists.ozlabs.org 3787 3787 S: Supported 3788 3788 F: arch/powerpc/platforms/powernv/pci-cxl.c ··· 5352 5352 F: drivers/video/fbdev/fsl-diu-fb.* 5353 5353 5354 5354 FREESCALE DMA DRIVER 5355 - M: Li Yang <leoli@freescale.com> 5355 + M: Li Yang <leoyang.li@nxp.com> 5356 5356 M: Zhang Wei <zw@zh-kernel.org> 5357 5357 L: linuxppc-dev@lists.ozlabs.org 5358 5358 S: Maintained ··· 5417 5417 F: drivers/net/ethernet/freescale/dpaa 5418 5418 5419 5419 FREESCALE SOC DRIVERS 5420 - M: Scott Wood <oss@buserror.net> 5420 + M: Li Yang <leoyang.li@nxp.com> 5421 5421 L: linuxppc-dev@lists.ozlabs.org 5422 5422 L: linux-arm-kernel@lists.infradead.org 5423 5423 S: Maintained 5424 - F: Documentation/devicetree/bindings/powerpc/fsl/ 5424 + F: Documentation/devicetree/bindings/soc/fsl/ 5425 5425 F: drivers/soc/fsl/ 5426 5426 F: include/linux/fsl/ 5427 5427 ··· 5434 5434 F: include/soc/fsl/*ucc*.h 5435 5435 5436 5436 FREESCALE USB PERIPHERAL DRIVERS 5437 - M: Li Yang <leoli@freescale.com> 5437 + M: Li Yang <leoyang.li@nxp.com> 5438 5438 L: linux-usb@vger.kernel.org 5439 5439 L: linuxppc-dev@lists.ozlabs.org 5440 5440 S: Maintained 5441 5441 F: drivers/usb/gadget/udc/fsl* 5442 5442 5443 5443 FREESCALE QUICC ENGINE UCC ETHERNET DRIVER 5444 - M: Li Yang <leoli@freescale.com> 5444 + M: Li Yang <leoyang.li@nxp.com> 5445 5445 L: netdev@vger.kernel.org 5446 5446 L: linuxppc-dev@lists.ozlabs.org 5447 5447 S: Maintained ··· 7784 7784 S: Maintained 7785 7785 F: arch/powerpc/platforms/83xx/ 7786 7786 F: arch/powerpc/platforms/85xx/ 7787 + F: Documentation/devicetree/bindings/powerpc/fsl/ 7787 7788 7788 7789 LINUX FOR POWERPC PA SEMI PWRFICIENT 7789 7790 L: linuxppc-dev@lists.ozlabs.org
+16 -9
arch/powerpc/Kconfig
··· 109 109 default y 110 110 depends on SMP && PREEMPT 111 111 112 - config ARCH_HAS_ILOG2_U32 113 - bool 114 - default y 115 - 116 - config ARCH_HAS_ILOG2_U64 117 - bool 118 - default y if 64BIT 119 - 120 112 config GENERIC_HWEIGHT 121 113 bool 122 114 default y ··· 130 138 select ARCH_HAS_SG_CHAIN 131 139 select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST 132 140 select ARCH_HAS_UBSAN_SANITIZE_ALL 141 + select ARCH_HAS_ZONE_DEVICE if PPC_BOOK3S_64 133 142 select ARCH_HAVE_NMI_SAFE_CMPXCHG 134 143 select ARCH_MIGHT_HAVE_PC_PARPORT 135 144 select ARCH_MIGHT_HAVE_PC_SERIO ··· 156 163 select GENERIC_SMP_IDLE_THREAD 157 164 select GENERIC_STRNCPY_FROM_USER 158 165 select GENERIC_STRNLEN_USER 159 - select GENERIC_TIME_VSYSCALL_OLD 166 + select GENERIC_TIME_VSYSCALL 160 167 select HAVE_ARCH_AUDITSYSCALL 161 168 select HAVE_ARCH_JUMP_LABEL 162 169 select HAVE_ARCH_KGDB ··· 164 171 select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT 165 172 select HAVE_ARCH_SECCOMP_FILTER 166 173 select HAVE_ARCH_TRACEHOOK 174 + select ARCH_HAS_STRICT_KERNEL_RWX if (PPC_BOOK3S_64 && !RELOCATABLE && !HIBERNATION) 175 + select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX 167 176 select HAVE_CBPF_JIT if !PPC64 168 177 select HAVE_CONTEXT_TRACKING if PPC64 169 178 select HAVE_DEBUG_KMEMLEAK ··· 203 208 select HAVE_REGS_AND_STACK_ACCESS_API 204 209 select HAVE_SYSCALL_TRACEPOINTS 205 210 select HAVE_VIRT_CPU_ACCOUNTING 211 + select HAVE_IRQ_TIME_ACCOUNTING 206 212 select IRQ_DOMAIN 207 213 select IRQ_FORCED_THREADING 208 214 select MODULES_USE_ELF_RELA ··· 433 437 default n 434 438 ---help--- 435 439 Support user-mode Transactional Memory on POWERPC. 440 + 441 + config LD_HEAD_STUB_CATCH 442 + bool "Reserve 256 bytes to cope with linker stubs in HEAD text" if EXPERT 443 + depends on PPC64 444 + default n 445 + help 446 + Very large kernels can cause linker branch stubs to be generated by 447 + code in head_64.S, which moves the head text sections out of their 448 + specified location. This option can work around the problem. 449 + 450 + If unsure, say "N". 436 451 437 452 config DISABLE_MPROFILE_KERNEL 438 453 bool "Disable use of mprofile-kernel for kernel tracing"
+11
arch/powerpc/Makefile
··· 98 98 LDFLAGS_vmlinux-y := -Bstatic 99 99 LDFLAGS_vmlinux-$(CONFIG_RELOCATABLE) := -pie 100 100 LDFLAGS_vmlinux := $(LDFLAGS_vmlinux-y) 101 + LDFLAGS_vmlinux += $(call ld-option,--orphan-handling=warn) 101 102 102 103 ifeq ($(CONFIG_PPC64),y) 103 104 ifeq ($(call cc-option-yn,-mcmodel=medium),y) ··· 190 189 CHECKFLAGS += -D__LITTLE_ENDIAN__ 191 190 endif 192 191 192 + ifdef CONFIG_PPC32 193 193 KBUILD_LDFLAGS_MODULE += arch/powerpc/lib/crtsavres.o 194 + else 195 + ifeq ($(call ld-ifversion, -ge, 225000000, y),y) 196 + # Have the linker provide sfpr if possible. 197 + # There is a corresponding test in arch/powerpc/lib/Makefile 198 + KBUILD_LDFLAGS_MODULE += --save-restore-funcs 199 + else 200 + KBUILD_LDFLAGS_MODULE += arch/powerpc/lib/crtsavres.o 201 + endif 202 + endif 194 203 195 204 ifeq ($(CONFIG_476FPE_ERR46),y) 196 205 KBUILD_LDFLAGS_MODULE += --ppc476-workaround \
+4 -1
arch/powerpc/boot/Makefile
··· 95 95 $(addprefix $(obj)/,$(libfdt) libfdt-wrapper.o simpleboot.o epapr.o opal.o): \ 96 96 $(addprefix $(obj)/,$(libfdtheader)) 97 97 98 - src-wlib-y := string.S crt0.S crtsavres.S stdio.c decompress.c main.c \ 98 + src-wlib-y := string.S crt0.S stdio.c decompress.c main.c \ 99 99 $(libfdt) libfdt-wrapper.c \ 100 100 ns16550.c serial.c simple_alloc.c div64.S util.S \ 101 101 elf_util.c $(zlib-y) devtree.c stdlib.c \ 102 102 oflib.c ofconsole.c cuboot.c mpsc.c cpm-serial.c \ 103 103 uartlite.c mpc52xx-psc.c opal.c 104 104 src-wlib-$(CONFIG_PPC64_BOOT_WRAPPER) += opal-calls.S 105 + ifndef CONFIG_PPC64_BOOT_WRAPPER 106 + src-wlib-y += crtsavres.S 107 + endif 105 108 src-wlib-$(CONFIG_40x) += 4xx.c planetcore.c 106 109 src-wlib-$(CONFIG_44x) += 4xx.c ebony.c bamboo.c 107 110 src-wlib-$(CONFIG_8xx) += mpc8xx.c planetcore.c fsl-soc.c
+4 -4
arch/powerpc/boot/crtsavres.S
··· 37 37 * the executable file might be covered by the GNU General Public License. 38 38 */ 39 39 40 + #ifdef __powerpc64__ 41 + #error "On PPC64, FPR save/restore functions are provided by the linker." 42 + #endif 43 + 40 44 .file "crtsavres.S" 41 45 .section ".text" 42 - 43 - /* On PowerPC64 Linux, these functions are provided by the linker. */ 44 - #ifndef __powerpc64__ 45 46 46 47 #define _GLOBAL(name) \ 47 48 .type name,@function; \ ··· 231 230 mtlr 0 232 231 mr 1,11 233 232 blr 234 - #endif
+1 -1
arch/powerpc/boot/dts/ac14xx.dts
··· 10 10 */ 11 11 12 12 13 - #include <mpc5121.dtsi> 13 + #include "mpc5121.dtsi" 14 14 15 15 / { 16 16 model = "ac14xx";
+1 -1
arch/powerpc/boot/dts/digsy_mtc.dts
··· 73 73 74 74 i2c@3d00 { 75 75 eeprom@50 { 76 - compatible = "at,24c08"; 76 + compatible = "atmel,24c08"; 77 77 reg = <0x50>; 78 78 }; 79 79
+4 -4
arch/powerpc/boot/dts/fsl/b4qds.dtsi
··· 166 166 reg = <0>; 167 167 168 168 eeprom@50 { 169 - compatible = "at24,24c64"; 169 + compatible = "atmel,24c64"; 170 170 reg = <0x50>; 171 171 }; 172 172 eeprom@51 { 173 - compatible = "at24,24c256"; 173 + compatible = "atmel,24c256"; 174 174 reg = <0x51>; 175 175 }; 176 176 eeprom@53 { 177 - compatible = "at24,24c256"; 177 + compatible = "atmel,24c256"; 178 178 reg = <0x53>; 179 179 }; 180 180 eeprom@57 { 181 - compatible = "at24,24c256"; 181 + compatible = "atmel,24c256"; 182 182 reg = <0x57>; 183 183 }; 184 184 rtc@68 {
+1 -1
arch/powerpc/boot/dts/fsl/c293pcie.dts
··· 153 153 &soc { 154 154 i2c@3000 { 155 155 eeprom@50 { 156 - compatible = "st,24c1024"; 156 + compatible = "st,24c1024", "atmel,24c1024"; 157 157 reg = <0x50>; 158 158 }; 159 159
+1 -1
arch/powerpc/boot/dts/fsl/p1010rdb.dtsi
··· 89 89 &board_soc { 90 90 i2c@3000 { 91 91 eeprom@50 { 92 - compatible = "st,24c256"; 92 + compatible = "st,24c256", "atmel,24c256"; 93 93 reg = <0x50>; 94 94 }; 95 95
+1 -1
arch/powerpc/boot/dts/fsl/p1023rdb.dts
··· 79 79 80 80 i2c@3000 { 81 81 eeprom@53 { 82 - compatible = "at24,24c04"; 82 + compatible = "atmel,24c04"; 83 83 reg = <0x53>; 84 84 }; 85 85
+2 -2
arch/powerpc/boot/dts/fsl/p2041rdb.dts
··· 127 127 reg = <0x48>; 128 128 }; 129 129 eeprom@50 { 130 - compatible = "at24,24c256"; 130 + compatible = "atmel,24c256"; 131 131 reg = <0x50>; 132 132 }; 133 133 rtc@68 { ··· 142 142 143 143 i2c@118100 { 144 144 eeprom@50 { 145 - compatible = "at24,24c256"; 145 + compatible = "atmel,24c256"; 146 146 reg = <0x50>; 147 147 }; 148 148 };
+2 -2
arch/powerpc/boot/dts/fsl/p3041ds.dts
··· 124 124 125 125 i2c@118100 { 126 126 eeprom@51 { 127 - compatible = "at24,24c256"; 127 + compatible = "atmel,24c256"; 128 128 reg = <0x51>; 129 129 }; 130 130 eeprom@52 { 131 - compatible = "at24,24c256"; 131 + compatible = "atmel,24c256"; 132 132 reg = <0x52>; 133 133 }; 134 134 };
+2 -2
arch/powerpc/boot/dts/fsl/p4080ds.dts
··· 125 125 126 126 i2c@118100 { 127 127 eeprom@51 { 128 - compatible = "at24,24c256"; 128 + compatible = "atmel,24c256"; 129 129 reg = <0x51>; 130 130 }; 131 131 eeprom@52 { 132 - compatible = "at24,24c256"; 132 + compatible = "atmel,24c256"; 133 133 reg = <0x52>; 134 134 }; 135 135 rtc@68 {
+2 -2
arch/powerpc/boot/dts/fsl/p5020ds.dts
··· 124 124 125 125 i2c@118100 { 126 126 eeprom@51 { 127 - compatible = "at24,24c256"; 127 + compatible = "atmel,24c256"; 128 128 reg = <0x51>; 129 129 }; 130 130 eeprom@52 { 131 - compatible = "at24,24c256"; 131 + compatible = "atmel,24c256"; 132 132 reg = <0x52>; 133 133 }; 134 134 };
+2 -2
arch/powerpc/boot/dts/fsl/p5040ds.dts
··· 133 133 134 134 i2c@118100 { 135 135 eeprom@51 { 136 - compatible = "at24,24c256"; 136 + compatible = "atmel,24c256"; 137 137 reg = <0x51>; 138 138 }; 139 139 eeprom@52 { 140 - compatible = "at24,24c256"; 140 + compatible = "atmel,24c256"; 141 141 reg = <0x52>; 142 142 }; 143 143 };
+4 -4
arch/powerpc/boot/dts/fsl/t208xqds.dtsi
··· 147 147 reg = <0x0>; 148 148 149 149 eeprom@50 { 150 - compatible = "at24,24c512"; 150 + compatible = "atmel,24c512"; 151 151 reg = <0x50>; 152 152 }; 153 153 154 154 eeprom@51 { 155 - compatible = "at24,24c02"; 155 + compatible = "atmel,24c02"; 156 156 reg = <0x51>; 157 157 }; 158 158 159 159 eeprom@57 { 160 - compatible = "at24,24c02"; 160 + compatible = "atmel,24c02"; 161 161 reg = <0x57>; 162 162 }; 163 163 ··· 174 174 reg = <0x1>; 175 175 176 176 eeprom@55 { 177 - compatible = "at24,24c02"; 177 + compatible = "atmel,24c02"; 178 178 reg = <0x55>; 179 179 }; 180 180 };
+6 -6
arch/powerpc/boot/dts/fsl/t4240qds.dts
··· 377 377 reg = <0>; 378 378 379 379 eeprom@51 { 380 - compatible = "at24,24c256"; 380 + compatible = "atmel,24c256"; 381 381 reg = <0x51>; 382 382 }; 383 383 eeprom@52 { 384 - compatible = "at24,24c256"; 384 + compatible = "atmel,24c256"; 385 385 reg = <0x52>; 386 386 }; 387 387 eeprom@53 { 388 - compatible = "at24,24c256"; 388 + compatible = "atmel,24c256"; 389 389 reg = <0x53>; 390 390 }; 391 391 eeprom@54 { 392 - compatible = "at24,24c256"; 392 + compatible = "atmel,24c256"; 393 393 reg = <0x54>; 394 394 }; 395 395 eeprom@55 { 396 - compatible = "at24,24c256"; 396 + compatible = "atmel,24c256"; 397 397 reg = <0x55>; 398 398 }; 399 399 eeprom@56 { 400 - compatible = "at24,24c256"; 400 + compatible = "atmel,24c256"; 401 401 reg = <0x56>; 402 402 }; 403 403 rtc@68 {
+3 -3
arch/powerpc/boot/dts/fsl/t4240rdb.dts
··· 130 130 reg = <0x2f>; 131 131 }; 132 132 eeprom@52 { 133 - compatible = "at24,24c256"; 133 + compatible = "atmel,24c256"; 134 134 reg = <0x52>; 135 135 }; 136 136 eeprom@54 { 137 - compatible = "at24,24c256"; 137 + compatible = "atmel,24c256"; 138 138 reg = <0x54>; 139 139 }; 140 140 eeprom@56 { 141 - compatible = "at24,24c256"; 141 + compatible = "atmel,24c256"; 142 142 reg = <0x56>; 143 143 }; 144 144 rtc@68 {
+608
arch/powerpc/boot/dts/fsp2.dts
··· 1 + /* 2 + * Device Tree Source for FSP2 3 + * 4 + * Copyright 2010,2012 IBM Corp. 5 + * 6 + * This file is licensed under the terms of the GNU General Public 7 + * License version 2. This program is licensed "as is" without 8 + * any warranty of any kind, whether express or implied. 9 + */ 10 + 11 + 12 + /dts-v1/; 13 + 14 + / { 15 + #address-cells = <2>; 16 + #size-cells = <1>; 17 + model = "ibm,fsp2"; 18 + compatible = "ibm,fsp2"; 19 + dcr-parent = <&{/cpus/cpu@0}>; 20 + 21 + aliases { 22 + ethernet0 = &EMAC0; 23 + ethernet1 = &EMAC1; 24 + serial0 = &UART0; 25 + }; 26 + 27 + cpus { 28 + #address-cells = <1>; 29 + #size-cells = <0>; 30 + 31 + cpu@0 { 32 + device_type = "cpu"; 33 + model = "PowerPC, 476FSP2"; 34 + reg = <0x0>; 35 + clock-frequency = <0>; /* Filled in by cuboot */ 36 + timebase-frequency = <0>; /* Filled in by cuboot */ 37 + i-cache-line-size = <32>; 38 + d-cache-line-size = <32>; 39 + d-cache-size = <32768>; 40 + i-cache-size = <32768>; 41 + dcr-controller; 42 + dcr-access-method = "native"; 43 + }; 44 + }; 45 + 46 + memory { 47 + device_type = "memory"; 48 + reg = <0x00000000 0x00000000 0x00000000>; /* Filled in by 49 + cuboot */ 50 + }; 51 + 52 + clocks { 53 + mmc_clk: mmc_clk { 54 + compatible = "fixed-clock"; 55 + clock-frequency = <50000000>; 56 + clock-output-names = "mmc_clk"; 57 + }; 58 + }; 59 + 60 + UIC0: uic0 { 61 + #address-cells = <0>; 62 + #size-cells = <0>; 63 + #interrupt-cells = <2>; 64 + compatible = "ibm,uic"; 65 + interrupt-controller; 66 + cell-index = <0>; 67 + dcr-reg = <0x2c0 0x8>; 68 + }; 69 + 70 + /* "interrupts" field is <bit level bit level> 71 + first pair is non-critical, second is critical */ 72 + UIC1_0: uic1_0 { 73 + #address-cells = <0>; 74 + #size-cells = <0>; 75 + #interrupt-cells = <2>; 76 + 77 + compatible = "ibm,uic"; 78 + interrupt-controller; 79 + cell-index = <1>; 80 + dcr-reg = <0x2c8 0x8>; 81 + interrupt-parent = <&UIC0>; 82 + interrupts = <21 0x4 4 0x84>; 83 + }; 84 + 85 + /* PSI and DMA */ 86 + UIC1_1: uic1_1 { 87 + #address-cells = <0>; 88 + #size-cells = <0>; 89 + #interrupt-cells = <2>; 90 + 91 + compatible = "ibm,uic"; 92 + interrupt-controller; 93 + cell-index = <2>; 94 + dcr-reg = <0x350 0x8>; 95 + interrupt-parent = <&UIC0>; 96 + interrupts = <22 0x4 5 0x84>; 97 + }; 98 + 99 + /* Ethernet and USB */ 100 + UIC1_2: uic1_2 { 101 + #address-cells = <0>; 102 + #size-cells = <0>; 103 + #interrupt-cells = <2>; 104 + 105 + compatible = "ibm,uic"; 106 + interrupt-controller; 107 + cell-index = <3>; 108 + dcr-reg = <0x358 0x8>; 109 + interrupt-parent = <&UIC0>; 110 + interrupts = <23 0x4 6 0x84>; 111 + }; 112 + 113 + /* PLB Errors */ 114 + UIC1_3: uic1_3 { 115 + #address-cells = <0>; 116 + #size-cells = <0>; 117 + #interrupt-cells = <2>; 118 + 119 + compatible = "ibm,uic"; 120 + interrupt-controller; 121 + cell-index = <4>; 122 + dcr-reg = <0x360 0x8>; 123 + interrupt-parent = <&UIC0>; 124 + interrupts = <24 0x4 7 0x84>; 125 + }; 126 + 127 + UIC1_4: uic1_4 { 128 + #address-cells = <0>; 129 + #size-cells = <0>; 130 + #interrupt-cells = <2>; 131 + 132 + compatible = "ibm,uic"; 133 + interrupt-controller; 134 + cell-index = <5>; 135 + dcr-reg = <0x368 0x8>; 136 + interrupt-parent = <&UIC0>; 137 + interrupts = <25 0x4 8 0x84>; 138 + }; 139 + 140 + UIC1_5: uic1_5 { 141 + #address-cells = <0>; 142 + #size-cells = <0>; 143 + #interrupt-cells = <2>; 144 + 145 + compatible = "ibm,uic"; 146 + interrupt-controller; 147 + cell-index = <6>; 148 + dcr-reg = <0x370 0x8>; 149 + interrupt-parent = <&UIC0>; 150 + interrupts = <26 0x4 9 0x84>; 151 + }; 152 + 153 + /* 2nd level UICs for FSI */ 154 + UIC2_0: uic2_0 { 155 + #address-cells = <0>; 156 + #size-cells = <0>; 157 + #interrupt-cells = <2>; 158 + 159 + compatible = "ibm,uic"; 160 + interrupt-controller; 161 + cell-index = <7>; 162 + dcr-reg = <0x2d0 0x8>; 163 + interrupt-parent = <&UIC1_0>; 164 + interrupts = <16 0x4 0 0x84>; 165 + }; 166 + 167 + UIC2_1: uic2_1 { 168 + #address-cells = <0>; 169 + #size-cells = <0>; 170 + #interrupt-cells = <2>; 171 + 172 + compatible = "ibm,uic"; 173 + interrupt-controller; 174 + cell-index = <8>; 175 + dcr-reg = <0x2d8 0x8>; 176 + interrupt-parent = <&UIC1_0>; 177 + interrupts = <17 0x4 1 0x84>; 178 + }; 179 + 180 + UIC2_2: uic2_2 { 181 + #address-cells = <0>; 182 + #size-cells = <0>; 183 + #interrupt-cells = <2>; 184 + 185 + compatible = "ibm,uic"; 186 + interrupt-controller; 187 + cell-index = <9>; 188 + dcr-reg = <0x2e0 0x8>; 189 + interrupt-parent = <&UIC1_0>; 190 + interrupts = <18 0x4 2 0x84>; 191 + }; 192 + 193 + UIC2_3: uic2_3 { 194 + #address-cells = <0>; 195 + #size-cells = <0>; 196 + #interrupt-cells = <2>; 197 + 198 + compatible = "ibm,uic"; 199 + interrupt-controller; 200 + cell-index = <10>; 201 + dcr-reg = <0x2e8 0x8>; 202 + interrupt-parent = <&UIC1_0>; 203 + interrupts = <19 0x4 3 0x84>; 204 + }; 205 + 206 + UIC2_4: uic2_4 { 207 + #address-cells = <0>; 208 + #size-cells = <0>; 209 + #interrupt-cells = <2>; 210 + 211 + compatible = "ibm,uic"; 212 + interrupt-controller; 213 + cell-index = <11>; 214 + dcr-reg = <0x2f0 0x8>; 215 + interrupt-parent = <&UIC1_0>; 216 + interrupts = <20 0x4 4 0x84>; 217 + }; 218 + 219 + UIC2_5: uic2_5 { 220 + #address-cells = <0>; 221 + #size-cells = <0>; 222 + #interrupt-cells = <2>; 223 + 224 + compatible = "ibm,uic"; 225 + interrupt-controller; 226 + cell-index = <12>; 227 + dcr-reg = <0x2f8 0x8>; 228 + interrupt-parent = <&UIC1_0>; 229 + interrupts = <21 0x4 5 0x84>; 230 + }; 231 + 232 + UIC2_6: uic2_6 { 233 + #address-cells = <0>; 234 + #size-cells = <0>; 235 + #interrupt-cells = <2>; 236 + 237 + compatible = "ibm,uic"; 238 + interrupt-controller; 239 + cell-index = <13>; 240 + dcr-reg = <0x300 0x8>; 241 + interrupt-parent = <&UIC1_0>; 242 + interrupts = <22 0x4 6 0x84>; 243 + }; 244 + 245 + UIC2_7: uic2_7 { 246 + #address-cells = <0>; 247 + #size-cells = <0>; 248 + #interrupt-cells = <2>; 249 + 250 + compatible = "ibm,uic"; 251 + interrupt-controller; 252 + cell-index = <14>; 253 + dcr-reg = <0x308 0x8>; 254 + interrupt-parent = <&UIC1_0>; 255 + interrupts = <23 0x4 7 0x84>; 256 + }; 257 + 258 + UIC2_8: uic2_8 { 259 + #address-cells = <0>; 260 + #size-cells = <0>; 261 + #interrupt-cells = <2>; 262 + 263 + compatible = "ibm,uic"; 264 + interrupt-controller; 265 + cell-index = <15>; 266 + dcr-reg = <0x310 0x8>; 267 + interrupt-parent = <&UIC1_0>; 268 + interrupts = <24 0x4 8 0x84>; 269 + }; 270 + 271 + UIC2_9: uic2_9 { 272 + #address-cells = <0>; 273 + #size-cells = <0>; 274 + #interrupt-cells = <2>; 275 + 276 + compatible = "ibm,uic"; 277 + interrupt-controller; 278 + cell-index = <16>; 279 + dcr-reg = <0x318 0x8>; 280 + interrupt-parent = <&UIC1_0>; 281 + interrupts = <25 0x4 9 0x84>; 282 + }; 283 + 284 + UIC2_10: uic2_10 { 285 + #address-cells = <0>; 286 + #size-cells = <0>; 287 + #interrupt-cells = <2>; 288 + 289 + compatible = "ibm,uic"; 290 + interrupt-controller; 291 + cell-index = <17>; 292 + dcr-reg = <0x320 0x8>; 293 + interrupt-parent = <&UIC1_0>; 294 + interrupts = <26 0x4 10 0x84>; 295 + }; 296 + 297 + UIC2_11: uic2_11 { 298 + #address-cells = <0>; 299 + #size-cells = <0>; 300 + #interrupt-cells = <2>; 301 + 302 + compatible = "ibm,uic"; 303 + interrupt-controller; 304 + cell-index = <18>; 305 + dcr-reg = <0x328 0x8>; 306 + interrupt-parent = <&UIC1_0>; 307 + interrupts = <27 0x4 11 0x84>; 308 + }; 309 + 310 + UIC2_12: uic2_12 { 311 + #address-cells = <0>; 312 + #size-cells = <0>; 313 + #interrupt-cells = <2>; 314 + 315 + compatible = "ibm,uic"; 316 + interrupt-controller; 317 + cell-index = <19>; 318 + dcr-reg = <0x330 0x8>; 319 + interrupt-parent = <&UIC1_0>; 320 + interrupts = <28 0x4 12 0x84>; 321 + }; 322 + 323 + UIC2_13: uic2_13 { 324 + #address-cells = <0>; 325 + #size-cells = <0>; 326 + #interrupt-cells = <2>; 327 + 328 + compatible = "ibm,uic"; 329 + interrupt-controller; 330 + cell-index = <20>; 331 + dcr-reg = <0x338 0x8>; 332 + interrupt-parent = <&UIC1_0>; 333 + interrupts = <29 0x4 13 0x84>; 334 + }; 335 + 336 + UIC2_14: uic2_14 { 337 + #address-cells = <0>; 338 + #size-cells = <0>; 339 + #interrupt-cells = <2>; 340 + 341 + compatible = "ibm,uic"; 342 + interrupt-controller; 343 + cell-index = <21>; 344 + dcr-reg = <0x340 0x8>; 345 + interrupt-parent = <&UIC1_0>; 346 + interrupts = <30 0x4 14 0x84>; 347 + }; 348 + 349 + UIC2_15: uic2_15 { 350 + #address-cells = <0>; 351 + #size-cells = <0>; 352 + #interrupt-cells = <2>; 353 + 354 + compatible = "ibm,uic"; 355 + interrupt-controller; 356 + cell-index = <22>; 357 + dcr-reg = <0x348 0x8>; 358 + interrupt-parent = <&UIC1_0>; 359 + interrupts = <31 0x4 15 0x84>; 360 + }; 361 + 362 + mmc0: sdhci@020c0000 { 363 + compatible = "st,sdhci-stih407", "st,sdhci"; 364 + status = "disabled"; 365 + reg = <0x020c0000 0x20000>; 366 + reg-names = "mmc"; 367 + interrupt-parent = <&UIC1_3>; 368 + interrupts = <21 0x4 22 0x4>; 369 + interrupt-names = "mmcirq"; 370 + pinctrl-names = "default"; 371 + pinctrl-0 = <>; 372 + clock-names = "mmc"; 373 + clocks = <&mmc_clk>; 374 + }; 375 + 376 + plb6 { 377 + compatible = "ibm,plb6"; 378 + #address-cells = <2>; 379 + #size-cells = <1>; 380 + ranges; 381 + 382 + MCW0: memory-controller-wrapper { 383 + compatible = "ibm,cw-476fsp2"; 384 + dcr-reg = <0x11111800 0x40>; 385 + }; 386 + 387 + MCIF0: memory-controller { 388 + compatible = "ibm,sdram-476fsp2", "ibm,sdram-4xx-ddr3"; 389 + dcr-reg = <0x11120000 0x10000>; 390 + mcer-device = <&MCW0>; 391 + interrupt-parent = <&UIC0>; 392 + interrupts = <10 0x84 /* ECC UE */ 393 + 11 0x84>; /* ECC CE */ 394 + }; 395 + }; 396 + 397 + plb4 { 398 + compatible = "ibm,plb4"; 399 + #address-cells = <1>; 400 + #size-cells = <1>; 401 + ranges = <0x00000000 0x00000010 0x00000000 0x80000000 402 + 0x80000000 0x00000010 0x80000000 0x80000000>; 403 + clock-frequency = <333333334>; 404 + 405 + plb6-system-hung-irq { 406 + compatible = "ibm,bus-error-irq"; 407 + #interrupt-cells = <2>; 408 + interrupt-parent = <&UIC0>; 409 + interrupts = <0 0x84>; 410 + }; 411 + 412 + l2-error-irq { 413 + compatible = "ibm,bus-error-irq"; 414 + #interrupt-cells = <2>; 415 + interrupt-parent = <&UIC0>; 416 + interrupts = <20 0x84>; 417 + }; 418 + 419 + plb6-plb4-irq { 420 + compatible = "ibm,bus-error-irq"; 421 + #interrupt-cells = <2>; 422 + interrupt-parent = <&UIC0>; 423 + interrupts = <1 0x84>; 424 + }; 425 + 426 + plb4-ahb-irq { 427 + compatible = "ibm,bus-error-irq"; 428 + #interrupt-cells = <2>; 429 + interrupt-parent = <&UIC1_3>; 430 + interrupts = <20 0x84>; 431 + }; 432 + 433 + opbd-error-irq { 434 + compatible = "ibm,opbd-error-irq"; 435 + #interrupt-cells = <2>; 436 + interrupt-parent = <&UIC1_4>; 437 + interrupts = <5 0x84>; 438 + }; 439 + 440 + cmu-error-irq { 441 + compatible = "ibm,cmu-error-irq"; 442 + #interrupt-cells = <2>; 443 + interrupt-parent = <&UIC0>; 444 + interrupts = <28 0x84>; 445 + }; 446 + 447 + conf-error-irq { 448 + compatible = "ibm,conf-error-irq"; 449 + #interrupt-cells = <2>; 450 + interrupt-parent = <&UIC1_4>; 451 + interrupts = <11 0x84>; 452 + }; 453 + 454 + mc-ue-irq { 455 + compatible = "ibm,mc-ue-irq"; 456 + #interrupt-cells = <2>; 457 + interrupt-parent = <&UIC0>; 458 + interrupts = <10 0x84>; 459 + }; 460 + 461 + reset-warning-irq { 462 + compatible = "ibm,reset-warning-irq"; 463 + #interrupt-cells = <2>; 464 + interrupt-parent = <&UIC0>; 465 + interrupts = <17 0x84>; 466 + }; 467 + 468 + MAL0: mcmal0 { 469 + #interrupt-cells = <1>; 470 + #address-cells = <0>; 471 + #size-cells = <0>; 472 + compatible = "ibm,mcmal"; 473 + dcr-reg = <0x80 0x80>; 474 + num-tx-chans = <1>; 475 + num-rx-chans = <1>; 476 + interrupt-parent = <&MAL0>; 477 + interrupts = <0 1 2 3 4>; 478 + /* index interrupt-parent interrupt# type */ 479 + interrupt-map = </*TXEOB*/ 0 &UIC1_2 4 0x4 480 + /*RXEOB*/ 1 &UIC1_2 3 0x4 481 + /*SERR*/ 2 &UIC1_2 7 0x4 482 + /*TXDE*/ 3 &UIC1_2 6 0x4 483 + /*RXDE*/ 4 &UIC1_2 5 0x4>; 484 + }; 485 + 486 + MAL1: mcmal1 { 487 + #interrupt-cells = <1>; 488 + #address-cells = <0>; 489 + #size-cells = <0>; 490 + compatible = "ibm,mcmal"; 491 + dcr-reg = <0x100 0x80>; 492 + num-tx-chans = <1>; 493 + num-rx-chans = <1>; 494 + interrupt-parent = <&MAL1>; 495 + interrupts = <0 1 2 3 4>; 496 + /* index interrupt-parent interrupt# type */ 497 + interrupt-map = </*TXEOB*/ 0 &UIC1_2 12 0x4 498 + /*RXEOB*/ 1 &UIC1_2 11 0x4 499 + /*SERR*/ 2 &UIC1_2 15 0x4 500 + /*TXDE*/ 3 &UIC1_2 14 0x4 501 + /*RXDE*/ 4 &UIC1_2 13 0x4>; 502 + }; 503 + 504 + opb { 505 + compatible = "ibm,opb"; 506 + #address-cells = <1>; 507 + #size-cells = <1>; 508 + ranges; // pass-thru to parent bus 509 + clock-frequency = <83333334>; 510 + 511 + EMAC0: ethernet@b0000000 { 512 + linux,network-index = <0>; 513 + device_type = "network"; 514 + compatible = "ibm,emac4sync"; 515 + has-inverted-stacr-oc; 516 + interrupt-parent = <&UIC1_2>; 517 + interrupts = <1 0x4 0 0x4>; 518 + reg = <0xb0000000 0x100>; 519 + local-mac-address = [000000000000]; /* Filled in by 520 + cuboot */ 521 + mal-device = <&MAL0>; 522 + mal-tx-channel = <0>; 523 + mal-rx-channel = <0>; 524 + cell-index = <0>; 525 + max-frame-size = <1500>; 526 + rx-fifo-size = <4096>; 527 + tx-fifo-size = <4096>; 528 + rx-fifo-size-gige = <16384>; 529 + tx-fifo-size-gige = <8192>; 530 + phy-address = <1>; 531 + phy-mode = "rgmii"; 532 + phy-map = <00000003>; 533 + rgmii-device = <&RGMII>; 534 + rgmii-channel = <0>; 535 + }; 536 + 537 + EMAC1: ethernet@b0000100 { 538 + linux,network-index = <1>; 539 + device_type = "network"; 540 + compatible = "ibm,emac4sync"; 541 + has-inverted-stacr-oc; 542 + interrupt-parent = <&UIC1_2>; 543 + interrupts = <9 0x4 8 0x4>; 544 + reg = <0xb0000100 0x100>; 545 + local-mac-address = [000000000000]; /* Filled in by 546 + cuboot */ 547 + mal-device = <&MAL1>; 548 + mal-tx-channel = <0>; 549 + mal-rx-channel = <0>; 550 + cell-index = <1>; 551 + max-frame-size = <1500>; 552 + rx-fifo-size = <4096>; 553 + tx-fifo-size = <4096>; 554 + rx-fifo-size-gige = <16384>; 555 + tx-fifo-size-gige = <8192>; 556 + phy-address = <2>; 557 + phy-mode = "rgmii"; 558 + phy-map = <00000003>; 559 + rgmii-device = <&RGMII>; 560 + rgmii-channel = <1>; 561 + }; 562 + 563 + RGMII: rgmii@b0000600 { 564 + compatible = "ibm,rgmii"; 565 + has-mdio; 566 + reg = <0xb0000600 0x8>; 567 + }; 568 + 569 + UART0: serial@b0020000 { 570 + device_type = "serial"; 571 + compatible = "ns16550"; 572 + reg = <0xb0020000 0x8>; 573 + virtual-reg = <0xb0020000>; 574 + clock-frequency = <20833333>; 575 + current-speed = <115200>; 576 + interrupt-parent = <&UIC0>; 577 + interrupts = <31 0x4>; 578 + }; 579 + }; 580 + 581 + OHCI1: ohci@02040000 { 582 + compatible = "ohci-le"; 583 + reg = <0x02040000 0xa0>; 584 + interrupt-parent = <&UIC1_3>; 585 + interrupts = <28 0x8 29 0x8>; 586 + }; 587 + 588 + OHCI2: ohci@02080000 { 589 + compatible = "ohci-le"; 590 + reg = <0x02080000 0xa0>; 591 + interrupt-parent = <&UIC1_3>; 592 + interrupts = <30 0x8 31 0x8>; 593 + }; 594 + 595 + EHCI: ehci@02000000 { 596 + compatible = "usb-ehci"; 597 + reg = <0x02000000 0xa4>; 598 + interrupt-parent = <&UIC1_3>; 599 + interrupts = <23 0x4>; 600 + }; 601 + 602 + }; 603 + 604 + chosen { 605 + linux,stdout-path = "/plb/opb/serial@b0020000"; 606 + bootargs = "console=ttyS0,115200 rw log_buf_len=32768 debug"; 607 + }; 608 + };
+2 -2
arch/powerpc/boot/dts/mpc5121ads.dts
··· 9 9 * option) any later version. 10 10 */ 11 11 12 - #include <mpc5121.dtsi> 12 + #include "mpc5121.dtsi" 13 13 14 14 / { 15 15 model = "mpc5121ads"; ··· 94 94 }; 95 95 96 96 eeprom@50 { 97 - compatible = "at,24c32"; 97 + compatible = "atmel,24c32"; 98 98 reg = <0x50>; 99 99 }; 100 100
+1 -1
arch/powerpc/boot/dts/mpc8308_p1m.dts
··· 123 123 interrupt-parent = <&ipic>; 124 124 dfsrr; 125 125 fram@50 { 126 - compatible = "ramtron,24c64"; 126 + compatible = "ramtron,24c64", "atmel,24c64"; 127 127 reg = <0x50>; 128 128 }; 129 129 };
+2 -2
arch/powerpc/boot/dts/mpc8349emitx.dts
··· 92 92 dfsrr; 93 93 94 94 eeprom: at24@50 { 95 - compatible = "st,24c256"; 95 + compatible = "st,24c256", "atmel,24c256"; 96 96 reg = <0x50>; 97 97 }; 98 98 ··· 130 130 }; 131 131 132 132 spd: at24@51 { 133 - compatible = "at24,spd"; 133 + compatible = "atmel,spd"; 134 134 reg = <0x51>; 135 135 }; 136 136
+1 -1
arch/powerpc/boot/dts/mpc8377_rdb.dts
··· 150 150 }; 151 151 152 152 at24@50 { 153 - compatible = "at24,24c256"; 153 + compatible = "atmel,24c256"; 154 154 reg = <0x50>; 155 155 }; 156 156
+1 -1
arch/powerpc/boot/dts/mpc8377_wlan.dts
··· 135 135 dfsrr; 136 136 137 137 at24@50 { 138 - compatible = "at24,24c256"; 138 + compatible = "atmel,24c256"; 139 139 reg = <0x50>; 140 140 }; 141 141
+1 -1
arch/powerpc/boot/dts/mpc8378_rdb.dts
··· 150 150 }; 151 151 152 152 at24@50 { 153 - compatible = "at24,24c256"; 153 + compatible = "atmel,24c256"; 154 154 reg = <0x50>; 155 155 }; 156 156
+1 -1
arch/powerpc/boot/dts/mpc8379_rdb.dts
··· 148 148 }; 149 149 150 150 at24@50 { 151 - compatible = "at24,24c256"; 151 + compatible = "atmel,24c256"; 152 152 reg = <0x50>; 153 153 }; 154 154
+1 -1
arch/powerpc/boot/dts/pcm030.dts
··· 71 71 reg = <0x51>; 72 72 }; 73 73 eeprom@52 { 74 - compatible = "catalyst,24c32"; 74 + compatible = "catalyst,24c32", "atmel,24c32"; 75 75 reg = <0x52>; 76 76 pagesize = <32>; 77 77 };
+1 -1
arch/powerpc/boot/dts/pcm032.dts
··· 75 75 reg = <0x51>; 76 76 }; 77 77 eeprom@52 { 78 - compatible = "catalyst,24c32"; 78 + compatible = "catalyst,24c32", "atmel,24c32"; 79 79 reg = <0x52>; 80 80 pagesize = <32>; 81 81 };
+1 -1
arch/powerpc/boot/dts/pdm360ng.dts
··· 13 13 * option) any later version. 14 14 */ 15 15 16 - #include <mpc5121.dtsi> 16 + #include "mpc5121.dtsi" 17 17 18 18 / { 19 19 model = "pdm360ng";
+1 -1
arch/powerpc/boot/dts/sequoia.dts
··· 229 229 }; 230 230 partition@84000 { 231 231 label = "user"; 232 - reg = <0x00000000 0x01f7c000>; 232 + reg = <0x00084000 0x01f7c000>; 233 233 }; 234 234 }; 235 235 };
+1 -1
arch/powerpc/boot/dts/warp.dts
··· 238 238 239 239 /* This will create 52 and 53 */ 240 240 at24@52 { 241 - compatible = "at,24c04"; 241 + compatible = "atmel,24c04"; 242 242 reg = <0x52>; 243 243 }; 244 244 };
+7 -5
arch/powerpc/boot/ppc_asm.h
··· 67 67 #define MSR_LE 0x0000000000000001 68 68 69 69 #define FIXUP_ENDIAN \ 70 - tdi 0, 0, 0x48; /* Reverse endian of b . + 8 */ \ 71 - b $+36; /* Skip trampoline if endian is good */ \ 72 - .long 0x05009f42; /* bcl 20,31,$+4 */ \ 73 - .long 0xa602487d; /* mflr r10 */ \ 74 - .long 0x1c004a39; /* addi r10,r10,28 */ \ 70 + tdi 0,0,0x48; /* Reverse endian of b . + 8 */ \ 71 + b $+44; /* Skip trampoline if endian is good */ \ 75 72 .long 0xa600607d; /* mfmsr r11 */ \ 76 73 .long 0x01006b69; /* xori r11,r11,1 */ \ 74 + .long 0x00004039; /* li r10,0 */ \ 75 + .long 0x6401417d; /* mtmsrd r10,1 */ \ 76 + .long 0x05009f42; /* bcl 20,31,$+4 */ \ 77 + .long 0xa602487d; /* mflr r10 */ \ 78 + .long 0x14004a39; /* addi r10,r10,20 */ \ 77 79 .long 0xa6035a7d; /* mtsrr0 r10 */ \ 78 80 .long 0xa6037b7d; /* mtsrr1 r11 */ \ 79 81 .long 0x2400004c /* rfid */
+126
arch/powerpc/configs/44x/fsp2_defconfig
··· 1 + CONFIG_44x=y 2 + # CONFIG_SWAP is not set 3 + CONFIG_SYSVIPC=y 4 + # CONFIG_CROSS_MEMORY_ATTACH is not set 5 + # CONFIG_FHANDLE is not set 6 + CONFIG_NO_HZ=y 7 + CONFIG_HIGH_RES_TIMERS=y 8 + CONFIG_IKCONFIG=y 9 + CONFIG_IKCONFIG_PROC=y 10 + CONFIG_LOG_BUF_SHIFT=16 11 + CONFIG_BLK_DEV_INITRD=y 12 + # CONFIG_RD_LZMA is not set 13 + # CONFIG_RD_XZ is not set 14 + # CONFIG_RD_LZO is not set 15 + # CONFIG_RD_LZ4 is not set 16 + CONFIG_KALLSYMS_ALL=y 17 + CONFIG_BPF_SYSCALL=y 18 + CONFIG_EMBEDDED=y 19 + CONFIG_PROFILING=y 20 + CONFIG_OPROFILE=y 21 + CONFIG_MODULES=y 22 + CONFIG_MODULE_UNLOAD=y 23 + # CONFIG_BLK_DEV_BSG is not set 24 + CONFIG_PPC_47x=y 25 + # CONFIG_EBONY is not set 26 + CONFIG_FSP2=y 27 + CONFIG_476FPE_ERR46=y 28 + CONFIG_SWIOTLB=y 29 + CONFIG_KEXEC=y 30 + CONFIG_CRASH_DUMP=y 31 + CONFIG_CMDLINE_BOOL=y 32 + CONFIG_CMDLINE="ip=on rw" 33 + # CONFIG_SUSPEND is not set 34 + # CONFIG_PCI is not set 35 + CONFIG_NET=y 36 + CONFIG_PACKET=y 37 + CONFIG_UNIX=y 38 + CONFIG_INET=y 39 + CONFIG_IP_PNP=y 40 + CONFIG_IP_PNP_DHCP=y 41 + CONFIG_IP_PNP_BOOTP=y 42 + # CONFIG_INET_XFRM_MODE_TRANSPORT is not set 43 + # CONFIG_INET_XFRM_MODE_TUNNEL is not set 44 + # CONFIG_INET_XFRM_MODE_BEET is not set 45 + # CONFIG_IPV6 is not set 46 + CONFIG_VLAN_8021Q=m 47 + CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" 48 + CONFIG_DEVTMPFS=y 49 + CONFIG_DEVTMPFS_MOUNT=y 50 + CONFIG_CONNECTOR=y 51 + CONFIG_MTD=y 52 + CONFIG_MTD_BLOCK=y 53 + CONFIG_MTD_JEDECPROBE=y 54 + CONFIG_MTD_CFI_AMDSTD=y 55 + CONFIG_MTD_PHYSMAP_OF=y 56 + CONFIG_BLK_DEV_RAM=y 57 + CONFIG_BLK_DEV_RAM_SIZE=35000 58 + # CONFIG_SCSI_PROC_FS is not set 59 + CONFIG_BLK_DEV_SD=y 60 + # CONFIG_SCSI_LOWLEVEL is not set 61 + CONFIG_ATA=y 62 + # CONFIG_SATA_PMP is not set 63 + # CONFIG_ATA_SFF is not set 64 + CONFIG_NETDEVICES=y 65 + CONFIG_BONDING=m 66 + CONFIG_IBM_EMAC=m 67 + # CONFIG_INPUT is not set 68 + # CONFIG_SERIO is not set 69 + # CONFIG_VT is not set 70 + # CONFIG_LEGACY_PTYS is not set 71 + # CONFIG_DEVMEM is not set 72 + CONFIG_SERIAL_8250=y 73 + CONFIG_SERIAL_8250_CONSOLE=y 74 + CONFIG_SERIAL_8250_NR_UARTS=32 75 + CONFIG_SERIAL_8250_RUNTIME_UARTS=32 76 + CONFIG_SERIAL_8250_EXTENDED=y 77 + CONFIG_SERIAL_8250_SHARE_IRQ=y 78 + CONFIG_SERIAL_OF_PLATFORM=y 79 + # CONFIG_HW_RANDOM is not set 80 + CONFIG_I2C=y 81 + CONFIG_I2C_IBM_IIC=y 82 + CONFIG_PTP_1588_CLOCK=y 83 + # CONFIG_HWMON is not set 84 + CONFIG_THERMAL=y 85 + CONFIG_WATCHDOG=y 86 + CONFIG_BOOKE_WDT=y 87 + CONFIG_USB=y 88 + CONFIG_USB_EHCI_HCD=y 89 + CONFIG_USB_OHCI_HCD=y 90 + CONFIG_MMC=y 91 + CONFIG_MMC_DEBUG=y 92 + CONFIG_MMC_SDHCI=y 93 + CONFIG_MMC_SDHCI_PLTFM=y 94 + CONFIG_MMC_SDHCI_OF_ARASAN=y 95 + CONFIG_RTC_CLASS=y 96 + CONFIG_RTC_DRV_M41T80=y 97 + CONFIG_EXT2_FS=y 98 + CONFIG_EXT4_FS=y 99 + CONFIG_EXT4_FS_POSIX_ACL=y 100 + CONFIG_EXT4_FS_SECURITY=y 101 + CONFIG_PROC_KCORE=y 102 + CONFIG_TMPFS=y 103 + CONFIG_JFFS2_FS=y 104 + CONFIG_JFFS2_FS_WBUF_VERIFY=y 105 + CONFIG_JFFS2_SUMMARY=y 106 + CONFIG_JFFS2_FS_XATTR=y 107 + CONFIG_CRAMFS=y 108 + CONFIG_NFS_FS=y 109 + CONFIG_NFS_V3_ACL=y 110 + CONFIG_NFS_V4=y 111 + CONFIG_ROOT_NFS=y 112 + CONFIG_NLS_DEFAULT="n" 113 + CONFIG_XZ_DEC=y 114 + CONFIG_PRINTK_TIME=y 115 + CONFIG_MESSAGE_LOGLEVEL_DEFAULT=3 116 + CONFIG_DYNAMIC_DEBUG=y 117 + CONFIG_DEBUG_INFO=y 118 + CONFIG_DEBUG_FS=y 119 + CONFIG_MAGIC_SYSRQ=y 120 + CONFIG_DETECT_HUNG_TASK=y 121 + CONFIG_CRYPTO_CBC=y 122 + CONFIG_CRYPTO_ECB=y 123 + CONFIG_CRYPTO_PCBC=y 124 + CONFIG_CRYPTO_MD5=y 125 + CONFIG_CRYPTO_DES=y 126 + # CONFIG_CRYPTO_HW is not set
+5
arch/powerpc/include/asm/barrier.h
··· 74 74 ___p1; \ 75 75 }) 76 76 77 + /* 78 + * This must resolve to hwsync on SMP for the context switch path. 79 + * See _switch, and core scheduler context switch memory ordering 80 + * comments. 81 + */ 77 82 #define smp_mb__before_spinlock() smp_mb() 78 83 79 84 #include <asm-generic/barrier.h>
+7 -80
arch/powerpc/include/asm/bitops.h
··· 206 206 * Return the zero-based bit position (LE, not IBM bit numbering) of 207 207 * the most significant 1-bit in a double word. 208 208 */ 209 - static __inline__ __attribute__((const)) 210 - int __ilog2(unsigned long x) 211 - { 212 - int lz; 209 + #define __ilog2(x) ilog2(x) 213 210 214 - asm (PPC_CNTLZL "%0,%1" : "=r" (lz) : "r" (x)); 215 - return BITS_PER_LONG - 1 - lz; 216 - } 211 + #include <asm-generic/bitops/ffz.h> 217 212 218 - static inline __attribute__((const)) 219 - int __ilog2_u32(u32 n) 220 - { 221 - int bit; 222 - asm ("cntlzw %0,%1" : "=r" (bit) : "r" (n)); 223 - return 31 - bit; 224 - } 213 + #include <asm-generic/bitops/builtin-__ffs.h> 225 214 226 - #ifdef __powerpc64__ 227 - static inline __attribute__((const)) 228 - int __ilog2_u64(u64 n) 229 - { 230 - int bit; 231 - asm ("cntlzd %0,%1" : "=r" (bit) : "r" (n)); 232 - return 63 - bit; 233 - } 234 - #endif 235 - 236 - /* 237 - * Determines the bit position of the least significant 0 bit in the 238 - * specified double word. The returned bit position will be 239 - * zero-based, starting from the right side (63/31 - 0). 240 - */ 241 - static __inline__ unsigned long ffz(unsigned long x) 242 - { 243 - /* no zero exists anywhere in the 8 byte area. */ 244 - if ((x = ~x) == 0) 245 - return BITS_PER_LONG; 246 - 247 - /* 248 - * Calculate the bit position of the least significant '1' bit in x 249 - * (since x has been changed this will actually be the least significant 250 - * '0' bit in * the original x). Note: (x & -x) gives us a mask that 251 - * is the least significant * (RIGHT-most) 1-bit of the value in x. 252 - */ 253 - return __ilog2(x & -x); 254 - } 255 - 256 - static __inline__ unsigned long __ffs(unsigned long x) 257 - { 258 - return __ilog2(x & -x); 259 - } 260 - 261 - /* 262 - * ffs: find first bit set. This is defined the same way as 263 - * the libc and compiler builtin ffs routines, therefore 264 - * differs in spirit from the above ffz (man ffs). 265 - */ 266 - static __inline__ int ffs(int x) 267 - { 268 - unsigned long i = (unsigned long)x; 269 - return __ilog2(i & -i) + 1; 270 - } 215 + #include <asm-generic/bitops/builtin-ffs.h> 271 216 272 217 /* 273 218 * fls: find last (most-significant) bit set. ··· 220 275 */ 221 276 static __inline__ int fls(unsigned int x) 222 277 { 223 - int lz; 224 - 225 - asm ("cntlzw %0,%1" : "=r" (lz) : "r" (x)); 226 - return 32 - lz; 278 + return 32 - __builtin_clz(x); 227 279 } 228 280 229 - static __inline__ unsigned long __fls(unsigned long x) 230 - { 231 - return __ilog2(x); 232 - } 281 + #include <asm-generic/bitops/builtin-__fls.h> 233 282 234 - /* 235 - * 64-bit can do this using one cntlzd (count leading zeroes doubleword) 236 - * instruction; for 32-bit we use the generic version, which does two 237 - * 32-bit fls calls. 238 - */ 239 - #ifdef __powerpc64__ 240 283 static __inline__ int fls64(__u64 x) 241 284 { 242 - int lz; 243 - 244 - asm ("cntlzd %0,%1" : "=r" (lz) : "r" (x)); 245 - return 64 - lz; 285 + return 64 - __builtin_clzll(x); 246 286 } 247 - #else 248 - #include <asm-generic/bitops/fls64.h> 249 - #endif /* __powerpc64__ */ 250 287 251 288 #ifdef CONFIG_PPC64 252 289 unsigned int __arch_hweight8(unsigned int w);
+2 -1
arch/powerpc/include/asm/book3s/32/pgalloc.h
··· 31 31 32 32 static inline pgd_t *pgd_alloc(struct mm_struct *mm) 33 33 { 34 - return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE), GFP_KERNEL); 34 + return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE), 35 + pgtable_gfp_flags(mm, GFP_KERNEL)); 35 36 } 36 37 37 38 static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
+2
arch/powerpc/include/asm/book3s/32/pgtable.h
··· 297 297 extern int get_pteptr(struct mm_struct *mm, unsigned long addr, pte_t **ptep, 298 298 pmd_t **pmdp); 299 299 300 + int map_kernel_page(unsigned long va, phys_addr_t pa, int flags); 301 + 300 302 /* Generic accessors to PTE bits */ 301 303 static inline int pte_write(pte_t pte) { return !!(pte_val(pte) & _PAGE_RW);} 302 304 static inline int pte_dirty(pte_t pte) { return !!(pte_val(pte) & _PAGE_DIRTY); }
+3
arch/powerpc/include/asm/book3s/64/hash.h
··· 89 89 { 90 90 return (pgd_val(pgd) == 0); 91 91 } 92 + #ifdef CONFIG_STRICT_KERNEL_RWX 93 + extern void hash__mark_rodata_ro(void); 94 + #endif 92 95 93 96 extern void hpte_need_flush(struct mm_struct *mm, unsigned long addr, 94 97 pte_t *ptep, unsigned long pte, int huge);
+10 -6
arch/powerpc/include/asm/book3s/64/pgalloc.h
··· 53 53 static inline pgd_t *radix__pgd_alloc(struct mm_struct *mm) 54 54 { 55 55 #ifdef CONFIG_PPC_64K_PAGES 56 - return (pgd_t *)__get_free_page(PGALLOC_GFP); 56 + return (pgd_t *)__get_free_page(pgtable_gfp_flags(mm, PGALLOC_GFP)); 57 57 #else 58 58 struct page *page; 59 - page = alloc_pages(PGALLOC_GFP | __GFP_REPEAT, 4); 59 + page = alloc_pages(pgtable_gfp_flags(mm, PGALLOC_GFP | __GFP_REPEAT), 60 + 4); 60 61 if (!page) 61 62 return NULL; 62 63 return (pgd_t *) page_address(page); ··· 77 76 { 78 77 if (radix_enabled()) 79 78 return radix__pgd_alloc(mm); 80 - return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE), GFP_KERNEL); 79 + return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE), 80 + pgtable_gfp_flags(mm, GFP_KERNEL)); 81 81 } 82 82 83 83 static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) ··· 95 93 96 94 static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) 97 95 { 98 - return kmem_cache_alloc(PGT_CACHE(PUD_INDEX_SIZE), GFP_KERNEL); 96 + return kmem_cache_alloc(PGT_CACHE(PUD_INDEX_SIZE), 97 + pgtable_gfp_flags(mm, GFP_KERNEL)); 99 98 } 100 99 101 100 static inline void pud_free(struct mm_struct *mm, pud_t *pud) ··· 122 119 123 120 static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) 124 121 { 125 - return kmem_cache_alloc(PGT_CACHE(PMD_CACHE_INDEX), GFP_KERNEL); 122 + return kmem_cache_alloc(PGT_CACHE(PMD_CACHE_INDEX), 123 + pgtable_gfp_flags(mm, GFP_KERNEL)); 126 124 } 127 125 128 126 static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) ··· 172 168 struct page *page; 173 169 pte_t *pte; 174 170 175 - pte = pte_alloc_one_kernel(mm, address); 171 + pte = (pte_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO | __GFP_ACCOUNT); 176 172 if (!pte) 177 173 return NULL; 178 174 page = virt_to_page(pte);
+45
arch/powerpc/include/asm/book3s/64/pgtable.h
··· 5 5 6 6 #ifndef __ASSEMBLY__ 7 7 #include <linux/mmdebug.h> 8 + #include <linux/bug.h> 8 9 #endif 9 10 10 11 /* ··· 80 79 81 80 #define _PAGE_SOFT_DIRTY _RPAGE_SW3 /* software: software dirty tracking */ 82 81 #define _PAGE_SPECIAL _RPAGE_SW2 /* software: special page */ 82 + #define _PAGE_DEVMAP _RPAGE_SW1 /* software: ZONE_DEVICE page */ 83 + #define __HAVE_ARCH_PTE_DEVMAP 84 + 83 85 /* 84 86 * Drivers request for cache inhibited pte mapping using _PAGE_NO_CACHE 85 87 * Instead of fixing all of them, add an alternate define which ··· 601 597 static inline pte_t pte_mkhuge(pte_t pte) 602 598 { 603 599 return pte; 600 + } 601 + 602 + static inline pte_t pte_mkdevmap(pte_t pte) 603 + { 604 + return __pte(pte_val(pte) | _PAGE_SPECIAL|_PAGE_DEVMAP); 605 + } 606 + 607 + static inline int pte_devmap(pte_t pte) 608 + { 609 + return !!(pte_raw(pte) & cpu_to_be64(_PAGE_DEVMAP)); 604 610 } 605 611 606 612 static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) ··· 1160 1146 return true; 1161 1147 } 1162 1148 1149 + 1150 + static inline pmd_t pmd_mkdevmap(pmd_t pmd) 1151 + { 1152 + return __pmd(pmd_val(pmd) | (_PAGE_PTE | _PAGE_DEVMAP)); 1153 + } 1154 + 1155 + static inline int pmd_devmap(pmd_t pmd) 1156 + { 1157 + return pte_devmap(pmd_pte(pmd)); 1158 + } 1159 + 1160 + static inline int pud_devmap(pud_t pud) 1161 + { 1162 + return 0; 1163 + } 1164 + 1165 + static inline int pgd_devmap(pgd_t pgd) 1166 + { 1167 + return 0; 1168 + } 1163 1169 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 1170 + 1171 + static inline const int pud_pfn(pud_t pud) 1172 + { 1173 + /* 1174 + * Currently all calls to pud_pfn() are gated around a pud_devmap() 1175 + * check so this should never be used. If it grows another user we 1176 + * want to know about it. 1177 + */ 1178 + BUILD_BUG(); 1179 + return 0; 1180 + } 1164 1181 #endif /* __ASSEMBLY__ */ 1165 1182 #endif /* _ASM_POWERPC_BOOK3S_64_PGTABLE_H_ */
+5 -1
arch/powerpc/include/asm/book3s/64/radix.h
··· 116 116 #define RADIX_PUD_TABLE_SIZE (sizeof(pud_t) << RADIX_PUD_INDEX_SIZE) 117 117 #define RADIX_PGD_TABLE_SIZE (sizeof(pgd_t) << RADIX_PGD_INDEX_SIZE) 118 118 119 + #ifdef CONFIG_STRICT_KERNEL_RWX 120 + extern void radix__mark_rodata_ro(void); 121 + #endif 122 + 119 123 static inline unsigned long __radix_pte_update(pte_t *ptep, unsigned long clr, 120 124 unsigned long set) 121 125 { ··· 256 252 257 253 static inline int radix__pmd_trans_huge(pmd_t pmd) 258 254 { 259 - return !!(pmd_val(pmd) & _PAGE_PTE); 255 + return (pmd_val(pmd) & (_PAGE_PTE | _PAGE_DEVMAP)) == _PAGE_PTE; 260 256 } 261 257 262 258 static inline pmd_t radix__pmd_mkhuge(pmd_t pmd)
+9 -1
arch/powerpc/include/asm/code-patching.h
··· 83 83 * On PPC64 ABIv1 the function pointer actually points to the 84 84 * function's descriptor. The first entry in the descriptor is the 85 85 * address of the function text. 86 + * 87 + * However, we may also receive pointer to an assembly symbol. To 88 + * detect that, we first check if the function pointer we receive 89 + * already points to kernel/module text and we only dereference it 90 + * if it doesn't. 86 91 */ 87 - return ((func_descr_t *)func)->entry; 92 + if (kernel_text_address((unsigned long)func)) 93 + return (unsigned long)func; 94 + else 95 + return ((func_descr_t *)func)->entry; 88 96 #else 89 97 return (unsigned long)func; 90 98 #endif
+13
arch/powerpc/include/asm/dbell.h
··· 56 56 : : "i" (CPU_FTR_HVMODE|CPU_FTR_ARCH_300)); 57 57 } 58 58 59 + static inline void _ppc_msgclr(u32 msg) 60 + { 61 + __asm__ __volatile__ (ASM_FTR_IFSET(PPC_MSGCLR(%1), PPC_MSGCLRP(%1), %0) 62 + : : "i" (CPU_FTR_HVMODE), "r" (msg)); 63 + } 64 + 65 + static inline void ppc_msgclr(enum ppc_dbell type) 66 + { 67 + u32 msg = PPC_DBELL_TYPE(type); 68 + 69 + _ppc_msgclr(msg); 70 + } 71 + 59 72 #else /* CONFIG_PPC_BOOK3S */ 60 73 61 74 #define PPC_DBELL_MSGTYPE PPC_DBELL
+12 -4
arch/powerpc/include/asm/delay.h
··· 2 2 #define _ASM_POWERPC_DELAY_H 3 3 #ifdef __KERNEL__ 4 4 5 + #include <linux/processor.h> 5 6 #include <asm/time.h> 6 7 7 8 /* ··· 59 58 typeof(condition) __ret; \ 60 59 unsigned long __loops = tb_ticks_per_usec * timeout; \ 61 60 unsigned long __start = get_tbl(); \ 62 - while (!(__ret = (condition)) && (tb_ticks_since(__start) <= __loops)) \ 63 - if (delay) \ 61 + \ 62 + if (delay) { \ 63 + while (!(__ret = (condition)) && \ 64 + (tb_ticks_since(__start) <= __loops)) \ 64 65 udelay(delay); \ 65 - else \ 66 - cpu_relax(); \ 66 + } else { \ 67 + spin_begin(); \ 68 + while (!(__ret = (condition)) && \ 69 + (tb_ticks_since(__start) <= __loops)) \ 70 + spin_cpu_relax(); \ 71 + spin_end(); \ 72 + } \ 67 73 if (!__ret) \ 68 74 __ret = (condition); \ 69 75 __ret; \
+40 -9
arch/powerpc/include/asm/exception-64s.h
··· 36 36 */ 37 37 #include <asm/head-64.h> 38 38 39 + /* PACA save area offsets (exgen, exmc, etc) */ 39 40 #define EX_R9 0 40 41 #define EX_R10 8 41 42 #define EX_R11 16 42 43 #define EX_R12 24 43 44 #define EX_R13 32 44 - #define EX_SRR0 40 45 - #define EX_DAR 48 46 - #define EX_DSISR 56 47 - #define EX_CCR 60 48 - #define EX_R3 64 49 - #define EX_LR 72 50 - #define EX_CFAR 80 51 - #define EX_PPR 88 /* SMT thread status register (priority) */ 52 - #define EX_CTR 96 45 + #define EX_DAR 40 46 + #define EX_DSISR 48 47 + #define EX_CCR 52 48 + #define EX_CFAR 56 49 + #define EX_PPR 64 50 + #if defined(CONFIG_RELOCATABLE) 51 + #define EX_CTR 72 52 + #define EX_SIZE 10 /* size in u64 units */ 53 + #else 54 + #define EX_SIZE 9 /* size in u64 units */ 55 + #endif 56 + 57 + /* 58 + * EX_LR is only used in EXSLB and where it does not overlap with EX_DAR 59 + * EX_CCR similarly with DSISR, but being 4 byte registers there is a hole 60 + * in the save area so it's not necessary to overlap them. Could be used 61 + * for future savings though if another 4 byte register was to be saved. 62 + */ 63 + #define EX_LR EX_DAR 64 + 65 + /* 66 + * EX_R3 is only used by the bad_stack handler. bad_stack reloads and 67 + * saves DAR from SPRN_DAR, and EX_DAR is not used. So EX_R3 can overlap 68 + * with EX_DAR. 69 + */ 70 + #define EX_R3 EX_DAR 53 71 54 72 #ifdef CONFIG_RELOCATABLE 55 73 #define __EXCEPTION_RELON_PROLOG_PSERIES_1(label, h) \ ··· 253 235 #else 254 236 #define kvmppc_interrupt kvmppc_interrupt_pr 255 237 #endif 238 + 239 + /* 240 + * Branch to label using its 0xC000 address. This results in instruction 241 + * address suitable for MSR[IR]=0 or 1, which allows relocation to be turned 242 + * on using mtmsr rather than rfid. 243 + * 244 + * This could set the 0xc bits for !RELOCATABLE as an immediate, rather than 245 + * load KBASE for a slight optimisation. 246 + */ 247 + #define BRANCH_TO_C000(reg, label) \ 248 + __LOAD_HANDLER(reg, label); \ 249 + mtctr reg; \ 250 + bctr 256 251 257 252 #ifdef CONFIG_RELOCATABLE 258 253 #define BRANCH_TO_COMMON(reg, label) \
+4
arch/powerpc/include/asm/fadump.h
··· 43 43 #define MIN_BOOT_MEM (((RMA_END < (0x1UL << 28)) ? (0x1UL << 28) : RMA_END) \ 44 44 + (0x1UL << 26)) 45 45 46 + /* The upper limit percentage for user specified boot memory size (25%) */ 47 + #define MAX_BOOT_MEM_RATIO 4 48 + 46 49 #define memblock_num_regions(memblock_type) (memblock.memblock_type.cnt) 47 50 48 51 /* Firmware provided dump sections */ ··· 203 200 unsigned long long size; 204 201 }; 205 202 203 + extern int is_fadump_boot_memory_area(u64 addr, ulong size); 206 204 extern int early_init_dt_scan_fw_dump(unsigned long node, 207 205 const char *uname, int depth, void *data); 208 206 extern int fadump_reserve_mem(void);
+23 -2
arch/powerpc/include/asm/head-64.h
··· 3 3 4 4 #include <asm/cache.h> 5 5 6 + #ifdef __ASSEMBLY__ 6 7 /* 7 8 * We can't do CPP stringification and concatination directly into the section 8 9 * name for some reason, so these macros can do it for us. ··· 50 49 * CLOSE_FIXED_SECTION() or elsewhere, there may be something 51 50 * unexpected being added there. Remove the '. = x_len' line, rebuild, and 52 51 * check what is pushing the section down. 53 - * - If the build dies in linking, check arch/powerpc/kernel/vmlinux.lds.S 54 - * for instructions. 52 + * - If the build dies in linking, check arch/powerpc/tools/head_check.sh 53 + * comments. 55 54 * - If the kernel crashes or hangs in very early boot, it could be linker 56 55 * stubs at the start of the main text. 57 56 */ ··· 64 63 . = 0x0; \ 65 64 start_##sname: 66 65 66 + /* 67 + * .linker_stub_catch section is used to catch linker stubs from being 68 + * inserted in our .text section, above the start_text label (which breaks 69 + * the ABS_ADDR calculation). See kernel/vmlinux.lds.S and tools/head_check.sh 70 + * for more details. We would prefer to just keep a cacheline (0x80), but 71 + * 0x100 seems to be how the linker aligns branch stub groups. 72 + */ 73 + #ifdef CONFIG_LD_HEAD_STUB_CATCH 74 + #define OPEN_TEXT_SECTION(start) \ 75 + .section ".linker_stub_catch","ax",@progbits; \ 76 + linker_stub_catch: \ 77 + . = 0x4; \ 78 + text_start = (start) + 0x100; \ 79 + .section ".text","ax",@progbits; \ 80 + .balign 0x100; \ 81 + start_text: 82 + #else 67 83 #define OPEN_TEXT_SECTION(start) \ 68 84 text_start = (start); \ 69 85 .section ".text","ax",@progbits; \ 70 86 . = 0x0; \ 71 87 start_text: 88 + #endif 72 89 73 90 #define ZERO_FIXED_SECTION(sname, start, end) \ 74 91 sname##_start = (start); \ ··· 415 396 #define EXC_COMMON_HV(name, realvec, hdlr) \ 416 397 EXC_COMMON_BEGIN(name); \ 417 398 STD_EXCEPTION_COMMON(realvec + 0x2, name, hdlr); \ 399 + 400 + #endif /* __ASSEMBLY__ */ 418 401 419 402 #endif /* _ASM_POWERPC_HEAD_64_H */
+4
arch/powerpc/include/asm/hw_irq.h
··· 129 129 } 130 130 131 131 extern bool prep_irq_for_idle(void); 132 + extern bool prep_irq_for_idle_irqsoff(void); 133 + extern void irq_set_pending_from_srr1(unsigned long srr1); 134 + 135 + #define fini_irq_for_idle_irqsoff() trace_hardirqs_off(); 132 136 133 137 extern void force_external_irq_replay(void); 134 138
+1
arch/powerpc/include/asm/machdep.h
··· 226 226 extern void e500_idle(void); 227 227 extern void power4_idle(void); 228 228 extern void power7_idle(void); 229 + extern void power9_idle(void); 229 230 extern void ppc6xx_idle(void); 230 231 extern void book3e_idle(void); 231 232
+8 -7
arch/powerpc/include/asm/mce.h
··· 90 90 enum MCE_RaErrorType { 91 91 MCE_RA_ERROR_INDETERMINATE = 0, 92 92 MCE_RA_ERROR_IFETCH = 1, 93 - MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH = 2, 94 - MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH_FOREIGN = 3, 95 - MCE_RA_ERROR_LOAD = 4, 96 - MCE_RA_ERROR_STORE = 5, 97 - MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE = 6, 98 - MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE_FOREIGN = 7, 99 - MCE_RA_ERROR_LOAD_STORE_FOREIGN = 8, 93 + MCE_RA_ERROR_IFETCH_FOREIGN = 2, 94 + MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH = 3, 95 + MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH_FOREIGN = 4, 96 + MCE_RA_ERROR_LOAD = 5, 97 + MCE_RA_ERROR_STORE = 6, 98 + MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE = 7, 99 + MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE_FOREIGN = 8, 100 + MCE_RA_ERROR_LOAD_STORE_FOREIGN = 9, 100 101 }; 101 102 102 103 enum MCE_LinkErrorType {
+2 -1
arch/powerpc/include/asm/nohash/32/pgalloc.h
··· 31 31 32 32 static inline pgd_t *pgd_alloc(struct mm_struct *mm) 33 33 { 34 - return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE), GFP_KERNEL); 34 + return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE), 35 + pgtable_gfp_flags(mm, GFP_KERNEL)); 35 36 } 36 37 37 38 static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
+2
arch/powerpc/include/asm/nohash/32/pgtable.h
··· 340 340 extern int get_pteptr(struct mm_struct *mm, unsigned long addr, pte_t **ptep, 341 341 pmd_t **pmdp); 342 342 343 + int map_kernel_page(unsigned long va, phys_addr_t pa, int flags); 344 + 343 345 #endif /* !__ASSEMBLY__ */ 344 346 345 347 #endif /* __ASM_POWERPC_NOHASH_32_PGTABLE_H */
+7 -4
arch/powerpc/include/asm/nohash/64/pgalloc.h
··· 43 43 44 44 static inline pgd_t *pgd_alloc(struct mm_struct *mm) 45 45 { 46 - return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE), GFP_KERNEL); 46 + return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE), 47 + pgtable_gfp_flags(mm, GFP_KERNEL)); 47 48 } 48 49 49 50 static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) ··· 58 57 59 58 static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) 60 59 { 61 - return kmem_cache_alloc(PGT_CACHE(PUD_INDEX_SIZE), GFP_KERNEL); 60 + return kmem_cache_alloc(PGT_CACHE(PUD_INDEX_SIZE), 61 + pgtable_gfp_flags(mm, GFP_KERNEL)); 62 62 } 63 63 64 64 static inline void pud_free(struct mm_struct *mm, pud_t *pud) ··· 98 96 struct page *page; 99 97 pte_t *pte; 100 98 101 - pte = pte_alloc_one_kernel(mm, address); 99 + pte = (pte_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO | __GFP_ACCOUNT); 102 100 if (!pte) 103 101 return NULL; 104 102 page = virt_to_page(pte); ··· 191 189 192 190 static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) 193 191 { 194 - return kmem_cache_alloc(PGT_CACHE(PMD_CACHE_INDEX), GFP_KERNEL); 192 + return kmem_cache_alloc(PGT_CACHE(PMD_CACHE_INDEX), 193 + pgtable_gfp_flags(mm, GFP_KERNEL)); 195 194 } 196 195 197 196 static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
+74 -2
arch/powerpc/include/asm/opal-api.h
··· 667 667 668 668 enum { 669 669 OPAL_PHB_ERROR_DATA_TYPE_P7IOC = 1, 670 - OPAL_PHB_ERROR_DATA_TYPE_PHB3 = 2 670 + OPAL_PHB_ERROR_DATA_TYPE_PHB3 = 2, 671 + OPAL_PHB_ERROR_DATA_TYPE_PHB4 = 3 671 672 }; 672 673 673 674 enum { 674 675 OPAL_P7IOC_NUM_PEST_REGS = 128, 675 - OPAL_PHB3_NUM_PEST_REGS = 256 676 + OPAL_PHB3_NUM_PEST_REGS = 256, 677 + OPAL_PHB4_NUM_PEST_REGS = 512 676 678 }; 677 679 678 680 struct OpalIoPhbErrorCommon { ··· 804 802 __be64 pestB[OPAL_PHB3_NUM_PEST_REGS]; 805 803 }; 806 804 805 + struct OpalIoPhb4ErrorData { 806 + struct OpalIoPhbErrorCommon common; 807 + 808 + __be32 brdgCtl; 809 + 810 + /* PHB4 cfg regs */ 811 + __be32 deviceStatus; 812 + __be32 slotStatus; 813 + __be32 linkStatus; 814 + __be32 devCmdStatus; 815 + __be32 devSecStatus; 816 + 817 + /* cfg AER regs */ 818 + __be32 rootErrorStatus; 819 + __be32 uncorrErrorStatus; 820 + __be32 corrErrorStatus; 821 + __be32 tlpHdr1; 822 + __be32 tlpHdr2; 823 + __be32 tlpHdr3; 824 + __be32 tlpHdr4; 825 + __be32 sourceId; 826 + 827 + /* PHB4 ETU Error Regs */ 828 + __be64 nFir; /* 000 */ 829 + __be64 nFirMask; /* 003 */ 830 + __be64 nFirWOF; /* 008 */ 831 + __be64 phbPlssr; /* 120 */ 832 + __be64 phbCsr; /* 110 */ 833 + __be64 lemFir; /* C00 */ 834 + __be64 lemErrorMask; /* C18 */ 835 + __be64 lemWOF; /* C40 */ 836 + __be64 phbErrorStatus; /* C80 */ 837 + __be64 phbFirstErrorStatus; /* C88 */ 838 + __be64 phbErrorLog0; /* CC0 */ 839 + __be64 phbErrorLog1; /* CC8 */ 840 + __be64 phbTxeErrorStatus; /* D00 */ 841 + __be64 phbTxeFirstErrorStatus; /* D08 */ 842 + __be64 phbTxeErrorLog0; /* D40 */ 843 + __be64 phbTxeErrorLog1; /* D48 */ 844 + __be64 phbRxeArbErrorStatus; /* D80 */ 845 + __be64 phbRxeArbFirstErrorStatus; /* D88 */ 846 + __be64 phbRxeArbErrorLog0; /* DC0 */ 847 + __be64 phbRxeArbErrorLog1; /* DC8 */ 848 + __be64 phbRxeMrgErrorStatus; /* E00 */ 849 + __be64 phbRxeMrgFirstErrorStatus; /* E08 */ 850 + __be64 phbRxeMrgErrorLog0; /* E40 */ 851 + __be64 phbRxeMrgErrorLog1; /* E48 */ 852 + __be64 phbRxeTceErrorStatus; /* E80 */ 853 + __be64 phbRxeTceFirstErrorStatus; /* E88 */ 854 + __be64 phbRxeTceErrorLog0; /* EC0 */ 855 + __be64 phbRxeTceErrorLog1; /* EC8 */ 856 + 857 + /* PHB4 REGB Error Regs */ 858 + __be64 phbPblErrorStatus; /* 1900 */ 859 + __be64 phbPblFirstErrorStatus; /* 1908 */ 860 + __be64 phbPblErrorLog0; /* 1940 */ 861 + __be64 phbPblErrorLog1; /* 1948 */ 862 + __be64 phbPcieDlpErrorLog1; /* 1AA0 */ 863 + __be64 phbPcieDlpErrorLog2; /* 1AA8 */ 864 + __be64 phbPcieDlpErrorStatus; /* 1AB0 */ 865 + __be64 phbRegbErrorStatus; /* 1C00 */ 866 + __be64 phbRegbFirstErrorStatus; /* 1C08 */ 867 + __be64 phbRegbErrorLog0; /* 1C40 */ 868 + __be64 phbRegbErrorLog1; /* 1C48 */ 869 + 870 + __be64 pestA[OPAL_PHB4_NUM_PEST_REGS]; 871 + __be64 pestB[OPAL_PHB4_NUM_PEST_REGS]; 872 + }; 873 + 807 874 enum { 808 875 OPAL_REINIT_CPUS_HILE_BE = (1 << 0), 809 876 OPAL_REINIT_CPUS_HILE_LE = (1 << 1), ··· 948 877 OPAL_PHB_CAPI_MODE_SNOOP_OFF = 2, 949 878 OPAL_PHB_CAPI_MODE_SNOOP_ON = 3, 950 879 OPAL_PHB_CAPI_MODE_DMA = 4, 880 + OPAL_PHB_CAPI_MODE_DMA_TVT1 = 5, 951 881 }; 952 882 953 883 /* OPAL I2C request */
+10 -4
arch/powerpc/include/asm/paca.h
··· 21 21 #include <asm/lppaca.h> 22 22 #include <asm/mmu.h> 23 23 #include <asm/page.h> 24 + #ifdef CONFIG_PPC_BOOK3E 24 25 #include <asm/exception-64e.h> 26 + #else 27 + #include <asm/exception-64s.h> 28 + #endif 25 29 #ifdef CONFIG_KVM_BOOK3S_64_HANDLER 26 30 #include <asm/kvm_book3s_asm.h> 27 31 #endif ··· 102 98 * Now, starting in cacheline 2, the exception save areas 103 99 */ 104 100 /* used for most interrupts/exceptions */ 105 - u64 exgen[13] __attribute__((aligned(0x80))); 106 - u64 exslb[13]; /* used for SLB/segment table misses 101 + u64 exgen[EX_SIZE] __attribute__((aligned(0x80))); 102 + u64 exslb[EX_SIZE]; /* used for SLB/segment table misses 107 103 * on the linear mapping */ 108 104 /* SLB related definitions */ 109 105 u16 vmalloc_sllp; ··· 181 177 * to the sibling threads' paca. 182 178 */ 183 179 struct paca_struct **thread_sibling_pacas; 180 + /* The PSSCR value that the kernel requested before going to stop */ 181 + u64 requested_psscr; 184 182 #endif 185 183 186 184 #ifdef CONFIG_PPC_STD_MMU_64 187 185 /* Non-maskable exceptions that are not performance critical */ 188 - u64 exnmi[13]; /* used for system reset (nmi) */ 189 - u64 exmc[13]; /* used for machine checks */ 186 + u64 exnmi[EX_SIZE]; /* used for system reset (nmi) */ 187 + u64 exmc[EX_SIZE]; /* used for machine checks */ 190 188 #endif 191 189 #ifdef CONFIG_PPC_BOOK3S_64 192 190 /* Exclusive stacks for system reset and machine check exception. */
+14
arch/powerpc/include/asm/pgalloc.h
··· 3 3 4 4 #include <linux/mm.h> 5 5 6 + #ifndef MODULE 7 + static inline gfp_t pgtable_gfp_flags(struct mm_struct *mm, gfp_t gfp) 8 + { 9 + if (unlikely(mm == &init_mm)) 10 + return gfp; 11 + return gfp | __GFP_ACCOUNT; 12 + } 13 + #else /* !MODULE */ 14 + static inline gfp_t pgtable_gfp_flags(struct mm_struct *mm, gfp_t gfp) 15 + { 16 + return gfp | __GFP_ACCOUNT; 17 + } 18 + #endif /* MODULE */ 19 + 6 20 #ifdef CONFIG_PPC_BOOK3S 7 21 #include <asm/book3s/pgalloc.h> 8 22 #else
+7 -4
arch/powerpc/include/asm/ppc-opcode.h
··· 191 191 /* sorted alphabetically */ 192 192 #define PPC_INST_BHRBE 0x7c00025c 193 193 #define PPC_INST_CLRBHRB 0x7c00035c 194 - #define PPC_INST_COPY 0x7c00060c 195 - #define PPC_INST_COPY_FIRST 0x7c20060c 194 + #define PPC_INST_COPY 0x7c20060c 196 195 #define PPC_INST_CP_ABORT 0x7c00068c 197 196 #define PPC_INST_DCBA 0x7c0005ec 198 197 #define PPC_INST_DCBA_MASK 0xfc0007fe ··· 222 223 #define PPC_INST_MSGCLR 0x7c0001dc 223 224 #define PPC_INST_MSGSYNC 0x7c0006ec 224 225 #define PPC_INST_MSGSNDP 0x7c00011c 226 + #define PPC_INST_MSGCLRP 0x7c00015c 225 227 #define PPC_INST_MTTMR 0x7c0003dc 226 228 #define PPC_INST_NOP 0x60000000 227 - #define PPC_INST_PASTE 0x7c00070c 228 - #define PPC_INST_PASTE_LAST 0x7c20070d 229 + #define PPC_INST_PASTE 0x7c20070d 229 230 #define PPC_INST_POPCNTB 0x7c0000f4 230 231 #define PPC_INST_POPCNTB_MASK 0xfc0007fe 231 232 #define PPC_INST_POPCNTD 0x7c0003f4 ··· 393 394 394 395 /* Deal with instructions that older assemblers aren't aware of */ 395 396 #define PPC_CP_ABORT stringify_in_c(.long PPC_INST_CP_ABORT) 397 + #define PPC_COPY(a, b) stringify_in_c(.long PPC_INST_COPY | \ 398 + ___PPC_RA(a) | ___PPC_RB(b)) 396 399 #define PPC_DCBAL(a, b) stringify_in_c(.long PPC_INST_DCBAL | \ 397 400 __PPC_RA(a) | __PPC_RB(b)) 398 401 #define PPC_DCBZL(a, b) stringify_in_c(.long PPC_INST_DCBZL | \ ··· 411 410 #define PPC_MSGCLR(b) stringify_in_c(.long PPC_INST_MSGCLR | \ 412 411 ___PPC_RB(b)) 413 412 #define PPC_MSGSNDP(b) stringify_in_c(.long PPC_INST_MSGSNDP | \ 413 + ___PPC_RB(b)) 414 + #define PPC_MSGCLRP(b) stringify_in_c(.long PPC_INST_MSGCLRP | \ 414 415 ___PPC_RB(b)) 415 416 #define PPC_POPCNTB(a, s) stringify_in_c(.long PPC_INST_POPCNTB | \ 416 417 __PPC_RA(a) | __PPC_RS(s))
+7 -4
arch/powerpc/include/asm/ppc_asm.h
··· 770 770 #else 771 771 #define FIXUP_ENDIAN \ 772 772 tdi 0,0,0x48; /* Reverse endian of b . + 8 */ \ 773 - b $+36; /* Skip trampoline if endian is good */ \ 774 - .long 0x05009f42; /* bcl 20,31,$+4 */ \ 775 - .long 0xa602487d; /* mflr r10 */ \ 776 - .long 0x1c004a39; /* addi r10,r10,28 */ \ 773 + b $+44; /* Skip trampoline if endian is good */ \ 777 774 .long 0xa600607d; /* mfmsr r11 */ \ 778 775 .long 0x01006b69; /* xori r11,r11,1 */ \ 776 + .long 0x00004039; /* li r10,0 */ \ 777 + .long 0x6401417d; /* mtmsrd r10,1 */ \ 778 + .long 0x05009f42; /* bcl 20,31,$+4 */ \ 779 + .long 0xa602487d; /* mflr r10 */ \ 780 + .long 0x14004a39; /* addi r10,r10,20 */ \ 779 781 .long 0xa6035a7d; /* mtsrr0 r10 */ \ 780 782 .long 0xa6037b7d; /* mtsrr1 r11 */ \ 781 783 .long 0x2400004c /* rfid */ 784 + 782 785 #endif /* !CONFIG_PPC_BOOK3E */ 783 786 784 787 #endif /* __ASSEMBLY__ */
+25 -5
arch/powerpc/include/asm/processor.h
··· 421 421 422 422 #ifdef CONFIG_PPC64 423 423 #define cpu_relax() do { HMT_low(); HMT_medium(); barrier(); } while (0) 424 + 425 + #define spin_begin() HMT_low() 426 + 427 + #define spin_cpu_relax() barrier() 428 + 429 + #define spin_cpu_yield() spin_cpu_relax() 430 + 431 + #define spin_end() HMT_medium() 432 + 433 + #define spin_until_cond(cond) \ 434 + do { \ 435 + if (unlikely(!(cond))) { \ 436 + spin_begin(); \ 437 + do { \ 438 + spin_cpu_relax(); \ 439 + } while (!(cond)); \ 440 + spin_end(); \ 441 + } \ 442 + } while (0) 443 + 424 444 #else 425 445 #define cpu_relax() barrier() 426 446 #endif ··· 494 474 enum idle_boot_override {IDLE_NO_OVERRIDE = 0, IDLE_POWERSAVE_OFF}; 495 475 496 476 extern int powersave_nap; /* set if nap mode can be used in idle loop */ 497 - extern unsigned long power7_nap(int check_irq); 498 - extern unsigned long power7_sleep(void); 499 - extern unsigned long power7_winkle(void); 500 - extern unsigned long power9_idle_stop(unsigned long stop_psscr_val, 501 - unsigned long stop_psscr_mask); 477 + extern unsigned long power7_idle_insn(unsigned long type); /* PNV_THREAD_NAP/etc*/ 478 + extern void power7_idle_type(unsigned long type); 479 + extern unsigned long power9_idle_stop(unsigned long psscr_val); 480 + extern void power9_idle_type(unsigned long stop_psscr_val, 481 + unsigned long stop_psscr_mask); 502 482 503 483 extern void flush_instruction_cache(void); 504 484 extern void hard_reset_now(void);
+33
arch/powerpc/include/asm/trace.h
··· 168 168 __entry->addr, __entry->access, __entry->trap) 169 169 ); 170 170 171 + 172 + TRACE_EVENT(tlbie, 173 + 174 + TP_PROTO(unsigned long lpid, unsigned long local, unsigned long rb, 175 + unsigned long rs, unsigned long ric, unsigned long prs, 176 + unsigned long r), 177 + TP_ARGS(lpid, local, rb, rs, ric, prs, r), 178 + TP_STRUCT__entry( 179 + __field(unsigned long, lpid) 180 + __field(unsigned long, local) 181 + __field(unsigned long, rb) 182 + __field(unsigned long, rs) 183 + __field(unsigned long, ric) 184 + __field(unsigned long, prs) 185 + __field(unsigned long, r) 186 + ), 187 + 188 + TP_fast_assign( 189 + __entry->lpid = lpid; 190 + __entry->local = local; 191 + __entry->rb = rb; 192 + __entry->rs = rs; 193 + __entry->ric = ric; 194 + __entry->prs = prs; 195 + __entry->r = r; 196 + ), 197 + 198 + TP_printk("lpid=%ld, local=%ld, rb=0x%lx, rs=0x%lx, ric=0x%lx, " 199 + "prs=0x%lx, r=0x%lx", __entry->lpid, __entry->local, 200 + __entry->rb, __entry->rs, __entry->ric, __entry->prs, 201 + __entry->r) 202 + ); 203 + 171 204 #endif /* _TRACE_POWERPC_H */ 172 205 173 206 #undef TRACE_INCLUDE_PATH
+6
arch/powerpc/include/uapi/asm/Kbuild
··· 1 1 # UAPI Header export list 2 2 include include/uapi/asm-generic/Kbuild.asm 3 + 4 + generic-y += param.h 5 + generic-y += poll.h 6 + generic-y += resource.h 7 + generic-y += sockios.h 8 + generic-y += statfs.h
-1
arch/powerpc/include/uapi/asm/param.h
··· 1 - #include <asm-generic/param.h>
-1
arch/powerpc/include/uapi/asm/poll.h
··· 1 - #include <asm-generic/poll.h>
-1
arch/powerpc/include/uapi/asm/resource.h
··· 1 - #include <asm-generic/resource.h>
-20
arch/powerpc/include/uapi/asm/sockios.h
··· 1 - #ifndef _ASM_POWERPC_SOCKIOS_H 2 - #define _ASM_POWERPC_SOCKIOS_H 3 - 4 - /* 5 - * This program is free software; you can redistribute it and/or 6 - * modify it under the terms of the GNU General Public License 7 - * as published by the Free Software Foundation; either version 8 - * 2 of the License, or (at your option) any later version. 9 - */ 10 - 11 - /* Socket-level I/O control calls. */ 12 - #define FIOSETOWN 0x8901 13 - #define SIOCSPGRP 0x8902 14 - #define FIOGETOWN 0x8903 15 - #define SIOCGPGRP 0x8904 16 - #define SIOCATMARK 0x8905 17 - #define SIOCGSTAMP 0x8906 /* Get stamp (timeval) */ 18 - #define SIOCGSTAMPNS 0x8907 /* Get stamp (timespec) */ 19 - 20 - #endif /* _ASM_POWERPC_SOCKIOS_H */
-6
arch/powerpc/include/uapi/asm/statfs.h
··· 1 - #ifndef _ASM_POWERPC_STATFS_H 2 - #define _ASM_POWERPC_STATFS_H 3 - 4 - #include <asm-generic/statfs.h> 5 - 6 - #endif
-2
arch/powerpc/kernel/Makefile
··· 25 25 CFLAGS_REMOVE_prom_init.o = -mno-sched-epilog $(CC_FLAGS_FTRACE) 26 26 CFLAGS_REMOVE_btext.o = -mno-sched-epilog $(CC_FLAGS_FTRACE) 27 27 CFLAGS_REMOVE_prom.o = -mno-sched-epilog $(CC_FLAGS_FTRACE) 28 - # timers used by tracing 29 - CFLAGS_REMOVE_time.o = -mno-sched-epilog $(CC_FLAGS_FTRACE) 30 28 endif 31 29 32 30 obj-y := cputable.o ptrace.o syscalls.o \
+6 -4
arch/powerpc/kernel/asm-offsets.c
··· 100 100 OFFSET(THREAD_NORMSAVES, thread_struct, normsave[0]); 101 101 #endif 102 102 OFFSET(THREAD_FPEXC_MODE, thread_struct, fpexc_mode); 103 - OFFSET(THREAD_FPSTATE, thread_struct, fp_state); 103 + OFFSET(THREAD_FPSTATE, thread_struct, fp_state.fpr); 104 104 OFFSET(THREAD_FPSAVEAREA, thread_struct, fp_save_area); 105 105 OFFSET(FPSTATE_FPSCR, thread_fp_state, fpscr); 106 106 OFFSET(THREAD_LOAD_FP, thread_struct, load_fp); 107 107 #ifdef CONFIG_ALTIVEC 108 - OFFSET(THREAD_VRSTATE, thread_struct, vr_state); 108 + OFFSET(THREAD_VRSTATE, thread_struct, vr_state.vr); 109 109 OFFSET(THREAD_VRSAVEAREA, thread_struct, vr_save_area); 110 110 OFFSET(THREAD_VRSAVE, thread_struct, vrsave); 111 111 OFFSET(THREAD_USED_VR, thread_struct, used_vr); ··· 145 145 OFFSET(THREAD_TM_PPR, thread_struct, tm_ppr); 146 146 OFFSET(THREAD_TM_DSCR, thread_struct, tm_dscr); 147 147 OFFSET(PT_CKPT_REGS, thread_struct, ckpt_regs); 148 - OFFSET(THREAD_CKVRSTATE, thread_struct, ckvr_state); 148 + OFFSET(THREAD_CKVRSTATE, thread_struct, ckvr_state.vr); 149 149 OFFSET(THREAD_CKVRSAVE, thread_struct, ckvrsave); 150 - OFFSET(THREAD_CKFPSTATE, thread_struct, ckfp_state); 150 + OFFSET(THREAD_CKFPSTATE, thread_struct, ckfp_state.fpr); 151 151 /* Local pt_regs on stack for Transactional Memory funcs. */ 152 152 DEFINE(TM_FRAME_SIZE, STACK_FRAME_OVERHEAD + 153 153 sizeof(struct pt_regs) + 16); ··· 745 745 OFFSET(PACA_THREAD_MASK, paca_struct, thread_mask); 746 746 OFFSET(PACA_SUBCORE_SIBLING_MASK, paca_struct, subcore_sibling_mask); 747 747 OFFSET(PACA_SIBLING_PACA_PTRS, paca_struct, thread_sibling_pacas); 748 + OFFSET(PACA_REQ_PSSCR, paca_struct, requested_psscr); 748 749 #endif 749 750 750 751 DEFINE(PPC_DBELL_SERVER, PPC_DBELL_SERVER); 752 + DEFINE(PPC_DBELL_MSGTYPE, PPC_DBELL_MSGTYPE); 751 753 752 754 #ifdef CONFIG_PPC_8xx 753 755 DEFINE(VIRT_IMMR_BASE, (u64)__fix_to_virt(FIX_IMMR_BASE));
+125 -68
arch/powerpc/kernel/entry_64.S
··· 57 57 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM 58 58 BEGIN_FTR_SECTION 59 59 extrdi. r10, r12, 1, (63-MSR_TS_T_LG) /* transaction active? */ 60 - bne tabort_syscall 60 + bne .Ltabort_syscall 61 61 END_FTR_SECTION_IFSET(CPU_FTR_TM) 62 62 #endif 63 63 andi. r10,r12,MSR_PR ··· 143 143 mtmsrd r11,1 144 144 #endif /* CONFIG_PPC_BOOK3E */ 145 145 146 + system_call: /* label this so stack traces look sane */ 146 147 /* We do need to set SOFTE in the stack frame or the return 147 148 * from interrupt will be painful 148 149 */ ··· 153 152 CURRENT_THREAD_INFO(r11, r1) 154 153 ld r10,TI_FLAGS(r11) 155 154 andi. r11,r10,_TIF_SYSCALL_DOTRACE 156 - bne syscall_dotrace /* does not return */ 155 + bne .Lsyscall_dotrace /* does not return */ 157 156 cmpldi 0,r0,NR_syscalls 158 - bge- syscall_enosys 157 + bge- .Lsyscall_enosys 159 158 160 - system_call: /* label this so stack traces look sane */ 159 + .Lsyscall: 161 160 /* 162 161 * Need to vector to 32 Bit or default sys_call_table here, 163 162 * based on caller's run-mode / personality. ··· 186 185 #ifdef CONFIG_PPC_BOOK3S 187 186 /* No MSR:RI on BookE */ 188 187 andi. r10,r8,MSR_RI 189 - beq- unrecov_restore 188 + beq- .Lunrecov_restore 190 189 #endif 190 + 191 + /* 192 + * This is a few instructions into the actual syscall exit path (which actually 193 + * starts at .Lsyscall_exit) to cater to kprobe blacklisting and to reduce the 194 + * number of visible symbols for profiling purposes. 195 + * 196 + * We can probe from system_call until this point as MSR_RI is set. But once it 197 + * is cleared below, we won't be able to take a trap. 198 + * 199 + * This is blacklisted from kprobes further below with _ASM_NOKPROBE_SYMBOL(). 200 + */ 201 + system_call_exit: 191 202 /* 192 203 * Disable interrupts so current_thread_info()->flags can't change, 193 204 * and so that we don't get interrupted after loading SRR0/1. ··· 221 208 ld r9,TI_FLAGS(r12) 222 209 li r11,-MAX_ERRNO 223 210 andi. r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK) 224 - bne- syscall_exit_work 211 + bne- .Lsyscall_exit_work 225 212 226 - andi. r0,r8,MSR_FP 227 - beq 2f 213 + /* If MSR_FP and MSR_VEC are set in user msr, then no need to restore */ 214 + li r7,MSR_FP 228 215 #ifdef CONFIG_ALTIVEC 229 - andis. r0,r8,MSR_VEC@h 230 - bne 3f 216 + oris r7,r7,MSR_VEC@h 231 217 #endif 232 - 2: addi r3,r1,STACK_FRAME_OVERHEAD 233 - #ifdef CONFIG_PPC_BOOK3S 234 - li r10,MSR_RI 235 - mtmsrd r10,1 /* Restore RI */ 236 - #endif 237 - bl restore_math 238 - #ifdef CONFIG_PPC_BOOK3S 239 - li r11,0 240 - mtmsrd r11,1 241 - #endif 242 - ld r8,_MSR(r1) 243 - ld r3,RESULT(r1) 244 - li r11,-MAX_ERRNO 218 + and r0,r8,r7 219 + cmpd r0,r7 220 + bne .Lsyscall_restore_math 221 + .Lsyscall_restore_math_cont: 245 222 246 - 3: cmpld r3,r11 223 + cmpld r3,r11 247 224 ld r5,_CCR(r1) 248 - bge- syscall_error 225 + bge- .Lsyscall_error 249 226 .Lsyscall_error_cont: 250 227 ld r7,_NIP(r1) 251 228 BEGIN_FTR_SECTION ··· 261 258 RFI 262 259 b . /* prevent speculative execution */ 263 260 264 - syscall_error: 261 + .Lsyscall_error: 265 262 oris r5,r5,0x1000 /* Set SO bit in CR */ 266 263 neg r3,r3 267 264 std r5,_CCR(r1) 268 265 b .Lsyscall_error_cont 269 - 266 + 267 + .Lsyscall_restore_math: 268 + /* 269 + * Some initial tests from restore_math to avoid the heavyweight 270 + * C code entry and MSR manipulations. 271 + */ 272 + LOAD_REG_IMMEDIATE(r0, MSR_TS_MASK) 273 + and. r0,r0,r8 274 + bne 1f 275 + 276 + ld r7,PACACURRENT(r13) 277 + lbz r0,THREAD+THREAD_LOAD_FP(r7) 278 + #ifdef CONFIG_ALTIVEC 279 + lbz r6,THREAD+THREAD_LOAD_VEC(r7) 280 + add r0,r0,r6 281 + #endif 282 + cmpdi r0,0 283 + beq .Lsyscall_restore_math_cont 284 + 285 + 1: addi r3,r1,STACK_FRAME_OVERHEAD 286 + #ifdef CONFIG_PPC_BOOK3S 287 + li r10,MSR_RI 288 + mtmsrd r10,1 /* Restore RI */ 289 + #endif 290 + bl restore_math 291 + #ifdef CONFIG_PPC_BOOK3S 292 + li r11,0 293 + mtmsrd r11,1 294 + #endif 295 + /* Restore volatiles, reload MSR from updated one */ 296 + ld r8,_MSR(r1) 297 + ld r3,RESULT(r1) 298 + li r11,-MAX_ERRNO 299 + b .Lsyscall_restore_math_cont 300 + 270 301 /* Traced system call support */ 271 - syscall_dotrace: 302 + .Lsyscall_dotrace: 272 303 bl save_nvgprs 273 304 addi r3,r1,STACK_FRAME_OVERHEAD 274 305 bl do_syscall_trace_enter ··· 323 286 ld r7,GPR7(r1) 324 287 ld r8,GPR8(r1) 325 288 326 - /* Repopulate r9 and r10 for the system_call path */ 289 + /* Repopulate r9 and r10 for the syscall path */ 327 290 addi r9,r1,STACK_FRAME_OVERHEAD 328 291 CURRENT_THREAD_INFO(r10, r1) 329 292 ld r10,TI_FLAGS(r10) 330 293 331 294 cmpldi r0,NR_syscalls 332 - blt+ system_call 295 + blt+ .Lsyscall 333 296 334 297 /* Return code is already in r3 thanks to do_syscall_trace_enter() */ 335 298 b .Lsyscall_exit 336 299 337 300 338 - syscall_enosys: 301 + .Lsyscall_enosys: 339 302 li r3,-ENOSYS 340 303 b .Lsyscall_exit 341 304 342 - syscall_exit_work: 305 + .Lsyscall_exit_work: 343 306 #ifdef CONFIG_PPC_BOOK3S 344 307 li r10,MSR_RI 345 308 mtmsrd r10,1 /* Restore RI */ ··· 399 362 b ret_from_except 400 363 401 364 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM 402 - tabort_syscall: 365 + .Ltabort_syscall: 403 366 /* Firstly we need to enable TM in the kernel */ 404 367 mfmsr r10 405 368 li r9, 1 ··· 425 388 rfid 426 389 b . /* prevent speculative execution */ 427 390 #endif 391 + _ASM_NOKPROBE_SYMBOL(system_call_common); 392 + _ASM_NOKPROBE_SYMBOL(system_call_exit); 428 393 429 394 /* Save non-volatile GPRs, if not already saved. */ 430 395 _GLOBAL(save_nvgprs) ··· 437 398 clrrdi r0,r11,1 438 399 std r0,_TRAP(r1) 439 400 blr 401 + _ASM_NOKPROBE_SYMBOL(save_nvgprs); 440 402 441 403 442 404 /* ··· 528 488 std r23,_CCR(r1) 529 489 std r1,KSP(r3) /* Set old stack pointer */ 530 490 531 - #ifdef CONFIG_SMP 532 - /* We need a sync somewhere here to make sure that if the 533 - * previous task gets rescheduled on another CPU, it sees all 534 - * stores it has performed on this one. 491 + /* 492 + * On SMP kernels, care must be taken because a task may be 493 + * scheduled off CPUx and on to CPUy. Memory ordering must be 494 + * considered. 495 + * 496 + * Cacheable stores on CPUx will be visible when the task is 497 + * scheduled on CPUy by virtue of the core scheduler barriers 498 + * (see "Notes on Program-Order guarantees on SMP systems." in 499 + * kernel/sched/core.c). 500 + * 501 + * Uncacheable stores in the case of involuntary preemption must 502 + * be taken care of. The smp_mb__before_spin_lock() in __schedule() 503 + * is implemented as hwsync on powerpc, which orders MMIO too. So 504 + * long as there is an hwsync in the context switch path, it will 505 + * be executed on the source CPU after the task has performed 506 + * all MMIO ops on that CPU, and on the destination CPU before the 507 + * task performs any MMIO ops there. 535 508 */ 536 - sync 537 - #endif /* CONFIG_SMP */ 538 509 539 510 /* 540 - * If we optimise away the clear of the reservation in system 541 - * calls because we know the CPU tracks the address of the 542 - * reservation, then we need to clear it here to cover the 543 - * case that the kernel context switch path has no larx 544 - * instructions. 511 + * The kernel context switch path must contain a spin_lock, 512 + * which contains larx/stcx, which will clear any reservation 513 + * of the task being switched. 545 514 */ 546 - BEGIN_FTR_SECTION 547 - ldarx r6,0,r1 548 - END_FTR_SECTION_IFSET(CPU_FTR_STCX_CHECKS_ADDRESS) 549 - 550 - BEGIN_FTR_SECTION 551 - /* 552 - * A cp_abort (copy paste abort) here ensures that when context switching, a 553 - * copy from one process can't leak into the paste of another. 554 - */ 555 - PPC_CP_ABORT 556 - END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) 557 - 558 515 #ifdef CONFIG_PPC_BOOK3S 559 516 /* Cancel all explict user streams as they will have no use after context 560 517 * switch and will stop the HW from creating streams itself ··· 620 583 top of the kernel stack. */ 621 584 addi r7,r7,THREAD_SIZE-SWITCH_FRAME_SIZE 622 585 586 + /* 587 + * PMU interrupts in radix may come in here. They will use r1, not 588 + * PACAKSAVE, so this stack switch will not cause a problem. They 589 + * will store to the process stack, which may then be migrated to 590 + * another CPU. However the rq lock release on this CPU paired with 591 + * the rq lock acquire on the new CPU before the stack becomes 592 + * active on the new CPU, will order those stores. 593 + */ 623 594 mr r1,r8 /* start using new stack pointer */ 624 595 std r7,PACAKSAVE(r13) 625 596 ··· 808 763 ld r5,SOFTE(r1) 809 764 lbz r6,PACASOFTIRQEN(r13) 810 765 cmpwi cr0,r5,0 811 - beq restore_irq_off 766 + beq .Lrestore_irq_off 812 767 813 768 /* We are enabling, were we already enabled ? Yes, just return */ 814 769 cmpwi cr0,r6,1 815 - beq cr0,do_restore 770 + beq cr0,.Ldo_restore 816 771 817 772 /* 818 773 * We are about to soft-enable interrupts (we are hard disabled ··· 821 776 */ 822 777 lbz r0,PACAIRQHAPPENED(r13) 823 778 cmpwi cr0,r0,0 824 - bne- restore_check_irq_replay 779 + bne- .Lrestore_check_irq_replay 825 780 826 781 /* 827 782 * Get here when nothing happened while soft-disabled, just 828 783 * soft-enable and move-on. We will hard-enable as a side 829 784 * effect of rfi 830 785 */ 831 - restore_no_replay: 786 + .Lrestore_no_replay: 832 787 TRACE_ENABLE_INTS 833 788 li r0,1 834 789 stb r0,PACASOFTIRQEN(r13); ··· 836 791 /* 837 792 * Final return path. BookE is handled in a different file 838 793 */ 839 - do_restore: 794 + .Ldo_restore: 840 795 #ifdef CONFIG_PPC_BOOK3E 841 796 b exception_return_book3e 842 797 #else ··· 870 825 REST_8GPRS(5, r1) 871 826 872 827 andi. r0,r3,MSR_RI 873 - beq- unrecov_restore 828 + beq- .Lunrecov_restore 874 829 875 830 /* Load PPR from thread struct before we clear MSR:RI */ 876 831 BEGIN_FTR_SECTION ··· 928 883 * make sure that in this case, we also clear PACA_IRQ_HARD_DIS 929 884 * or that bit can get out of sync and bad things will happen 930 885 */ 931 - restore_irq_off: 886 + .Lrestore_irq_off: 932 887 ld r3,_MSR(r1) 933 888 lbz r7,PACAIRQHAPPENED(r13) 934 889 andi. r0,r3,MSR_EE ··· 938 893 1: li r0,0 939 894 stb r0,PACASOFTIRQEN(r13); 940 895 TRACE_DISABLE_INTS 941 - b do_restore 896 + b .Ldo_restore 942 897 943 898 /* 944 899 * Something did happen, check if a re-emit is needed 945 900 * (this also clears paca->irq_happened) 946 901 */ 947 - restore_check_irq_replay: 902 + .Lrestore_check_irq_replay: 948 903 /* XXX: We could implement a fast path here where we check 949 904 * for irq_happened being just 0x01, in which case we can 950 905 * clear it and return. That means that we would potentially ··· 954 909 */ 955 910 bl __check_irq_replay 956 911 cmpwi cr0,r3,0 957 - beq restore_no_replay 912 + beq .Lrestore_no_replay 958 913 959 914 /* 960 915 * We need to re-emit an interrupt. We do so by re-using our ··· 1003 958 #endif /* CONFIG_PPC_DOORBELL */ 1004 959 1: b ret_from_except /* What else to do here ? */ 1005 960 1006 - unrecov_restore: 961 + .Lunrecov_restore: 1007 962 addi r3,r1,STACK_FRAME_OVERHEAD 1008 963 bl unrecoverable_exception 1009 - b unrecov_restore 964 + b .Lunrecov_restore 965 + 966 + _ASM_NOKPROBE_SYMBOL(ret_from_except); 967 + _ASM_NOKPROBE_SYMBOL(ret_from_except_lite); 968 + _ASM_NOKPROBE_SYMBOL(resume_kernel); 969 + _ASM_NOKPROBE_SYMBOL(fast_exc_return_irq); 970 + _ASM_NOKPROBE_SYMBOL(restore); 971 + _ASM_NOKPROBE_SYMBOL(fast_exception_return); 972 + 1010 973 1011 974 #ifdef CONFIG_PPC_RTAS 1012 975 /* ··· 1091 1038 rldicr r9,r9,MSR_SF_LG,(63-MSR_SF_LG) 1092 1039 ori r9,r9,MSR_IR|MSR_DR|MSR_FE0|MSR_FE1|MSR_FP|MSR_RI|MSR_LE 1093 1040 andc r6,r0,r9 1041 + 1042 + __enter_rtas: 1094 1043 sync /* disable interrupts so SRR0/1 */ 1095 1044 mtmsrd r0 /* don't get trashed */ 1096 1045 ··· 1129 1074 mtspr SPRN_SRR1,r4 1130 1075 rfid 1131 1076 b . /* prevent speculative execution */ 1077 + _ASM_NOKPROBE_SYMBOL(__enter_rtas) 1078 + _ASM_NOKPROBE_SYMBOL(rtas_return_loc) 1132 1079 1133 1080 .align 3 1134 1081 1: .llong rtas_restore_regs
+176 -108
arch/powerpc/kernel/exceptions-64s.S
··· 99 99 #ifdef CONFIG_PPC_P7_NAP 100 100 /* 101 101 * If running native on arch 2.06 or later, check if we are waking up 102 - * from nap/sleep/winkle, and branch to idle handler. 102 + * from nap/sleep/winkle, and branch to idle handler. This tests SRR1 103 + * bits 46:47. A non-0 value indicates that we are coming from a power 104 + * saving state. The idle wakeup handler initially runs in real mode, 105 + * but we branch to the 0xc000... address so we can turn on relocation 106 + * with mtmsr. 103 107 */ 104 108 #define IDLETEST(n) \ 105 109 BEGIN_FTR_SECTION ; \ ··· 111 107 rlwinm. r10,r10,47-31,30,31 ; \ 112 108 beq- 1f ; \ 113 109 cmpwi cr3,r10,2 ; \ 114 - BRANCH_TO_COMMON(r10, system_reset_idle_common) ; \ 110 + BRANCH_TO_C000(r10, system_reset_idle_common) ; \ 115 111 1: \ 116 112 END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) 117 113 #else ··· 132 128 133 129 #ifdef CONFIG_PPC_P7_NAP 134 130 EXC_COMMON_BEGIN(system_reset_idle_common) 131 + mfspr r12,SPRN_SRR1 135 132 b pnv_powersave_wakeup 136 133 #endif 137 134 ··· 512 507 SET_SCRATCH0(r13) 513 508 EXCEPTION_PROLOG_0(PACA_EXSLB) 514 509 EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST_PR, 0x380) 515 - std r3,PACA_EXSLB+EX_R3(r13) 510 + mr r12,r3 /* save r3 */ 516 511 mfspr r3,SPRN_DAR 517 - mfspr r12,SPRN_SRR1 512 + mfspr r11,SPRN_SRR1 518 513 crset 4*cr6+eq 519 - #ifndef CONFIG_RELOCATABLE 520 - b slb_miss_realmode 521 - #else 522 - /* 523 - * We can't just use a direct branch to slb_miss_realmode 524 - * because the distance from here to there depends on where 525 - * the kernel ends up being put. 526 - */ 527 - mfctr r11 528 - LOAD_HANDLER(r10, slb_miss_realmode) 529 - mtctr r10 530 - bctr 531 - #endif 514 + BRANCH_TO_COMMON(r10, slb_miss_common) 532 515 EXC_REAL_END(data_access_slb, 0x380, 0x80) 533 516 534 517 EXC_VIRT_BEGIN(data_access_slb, 0x4380, 0x80) 535 518 SET_SCRATCH0(r13) 536 519 EXCEPTION_PROLOG_0(PACA_EXSLB) 537 520 EXCEPTION_PROLOG_1(PACA_EXSLB, NOTEST, 0x380) 538 - std r3,PACA_EXSLB+EX_R3(r13) 521 + mr r12,r3 /* save r3 */ 539 522 mfspr r3,SPRN_DAR 540 - mfspr r12,SPRN_SRR1 523 + mfspr r11,SPRN_SRR1 541 524 crset 4*cr6+eq 542 - #ifndef CONFIG_RELOCATABLE 543 - b slb_miss_realmode 544 - #else 545 - /* 546 - * We can't just use a direct branch to slb_miss_realmode 547 - * because the distance from here to there depends on where 548 - * the kernel ends up being put. 549 - */ 550 - mfctr r11 551 - LOAD_HANDLER(r10, slb_miss_realmode) 552 - mtctr r10 553 - bctr 554 - #endif 525 + BRANCH_TO_COMMON(r10, slb_miss_common) 555 526 EXC_VIRT_END(data_access_slb, 0x4380, 0x80) 556 527 TRAMP_KVM_SKIP(PACA_EXSLB, 0x380) 557 528 ··· 556 575 SET_SCRATCH0(r13) 557 576 EXCEPTION_PROLOG_0(PACA_EXSLB) 558 577 EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST_PR, 0x480) 559 - std r3,PACA_EXSLB+EX_R3(r13) 578 + mr r12,r3 /* save r3 */ 560 579 mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */ 561 - mfspr r12,SPRN_SRR1 580 + mfspr r11,SPRN_SRR1 562 581 crclr 4*cr6+eq 563 - #ifndef CONFIG_RELOCATABLE 564 - b slb_miss_realmode 565 - #else 566 - mfctr r11 567 - LOAD_HANDLER(r10, slb_miss_realmode) 568 - mtctr r10 569 - bctr 570 - #endif 582 + BRANCH_TO_COMMON(r10, slb_miss_common) 571 583 EXC_REAL_END(instruction_access_slb, 0x480, 0x80) 572 584 573 585 EXC_VIRT_BEGIN(instruction_access_slb, 0x4480, 0x80) 574 586 SET_SCRATCH0(r13) 575 587 EXCEPTION_PROLOG_0(PACA_EXSLB) 576 588 EXCEPTION_PROLOG_1(PACA_EXSLB, NOTEST, 0x480) 577 - std r3,PACA_EXSLB+EX_R3(r13) 589 + mr r12,r3 /* save r3 */ 578 590 mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */ 579 - mfspr r12,SPRN_SRR1 591 + mfspr r11,SPRN_SRR1 580 592 crclr 4*cr6+eq 581 - #ifndef CONFIG_RELOCATABLE 582 - b slb_miss_realmode 583 - #else 584 - mfctr r11 585 - LOAD_HANDLER(r10, slb_miss_realmode) 586 - mtctr r10 587 - bctr 588 - #endif 593 + BRANCH_TO_COMMON(r10, slb_miss_common) 589 594 EXC_VIRT_END(instruction_access_slb, 0x4480, 0x80) 590 595 TRAMP_KVM(PACA_EXSLB, 0x480) 591 596 592 597 593 - /* This handler is used by both 0x380 and 0x480 slb miss interrupts */ 594 - EXC_COMMON_BEGIN(slb_miss_realmode) 598 + /* 599 + * This handler is used by the 0x380 and 0x480 SLB miss interrupts, as well as 600 + * the virtual mode 0x4380 and 0x4480 interrupts if AIL is enabled. 601 + */ 602 + EXC_COMMON_BEGIN(slb_miss_common) 595 603 /* 596 604 * r13 points to the PACA, r9 contains the saved CR, 597 - * r12 contain the saved SRR1, SRR0 is still ready for return 605 + * r12 contains the saved r3, 606 + * r11 contain the saved SRR1, SRR0 is still ready for return 598 607 * r3 has the faulting address 599 608 * r9 - r13 are saved in paca->exslb. 600 - * r3 is saved in paca->slb_r3 601 609 * cr6.eq is set for a D-SLB miss, clear for a I-SLB miss 602 610 * We assume we aren't going to take any exceptions during this 603 611 * procedure. 604 612 */ 605 613 mflr r10 606 - #ifdef CONFIG_RELOCATABLE 607 - mtctr r11 608 - #endif 609 - 610 614 stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */ 611 615 std r10,PACA_EXSLB+EX_LR(r13) /* save LR */ 612 - std r3,PACA_EXSLB+EX_DAR(r13) 616 + 617 + /* 618 + * Test MSR_RI before calling slb_allocate_realmode, because the 619 + * MSR in r11 gets clobbered. However we still want to allocate 620 + * SLB in case MSR_RI=0, to minimise the risk of getting stuck in 621 + * recursive SLB faults. So use cr5 for this, which is preserved. 622 + */ 623 + andi. r11,r11,MSR_RI /* check for unrecoverable exception */ 624 + cmpdi cr5,r11,MSR_RI 613 625 614 626 crset 4*cr0+eq 615 627 #ifdef CONFIG_PPC_STD_MMU_64 616 628 BEGIN_MMU_FTR_SECTION 617 - bl slb_allocate_realmode 629 + bl slb_allocate 618 630 END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX) 619 631 #endif 620 632 621 633 ld r10,PACA_EXSLB+EX_LR(r13) 622 - ld r3,PACA_EXSLB+EX_R3(r13) 623 634 lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */ 624 635 mtlr r10 625 636 626 - beq 8f /* if bad address, make full stack frame */ 637 + beq- 8f /* if bad address, make full stack frame */ 627 638 628 - andi. r10,r12,MSR_RI /* check for unrecoverable exception */ 629 - beq- 2f 639 + bne- cr5,2f /* if unrecoverable exception, oops */ 630 640 631 641 /* All done -- return from exception. */ 632 642 633 643 .machine push 634 644 .machine "power4" 635 645 mtcrf 0x80,r9 646 + mtcrf 0x04,r9 /* MSR[RI] indication is in cr5 */ 636 647 mtcrf 0x02,r9 /* I/D indication is in cr6 */ 637 648 mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */ 638 649 .machine pop 639 650 651 + RESTORE_CTR(r9, PACA_EXSLB) 640 652 RESTORE_PPR_PACA(PACA_EXSLB, r9) 653 + mr r3,r12 641 654 ld r9,PACA_EXSLB+EX_R9(r13) 642 655 ld r10,PACA_EXSLB+EX_R10(r13) 643 656 ld r11,PACA_EXSLB+EX_R11(r13) ··· 640 665 rfid 641 666 b . /* prevent speculative execution */ 642 667 643 - 2: mfspr r11,SPRN_SRR0 668 + 2: std r3,PACA_EXSLB+EX_DAR(r13) 669 + mr r3,r12 670 + mfspr r11,SPRN_SRR0 671 + mfspr r12,SPRN_SRR1 644 672 LOAD_HANDLER(r10,unrecov_slb) 645 673 mtspr SPRN_SRR0,r10 646 674 ld r10,PACAKMSR(r13) ··· 651 673 rfid 652 674 b . 653 675 654 - 8: mfspr r11,SPRN_SRR0 676 + 8: std r3,PACA_EXSLB+EX_DAR(r13) 677 + mr r3,r12 678 + mfspr r11,SPRN_SRR0 679 + mfspr r12,SPRN_SRR1 655 680 LOAD_HANDLER(r10,bad_addr_slb) 656 681 mtspr SPRN_SRR0,r10 657 682 ld r10,PACAKMSR(r13) ··· 802 821 TRAMP_KVM(PACA_EXGEN, 0xb00) 803 822 EXC_COMMON(trap_0b_common, 0xb00, unknown_exception) 804 823 824 + /* 825 + * system call / hypercall (0xc00, 0x4c00) 826 + * 827 + * The system call exception is invoked with "sc 0" and does not alter HV bit. 828 + * There is support for kernel code to invoke system calls but there are no 829 + * in-tree users. 830 + * 831 + * The hypercall is invoked with "sc 1" and sets HV=1. 832 + * 833 + * In HPT, sc 1 always goes to 0xc00 real mode. In RADIX, sc 1 can go to 834 + * 0x4c00 virtual mode. 835 + * 836 + * Call convention: 837 + * 838 + * syscall register convention is in Documentation/powerpc/syscall64-abi.txt 839 + * 840 + * For hypercalls, the register convention is as follows: 841 + * r0 volatile 842 + * r1-2 nonvolatile 843 + * r3 volatile parameter and return value for status 844 + * r4-r10 volatile input and output value 845 + * r11 volatile hypercall number and output value 846 + * r12 volatile 847 + * r13-r31 nonvolatile 848 + * LR nonvolatile 849 + * CTR volatile 850 + * XER volatile 851 + * CR0-1 CR5-7 volatile 852 + * CR2-4 nonvolatile 853 + * Other registers nonvolatile 854 + * 855 + * The intersection of volatile registers that don't contain possible 856 + * inputs is: r12, cr0, xer, ctr. We may use these as scratch regs 857 + * upon entry without saving. 858 + */ 805 859 #ifdef CONFIG_KVM_BOOK3S_64_HANDLER 806 - /* 807 - * If CONFIG_KVM_BOOK3S_64_HANDLER is set, save the PPR (on systems 808 - * that support it) before changing to HMT_MEDIUM. That allows the KVM 809 - * code to save that value into the guest state (it is the guest's PPR 810 - * value). Otherwise just change to HMT_MEDIUM as userspace has 811 - * already saved the PPR. 812 - */ 860 + /* 861 + * There is a little bit of juggling to get syscall and hcall 862 + * working well. Save r10 in ctr to be restored in case it is a 863 + * hcall. 864 + * 865 + * Userspace syscalls have already saved the PPR, hcalls must save 866 + * it before setting HMT_MEDIUM. 867 + */ 813 868 #define SYSCALL_KVMTEST \ 814 - SET_SCRATCH0(r13); \ 869 + mr r12,r13; \ 815 870 GET_PACA(r13); \ 816 - std r9,PACA_EXGEN+EX_R9(r13); \ 817 - OPT_GET_SPR(r9, SPRN_PPR, CPU_FTR_HAS_PPR); \ 871 + mtctr r10; \ 872 + KVMTEST_PR(0xc00); /* uses r10, branch to do_kvm_0xc00_system_call */ \ 818 873 HMT_MEDIUM; \ 819 - std r10,PACA_EXGEN+EX_R10(r13); \ 820 - OPT_SAVE_REG_TO_PACA(PACA_EXGEN+EX_PPR, r9, CPU_FTR_HAS_PPR); \ 821 - mfcr r9; \ 822 - KVMTEST_PR(0xc00); \ 823 - GET_SCRATCH0(r13) 874 + mr r9,r12; \ 824 875 825 876 #else 826 877 #define SYSCALL_KVMTEST \ 827 - HMT_MEDIUM 878 + HMT_MEDIUM; \ 879 + mr r9,r13; \ 880 + GET_PACA(r13); 828 881 #endif 829 882 830 883 #define LOAD_SYSCALL_HANDLER(reg) \ 831 884 __LOAD_HANDLER(reg, system_call_common) 832 885 833 - /* Syscall routine is used twice, in reloc-off and reloc-on paths */ 834 - #define SYSCALL_PSERIES_1 \ 886 + #define SYSCALL_FASTENDIAN_TEST \ 835 887 BEGIN_FTR_SECTION \ 836 888 cmpdi r0,0x1ebe ; \ 837 889 beq- 1f ; \ 838 890 END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) \ 839 - mr r9,r13 ; \ 840 - GET_PACA(r13) ; \ 841 - mfspr r11,SPRN_SRR0 ; \ 842 - 0: 843 891 844 - #define SYSCALL_PSERIES_2_RFID \ 892 + /* 893 + * After SYSCALL_KVMTEST, we reach here with PACA in r13, r13 in r9, 894 + * and HMT_MEDIUM. 895 + */ 896 + #define SYSCALL_REAL \ 897 + mfspr r11,SPRN_SRR0 ; \ 845 898 mfspr r12,SPRN_SRR1 ; \ 846 899 LOAD_SYSCALL_HANDLER(r10) ; \ 847 900 mtspr SPRN_SRR0,r10 ; \ ··· 884 869 rfid ; \ 885 870 b . ; /* prevent speculative execution */ 886 871 887 - #define SYSCALL_PSERIES_3 \ 872 + #define SYSCALL_FASTENDIAN \ 888 873 /* Fast LE/BE switch system call */ \ 889 874 1: mfspr r12,SPRN_SRR1 ; \ 890 875 xori r12,r12,MSR_LE ; \ 891 876 mtspr SPRN_SRR1,r12 ; \ 877 + mr r13,r9 ; \ 892 878 rfid ; /* return to userspace */ \ 893 879 b . ; /* prevent speculative execution */ 894 880 ··· 898 882 * We can't branch directly so we do it via the CTR which 899 883 * is volatile across system calls. 900 884 */ 901 - #define SYSCALL_PSERIES_2_DIRECT \ 902 - LOAD_SYSCALL_HANDLER(r12) ; \ 903 - mtctr r12 ; \ 885 + #define SYSCALL_VIRT \ 886 + LOAD_SYSCALL_HANDLER(r10) ; \ 887 + mtctr r10 ; \ 888 + mfspr r11,SPRN_SRR0 ; \ 904 889 mfspr r12,SPRN_SRR1 ; \ 905 890 li r10,MSR_RI ; \ 906 891 mtmsrd r10,1 ; \ 907 892 bctr ; 908 893 #else 909 894 /* We can branch directly */ 910 - #define SYSCALL_PSERIES_2_DIRECT \ 895 + #define SYSCALL_VIRT \ 896 + mfspr r11,SPRN_SRR0 ; \ 911 897 mfspr r12,SPRN_SRR1 ; \ 912 898 li r10,MSR_RI ; \ 913 899 mtmsrd r10,1 ; /* Set RI (EE=0) */ \ ··· 917 899 #endif 918 900 919 901 EXC_REAL_BEGIN(system_call, 0xc00, 0x100) 920 - SYSCALL_KVMTEST 921 - SYSCALL_PSERIES_1 922 - SYSCALL_PSERIES_2_RFID 923 - SYSCALL_PSERIES_3 902 + SYSCALL_KVMTEST /* loads PACA into r13, and saves r13 to r9 */ 903 + SYSCALL_FASTENDIAN_TEST 904 + SYSCALL_REAL 905 + SYSCALL_FASTENDIAN 924 906 EXC_REAL_END(system_call, 0xc00, 0x100) 925 907 926 908 EXC_VIRT_BEGIN(system_call, 0x4c00, 0x100) 927 - SYSCALL_KVMTEST 928 - SYSCALL_PSERIES_1 929 - SYSCALL_PSERIES_2_DIRECT 930 - SYSCALL_PSERIES_3 909 + SYSCALL_KVMTEST /* loads PACA into r13, and saves r13 to r9 */ 910 + SYSCALL_FASTENDIAN_TEST 911 + SYSCALL_VIRT 912 + SYSCALL_FASTENDIAN 931 913 EXC_VIRT_END(system_call, 0x4c00, 0x100) 932 914 933 - TRAMP_KVM(PACA_EXGEN, 0xc00) 915 + #ifdef CONFIG_KVM_BOOK3S_64_HANDLER 916 + /* 917 + * This is a hcall, so register convention is as above, with these 918 + * differences: 919 + * r13 = PACA 920 + * r12 = orig r13 921 + * ctr = orig r10 922 + */ 923 + TRAMP_KVM_BEGIN(do_kvm_0xc00) 924 + /* 925 + * Save the PPR (on systems that support it) before changing to 926 + * HMT_MEDIUM. That allows the KVM code to save that value into the 927 + * guest state (it is the guest's PPR value). 928 + */ 929 + OPT_GET_SPR(r0, SPRN_PPR, CPU_FTR_HAS_PPR) 930 + HMT_MEDIUM 931 + OPT_SAVE_REG_TO_PACA(PACA_EXGEN+EX_PPR, r0, CPU_FTR_HAS_PPR) 932 + mfctr r10 933 + SET_SCRATCH0(r12) 934 + std r9,PACA_EXGEN+EX_R9(r13) 935 + mfcr r9 936 + std r10,PACA_EXGEN+EX_R10(r13) 937 + KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xc00) 938 + #endif 934 939 935 940 936 941 EXC_REAL(single_step, 0xd00, 0x100) ··· 1594 1553 1: addi r3,r1,STACK_FRAME_OVERHEAD 1595 1554 bl kernel_bad_stack 1596 1555 b 1b 1556 + _ASM_NOKPROBE_SYMBOL(bad_stack); 1557 + 1558 + /* 1559 + * When doorbell is triggered from system reset wakeup, the message is 1560 + * not cleared, so it would fire again when EE is enabled. 1561 + * 1562 + * When coming from local_irq_enable, there may be the same problem if 1563 + * we were hard disabled. 1564 + * 1565 + * Execute msgclr to clear pending exceptions before handling it. 1566 + */ 1567 + h_doorbell_common_msgclr: 1568 + LOAD_REG_IMMEDIATE(r3, PPC_DBELL_MSGTYPE << (63-36)) 1569 + PPC_MSGCLR(3) 1570 + b h_doorbell_common 1571 + 1572 + doorbell_super_common_msgclr: 1573 + LOAD_REG_IMMEDIATE(r3, PPC_DBELL_MSGTYPE << (63-36)) 1574 + PPC_MSGCLRP(3) 1575 + b doorbell_super_common 1597 1576 1598 1577 /* 1599 1578 * Called from arch_local_irq_enable when an interrupt needs ··· 1624 1563 * Note: While MSR:EE is off, we need to make sure that _MSR 1625 1564 * in the generated frame has EE set to 1 or the exception 1626 1565 * handler will not properly re-enable them. 1566 + * 1567 + * Note that we don't specify LR as the NIP (return address) for 1568 + * the interrupt because that would unbalance the return branch 1569 + * predictor. 1627 1570 */ 1628 1571 _GLOBAL(__replay_interrupt) 1629 1572 /* We are going to jump to the exception common code which ··· 1635 1570 * we don't give a damn about, so we don't bother storing them. 1636 1571 */ 1637 1572 mfmsr r12 1638 - mflr r11 1573 + LOAD_REG_ADDR(r11, 1f) 1639 1574 mfcr r9 1640 1575 ori r12,r12,MSR_EE 1641 1576 cmpwi r3,0x900 ··· 1644 1579 beq hardware_interrupt_common 1645 1580 BEGIN_FTR_SECTION 1646 1581 cmpwi r3,0xe80 1647 - beq h_doorbell_common 1582 + beq h_doorbell_common_msgclr 1648 1583 cmpwi r3,0xea0 1649 1584 beq h_virt_irq_common 1650 1585 cmpwi r3,0xe60 1651 1586 beq hmi_exception_common 1652 1587 FTR_SECTION_ELSE 1653 1588 cmpwi r3,0xa00 1654 - beq doorbell_super_common 1589 + beq doorbell_super_common_msgclr 1655 1590 ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE) 1591 + 1: 1656 1592 blr 1593 + 1594 + _ASM_NOKPROBE_SYMBOL(__replay_interrupt)
+172 -24
arch/powerpc/kernel/fadump.c
··· 113 113 return 1; 114 114 } 115 115 116 + /* 117 + * If fadump is registered, check if the memory provided 118 + * falls within boot memory area. 119 + */ 120 + int is_fadump_boot_memory_area(u64 addr, ulong size) 121 + { 122 + if (!fw_dump.dump_registered) 123 + return 0; 124 + 125 + return (addr + size) > RMA_START && addr <= fw_dump.boot_memory_size; 126 + } 127 + 116 128 int is_fadump_active(void) 117 129 { 118 130 return fw_dump.dump_active; 131 + } 132 + 133 + /* 134 + * Returns 1, if there are no holes in boot memory area, 135 + * 0 otherwise. 136 + */ 137 + static int is_boot_memory_area_contiguous(void) 138 + { 139 + struct memblock_region *reg; 140 + unsigned long tstart, tend; 141 + unsigned long start_pfn = PHYS_PFN(RMA_START); 142 + unsigned long end_pfn = PHYS_PFN(RMA_START + fw_dump.boot_memory_size); 143 + unsigned int ret = 0; 144 + 145 + for_each_memblock(memory, reg) { 146 + tstart = max(start_pfn, memblock_region_memory_base_pfn(reg)); 147 + tend = min(end_pfn, memblock_region_memory_end_pfn(reg)); 148 + if (tstart < tend) { 149 + /* Memory hole from start_pfn to tstart */ 150 + if (tstart > start_pfn) 151 + break; 152 + 153 + if (tend == end_pfn) { 154 + ret = 1; 155 + break; 156 + } 157 + 158 + start_pfn = tend + 1; 159 + } 160 + } 161 + 162 + return ret; 119 163 } 120 164 121 165 /* Print firmware assisted dump configurations for debugging purpose. */ ··· 256 212 int ret; 257 213 unsigned long long base, size; 258 214 215 + if (fw_dump.reserve_bootvar) 216 + pr_warn("'fadump_reserve_mem=' parameter is deprecated in favor of 'crashkernel=' parameter.\n"); 217 + 259 218 /* 260 219 * Check if the size is specified through crashkernel= cmdline 261 - * option. If yes, then use that but ignore base as fadump 262 - * reserves memory at end of RAM. 220 + * option. If yes, then use that but ignore base as fadump reserves 221 + * memory at a predefined offset. 263 222 */ 264 223 ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(), 265 224 &size, &base); 266 225 if (ret == 0 && size > 0) { 226 + unsigned long max_size; 227 + 228 + if (fw_dump.reserve_bootvar) 229 + pr_info("Using 'crashkernel=' parameter for memory reservation.\n"); 230 + 267 231 fw_dump.reserve_bootvar = (unsigned long)size; 232 + 233 + /* 234 + * Adjust if the boot memory size specified is above 235 + * the upper limit. 236 + */ 237 + max_size = memblock_phys_mem_size() / MAX_BOOT_MEM_RATIO; 238 + if (fw_dump.reserve_bootvar > max_size) { 239 + fw_dump.reserve_bootvar = max_size; 240 + pr_info("Adjusted boot memory size to %luMB\n", 241 + (fw_dump.reserve_bootvar >> 20)); 242 + } 243 + 244 + return fw_dump.reserve_bootvar; 245 + } else if (fw_dump.reserve_bootvar) { 246 + /* 247 + * 'fadump_reserve_mem=' is being used to reserve memory 248 + * for firmware-assisted dump. 249 + */ 268 250 return fw_dump.reserve_bootvar; 269 251 } 270 252 271 253 /* divide by 20 to get 5% of value */ 272 - size = memblock_end_of_DRAM() / 20; 254 + size = memblock_phys_mem_size() / 20; 273 255 274 256 /* round it down in multiples of 256 */ 275 257 size = size & ~0x0FFFFFFFUL; ··· 447 377 } 448 378 early_param("fadump", early_fadump_param); 449 379 450 - static void register_fw_dump(struct fadump_mem_struct *fdm) 380 + /* 381 + * Look for fadump_reserve_mem= cmdline option 382 + * TODO: Remove references to 'fadump_reserve_mem=' parameter, 383 + * the sooner 'crashkernel=' parameter is accustomed to. 384 + */ 385 + static int __init early_fadump_reserve_mem(char *p) 451 386 { 452 - int rc; 387 + if (p) 388 + fw_dump.reserve_bootvar = memparse(p, &p); 389 + return 0; 390 + } 391 + early_param("fadump_reserve_mem", early_fadump_reserve_mem); 392 + 393 + static int register_fw_dump(struct fadump_mem_struct *fdm) 394 + { 395 + int rc, err; 453 396 unsigned int wait_time; 454 397 455 398 pr_debug("Registering for firmware-assisted kernel dump...\n"); ··· 479 396 480 397 } while (wait_time); 481 398 399 + err = -EIO; 482 400 switch (rc) { 401 + default: 402 + pr_err("Failed to register. Unknown Error(%d).\n", rc); 403 + break; 483 404 case -1: 484 405 printk(KERN_ERR "Failed to register firmware-assisted kernel" 485 406 " dump. Hardware Error(%d).\n", rc); 486 407 break; 487 408 case -3: 409 + if (!is_boot_memory_area_contiguous()) 410 + pr_err("Can't have holes in boot memory area while " 411 + "registering fadump\n"); 412 + 488 413 printk(KERN_ERR "Failed to register firmware-assisted kernel" 489 414 " dump. Parameter Error(%d).\n", rc); 415 + err = -EINVAL; 490 416 break; 491 417 case -9: 492 418 printk(KERN_ERR "firmware-assisted kernel dump is already " 493 419 " registered."); 494 420 fw_dump.dump_registered = 1; 421 + err = -EEXIST; 495 422 break; 496 423 case 0: 497 424 printk(KERN_INFO "firmware-assisted kernel dump registration" 498 425 " is successful\n"); 499 426 fw_dump.dump_registered = 1; 427 + err = 0; 500 428 break; 501 429 } 430 + return err; 502 431 } 503 432 504 433 void crash_fadump(struct pt_regs *regs, const char *str) ··· 926 831 for_each_memblock(memory, reg) { 927 832 start = (unsigned long long)reg->base; 928 833 end = start + (unsigned long long)reg->size; 929 - if (start == RMA_START && end >= fw_dump.boot_memory_size) 930 - start = fw_dump.boot_memory_size; 834 + 835 + /* 836 + * skip the first memory chunk that is already added (RMA_START 837 + * through boot_memory_size). This logic needs a relook if and 838 + * when RMA_START changes to a non-zero value. 839 + */ 840 + BUILD_BUG_ON(RMA_START != 0); 841 + if (start < fw_dump.boot_memory_size) { 842 + if (end > fw_dump.boot_memory_size) 843 + start = fw_dump.boot_memory_size; 844 + else 845 + continue; 846 + } 931 847 932 848 /* add this range excluding the reserved dump area. */ 933 849 fadump_exclude_reserved_area(start, end); ··· 1062 956 return addr; 1063 957 } 1064 958 1065 - static void register_fadump(void) 959 + static int register_fadump(void) 1066 960 { 1067 961 unsigned long addr; 1068 962 void *vaddr; ··· 1072 966 * assisted dump. 1073 967 */ 1074 968 if (!fw_dump.reserve_dump_area_size) 1075 - return; 969 + return -ENODEV; 1076 970 1077 971 fadump_setup_crash_memory_ranges(); 1078 972 ··· 1085 979 fadump_create_elfcore_headers(vaddr); 1086 980 1087 981 /* register the future kernel dump with firmware. */ 1088 - register_fw_dump(&fdm); 982 + return register_fw_dump(&fdm); 1089 983 } 1090 984 1091 985 static int fadump_unregister_dump(struct fadump_mem_struct *fdm) ··· 1152 1046 } 1153 1047 } 1154 1048 1049 + static void fadump_free_reserved_memory(unsigned long start_pfn, 1050 + unsigned long end_pfn) 1051 + { 1052 + unsigned long pfn; 1053 + unsigned long time_limit = jiffies + HZ; 1054 + 1055 + pr_info("freeing reserved memory (0x%llx - 0x%llx)\n", 1056 + PFN_PHYS(start_pfn), PFN_PHYS(end_pfn)); 1057 + 1058 + for (pfn = start_pfn; pfn < end_pfn; pfn++) { 1059 + free_reserved_page(pfn_to_page(pfn)); 1060 + 1061 + if (time_after(jiffies, time_limit)) { 1062 + cond_resched(); 1063 + time_limit = jiffies + HZ; 1064 + } 1065 + } 1066 + } 1067 + 1068 + /* 1069 + * Skip memory holes and free memory that was actually reserved. 1070 + */ 1071 + static void fadump_release_reserved_area(unsigned long start, unsigned long end) 1072 + { 1073 + struct memblock_region *reg; 1074 + unsigned long tstart, tend; 1075 + unsigned long start_pfn = PHYS_PFN(start); 1076 + unsigned long end_pfn = PHYS_PFN(end); 1077 + 1078 + for_each_memblock(memory, reg) { 1079 + tstart = max(start_pfn, memblock_region_memory_base_pfn(reg)); 1080 + tend = min(end_pfn, memblock_region_memory_end_pfn(reg)); 1081 + if (tstart < tend) { 1082 + fadump_free_reserved_memory(tstart, tend); 1083 + 1084 + if (tend == end_pfn) 1085 + break; 1086 + 1087 + start_pfn = tend + 1; 1088 + } 1089 + } 1090 + } 1091 + 1155 1092 /* 1156 1093 * Release the memory that was reserved in early boot to preserve the memory 1157 1094 * contents. The released memory will be available for general use. 1158 1095 */ 1159 1096 static void fadump_release_memory(unsigned long begin, unsigned long end) 1160 1097 { 1161 - unsigned long addr; 1162 1098 unsigned long ra_start, ra_end; 1163 1099 1164 1100 ra_start = fw_dump.reserve_dump_area_start; 1165 1101 ra_end = ra_start + fw_dump.reserve_dump_area_size; 1166 1102 1167 - for (addr = begin; addr < end; addr += PAGE_SIZE) { 1168 - /* 1169 - * exclude the dump reserve area. Will reuse it for next 1170 - * fadump registration. 1171 - */ 1172 - if (addr <= ra_end && ((addr + PAGE_SIZE) > ra_start)) 1173 - continue; 1174 - 1175 - free_reserved_page(pfn_to_page(addr >> PAGE_SHIFT)); 1176 - } 1103 + /* 1104 + * exclude the dump reserve area. Will reuse it for next 1105 + * fadump registration. 1106 + */ 1107 + if (begin < ra_end && end > ra_start) { 1108 + if (begin < ra_start) 1109 + fadump_release_reserved_area(begin, ra_start); 1110 + if (end > ra_end) 1111 + fadump_release_reserved_area(ra_end, end); 1112 + } else 1113 + fadump_release_reserved_area(begin, end); 1177 1114 } 1178 1115 1179 1116 static void fadump_invalidate_release_mem(void) ··· 1310 1161 switch (buf[0]) { 1311 1162 case '0': 1312 1163 if (fw_dump.dump_registered == 0) { 1313 - ret = -EINVAL; 1314 1164 goto unlock_out; 1315 1165 } 1316 1166 /* Un-register Firmware-assisted dump */ ··· 1317 1169 break; 1318 1170 case '1': 1319 1171 if (fw_dump.dump_registered == 1) { 1320 - ret = -EINVAL; 1172 + ret = -EEXIST; 1321 1173 goto unlock_out; 1322 1174 } 1323 1175 /* Register Firmware-assisted dump */ 1324 - register_fadump(); 1176 + ret = register_fadump(); 1325 1177 break; 1326 1178 default: 1327 1179 ret = -EINVAL;
+86 -102
arch/powerpc/kernel/idle_book3s.S
··· 31 31 * registers for winkle support. 32 32 */ 33 33 #define _SDR1 GPR3 34 + #define _PTCR GPR3 34 35 #define _RPR GPR4 35 36 #define _SPURR GPR5 36 37 #define _PURR GPR6 ··· 40 39 #define _AMOR GPR9 41 40 #define _WORT GPR10 42 41 #define _WORC GPR11 43 - #define _PTCR GPR12 42 + #define _LPCR GPR12 44 43 45 44 #define PSSCR_EC_ESL_MASK_SHIFTED (PSSCR_EC | PSSCR_ESL) >> 16 46 45 ··· 56 55 * here since any thread in the core might wake up first 57 56 */ 58 57 BEGIN_FTR_SECTION 59 - mfspr r3,SPRN_PTCR 60 - std r3,_PTCR(r1) 61 58 /* 62 59 * Note - SDR1 is dropped in Power ISA v3. Hence not restoring 63 60 * SDR1 here 64 61 */ 62 + mfspr r3,SPRN_PTCR 63 + std r3,_PTCR(r1) 64 + mfspr r3,SPRN_LPCR 65 + std r3,_LPCR(r1) 65 66 FTR_SECTION_ELSE 66 67 mfspr r3,SPRN_SDR1 67 68 std r3,_SDR1(r1) ··· 109 106 /* 110 107 * Pass requested state in r3: 111 108 * r3 - PNV_THREAD_NAP/SLEEP/WINKLE in POWER8 112 - * - Requested STOP state in POWER9 109 + * - Requested PSSCR value in POWER9 113 110 * 114 - * To check IRQ_HAPPENED in r4 115 - * 0 - don't check 116 - * 1 - check 117 - * 118 - * Address to 'rfid' to in r5 111 + * Address of idle handler to branch to in realmode in r4 119 112 */ 120 113 pnv_powersave_common: 121 114 /* Use r3 to pass state nap/sleep/winkle */ ··· 121 122 * need to save PC, some CR bits and the NV GPRs, 122 123 * but for now an interrupt frame will do. 123 124 */ 125 + mtctr r4 126 + 124 127 mflr r0 125 128 std r0,16(r1) 126 129 stdu r1,-INT_FRAME_SIZE(r1) 127 130 std r0,_LINK(r1) 128 131 std r0,_NIP(r1) 129 - 130 - /* Hard disable interrupts */ 131 - mfmsr r9 132 - rldicl r9,r9,48,1 133 - rotldi r9,r9,16 134 - mtmsrd r9,1 /* hard-disable interrupts */ 135 - 136 - /* Check if something happened while soft-disabled */ 137 - lbz r0,PACAIRQHAPPENED(r13) 138 - andi. r0,r0,~PACA_IRQ_HARD_DIS@l 139 - beq 1f 140 - cmpwi cr0,r4,0 141 - beq 1f 142 - addi r1,r1,INT_FRAME_SIZE 143 - ld r0,16(r1) 144 - li r3,0 /* Return 0 (no nap) */ 145 - mtlr r0 146 - blr 147 - 148 - 1: /* We mark irqs hard disabled as this is the state we'll 149 - * be in when returning and we need to tell arch_local_irq_restore() 150 - * about it 151 - */ 152 - li r0,PACA_IRQ_HARD_DIS 153 - stb r0,PACAIRQHAPPENED(r13) 154 132 155 133 /* We haven't lost state ... yet */ 156 134 li r0,0 ··· 136 160 /* Continue saving state */ 137 161 SAVE_GPR(2, r1) 138 162 SAVE_NVGPRS(r1) 139 - mfcr r4 140 - std r4,_CCR(r1) 141 - std r9,_MSR(r1) 163 + mfcr r5 164 + std r5,_CCR(r1) 142 165 std r1,PACAR1(r13) 143 166 144 167 /* ··· 147 172 * the MMU context to the guest. 148 173 */ 149 174 LOAD_REG_IMMEDIATE(r7, MSR_IDLE) 150 - li r6, MSR_RI 151 - andc r6, r9, r6 152 - mtmsrd r6, 1 /* clear RI before setting SRR0/1 */ 153 - mtspr SPRN_SRR0, r5 154 - mtspr SPRN_SRR1, r7 155 - rfid 175 + mtmsrd r7,0 176 + bctr 156 177 157 178 .globl pnv_enter_arch207_idle_mode 158 179 pnv_enter_arch207_idle_mode: ··· 256 285 bne .Lhandle_esl_ec_set 257 286 IDLE_STATE_ENTER_SEQ(PPC_STOP) 258 287 li r3,0 /* Since we didn't lose state, return 0 */ 288 + 289 + /* 290 + * pnv_wakeup_noloss() expects r12 to contain the SRR1 value so 291 + * it can determine if the wakeup reason is an HMI in 292 + * CHECK_HMI_INTERRUPT. 293 + * 294 + * However, when we wakeup with ESL=0, SRR1 will not contain the wakeup 295 + * reason, so there is no point setting r12 to SRR1. 296 + * 297 + * Further, we clear r12 here, so that we don't accidentally enter the 298 + * HMI in pnv_wakeup_noloss() if the value of r12[42:45] == WAKE_HMI. 299 + */ 300 + li r12, 0 259 301 b pnv_wakeup_noloss 260 302 261 303 .Lhandle_esl_ec_set: ··· 303 319 304 320 IDLE_STATE_ENTER_SEQ_NORET(PPC_STOP) 305 321 306 - _GLOBAL(power7_idle) 322 + /* 323 + * Entered with MSR[EE]=0 and no soft-masked interrupts pending. 324 + * r3 contains desired idle state (PNV_THREAD_NAP/SLEEP/WINKLE). 325 + */ 326 + _GLOBAL(power7_idle_insn) 307 327 /* Now check if user or arch enabled NAP mode */ 308 - LOAD_REG_ADDRBASE(r3,powersave_nap) 309 - lwz r4,ADDROFF(powersave_nap)(r3) 310 - cmpwi 0,r4,0 311 - beqlr 312 - li r3, 1 313 - /* fall through */ 314 - 315 - _GLOBAL(power7_nap) 316 - mr r4,r3 317 - li r3,PNV_THREAD_NAP 318 - LOAD_REG_ADDR(r5, pnv_enter_arch207_idle_mode) 328 + LOAD_REG_ADDR(r4, pnv_enter_arch207_idle_mode) 319 329 b pnv_powersave_common 320 - /* No return */ 321 - 322 - _GLOBAL(power7_sleep) 323 - li r3,PNV_THREAD_SLEEP 324 - li r4,1 325 - LOAD_REG_ADDR(r5, pnv_enter_arch207_idle_mode) 326 - b pnv_powersave_common 327 - /* No return */ 328 - 329 - _GLOBAL(power7_winkle) 330 - li r3,PNV_THREAD_WINKLE 331 - li r4,1 332 - LOAD_REG_ADDR(r5, pnv_enter_arch207_idle_mode) 333 - b pnv_powersave_common 334 - /* No return */ 335 330 336 331 #define CHECK_HMI_INTERRUPT \ 337 - mfspr r0,SPRN_SRR1; \ 338 332 BEGIN_FTR_SECTION_NESTED(66); \ 339 - rlwinm r0,r0,45-31,0xf; /* extract wake reason field (P8) */ \ 333 + rlwinm r0,r12,45-31,0xf; /* extract wake reason field (P8) */ \ 340 334 FTR_SECTION_ELSE_NESTED(66); \ 341 - rlwinm r0,r0,45-31,0xe; /* P7 wake reason field is 3 bits */ \ 335 + rlwinm r0,r12,45-31,0xe; /* P7 wake reason field is 3 bits */ \ 342 336 ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_ARCH_207S, 66); \ 343 337 cmpwi r0,0xa; /* Hypervisor maintenance ? */ \ 344 - bne 20f; \ 338 + bne+ 20f; \ 345 339 /* Invoke opal call to handle hmi */ \ 346 340 ld r2,PACATOC(r13); \ 347 341 ld r1,PACAR1(r13); \ ··· 331 369 20: nop; 332 370 333 371 /* 334 - * r3 - The PSSCR value corresponding to the stop state. 335 - * r4 - The PSSCR mask corrresonding to the stop state. 372 + * Entered with MSR[EE]=0 and no soft-masked interrupts pending. 373 + * r3 contains desired PSSCR register value. 336 374 */ 337 375 _GLOBAL(power9_idle_stop) 338 - mfspr r5,SPRN_PSSCR 339 - andc r5,r5,r4 340 - or r3,r3,r5 376 + std r3, PACA_REQ_PSSCR(r13) 341 377 mtspr SPRN_PSSCR,r3 342 - LOAD_REG_ADDR(r5,power_enter_stop) 343 - li r4,1 378 + LOAD_REG_ADDR(r4,power_enter_stop) 344 379 b pnv_powersave_common 345 380 /* No return */ 346 381 ··· 395 436 396 437 /* 397 438 * Now put the original SRR1 with SRR1_WAKEMCE_RESVD as the wake 398 - * reason into SRR1, which allows reuse of the system reset wakeup 439 + * reason into r12, which allows reuse of the system reset wakeup 399 440 * code without being mistaken for another type of wakeup. 400 441 */ 401 - oris r3,r3,SRR1_WAKEMCE_RESVD@h 402 - mtspr SPRN_SRR1,r3 442 + oris r12,r3,SRR1_WAKEMCE_RESVD@h 403 443 404 444 b pnv_powersave_wakeup 405 445 406 446 /* 407 447 * Called from reset vector for powersave wakeups. 408 448 * cr3 - set to gt if waking up with partial/complete hypervisor state loss 449 + * r12 - SRR1 409 450 */ 410 451 .global pnv_powersave_wakeup 411 452 pnv_powersave_wakeup: ··· 423 464 li r0,PNV_THREAD_RUNNING 424 465 stb r0,PACA_THREAD_IDLE_STATE(r13) /* Clear thread state */ 425 466 467 + mr r3,r12 468 + 426 469 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 427 470 li r0,KVM_HWTHREAD_IN_KERNEL 428 471 stb r0,HSTATE_HWTHREAD_STATE(r13) ··· 438 477 #endif 439 478 440 479 /* Return SRR1 from power7_nap() */ 441 - mfspr r3,SPRN_SRR1 442 480 blt cr3,pnv_wakeup_noloss 443 481 b pnv_wakeup_loss 444 482 ··· 449 489 */ 450 490 pnv_restore_hyp_resource_arch300: 451 491 /* 492 + * Workaround for POWER9, if we lost resources, the ERAT 493 + * might have been mixed up and needs flushing. 494 + */ 495 + blt cr3,1f 496 + PPC_INVALIDATE_ERAT 497 + 1: 498 + /* 452 499 * POWER ISA 3. Use PSSCR to determine if we 453 500 * are waking up from deep idle state 454 501 */ 455 502 LOAD_REG_ADDRBASE(r5,pnv_first_deep_stop_state) 456 503 ld r4,ADDROFF(pnv_first_deep_stop_state)(r5) 457 504 458 - mfspr r5,SPRN_PSSCR 505 + BEGIN_FTR_SECTION_NESTED(71) 506 + /* 507 + * Assume that we are waking up from the state 508 + * same as the Requested Level (RL) in the PSSCR 509 + * which are Bits 60-63 510 + */ 511 + ld r5,PACA_REQ_PSSCR(r13) 512 + rldicl r5,r5,0,60 513 + FTR_SECTION_ELSE_NESTED(71) 459 514 /* 460 515 * 0-3 bits correspond to Power-Saving Level Status 461 516 * which indicates the idle state we are waking up from 462 517 */ 518 + mfspr r5, SPRN_PSSCR 463 519 rldicl r5,r5,4,60 520 + ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_POWER9_DD1, 71) 464 521 cmpd cr4,r5,r4 465 522 bge cr4,pnv_wakeup_tb_loss /* returns to caller */ 466 523 ··· 544 567 * is required to return back to reset vector after hypervisor state 545 568 * restore is complete. 546 569 */ 570 + mr r19,r12 547 571 mr r18,r4 548 572 mflr r17 549 - mfspr r16,SPRN_SRR1 550 573 BEGIN_FTR_SECTION 551 574 CHECK_HMI_INTERRUPT 552 575 END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) ··· 708 731 * Use cr3 which indicates that we are waking up with atleast partial 709 732 * hypervisor state loss to determine if TIMEBASE RESYNC is needed. 710 733 */ 711 - ble cr3,clear_lock 734 + ble cr3,.Ltb_resynced 712 735 /* Time base re-sync */ 713 736 bl opal_resync_timebase; 714 737 /* 715 - * If waking up from sleep, per core state is not lost, skip to 716 - * clear_lock. 738 + * If waking up from sleep (POWER8), per core state 739 + * is not lost, skip to clear_lock. 717 740 */ 741 + .Ltb_resynced: 718 742 blt cr4,clear_lock 719 743 720 744 /* ··· 790 812 mtctr r12 791 813 bctrl 792 814 815 + BEGIN_FTR_SECTION 816 + ld r4,_LPCR(r1) 817 + mtspr SPRN_LPCR,r4 818 + END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) 793 819 hypervisor_state_restored: 794 820 795 - mtspr SPRN_SRR1,r16 821 + mr r12,r19 796 822 mtlr r17 797 823 blr /* return to pnv_powersave_wakeup */ 798 824 ··· 809 827 /* 810 828 * R3 here contains the value that will be returned to the caller 811 829 * of power7_nap. 830 + * R12 contains SRR1 for CHECK_HMI_INTERRUPT. 812 831 */ 813 832 .global pnv_wakeup_loss 814 833 pnv_wakeup_loss: ··· 819 836 END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) 820 837 REST_NVGPRS(r1) 821 838 REST_GPR(2, r1) 839 + ld r4,PACAKMSR(r13) 840 + ld r5,_LINK(r1) 822 841 ld r6,_CCR(r1) 823 - ld r4,_MSR(r1) 824 - ld r5,_NIP(r1) 825 842 addi r1,r1,INT_FRAME_SIZE 843 + mtlr r5 826 844 mtcr r6 827 - mtspr SPRN_SRR1,r4 828 - mtspr SPRN_SRR0,r5 829 - rfid 845 + mtmsrd r4 846 + blr 830 847 831 848 /* 832 849 * R3 here contains the value that will be returned to the caller 833 850 * of power7_nap. 851 + * R12 contains SRR1 for CHECK_HMI_INTERRUPT. 834 852 */ 835 853 pnv_wakeup_noloss: 836 854 lbz r0,PACA_NAPSTATELOST(r13) 837 855 cmpwi r0,0 838 856 bne pnv_wakeup_loss 857 + ld r1,PACAR1(r13) 839 858 BEGIN_FTR_SECTION 840 859 CHECK_HMI_INTERRUPT 841 860 END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) 842 - ld r1,PACAR1(r13) 843 - ld r6,_CCR(r1) 844 - ld r4,_MSR(r1) 861 + ld r4,PACAKMSR(r13) 845 862 ld r5,_NIP(r1) 863 + ld r6,_CCR(r1) 846 864 addi r1,r1,INT_FRAME_SIZE 865 + mtlr r5 847 866 mtcr r6 848 - mtspr SPRN_SRR1,r4 849 - mtspr SPRN_SRR0,r5 850 - rfid 867 + mtmsrd r4 868 + blr
+61 -1
arch/powerpc/kernel/irq.c
··· 322 322 * First we need to hard disable to ensure no interrupt 323 323 * occurs before we effectively enter the low power state 324 324 */ 325 - hard_irq_disable(); 325 + __hard_irq_disable(); 326 + local_paca->irq_happened |= PACA_IRQ_HARD_DIS; 326 327 327 328 /* 328 329 * If anything happened while we were soft-disabled, ··· 347 346 /* Tell the caller to enter the low power state */ 348 347 return true; 349 348 } 349 + 350 + #ifdef CONFIG_PPC_BOOK3S 351 + /* 352 + * This is for idle sequences that return with IRQs off, but the 353 + * idle state itself wakes on interrupt. Tell the irq tracer that 354 + * IRQs are enabled for the duration of idle so it does not get long 355 + * off times. Must be paired with fini_irq_for_idle_irqsoff. 356 + */ 357 + bool prep_irq_for_idle_irqsoff(void) 358 + { 359 + WARN_ON(!irqs_disabled()); 360 + 361 + /* 362 + * First we need to hard disable to ensure no interrupt 363 + * occurs before we effectively enter the low power state 364 + */ 365 + __hard_irq_disable(); 366 + local_paca->irq_happened |= PACA_IRQ_HARD_DIS; 367 + 368 + /* 369 + * If anything happened while we were soft-disabled, 370 + * we return now and do not enter the low power state. 371 + */ 372 + if (lazy_irq_pending()) 373 + return false; 374 + 375 + /* Tell lockdep we are about to re-enable */ 376 + trace_hardirqs_on(); 377 + 378 + return true; 379 + } 380 + 381 + /* 382 + * Take the SRR1 wakeup reason, index into this table to find the 383 + * appropriate irq_happened bit. 384 + */ 385 + static const u8 srr1_to_lazyirq[0x10] = { 386 + 0, 0, 0, 387 + PACA_IRQ_DBELL, 388 + 0, 389 + PACA_IRQ_DBELL, 390 + PACA_IRQ_DEC, 391 + 0, 392 + PACA_IRQ_EE, 393 + PACA_IRQ_EE, 394 + PACA_IRQ_HMI, 395 + 0, 0, 0, 0, 0 }; 396 + 397 + void irq_set_pending_from_srr1(unsigned long srr1) 398 + { 399 + unsigned int idx = (srr1 & SRR1_WAKEMASK_P8) >> 18; 400 + 401 + /* 402 + * The 0 index (SRR1[42:45]=b0000) must always evaluate to 0, 403 + * so this can be called unconditionally with srr1 wake reason. 404 + */ 405 + local_paca->irq_happened |= srr1_to_lazyirq[idx]; 406 + } 407 + #endif /* CONFIG_PPC_BOOK3S */ 350 408 351 409 /* 352 410 * Force a replay of the external interrupt handler on this CPU.
+2 -6
arch/powerpc/kernel/kprobes.c
··· 164 164 165 165 void arch_arm_kprobe(struct kprobe *p) 166 166 { 167 - *p->addr = BREAKPOINT_INSTRUCTION; 168 - flush_icache_range((unsigned long) p->addr, 169 - (unsigned long) p->addr + sizeof(kprobe_opcode_t)); 167 + patch_instruction(p->addr, BREAKPOINT_INSTRUCTION); 170 168 } 171 169 NOKPROBE_SYMBOL(arch_arm_kprobe); 172 170 173 171 void arch_disarm_kprobe(struct kprobe *p) 174 172 { 175 - *p->addr = p->opcode; 176 - flush_icache_range((unsigned long) p->addr, 177 - (unsigned long) p->addr + sizeof(kprobe_opcode_t)); 173 + patch_instruction(p->addr, p->opcode); 178 174 } 179 175 NOKPROBE_SYMBOL(arch_disarm_kprobe); 180 176
+1
arch/powerpc/kernel/mce.c
··· 268 268 static const char *mc_ra_types[] = { 269 269 "Indeterminate", 270 270 "Instruction fetch (bad)", 271 + "Instruction fetch (foreign)", 271 272 "Page table walk ifetch (bad)", 272 273 "Page table walk ifetch (foreign)", 273 274 "Load (bad)",
+3
arch/powerpc/kernel/mce_power.c
··· 236 236 { 0x00000000081c0000, 0x0000000000180000, true, 237 237 MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH, 238 238 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, 239 + { 0x00000000081c0000, 0x00000000001c0000, true, 240 + MCE_ERROR_TYPE_RA, MCE_RA_ERROR_IFETCH_FOREIGN, 241 + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, 239 242 { 0x00000000081c0000, 0x0000000008000000, true, 240 243 MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_IFETCH_TIMEOUT, 241 244 MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+2 -4
arch/powerpc/kernel/misc_32.S
··· 244 244 */ 245 245 _GLOBAL(real_readb) 246 246 mfmsr r7 247 - ori r0,r7,MSR_DR 248 - xori r0,r0,MSR_DR 247 + rlwinm r0,r7,0,~MSR_DR 249 248 sync 250 249 mtmsr r0 251 250 sync ··· 261 262 */ 262 263 _GLOBAL(real_writeb) 263 264 mfmsr r7 264 - ori r0,r7,MSR_DR 265 - xori r0,r0,MSR_DR 265 + rlwinm r0,r7,0,~MSR_DR 266 266 sync 267 267 mtmsr r0 268 268 sync
+32 -21
arch/powerpc/kernel/optprobes.c
··· 158 158 void patch_imm32_load_insns(unsigned int val, kprobe_opcode_t *addr) 159 159 { 160 160 /* addis r4,0,(insn)@h */ 161 - *addr++ = PPC_INST_ADDIS | ___PPC_RT(4) | 162 - ((val >> 16) & 0xffff); 161 + patch_instruction(addr, PPC_INST_ADDIS | ___PPC_RT(4) | 162 + ((val >> 16) & 0xffff)); 163 + addr++; 163 164 164 165 /* ori r4,r4,(insn)@l */ 165 - *addr = PPC_INST_ORI | ___PPC_RA(4) | ___PPC_RS(4) | 166 - (val & 0xffff); 166 + patch_instruction(addr, PPC_INST_ORI | ___PPC_RA(4) | 167 + ___PPC_RS(4) | (val & 0xffff)); 167 168 } 168 169 169 170 /* ··· 174 173 void patch_imm64_load_insns(unsigned long val, kprobe_opcode_t *addr) 175 174 { 176 175 /* lis r3,(op)@highest */ 177 - *addr++ = PPC_INST_ADDIS | ___PPC_RT(3) | 178 - ((val >> 48) & 0xffff); 176 + patch_instruction(addr, PPC_INST_ADDIS | ___PPC_RT(3) | 177 + ((val >> 48) & 0xffff)); 178 + addr++; 179 179 180 180 /* ori r3,r3,(op)@higher */ 181 - *addr++ = PPC_INST_ORI | ___PPC_RA(3) | ___PPC_RS(3) | 182 - ((val >> 32) & 0xffff); 181 + patch_instruction(addr, PPC_INST_ORI | ___PPC_RA(3) | 182 + ___PPC_RS(3) | ((val >> 32) & 0xffff)); 183 + addr++; 183 184 184 185 /* rldicr r3,r3,32,31 */ 185 - *addr++ = PPC_INST_RLDICR | ___PPC_RA(3) | ___PPC_RS(3) | 186 - __PPC_SH64(32) | __PPC_ME64(31); 186 + patch_instruction(addr, PPC_INST_RLDICR | ___PPC_RA(3) | 187 + ___PPC_RS(3) | __PPC_SH64(32) | __PPC_ME64(31)); 188 + addr++; 187 189 188 190 /* oris r3,r3,(op)@h */ 189 - *addr++ = PPC_INST_ORIS | ___PPC_RA(3) | ___PPC_RS(3) | 190 - ((val >> 16) & 0xffff); 191 + patch_instruction(addr, PPC_INST_ORIS | ___PPC_RA(3) | 192 + ___PPC_RS(3) | ((val >> 16) & 0xffff)); 193 + addr++; 191 194 192 195 /* ori r3,r3,(op)@l */ 193 - *addr = PPC_INST_ORI | ___PPC_RA(3) | ___PPC_RS(3) | 194 - (val & 0xffff); 196 + patch_instruction(addr, PPC_INST_ORI | ___PPC_RA(3) | 197 + ___PPC_RS(3) | (val & 0xffff)); 195 198 } 196 199 197 200 int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p) ··· 203 198 kprobe_opcode_t *buff, branch_op_callback, branch_emulate_step; 204 199 kprobe_opcode_t *op_callback_addr, *emulate_step_addr; 205 200 long b_offset; 206 - unsigned long nip; 201 + unsigned long nip, size; 202 + int rc, i; 207 203 208 204 kprobe_ppc_optinsn_slots.insn_size = MAX_OPTINSN_SIZE; 209 205 ··· 237 231 goto error; 238 232 239 233 /* Setup template */ 240 - memcpy(buff, optprobe_template_entry, 241 - TMPL_END_IDX * sizeof(kprobe_opcode_t)); 234 + /* We can optimize this via patch_instruction_window later */ 235 + size = (TMPL_END_IDX * sizeof(kprobe_opcode_t)) / sizeof(int); 236 + pr_devel("Copying template to %p, size %lu\n", buff, size); 237 + for (i = 0; i < size; i++) { 238 + rc = patch_instruction(buff + i, *(optprobe_template_entry + i)); 239 + if (rc < 0) 240 + goto error; 241 + } 242 242 243 243 /* 244 244 * Fixup the template with instructions to: ··· 273 261 if (!branch_op_callback || !branch_emulate_step) 274 262 goto error; 275 263 276 - buff[TMPL_CALL_HDLR_IDX] = branch_op_callback; 277 - buff[TMPL_EMULATE_IDX] = branch_emulate_step; 264 + patch_instruction(buff + TMPL_CALL_HDLR_IDX, branch_op_callback); 265 + patch_instruction(buff + TMPL_EMULATE_IDX, branch_emulate_step); 278 266 279 267 /* 280 268 * 3. load instruction to be emulated into relevant register, and ··· 284 272 /* 285 273 * 4. branch back from trampoline 286 274 */ 287 - buff[TMPL_RET_IDX] = create_branch((unsigned int *)buff + TMPL_RET_IDX, 288 - (unsigned long)nip, 0); 275 + patch_branch(buff + TMPL_RET_IDX, (unsigned long)nip, 0); 289 276 290 277 flush_icache_range((unsigned long)buff, 291 278 (unsigned long)(&buff[TMPL_END_IDX]));
+38 -7
arch/powerpc/kernel/process.c
··· 511 511 { 512 512 unsigned long msr; 513 513 514 + /* 515 + * Syscall exit makes a similar initial check before branching 516 + * to restore_math. Keep them in synch. 517 + */ 514 518 if (!msr_tm_active(regs->msr) && 515 519 !current->thread.load_fp && !loadvec(current->thread)) 516 520 return; ··· 1137 1133 #endif 1138 1134 } 1139 1135 1136 + #ifdef CONFIG_PPC_BOOK3S_64 1137 + #define CP_SIZE 128 1138 + static const u8 dummy_copy_buffer[CP_SIZE] __attribute__((aligned(CP_SIZE))); 1139 + #endif 1140 + 1140 1141 struct task_struct *__switch_to(struct task_struct *prev, 1141 1142 struct task_struct *new) 1142 1143 { ··· 1204 1195 1205 1196 __switch_to_tm(prev, new); 1206 1197 1207 - /* 1208 - * We can't take a PMU exception inside _switch() since there is a 1209 - * window where the kernel stack SLB and the kernel stack are out 1210 - * of sync. Hard disable here. 1211 - */ 1212 - hard_irq_disable(); 1198 + if (!radix_enabled()) { 1199 + /* 1200 + * We can't take a PMU exception inside _switch() since there 1201 + * is a window where the kernel stack SLB and the kernel stack 1202 + * are out of sync. Hard disable here. 1203 + */ 1204 + hard_irq_disable(); 1205 + } 1213 1206 1214 1207 /* 1215 1208 * Call restore_sprs() before calling _switch(). If we move it after ··· 1231 1220 batch->active = 1; 1232 1221 } 1233 1222 1234 - if (current_thread_info()->task->thread.regs) 1223 + if (current_thread_info()->task->thread.regs) { 1235 1224 restore_math(current_thread_info()->task->thread.regs); 1225 + 1226 + /* 1227 + * The copy-paste buffer can only store into foreign real 1228 + * addresses, so unprivileged processes can not see the 1229 + * data or use it in any way unless they have foreign real 1230 + * mappings. We don't have a VAS driver that allocates those 1231 + * yet, so no cpabort is required. 1232 + */ 1233 + if (cpu_has_feature(CPU_FTR_POWER9_DD1)) { 1234 + /* 1235 + * DD1 allows paste into normal system memory, so we 1236 + * do an unpaired copy here to clear the buffer and 1237 + * prevent a covert channel being set up. 1238 + * 1239 + * cpabort is not used because it is quite expensive. 1240 + */ 1241 + asm volatile(PPC_COPY(%0, %1) 1242 + : : "r"(dummy_copy_buffer), "r"(0)); 1243 + } 1244 + } 1236 1245 #endif /* CONFIG_PPC_STD_MMU_64 */ 1237 1246 1238 1247 return last;
+4
arch/powerpc/kernel/setup-common.c
··· 335 335 maj = ((pvr >> 8) & 0xFF) - 1; 336 336 min = pvr & 0xFF; 337 337 break; 338 + case 0x004e: /* POWER9 bits 12-15 give chip type */ 339 + maj = (pvr >> 8) & 0x0F; 340 + min = pvr & 0xFF; 341 + break; 338 342 default: 339 343 maj = (pvr >> 8) & 0xFF; 340 344 min = pvr & 0xFF;
+4 -3
arch/powerpc/kernel/smp.c
··· 33 33 #include <linux/notifier.h> 34 34 #include <linux/topology.h> 35 35 #include <linux/profile.h> 36 + #include <linux/processor.h> 36 37 37 38 #include <asm/ptrace.h> 38 39 #include <linux/atomic.h> ··· 113 112 #ifdef CONFIG_PPC64 114 113 int smp_generic_kick_cpu(int nr) 115 114 { 116 - BUG_ON(nr < 0 || nr >= NR_CPUS); 115 + if (nr < 0 || nr >= nr_cpu_ids) 116 + return -EINVAL; 117 117 118 118 /* 119 119 * The processor is currently spinning, waiting for the ··· 768 766 smp_ops->give_timebase(); 769 767 770 768 /* Wait until cpu puts itself in the online & active maps */ 771 - while (!cpu_online(cpu)) 772 - cpu_relax(); 769 + spin_until_cond(cpu_online(cpu)); 773 770 774 771 return 0; 775 772 }
+71 -25
arch/powerpc/kernel/time.c
··· 59 59 #include <linux/suspend.h> 60 60 #include <linux/rtc.h> 61 61 #include <linux/sched/cputime.h> 62 + #include <linux/processor.h> 62 63 #include <asm/trace.h> 63 64 64 65 #include <asm/io.h> 65 - #include <asm/processor.h> 66 66 #include <asm/nvram.h> 67 67 #include <asm/cache.h> 68 68 #include <asm/machdep.h> ··· 442 442 unsigned long start; 443 443 int diff; 444 444 445 + spin_begin(); 445 446 if (__USE_RTC()) { 446 447 start = get_rtcl(); 447 448 do { ··· 450 449 diff = get_rtcl() - start; 451 450 if (diff < 0) 452 451 diff += 1000000000; 452 + spin_cpu_relax(); 453 453 } while (diff < loops); 454 454 } else { 455 455 start = get_tbl(); 456 456 while (get_tbl() - start < loops) 457 - HMT_low(); 458 - HMT_medium(); 457 + spin_cpu_relax(); 459 458 } 459 + spin_end(); 460 460 } 461 461 EXPORT_SYMBOL(__delay); 462 462 ··· 677 675 * the high 64 bits of a * b, i.e. (a * b) >> 64, where a and b 678 676 * are 64-bit unsigned numbers. 679 677 */ 680 - unsigned long long sched_clock(void) 678 + notrace unsigned long long sched_clock(void) 681 679 { 682 680 if (__USE_RTC()) 683 681 return get_rtc(); ··· 741 739 static void start_cpu_decrementer(void) 742 740 { 743 741 #if defined(CONFIG_BOOKE) || defined(CONFIG_40x) 742 + unsigned int tcr; 743 + 744 744 /* Clear any pending timer interrupts */ 745 745 mtspr(SPRN_TSR, TSR_ENW | TSR_WIS | TSR_DIS | TSR_FIS); 746 746 747 - /* Enable decrementer interrupt */ 748 - mtspr(SPRN_TCR, TCR_DIE); 749 - #endif /* defined(CONFIG_BOOKE) || defined(CONFIG_40x) */ 747 + tcr = mfspr(SPRN_TCR); 748 + /* 749 + * The watchdog may have already been enabled by u-boot. So leave 750 + * TRC[WP] (Watchdog Period) alone. 751 + */ 752 + tcr &= TCR_WP_MASK; /* Clear all bits except for TCR[WP] */ 753 + tcr |= TCR_DIE; /* Enable decrementer */ 754 + mtspr(SPRN_TCR, tcr); 755 + #endif 750 756 } 751 757 752 758 void __init generic_calibrate_decr(void) ··· 833 823 } 834 824 835 825 /* clocksource code */ 836 - static u64 rtc_read(struct clocksource *cs) 826 + static notrace u64 rtc_read(struct clocksource *cs) 837 827 { 838 828 return (u64)get_rtc(); 839 829 } 840 830 841 - static u64 timebase_read(struct clocksource *cs) 831 + static notrace u64 timebase_read(struct clocksource *cs) 842 832 { 843 833 return (u64)get_tb(); 844 834 } 845 835 846 - void update_vsyscall_old(struct timespec *wall_time, struct timespec *wtm, 847 - struct clocksource *clock, u32 mult, u64 cycle_last) 836 + 837 + void update_vsyscall(struct timekeeper *tk) 848 838 { 839 + struct timespec xt; 840 + struct clocksource *clock = tk->tkr_mono.clock; 841 + u32 mult = tk->tkr_mono.mult; 842 + u32 shift = tk->tkr_mono.shift; 843 + u64 cycle_last = tk->tkr_mono.cycle_last; 849 844 u64 new_tb_to_xs, new_stamp_xsec; 850 - u32 frac_sec; 845 + u64 frac_sec; 851 846 852 847 if (clock != &clocksource_timebase) 853 848 return; 849 + 850 + xt.tv_sec = tk->xtime_sec; 851 + xt.tv_nsec = (long)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift); 854 852 855 853 /* Make userspace gettimeofday spin until we're done. */ 856 854 ++vdso_data->tb_update_count; 857 855 smp_mb(); 858 856 859 - /* 19342813113834067 ~= 2^(20+64) / 1e9 */ 860 - new_tb_to_xs = (u64) mult * (19342813113834067ULL >> clock->shift); 861 - new_stamp_xsec = (u64) wall_time->tv_nsec * XSEC_PER_SEC; 862 - do_div(new_stamp_xsec, 1000000000); 863 - new_stamp_xsec += (u64) wall_time->tv_sec * XSEC_PER_SEC; 857 + /* 858 + * This computes ((2^20 / 1e9) * mult) >> shift as a 859 + * 0.64 fixed-point fraction. 860 + * The computation in the else clause below won't overflow 861 + * (as long as the timebase frequency is >= 1.049 MHz) 862 + * but loses precision because we lose the low bits of the constant 863 + * in the shift. Note that 19342813113834067 ~= 2^(20+64) / 1e9. 864 + * For a shift of 24 the error is about 0.5e-9, or about 0.5ns 865 + * over a second. (Shift values are usually 22, 23 or 24.) 866 + * For high frequency clocks such as the 512MHz timebase clock 867 + * on POWER[6789], the mult value is small (e.g. 32768000) 868 + * and so we can shift the constant by 16 initially 869 + * (295147905179 ~= 2^(20+64-16) / 1e9) and then do the 870 + * remaining shifts after the multiplication, which gives a 871 + * more accurate result (e.g. with mult = 32768000, shift = 24, 872 + * the error is only about 1.2e-12, or 0.7ns over 10 minutes). 873 + */ 874 + if (mult <= 62500000 && clock->shift >= 16) 875 + new_tb_to_xs = ((u64) mult * 295147905179ULL) >> (clock->shift - 16); 876 + else 877 + new_tb_to_xs = (u64) mult * (19342813113834067ULL >> clock->shift); 864 878 865 - BUG_ON(wall_time->tv_nsec >= NSEC_PER_SEC); 866 - /* this is tv_nsec / 1e9 as a 0.32 fraction */ 867 - frac_sec = ((u64) wall_time->tv_nsec * 18446744073ULL) >> 32; 879 + /* 880 + * Compute the fractional second in units of 2^-32 seconds. 881 + * The fractional second is tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift 882 + * in nanoseconds, so multiplying that by 2^32 / 1e9 gives 883 + * it in units of 2^-32 seconds. 884 + * We assume shift <= 32 because clocks_calc_mult_shift() 885 + * generates shift values in the range 0 - 32. 886 + */ 887 + frac_sec = tk->tkr_mono.xtime_nsec << (32 - shift); 888 + do_div(frac_sec, NSEC_PER_SEC); 889 + 890 + /* 891 + * Work out new stamp_xsec value for any legacy users of systemcfg. 892 + * stamp_xsec is in units of 2^-20 seconds. 893 + */ 894 + new_stamp_xsec = frac_sec >> 12; 895 + new_stamp_xsec += tk->xtime_sec * XSEC_PER_SEC; 868 896 869 897 /* 870 898 * tb_update_count is used to allow the userspace gettimeofday code ··· 912 864 * the two values of tb_update_count match and are even then the 913 865 * tb_to_xs and stamp_xsec values are consistent. If not, then it 914 866 * loops back and reads them again until this criteria is met. 915 - * We expect the caller to have done the first increment of 916 - * vdso_data->tb_update_count already. 917 867 */ 918 868 vdso_data->tb_orig_stamp = cycle_last; 919 869 vdso_data->stamp_xsec = new_stamp_xsec; 920 870 vdso_data->tb_to_xs = new_tb_to_xs; 921 - vdso_data->wtom_clock_sec = wtm->tv_sec; 922 - vdso_data->wtom_clock_nsec = wtm->tv_nsec; 923 - vdso_data->stamp_xtime = *wall_time; 871 + vdso_data->wtom_clock_sec = tk->wall_to_monotonic.tv_sec; 872 + vdso_data->wtom_clock_nsec = tk->wall_to_monotonic.tv_nsec; 873 + vdso_data->stamp_xtime = xt; 924 874 vdso_data->stamp_sec_fraction = frac_sec; 925 875 smp_wmb(); 926 876 ++(vdso_data->tb_update_count);
+2 -2
arch/powerpc/kernel/tm.S
··· 313 313 blr 314 314 315 315 316 - /* void tm_recheckpoint(struct thread_struct *thread, 317 - * unsigned long orig_msr) 316 + /* void __tm_recheckpoint(struct thread_struct *thread, 317 + * unsigned long orig_msr) 318 318 * - Restore the checkpointed register state saved by tm_reclaim 319 319 * when we switch_to a process. 320 320 *
+3
arch/powerpc/kernel/traps.c
··· 237 237 err = 0; 238 238 oops_end(flags, regs, err); 239 239 } 240 + NOKPROBE_SYMBOL(die); 240 241 241 242 void user_single_step_siginfo(struct task_struct *tsk, 242 243 struct pt_regs *regs, siginfo_t *info) ··· 1969 1968 regs->trap, regs->nip); 1970 1969 die("Unrecoverable exception", regs, SIGABRT); 1971 1970 } 1971 + NOKPROBE_SYMBOL(unrecoverable_exception); 1972 1972 1973 1973 #if defined(CONFIG_BOOKE_WDT) || defined(CONFIG_40x) 1974 1974 /* ··· 2000 1998 regs->gpr[1], regs->nip); 2001 1999 die("Bad kernel stack pointer", regs, SIGABRT); 2002 2000 } 2001 + NOKPROBE_SYMBOL(kernel_bad_stack); 2003 2002 2004 2003 void __init trap_init(void) 2005 2004 {
+35 -26
arch/powerpc/kernel/vmlinux.lds.S
··· 8 8 #include <asm/cache.h> 9 9 #include <asm/thread_info.h> 10 10 11 + #ifdef CONFIG_STRICT_KERNEL_RWX 12 + #define STRICT_ALIGN_SIZE (1 << 24) 13 + #else 14 + #define STRICT_ALIGN_SIZE PAGE_SIZE 15 + #endif 16 + 11 17 ENTRY(_stext) 12 18 13 19 PHDRS { ··· 64 58 #ifdef CONFIG_PPC64 65 59 KEEP(*(.head.text.first_256B)); 66 60 #ifdef CONFIG_PPC_BOOK3E 67 - # define END_FIXED 0x100 68 61 #else 69 62 KEEP(*(.head.text.real_vectors)); 70 63 *(.head.text.real_trampolines); ··· 71 66 *(.head.text.virt_trampolines); 72 67 # if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) 73 68 KEEP(*(.head.data.fwnmi_page)); 74 - # define END_FIXED 0x8000 75 - # else 76 - # define END_FIXED 0x7000 77 69 # endif 78 70 #endif 79 - ASSERT((. == END_FIXED), "vmlinux.lds.S: fixed section overflow error"); 80 71 #else /* !CONFIG_PPC64 */ 81 72 HEAD_TEXT 82 73 #endif ··· 80 79 81 80 __head_end = .; 82 81 83 - /* 84 - * If the build dies here, it's likely code in head_64.S is referencing 85 - * labels it can't reach, and the linker inserting stubs without the 86 - * assembler's knowledge. To debug, remove the above assert and 87 - * rebuild. Look for branch stubs in the fixed section region. 88 - * 89 - * Linker stub generation could be allowed in "trampoline" 90 - * sections if absolutely necessary, but this would require 91 - * some rework of the fixed sections. Before resorting to this, 92 - * consider references that have sufficient addressing range, 93 - * (e.g., hand coded trampolines) so the linker does not have 94 - * to add stubs. 95 - * 96 - * Linker stubs at the top of the main text section are currently not 97 - * detected, and will result in a crash at boot due to offsets being 98 - * wrong. 99 - */ 100 82 #ifdef CONFIG_PPC64 101 83 /* 102 84 * BLOCK(0) overrides the default output section alignment because ··· 87 103 * section placement to work. 88 104 */ 89 105 .text BLOCK(0) : AT(ADDR(.text) - LOAD_OFFSET) { 106 + #ifdef CONFIG_LD_HEAD_STUB_CATCH 107 + *(.linker_stub_catch); 108 + . = . ; 109 + #endif 110 + 90 111 #else 91 112 .text : AT(ADDR(.text) - LOAD_OFFSET) { 92 113 ALIGN_FUNCTION(); 93 114 #endif 94 115 /* careful! __ftr_alt_* sections need to be close to .text */ 95 - *(.text .fixup __ftr_alt_* .ref.text) 116 + *(.text.hot .text .text.fixup .text.unlikely .fixup __ftr_alt_* .ref.text); 96 117 SCHED_TEXT 97 118 CPUIDLE_TEXT 98 119 LOCK_TEXT 99 120 KPROBES_TEXT 100 121 IRQENTRY_TEXT 101 122 SOFTIRQENTRY_TEXT 123 + /* 124 + * -Os builds call FP save/restore functions. The powerpc64 125 + * linker generates those on demand in the .sfpr section. 126 + * .sfpr gets placed at the beginning of a group of input 127 + * sections, which can break start-of-text offset if it is 128 + * included with the main text sections, so put it by itself. 129 + */ 130 + *(.sfpr); 102 131 MEM_KEEP(init.text) 103 132 MEM_KEEP(exit.text) 104 133 ··· 129 132 PROVIDE32 (etext = .); 130 133 131 134 /* Read-only data */ 132 - RODATA 135 + RO_DATA(PAGE_SIZE) 133 136 134 137 EXCEPTION_TABLE(0) 135 138 ··· 146 149 /* 147 150 * Init sections discarded at runtime 148 151 */ 149 - . = ALIGN(PAGE_SIZE); 152 + . = ALIGN(STRICT_ALIGN_SIZE); 150 153 __init_begin = .; 151 154 INIT_TEXT_SECTION(PAGE_SIZE) :kernel 152 155 ··· 264 267 .data : AT(ADDR(.data) - LOAD_OFFSET) { 265 268 DATA_DATA 266 269 *(.sdata) 270 + *(.sdata2) 267 271 *(.got.plt) *(.got) 272 + *(.plt) 268 273 } 269 274 #else 270 275 .data : AT(ADDR(.data) - LOAD_OFFSET) { ··· 329 330 _end = . ; 330 331 PROVIDE32 (end = .); 331 332 332 - /* Sections to be discarded. */ 333 + STABS_DEBUG 334 + 335 + DWARF_DEBUG 336 + 333 337 DISCARDS 338 + /DISCARD/ : { 339 + *(*.EMB.apuinfo) 340 + *(.glink .iplt .plt .rela* .comment) 341 + *(.gnu.version*) 342 + *(.gnu.attributes) 343 + *(.eh_frame) 344 + } 334 345 }
+9 -2
arch/powerpc/kvm/book3s_hv_rm_mmu.c
··· 15 15 #include <linux/log2.h> 16 16 17 17 #include <asm/tlbflush.h> 18 + #include <asm/trace.h> 18 19 #include <asm/kvm_ppc.h> 19 20 #include <asm/kvm_book3s.h> 20 21 #include <asm/book3s/64/mmu-hash.h> ··· 444 443 cpu_relax(); 445 444 if (need_sync) 446 445 asm volatile("ptesync" : : : "memory"); 447 - for (i = 0; i < npages; ++i) 446 + for (i = 0; i < npages; ++i) { 448 447 asm volatile(PPC_TLBIE_5(%0,%1,0,0,0) : : 449 448 "r" (rbvalues[i]), "r" (kvm->arch.lpid)); 449 + trace_tlbie(kvm->arch.lpid, 0, rbvalues[i], 450 + kvm->arch.lpid, 0, 0, 0); 451 + } 450 452 asm volatile("eieio; tlbsync; ptesync" : : : "memory"); 451 453 kvm->arch.tlbie_lock = 0; 452 454 } else { 453 455 if (need_sync) 454 456 asm volatile("ptesync" : : : "memory"); 455 - for (i = 0; i < npages; ++i) 457 + for (i = 0; i < npages; ++i) { 456 458 asm volatile(PPC_TLBIEL(%0,%1,0,0,0) : : 457 459 "r" (rbvalues[i]), "r" (0)); 460 + trace_tlbie(kvm->arch.lpid, 1, rbvalues[i], 461 + 0, 0, 0, 0); 462 + } 458 463 asm volatile("ptesync" : : : "memory"); 459 464 } 460 465 }
+10 -2
arch/powerpc/kvm/book3s_hv_rmhandlers.S
··· 313 313 * We come in here when wakened from nap mode. 314 314 * Relocation is off and most register values are lost. 315 315 * r13 points to the PACA. 316 + * r3 contains the SRR1 wakeup value, SRR1 is trashed. 316 317 */ 317 318 .globl kvm_start_guest 318 319 kvm_start_guest: 319 - 320 320 /* Set runlatch bit the minute you wake up from nap */ 321 321 mfspr r0, SPRN_CTRLF 322 322 ori r0, r0, 1 323 323 mtspr SPRN_CTRLT, r0 324 + 325 + /* 326 + * Could avoid this and pass it through in r3. For now, 327 + * code expects it to be in SRR1. 328 + */ 329 + mtspr SPRN_SRR1,r3 324 330 325 331 ld r2,PACATOC(r13) 326 332 ··· 446 440 /* 447 441 * We jump to pnv_wakeup_loss, which will return to the caller 448 442 * of power7_nap in the powernv cpu offline loop. The value we 449 - * put in r3 becomes the return value for power7_nap. 443 + * put in r3 becomes the return value for power7_nap. pnv_wakeup_loss 444 + * requires SRR1 in r12. 450 445 */ 451 446 li r3, LPCR_PECE0 452 447 mfspr r4, SPRN_LPCR 453 448 rlwimi r4, r3, 0, LPCR_PECE0 | LPCR_PECE1 454 449 mtspr SPRN_LPCR, r4 455 450 li r3, 0 451 + mfspr r12,SPRN_SRR1 456 452 b pnv_wakeup_loss 457 453 458 454 53: HMT_LOW
+11 -4
arch/powerpc/lib/Makefile
··· 9 9 CFLAGS_REMOVE_code-patching.o = $(CC_FLAGS_FTRACE) 10 10 CFLAGS_REMOVE_feature-fixups.o = $(CC_FLAGS_FTRACE) 11 11 12 - obj-y += string.o alloc.o crtsavres.o code-patching.o \ 13 - feature-fixups.o 12 + obj-y += string.o alloc.o code-patching.o feature-fixups.o 14 13 15 - obj-$(CONFIG_PPC32) += div64.o copy_32.o 14 + obj-$(CONFIG_PPC32) += div64.o copy_32.o crtsavres.o 15 + 16 + # See corresponding test in arch/powerpc/Makefile 17 + # 64-bit linker creates .sfpr on demand for final link (vmlinux), 18 + # so it is only needed for modules, and only for older linkers which 19 + # do not support --save-restore-funcs 20 + ifeq ($(call ld-ifversion, -lt, 225000000, y),y) 21 + extra-$(CONFIG_PPC64) += crtsavres.o 22 + endif 16 23 17 24 obj64-y += copypage_64.o copyuser_64.o mem_64.o hweight_64.o \ 18 25 copyuser_power7.o string_64.o copypage_power7.o memcpy_power7.o \ ··· 37 30 38 31 obj-$(CONFIG_FTR_FIXUP_SELFTEST) += feature-fixups-test.o 39 32 40 - obj-$(CONFIG_ALTIVEC) += xor_vmx.o 33 + obj-$(CONFIG_ALTIVEC) += xor_vmx.o xor_vmx_glue.o 41 34 CFLAGS_xor_vmx.o += -maltivec $(call cc-option,-mabi=altivec) 42 35 43 36 obj-$(CONFIG_PPC64) += $(obj64-y)
+167 -4
arch/powerpc/lib/code-patching.c
··· 12 12 #include <linux/vmalloc.h> 13 13 #include <linux/init.h> 14 14 #include <linux/mm.h> 15 - #include <asm/page.h> 16 - #include <asm/code-patching.h> 15 + #include <linux/cpuhotplug.h> 16 + #include <linux/slab.h> 17 17 #include <linux/uaccess.h> 18 18 #include <linux/kprobes.h> 19 19 20 + #include <asm/pgtable.h> 21 + #include <asm/tlbflush.h> 22 + #include <asm/page.h> 23 + #include <asm/code-patching.h> 20 24 21 - int patch_instruction(unsigned int *addr, unsigned int instr) 25 + static int __patch_instruction(unsigned int *addr, unsigned int instr) 22 26 { 23 27 int err; 24 28 25 29 __put_user_size(instr, addr, 4, err); 26 30 if (err) 27 31 return err; 28 - asm ("dcbst 0, %0; sync; icbi 0,%0; sync; isync" : : "r" (addr)); 32 + 33 + asm ("dcbst 0, %0; sync; icbi 0,%0; sync; isync" :: "r" (addr)); 34 + 29 35 return 0; 30 36 } 37 + 38 + #ifdef CONFIG_STRICT_KERNEL_RWX 39 + static DEFINE_PER_CPU(struct vm_struct *, text_poke_area); 40 + 41 + static int text_area_cpu_up(unsigned int cpu) 42 + { 43 + struct vm_struct *area; 44 + 45 + area = get_vm_area(PAGE_SIZE, VM_ALLOC); 46 + if (!area) { 47 + WARN_ONCE(1, "Failed to create text area for cpu %d\n", 48 + cpu); 49 + return -1; 50 + } 51 + this_cpu_write(text_poke_area, area); 52 + 53 + return 0; 54 + } 55 + 56 + static int text_area_cpu_down(unsigned int cpu) 57 + { 58 + free_vm_area(this_cpu_read(text_poke_area)); 59 + return 0; 60 + } 61 + 62 + /* 63 + * Run as a late init call. This allows all the boot time patching to be done 64 + * simply by patching the code, and then we're called here prior to 65 + * mark_rodata_ro(), which happens after all init calls are run. Although 66 + * BUG_ON() is rude, in this case it should only happen if ENOMEM, and we judge 67 + * it as being preferable to a kernel that will crash later when someone tries 68 + * to use patch_instruction(). 69 + */ 70 + static int __init setup_text_poke_area(void) 71 + { 72 + BUG_ON(!cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, 73 + "powerpc/text_poke:online", text_area_cpu_up, 74 + text_area_cpu_down)); 75 + 76 + return 0; 77 + } 78 + late_initcall(setup_text_poke_area); 79 + 80 + /* 81 + * This can be called for kernel text or a module. 82 + */ 83 + static int map_patch_area(void *addr, unsigned long text_poke_addr) 84 + { 85 + unsigned long pfn; 86 + int err; 87 + 88 + if (is_vmalloc_addr(addr)) 89 + pfn = vmalloc_to_pfn(addr); 90 + else 91 + pfn = __pa_symbol(addr) >> PAGE_SHIFT; 92 + 93 + err = map_kernel_page(text_poke_addr, (pfn << PAGE_SHIFT), 94 + pgprot_val(PAGE_KERNEL)); 95 + 96 + pr_devel("Mapped addr %lx with pfn %lx:%d\n", text_poke_addr, pfn, err); 97 + if (err) 98 + return -1; 99 + 100 + return 0; 101 + } 102 + 103 + static inline int unmap_patch_area(unsigned long addr) 104 + { 105 + pte_t *ptep; 106 + pmd_t *pmdp; 107 + pud_t *pudp; 108 + pgd_t *pgdp; 109 + 110 + pgdp = pgd_offset_k(addr); 111 + if (unlikely(!pgdp)) 112 + return -EINVAL; 113 + 114 + pudp = pud_offset(pgdp, addr); 115 + if (unlikely(!pudp)) 116 + return -EINVAL; 117 + 118 + pmdp = pmd_offset(pudp, addr); 119 + if (unlikely(!pmdp)) 120 + return -EINVAL; 121 + 122 + ptep = pte_offset_kernel(pmdp, addr); 123 + if (unlikely(!ptep)) 124 + return -EINVAL; 125 + 126 + pr_devel("clearing mm %p, pte %p, addr %lx\n", &init_mm, ptep, addr); 127 + 128 + /* 129 + * In hash, pte_clear flushes the tlb, in radix, we have to 130 + */ 131 + pte_clear(&init_mm, addr, ptep); 132 + flush_tlb_kernel_range(addr, addr + PAGE_SIZE); 133 + 134 + return 0; 135 + } 136 + 137 + int patch_instruction(unsigned int *addr, unsigned int instr) 138 + { 139 + int err; 140 + unsigned int *dest = NULL; 141 + unsigned long flags; 142 + unsigned long text_poke_addr; 143 + unsigned long kaddr = (unsigned long)addr; 144 + 145 + /* 146 + * During early early boot patch_instruction is called 147 + * when text_poke_area is not ready, but we still need 148 + * to allow patching. We just do the plain old patching 149 + * We use slab_is_available and per cpu read * via this_cpu_read 150 + * of text_poke_area. Per-CPU areas might not be up early 151 + * this can create problems with just using this_cpu_read() 152 + */ 153 + if (!slab_is_available() || !this_cpu_read(text_poke_area)) 154 + return __patch_instruction(addr, instr); 155 + 156 + local_irq_save(flags); 157 + 158 + text_poke_addr = (unsigned long)__this_cpu_read(text_poke_area)->addr; 159 + if (map_patch_area(addr, text_poke_addr)) { 160 + err = -1; 161 + goto out; 162 + } 163 + 164 + dest = (unsigned int *)(text_poke_addr) + 165 + ((kaddr & ~PAGE_MASK) / sizeof(unsigned int)); 166 + 167 + /* 168 + * We use __put_user_size so that we can handle faults while 169 + * writing to dest and return err to handle faults gracefully 170 + */ 171 + __put_user_size(instr, dest, 4, err); 172 + if (!err) 173 + asm ("dcbst 0, %0; sync; icbi 0,%0; icbi 0,%1; sync; isync" 174 + ::"r" (dest), "r"(addr)); 175 + 176 + err = unmap_patch_area(text_poke_addr); 177 + if (err) 178 + pr_warn("failed to unmap %lx\n", text_poke_addr); 179 + 180 + out: 181 + local_irq_restore(flags); 182 + 183 + return err; 184 + } 185 + #else /* !CONFIG_STRICT_KERNEL_RWX */ 186 + 187 + int patch_instruction(unsigned int *addr, unsigned int instr) 188 + { 189 + return __patch_instruction(addr, instr); 190 + } 191 + 192 + #endif /* CONFIG_STRICT_KERNEL_RWX */ 193 + NOKPROBE_SYMBOL(patch_instruction); 31 194 32 195 int patch_branch(unsigned int *addr, unsigned long target, int flags) 33 196 {
+2 -2
arch/powerpc/lib/copyuser_power7.S
··· 82 82 _GLOBAL(__copy_tofrom_user_power7) 83 83 #ifdef CONFIG_ALTIVEC 84 84 cmpldi r5,16 85 - cmpldi cr1,r5,4096 85 + cmpldi cr1,r5,3328 86 86 87 87 std r3,-STACKFRAMESIZE+STK_REG(R31)(r1) 88 88 std r4,-STACKFRAMESIZE+STK_REG(R30)(r1) 89 89 std r5,-STACKFRAMESIZE+STK_REG(R29)(r1) 90 90 91 91 blt .Lshort_copy 92 - bgt cr1,.Lvmx_copy 92 + bge cr1,.Lvmx_copy 93 93 #else 94 94 cmpldi r5,16 95 95
+2 -4
arch/powerpc/lib/crtsavres.S
··· 44 44 45 45 #ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE 46 46 47 - #ifndef CONFIG_PPC64 48 - 49 47 .section ".text" 48 + 49 + #ifndef CONFIG_PPC64 50 50 51 51 /* Routines for saving integer registers, called by the compiler. */ 52 52 /* Called with r11 pointing to the stack header word of the caller of the */ ··· 313 313 #endif /* CONFIG_ALTIVEC */ 314 314 315 315 #else /* CONFIG_PPC64 */ 316 - 317 - .section ".text.save.restore","ax",@progbits 318 316 319 317 .globl _savegpr0_14 320 318 _savegpr0_14:
+11 -42
arch/powerpc/lib/xor_vmx.c
··· 29 29 #define vector __attribute__((vector_size(16))) 30 30 #endif 31 31 32 - #include <linux/preempt.h> 33 - #include <linux/export.h> 34 - #include <linux/sched.h> 35 - #include <asm/switch_to.h> 32 + #include "xor_vmx.h" 36 33 37 34 typedef vector signed char unative_t; 38 35 ··· 61 64 V1##_3 = vec_xor(V1##_3, V2##_3); \ 62 65 } while (0) 63 66 64 - void xor_altivec_2(unsigned long bytes, unsigned long *v1_in, 65 - unsigned long *v2_in) 67 + void __xor_altivec_2(unsigned long bytes, unsigned long *v1_in, 68 + unsigned long *v2_in) 66 69 { 67 70 DEFINE(v1); 68 71 DEFINE(v2); 69 72 unsigned long lines = bytes / (sizeof(unative_t)) / 4; 70 - 71 - preempt_disable(); 72 - enable_kernel_altivec(); 73 73 74 74 do { 75 75 LOAD(v1); ··· 77 83 v1 += 4; 78 84 v2 += 4; 79 85 } while (--lines > 0); 80 - 81 - disable_kernel_altivec(); 82 - preempt_enable(); 83 86 } 84 - EXPORT_SYMBOL(xor_altivec_2); 85 87 86 - void xor_altivec_3(unsigned long bytes, unsigned long *v1_in, 87 - unsigned long *v2_in, unsigned long *v3_in) 88 + void __xor_altivec_3(unsigned long bytes, unsigned long *v1_in, 89 + unsigned long *v2_in, unsigned long *v3_in) 88 90 { 89 91 DEFINE(v1); 90 92 DEFINE(v2); 91 93 DEFINE(v3); 92 94 unsigned long lines = bytes / (sizeof(unative_t)) / 4; 93 - 94 - preempt_disable(); 95 - enable_kernel_altivec(); 96 95 97 96 do { 98 97 LOAD(v1); ··· 99 112 v2 += 4; 100 113 v3 += 4; 101 114 } while (--lines > 0); 102 - 103 - disable_kernel_altivec(); 104 - preempt_enable(); 105 115 } 106 - EXPORT_SYMBOL(xor_altivec_3); 107 116 108 - void xor_altivec_4(unsigned long bytes, unsigned long *v1_in, 109 - unsigned long *v2_in, unsigned long *v3_in, 110 - unsigned long *v4_in) 117 + void __xor_altivec_4(unsigned long bytes, unsigned long *v1_in, 118 + unsigned long *v2_in, unsigned long *v3_in, 119 + unsigned long *v4_in) 111 120 { 112 121 DEFINE(v1); 113 122 DEFINE(v2); 114 123 DEFINE(v3); 115 124 DEFINE(v4); 116 125 unsigned long lines = bytes / (sizeof(unative_t)) / 4; 117 - 118 - preempt_disable(); 119 - enable_kernel_altivec(); 120 126 121 127 do { 122 128 LOAD(v1); ··· 126 146 v3 += 4; 127 147 v4 += 4; 128 148 } while (--lines > 0); 129 - 130 - disable_kernel_altivec(); 131 - preempt_enable(); 132 149 } 133 - EXPORT_SYMBOL(xor_altivec_4); 134 150 135 - void xor_altivec_5(unsigned long bytes, unsigned long *v1_in, 136 - unsigned long *v2_in, unsigned long *v3_in, 137 - unsigned long *v4_in, unsigned long *v5_in) 151 + void __xor_altivec_5(unsigned long bytes, unsigned long *v1_in, 152 + unsigned long *v2_in, unsigned long *v3_in, 153 + unsigned long *v4_in, unsigned long *v5_in) 138 154 { 139 155 DEFINE(v1); 140 156 DEFINE(v2); ··· 138 162 DEFINE(v4); 139 163 DEFINE(v5); 140 164 unsigned long lines = bytes / (sizeof(unative_t)) / 4; 141 - 142 - preempt_disable(); 143 - enable_kernel_altivec(); 144 165 145 166 do { 146 167 LOAD(v1); ··· 157 184 v4 += 4; 158 185 v5 += 4; 159 186 } while (--lines > 0); 160 - 161 - disable_kernel_altivec(); 162 - preempt_enable(); 163 187 } 164 - EXPORT_SYMBOL(xor_altivec_5);
+20
arch/powerpc/lib/xor_vmx.h
··· 1 + /* 2 + * Simple interface to link xor_vmx.c and xor_vmx_glue.c 3 + * 4 + * Separating these file ensures that no altivec instructions are run 5 + * outside of the enable/disable altivec block. 6 + */ 7 + 8 + void __xor_altivec_2(unsigned long bytes, unsigned long *v1_in, 9 + unsigned long *v2_in); 10 + 11 + void __xor_altivec_3(unsigned long bytes, unsigned long *v1_in, 12 + unsigned long *v2_in, unsigned long *v3_in); 13 + 14 + void __xor_altivec_4(unsigned long bytes, unsigned long *v1_in, 15 + unsigned long *v2_in, unsigned long *v3_in, 16 + unsigned long *v4_in); 17 + 18 + void __xor_altivec_5(unsigned long bytes, unsigned long *v1_in, 19 + unsigned long *v2_in, unsigned long *v3_in, 20 + unsigned long *v4_in, unsigned long *v5_in);
+62
arch/powerpc/lib/xor_vmx_glue.c
··· 1 + /* 2 + * Altivec XOR operations 3 + * 4 + * Copyright 2017 IBM Corp. 5 + * 6 + * This program is free software; you can redistribute it and/or 7 + * modify it under the terms of the GNU General Public License 8 + * as published by the Free Software Foundation; either version 9 + * 2 of the License, or (at your option) any later version. 10 + */ 11 + 12 + #include <linux/preempt.h> 13 + #include <linux/export.h> 14 + #include <linux/sched.h> 15 + #include <asm/switch_to.h> 16 + #include "xor_vmx.h" 17 + 18 + void xor_altivec_2(unsigned long bytes, unsigned long *v1_in, 19 + unsigned long *v2_in) 20 + { 21 + preempt_disable(); 22 + enable_kernel_altivec(); 23 + __xor_altivec_2(bytes, v1_in, v2_in); 24 + disable_kernel_altivec(); 25 + preempt_enable(); 26 + } 27 + EXPORT_SYMBOL(xor_altivec_2); 28 + 29 + void xor_altivec_3(unsigned long bytes, unsigned long *v1_in, 30 + unsigned long *v2_in, unsigned long *v3_in) 31 + { 32 + preempt_disable(); 33 + enable_kernel_altivec(); 34 + __xor_altivec_3(bytes, v1_in, v2_in, v3_in); 35 + disable_kernel_altivec(); 36 + preempt_enable(); 37 + } 38 + EXPORT_SYMBOL(xor_altivec_3); 39 + 40 + void xor_altivec_4(unsigned long bytes, unsigned long *v1_in, 41 + unsigned long *v2_in, unsigned long *v3_in, 42 + unsigned long *v4_in) 43 + { 44 + preempt_disable(); 45 + enable_kernel_altivec(); 46 + __xor_altivec_4(bytes, v1_in, v2_in, v3_in, v4_in); 47 + disable_kernel_altivec(); 48 + preempt_enable(); 49 + } 50 + EXPORT_SYMBOL(xor_altivec_4); 51 + 52 + void xor_altivec_5(unsigned long bytes, unsigned long *v1_in, 53 + unsigned long *v2_in, unsigned long *v3_in, 54 + unsigned long *v4_in, unsigned long *v5_in) 55 + { 56 + preempt_disable(); 57 + enable_kernel_altivec(); 58 + __xor_altivec_5(bytes, v1_in, v2_in, v3_in, v4_in, v5_in); 59 + disable_kernel_altivec(); 60 + preempt_enable(); 61 + } 62 + EXPORT_SYMBOL(xor_altivec_5);
+1 -1
arch/powerpc/mm/8xx_mmu.c
··· 88 88 int offset; 89 89 90 90 for (offset = 0; offset < IMMR_SIZE; offset += PAGE_SIZE) 91 - map_page(v + offset, p + offset, f); 91 + map_kernel_page(v + offset, p + offset, f); 92 92 } 93 93 94 94 /* Address of instructions to patch */
+1 -1
arch/powerpc/mm/dma-noncoherent.c
··· 227 227 228 228 do { 229 229 SetPageReserved(page); 230 - map_page(vaddr, page_to_phys(page), 230 + map_kernel_page(vaddr, page_to_phys(page), 231 231 pgprot_val(pgprot_noncached(PAGE_KERNEL))); 232 232 page++; 233 233 vaddr += PAGE_SIZE;
+1 -1
arch/powerpc/mm/dump_hashpagetable.c
··· 335 335 unsigned long rpn, lp_bits; 336 336 int base_psize = 0, actual_psize = 0; 337 337 338 - if (ea <= PAGE_OFFSET) 338 + if (ea < PAGE_OFFSET) 339 339 return -1; 340 340 341 341 /* Look in primary table */
+9 -8
arch/powerpc/mm/fault.c
··· 206 206 int is_write = 0; 207 207 int trap = TRAP(regs); 208 208 int is_exec = trap == 0x400; 209 + int is_user = user_mode(regs); 209 210 int fault; 210 211 int rc = 0, store_update_sp = 0; 211 212 ··· 217 216 * bits we are interested in. But there are some bits which 218 217 * indicate errors in DSISR but can validly be set in SRR1. 219 218 */ 220 - if (trap == 0x400) 219 + if (is_exec) 221 220 error_code &= 0x48200000; 222 221 else 223 222 is_write = error_code & DSISR_ISSTORE; ··· 248 247 * The kernel should never take an execute fault nor should it 249 248 * take a page fault to a kernel address. 250 249 */ 251 - if (!user_mode(regs) && (is_exec || (address >= TASK_SIZE))) { 250 + if (!is_user && (is_exec || (address >= TASK_SIZE))) { 252 251 rc = SIGSEGV; 253 252 goto bail; 254 253 } 255 254 256 255 #if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE) || \ 257 - defined(CONFIG_PPC_BOOK3S_64)) 256 + defined(CONFIG_PPC_BOOK3S_64) || defined(CONFIG_PPC_8xx)) 258 257 if (error_code & DSISR_DABRMATCH) { 259 258 /* breakpoint match */ 260 259 do_break(regs, address, error_code); ··· 267 266 local_irq_enable(); 268 267 269 268 if (faulthandler_disabled() || mm == NULL) { 270 - if (!user_mode(regs)) { 269 + if (!is_user) { 271 270 rc = SIGSEGV; 272 271 goto bail; 273 272 } ··· 288 287 * can result in fault, which will cause a deadlock when called with 289 288 * mmap_sem held 290 289 */ 291 - if (!is_exec && user_mode(regs)) 290 + if (is_write && is_user) 292 291 store_update_sp = store_updates_sp(regs); 293 292 294 - if (user_mode(regs)) 293 + if (is_user) 295 294 flags |= FAULT_FLAG_USER; 296 295 297 296 /* When running in the kernel we expect faults to occur only to ··· 310 309 * thus avoiding the deadlock. 311 310 */ 312 311 if (!down_read_trylock(&mm->mmap_sem)) { 313 - if (!user_mode(regs) && !search_exception_tables(regs->nip)) 312 + if (!is_user && !search_exception_tables(regs->nip)) 314 313 goto bad_area_nosemaphore; 315 314 316 315 retry: ··· 510 509 511 510 bad_area_nosemaphore: 512 511 /* User mode accesses cause a SIGSEGV */ 513 - if (user_mode(regs)) { 512 + if (is_user) { 514 513 _exception(SIGSEGV, regs, code, address); 515 514 goto bail; 516 515 }
+40 -1
arch/powerpc/mm/hash_native_64.c
··· 15 15 #include <linux/spinlock.h> 16 16 #include <linux/bitops.h> 17 17 #include <linux/of.h> 18 + #include <linux/processor.h> 18 19 #include <linux/threads.h> 19 20 #include <linux/smp.h> 20 21 ··· 24 23 #include <asm/mmu_context.h> 25 24 #include <asm/pgtable.h> 26 25 #include <asm/tlbflush.h> 26 + #include <asm/trace.h> 27 27 #include <asm/tlb.h> 28 28 #include <asm/cputable.h> 29 29 #include <asm/udbg.h> ··· 100 98 : "memory"); 101 99 break; 102 100 } 101 + trace_tlbie(0, 0, va, 0, 0, 0, 0); 103 102 } 104 103 105 104 static inline void __tlbiel(unsigned long vpn, int psize, int apsize, int ssize) ··· 150 147 : "memory"); 151 148 break; 152 149 } 150 + trace_tlbie(0, 1, va, 0, 0, 0, 0); 153 151 154 152 } 155 153 ··· 185 181 while (1) { 186 182 if (!test_and_set_bit_lock(HPTE_LOCK_BIT, word)) 187 183 break; 184 + spin_begin(); 188 185 while(test_bit(HPTE_LOCK_BIT, word)) 189 - cpu_relax(); 186 + spin_cpu_relax(); 187 + spin_end(); 190 188 } 191 189 } 192 190 ··· 412 406 */ 413 407 tlbie(vpn, psize, psize, ssize, 0); 414 408 } 409 + 410 + /* 411 + * Remove a bolted kernel entry. Memory hotplug uses this. 412 + * 413 + * No need to lock here because we should be the only user. 414 + */ 415 + static int native_hpte_removebolted(unsigned long ea, int psize, int ssize) 416 + { 417 + unsigned long vpn; 418 + unsigned long vsid; 419 + long slot; 420 + struct hash_pte *hptep; 421 + 422 + vsid = get_kernel_vsid(ea, ssize); 423 + vpn = hpt_vpn(ea, vsid, ssize); 424 + 425 + slot = native_hpte_find(vpn, psize, ssize); 426 + if (slot == -1) 427 + return -ENOENT; 428 + 429 + hptep = htab_address + slot; 430 + 431 + VM_WARN_ON(!(be64_to_cpu(hptep->v) & HPTE_V_BOLTED)); 432 + 433 + /* Invalidate the hpte */ 434 + hptep->v = 0; 435 + 436 + /* Invalidate the TLB */ 437 + tlbie(vpn, psize, psize, ssize, 0); 438 + return 0; 439 + } 440 + 415 441 416 442 static void native_hpte_invalidate(unsigned long slot, unsigned long vpn, 417 443 int bpsize, int apsize, int ssize, int local) ··· 763 725 mmu_hash_ops.hpte_invalidate = native_hpte_invalidate; 764 726 mmu_hash_ops.hpte_updatepp = native_hpte_updatepp; 765 727 mmu_hash_ops.hpte_updateboltedpp = native_hpte_updateboltedpp; 728 + mmu_hash_ops.hpte_removebolted = native_hpte_removebolted; 766 729 mmu_hash_ops.hpte_insert = native_hpte_insert; 767 730 mmu_hash_ops.hpte_remove = native_hpte_remove; 768 731 mmu_hash_ops.hpte_clear_all = native_hpte_clear;
+2
arch/powerpc/mm/hash_utils_64.c
··· 810 810 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 811 811 : : "r"(rb), "i"(0), "i"(0), "i"(2), "r"(0) : "memory"); 812 812 asm volatile("eieio; tlbsync; ptesync; isync; slbia": : :"memory"); 813 + trace_tlbie(0, 0, rb, 0, 2, 0, 0); 814 + 813 815 /* 814 816 * now switch the HID 815 817 */
+3 -2
arch/powerpc/mm/hugetlbpage.c
··· 34 34 #define PAGE_SHIFT_16G 34 35 35 36 36 unsigned int HPAGE_SHIFT; 37 + EXPORT_SYMBOL(HPAGE_SHIFT); 37 38 38 39 /* 39 40 * Tracks gpages after the device tree is scanned and before the ··· 80 79 num_hugepd = 1; 81 80 } 82 81 83 - new = kmem_cache_zalloc(cachep, GFP_KERNEL); 82 + new = kmem_cache_zalloc(cachep, pgtable_gfp_flags(mm, GFP_KERNEL)); 84 83 85 84 BUG_ON(pshift > HUGEPD_SHIFT_MASK); 86 85 BUG_ON((unsigned long)new & HUGEPD_SHIFT_MASK); ··· 946 945 if (pmd_none(pmd)) 947 946 return NULL; 948 947 949 - if (pmd_trans_huge(pmd)) { 948 + if (pmd_trans_huge(pmd) || pmd_devmap(pmd)) { 950 949 if (is_thp) 951 950 *is_thp = true; 952 951 ret_pte = (pte_t *) pmdp;
+56 -22
arch/powerpc/mm/init_64.c
··· 44 44 #include <linux/slab.h> 45 45 #include <linux/of_fdt.h> 46 46 #include <linux/libfdt.h> 47 + #include <linux/memremap.h> 47 48 48 49 #include <asm/pgalloc.h> 49 50 #include <asm/page.h> ··· 111 110 return 0; 112 111 } 113 112 113 + /* 114 + * vmemmap virtual address space management does not have a traditonal page 115 + * table to track which virtual struct pages are backed by physical mapping. 116 + * The virtual to physical mappings are tracked in a simple linked list 117 + * format. 'vmemmap_list' maintains the entire vmemmap physical mapping at 118 + * all times where as the 'next' list maintains the available 119 + * vmemmap_backing structures which have been deleted from the 120 + * 'vmemmap_global' list during system runtime (memory hotplug remove 121 + * operation). The freed 'vmemmap_backing' structures are reused later when 122 + * new requests come in without allocating fresh memory. This pointer also 123 + * tracks the allocated 'vmemmap_backing' structures as we allocate one 124 + * full page memory at a time when we dont have any. 125 + */ 114 126 struct vmemmap_backing *vmemmap_list; 115 127 static struct vmemmap_backing *next; 128 + 129 + /* 130 + * The same pointer 'next' tracks individual chunks inside the allocated 131 + * full page during the boot time and again tracks the freeed nodes during 132 + * runtime. It is racy but it does not happen as they are separated by the 133 + * boot process. Will create problem if some how we have memory hotplug 134 + * operation during boot !! 135 + */ 116 136 static int num_left; 117 137 static int num_freed; 118 138 ··· 193 171 pr_debug("vmemmap_populate %lx..%lx, node %d\n", start, end, node); 194 172 195 173 for (; start < end; start += page_size) { 174 + struct vmem_altmap *altmap; 196 175 void *p; 197 176 int rc; 198 177 199 178 if (vmemmap_populated(start, page_size)) 200 179 continue; 201 180 202 - p = vmemmap_alloc_block(page_size, node); 181 + /* altmap lookups only work at section boundaries */ 182 + altmap = to_vmem_altmap(SECTION_ALIGN_DOWN(start)); 183 + 184 + p = __vmemmap_alloc_block_buf(page_size, node, altmap); 203 185 if (!p) 204 186 return -ENOMEM; 205 187 ··· 260 234 void __ref vmemmap_free(unsigned long start, unsigned long end) 261 235 { 262 236 unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift; 237 + unsigned long page_order = get_order(page_size); 263 238 264 239 start = _ALIGN_DOWN(start, page_size); 265 240 266 241 pr_debug("vmemmap_free %lx...%lx\n", start, end); 267 242 268 243 for (; start < end; start += page_size) { 269 - unsigned long addr; 244 + unsigned long nr_pages, addr; 245 + struct vmem_altmap *altmap; 246 + struct page *section_base; 247 + struct page *page; 270 248 271 249 /* 272 250 * the section has already be marked as invalid, so ··· 281 251 continue; 282 252 283 253 addr = vmemmap_list_free(start); 284 - if (addr) { 285 - struct page *page = pfn_to_page(addr >> PAGE_SHIFT); 254 + if (!addr) 255 + continue; 286 256 287 - if (PageReserved(page)) { 288 - /* allocated from bootmem */ 289 - if (page_size < PAGE_SIZE) { 290 - /* 291 - * this shouldn't happen, but if it is 292 - * the case, leave the memory there 293 - */ 294 - WARN_ON_ONCE(1); 295 - } else { 296 - unsigned int nr_pages = 297 - 1 << get_order(page_size); 298 - while (nr_pages--) 299 - free_reserved_page(page++); 300 - } 301 - } else 302 - free_pages((unsigned long)(__va(addr)), 303 - get_order(page_size)); 257 + page = pfn_to_page(addr >> PAGE_SHIFT); 258 + section_base = pfn_to_page(vmemmap_section_start(start)); 259 + nr_pages = 1 << page_order; 304 260 305 - vmemmap_remove_mapping(start, page_size); 261 + altmap = to_vmem_altmap((unsigned long) section_base); 262 + if (altmap) { 263 + vmem_altmap_free(altmap, nr_pages); 264 + } else if (PageReserved(page)) { 265 + /* allocated from bootmem */ 266 + if (page_size < PAGE_SIZE) { 267 + /* 268 + * this shouldn't happen, but if it is 269 + * the case, leave the memory there 270 + */ 271 + WARN_ON_ONCE(1); 272 + } else { 273 + while (nr_pages--) 274 + free_reserved_page(page++); 275 + } 276 + } else { 277 + free_pages((unsigned long)(__va(addr)), page_order); 306 278 } 279 + 280 + vmemmap_remove_mapping(start, page_size); 307 281 } 308 282 } 309 283 #endif
+15 -5
arch/powerpc/mm/mem.c
··· 36 36 #include <linux/hugetlb.h> 37 37 #include <linux/slab.h> 38 38 #include <linux/vmalloc.h> 39 + #include <linux/memremap.h> 39 40 40 41 #include <asm/pgalloc.h> 41 42 #include <asm/prom.h> ··· 152 151 { 153 152 unsigned long start_pfn = start >> PAGE_SHIFT; 154 153 unsigned long nr_pages = size >> PAGE_SHIFT; 155 - struct zone *zone; 154 + struct vmem_altmap *altmap; 155 + struct page *page; 156 156 int ret; 157 157 158 - zone = page_zone(pfn_to_page(start_pfn)); 159 - ret = __remove_pages(zone, start_pfn, nr_pages); 158 + /* 159 + * If we have an altmap then we need to skip over any reserved PFNs 160 + * when querying the zone. 161 + */ 162 + page = pfn_to_page(start_pfn); 163 + altmap = to_vmem_altmap((unsigned long) page); 164 + if (altmap) 165 + page += vmem_altmap_offset(altmap); 166 + 167 + ret = __remove_pages(page_zone(page), start_pfn, nr_pages); 160 168 if (ret) 161 169 return ret; 162 170 ··· 315 305 unsigned long end = __fix_to_virt(FIX_HOLE); 316 306 317 307 for (; v < end; v += PAGE_SIZE) 318 - map_page(v, 0, 0); /* XXX gross */ 308 + map_kernel_page(v, 0, 0); /* XXX gross */ 319 309 #endif 320 310 321 311 #ifdef CONFIG_HIGHMEM 322 - map_page(PKMAP_BASE, 0, 0); /* XXX gross */ 312 + map_kernel_page(PKMAP_BASE, 0, 0); /* XXX gross */ 323 313 pkmap_page_table = virt_to_kpte(PKMAP_BASE); 324 314 325 315 kmap_pte = virt_to_kpte(__fix_to_virt(FIX_KMAP_BEGIN));
+10 -5
arch/powerpc/mm/mmu_context_book3s64.c
··· 235 235 #ifdef CONFIG_PPC_RADIX_MMU 236 236 void radix__switch_mmu_context(struct mm_struct *prev, struct mm_struct *next) 237 237 { 238 - asm volatile("isync": : :"memory"); 239 - mtspr(SPRN_PID, next->context.id); 240 - asm volatile("isync \n" 241 - PPC_SLBIA(0x7) 242 - : : :"memory"); 238 + 239 + if (cpu_has_feature(CPU_FTR_POWER9_DD1)) { 240 + isync(); 241 + mtspr(SPRN_PID, next->context.id); 242 + isync(); 243 + asm volatile(PPC_INVALIDATE_ERAT : : :"memory"); 244 + } else { 245 + mtspr(SPRN_PID, next->context.id); 246 + isync(); 247 + } 243 248 } 244 249 #endif
-1
arch/powerpc/mm/mmu_decl.h
··· 94 94 #ifdef CONFIG_PPC32 95 95 96 96 extern void mapin_ram(void); 97 - extern int map_page(unsigned long va, phys_addr_t pa, int flags); 98 97 extern void setbat(int index, unsigned long virt, phys_addr_t phys, 99 98 unsigned int size, pgprot_t prot); 100 99
+2 -2
arch/powerpc/mm/pgtable-book3s64.c
··· 32 32 { 33 33 int changed; 34 34 #ifdef CONFIG_DEBUG_VM 35 - WARN_ON(!pmd_trans_huge(*pmdp)); 35 + WARN_ON(!pmd_trans_huge(*pmdp) && !pmd_devmap(*pmdp)); 36 36 assert_spin_locked(&vma->vm_mm->page_table_lock); 37 37 #endif 38 38 changed = !pmd_same(*(pmdp), entry); ··· 59 59 #ifdef CONFIG_DEBUG_VM 60 60 WARN_ON(pte_present(pmd_pte(*pmdp)) && !pte_protnone(pmd_pte(*pmdp))); 61 61 assert_spin_locked(&mm->page_table_lock); 62 - WARN_ON(!pmd_trans_huge(pmd)); 62 + WARN_ON(!(pmd_trans_huge(pmd) || pmd_devmap(pmd))); 63 63 #endif 64 64 trace_hugepage_set_pmd(addr, pmd_val(pmd)); 65 65 return set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd));
+114 -1
arch/powerpc/mm/pgtable-hash64.c
··· 11 11 12 12 #include <linux/sched.h> 13 13 #include <linux/mm_types.h> 14 + #include <linux/mm.h> 14 15 15 16 #include <asm/pgalloc.h> 17 + #include <asm/pgtable.h> 18 + #include <asm/sections.h> 19 + #include <asm/mmu.h> 16 20 #include <asm/tlb.h> 17 21 18 22 #include "mmu_decl.h" ··· 25 21 #include <trace/events/thp.h> 26 22 27 23 #ifdef CONFIG_SPARSEMEM_VMEMMAP 24 + /* 25 + * vmemmap is the starting address of the virtual address space where 26 + * struct pages are allocated for all possible PFNs present on the system 27 + * including holes and bad memory (hence sparse). These virtual struct 28 + * pages are stored in sequence in this virtual address space irrespective 29 + * of the fact whether the corresponding PFN is valid or not. This achieves 30 + * constant relationship between address of struct page and its PFN. 31 + * 32 + * During boot or memory hotplug operation when a new memory section is 33 + * added, physical memory allocation (including hash table bolting) will 34 + * be performed for the set of struct pages which are part of the memory 35 + * section. This saves memory by not allocating struct pages for PFNs 36 + * which are not valid. 37 + * 38 + * ---------------------------------------------- 39 + * | PHYSICAL ALLOCATION OF VIRTUAL STRUCT PAGES| 40 + * ---------------------------------------------- 41 + * 42 + * f000000000000000 c000000000000000 43 + * vmemmap +--------------+ +--------------+ 44 + * + | page struct | +--------------> | page struct | 45 + * | +--------------+ +--------------+ 46 + * | | page struct | +--------------> | page struct | 47 + * | +--------------+ | +--------------+ 48 + * | | page struct | + +------> | page struct | 49 + * | +--------------+ | +--------------+ 50 + * | | page struct | | +--> | page struct | 51 + * | +--------------+ | | +--------------+ 52 + * | | page struct | | | 53 + * | +--------------+ | | 54 + * | | page struct | | | 55 + * | +--------------+ | | 56 + * | | page struct | | | 57 + * | +--------------+ | | 58 + * | | page struct | | | 59 + * | +--------------+ | | 60 + * | | page struct | +-------+ | 61 + * | +--------------+ | 62 + * | | page struct | +-----------+ 63 + * | +--------------+ 64 + * | | page struct | No mapping 65 + * | +--------------+ 66 + * | | page struct | No mapping 67 + * v +--------------+ 68 + * 69 + * ----------------------------------------- 70 + * | RELATION BETWEEN STRUCT PAGES AND PFNS| 71 + * ----------------------------------------- 72 + * 73 + * vmemmap +--------------+ +---------------+ 74 + * + | page struct | +-------------> | PFN | 75 + * | +--------------+ +---------------+ 76 + * | | page struct | +-------------> | PFN | 77 + * | +--------------+ +---------------+ 78 + * | | page struct | +-------------> | PFN | 79 + * | +--------------+ +---------------+ 80 + * | | page struct | +-------------> | PFN | 81 + * | +--------------+ +---------------+ 82 + * | | | 83 + * | +--------------+ 84 + * | | | 85 + * | +--------------+ 86 + * | | | 87 + * | +--------------+ +---------------+ 88 + * | | page struct | +-------------> | PFN | 89 + * | +--------------+ +---------------+ 90 + * | | | 91 + * | +--------------+ 92 + * | | | 93 + * | +--------------+ +---------------+ 94 + * | | page struct | +-------------> | PFN | 95 + * | +--------------+ +---------------+ 96 + * | | page struct | +-------------> | PFN | 97 + * v +--------------+ +---------------+ 98 + */ 28 99 /* 29 100 * On hash-based CPUs, the vmemmap is bolted in the hash table. 30 101 * ··· 188 109 unsigned long old; 189 110 190 111 #ifdef CONFIG_DEBUG_VM 191 - WARN_ON(!pmd_trans_huge(*pmdp)); 112 + WARN_ON(!hash__pmd_trans_huge(*pmdp) && !pmd_devmap(*pmdp)); 192 113 assert_spin_locked(&mm->page_table_lock); 193 114 #endif 194 115 ··· 220 141 221 142 VM_BUG_ON(address & ~HPAGE_PMD_MASK); 222 143 VM_BUG_ON(pmd_trans_huge(*pmdp)); 144 + VM_BUG_ON(pmd_devmap(*pmdp)); 223 145 224 146 pmd = *pmdp; 225 147 pmd_clear(pmdp); ··· 301 221 { 302 222 VM_BUG_ON(address & ~HPAGE_PMD_MASK); 303 223 VM_BUG_ON(REGION_ID(address) != USER_REGION_ID); 224 + VM_BUG_ON(pmd_devmap(*pmdp)); 304 225 305 226 /* 306 227 * We can't mark the pmd none here, because that will cause a race ··· 423 342 return 1; 424 343 } 425 344 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 345 + 346 + #ifdef CONFIG_STRICT_KERNEL_RWX 347 + void hash__mark_rodata_ro(void) 348 + { 349 + unsigned long start = (unsigned long)_stext; 350 + unsigned long end = (unsigned long)__init_begin; 351 + unsigned long idx; 352 + unsigned int step, shift; 353 + unsigned long newpp = PP_RXXX; 354 + 355 + shift = mmu_psize_defs[mmu_linear_psize].shift; 356 + step = 1 << shift; 357 + 358 + start = ((start + step - 1) >> shift) << shift; 359 + end = (end >> shift) << shift; 360 + 361 + pr_devel("marking ro start %lx, end %lx, step %x\n", 362 + start, end, step); 363 + 364 + if (start == end) { 365 + pr_warn("could not set rodata ro, relocate the start" 366 + " of the kernel to a 0x%x boundary\n", step); 367 + return; 368 + } 369 + 370 + for (idx = start; idx < end; idx += step) 371 + /* Not sure if we can do much with the return value */ 372 + mmu_hash_ops.hpte_updateboltedpp(newpp, idx, mmu_linear_psize, 373 + mmu_kernel_ssize); 374 + 375 + } 376 + #endif
+85 -5
arch/powerpc/mm/pgtable-radix.c
··· 11 11 #include <linux/sched/mm.h> 12 12 #include <linux/memblock.h> 13 13 #include <linux/of_fdt.h> 14 + #include <linux/mm.h> 14 15 15 16 #include <asm/pgtable.h> 16 17 #include <asm/pgalloc.h> ··· 20 19 #include <asm/mmu.h> 21 20 #include <asm/firmware.h> 22 21 #include <asm/powernv.h> 22 + #include <asm/sections.h> 23 + #include <asm/trace.h> 23 24 24 25 #include <trace/events/thp.h> 25 26 ··· 111 108 return 0; 112 109 } 113 110 111 + #ifdef CONFIG_STRICT_KERNEL_RWX 112 + void radix__mark_rodata_ro(void) 113 + { 114 + unsigned long start = (unsigned long)_stext; 115 + unsigned long end = (unsigned long)__init_begin; 116 + unsigned long idx; 117 + pgd_t *pgdp; 118 + pud_t *pudp; 119 + pmd_t *pmdp; 120 + pte_t *ptep; 121 + 122 + start = ALIGN_DOWN(start, PAGE_SIZE); 123 + end = PAGE_ALIGN(end); // aligns up 124 + 125 + pr_devel("marking ro start %lx, end %lx\n", start, end); 126 + 127 + for (idx = start; idx < end; idx += PAGE_SIZE) { 128 + pgdp = pgd_offset_k(idx); 129 + pudp = pud_alloc(&init_mm, pgdp, idx); 130 + if (!pudp) 131 + continue; 132 + if (pud_huge(*pudp)) { 133 + ptep = (pte_t *)pudp; 134 + goto update_the_pte; 135 + } 136 + pmdp = pmd_alloc(&init_mm, pudp, idx); 137 + if (!pmdp) 138 + continue; 139 + if (pmd_huge(*pmdp)) { 140 + ptep = pmdp_ptep(pmdp); 141 + goto update_the_pte; 142 + } 143 + ptep = pte_alloc_kernel(pmdp, idx); 144 + if (!ptep) 145 + continue; 146 + update_the_pte: 147 + radix__pte_update(&init_mm, idx, ptep, _PAGE_WRITE, 0, 0); 148 + } 149 + 150 + radix__flush_tlb_kernel_range(start, end); 151 + } 152 + #endif /* CONFIG_STRICT_KERNEL_RWX */ 153 + 114 154 static inline void __meminit print_mapping(unsigned long start, 115 155 unsigned long end, 116 156 unsigned long size) ··· 167 121 static int __meminit create_physical_mapping(unsigned long start, 168 122 unsigned long end) 169 123 { 170 - unsigned long addr, mapping_size = 0; 124 + unsigned long vaddr, addr, mapping_size = 0; 125 + pgprot_t prot; 126 + unsigned long max_mapping_size; 127 + #ifdef CONFIG_STRICT_KERNEL_RWX 128 + int split_text_mapping = 1; 129 + #else 130 + int split_text_mapping = 0; 131 + #endif 171 132 172 133 start = _ALIGN_UP(start, PAGE_SIZE); 173 134 for (addr = start; addr < end; addr += mapping_size) { ··· 183 130 184 131 gap = end - addr; 185 132 previous_size = mapping_size; 133 + max_mapping_size = PUD_SIZE; 186 134 135 + retry: 187 136 if (IS_ALIGNED(addr, PUD_SIZE) && gap >= PUD_SIZE && 188 - mmu_psize_defs[MMU_PAGE_1G].shift) 137 + mmu_psize_defs[MMU_PAGE_1G].shift && 138 + PUD_SIZE <= max_mapping_size) 189 139 mapping_size = PUD_SIZE; 190 140 else if (IS_ALIGNED(addr, PMD_SIZE) && gap >= PMD_SIZE && 191 141 mmu_psize_defs[MMU_PAGE_2M].shift) ··· 196 140 else 197 141 mapping_size = PAGE_SIZE; 198 142 143 + if (split_text_mapping && (mapping_size == PUD_SIZE) && 144 + (addr <= __pa_symbol(__init_begin)) && 145 + (addr + mapping_size) >= __pa_symbol(_stext)) { 146 + max_mapping_size = PMD_SIZE; 147 + goto retry; 148 + } 149 + 150 + if (split_text_mapping && (mapping_size == PMD_SIZE) && 151 + (addr <= __pa_symbol(__init_begin)) && 152 + (addr + mapping_size) >= __pa_symbol(_stext)) 153 + mapping_size = PAGE_SIZE; 154 + 199 155 if (mapping_size != previous_size) { 200 156 print_mapping(start, addr, previous_size); 201 157 start = addr; 202 158 } 203 159 204 - rc = radix__map_kernel_page((unsigned long)__va(addr), addr, 205 - PAGE_KERNEL_X, mapping_size); 160 + vaddr = (unsigned long)__va(addr); 161 + 162 + if (overlaps_kernel_text(vaddr, vaddr + mapping_size) || 163 + overlaps_interrupt_vector_text(vaddr, vaddr + mapping_size)) 164 + prot = PAGE_KERNEL_X; 165 + else 166 + prot = PAGE_KERNEL; 167 + 168 + rc = radix__map_kernel_page(vaddr, addr, prot, mapping_size); 206 169 if (rc) 207 170 return rc; 208 171 } ··· 265 190 asm volatile(PPC_TLBIE_5(%0,%1,2,1,1) : : 266 191 "r" (TLBIEL_INVAL_SET_LPID), "r" (0)); 267 192 asm volatile("eieio; tlbsync; ptesync" : : : "memory"); 193 + trace_tlbie(0, 0, TLBIEL_INVAL_SET_LPID, 0, 2, 1, 1); 268 194 } 269 195 270 196 static void __init radix_init_partition_table(void) ··· 392 316 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 393 317 : : "r"(rb), "i"(1), "i"(1), "i"(2), "r"(0) : "memory"); 394 318 asm volatile("eieio; tlbsync; ptesync; isync; slbia": : :"memory"); 319 + trace_tlbie(0, 0, rb, 0, 2, 0, 1); 320 + trace_tlbie(0, 0, rb, 0, 2, 1, 1); 321 + 395 322 /* 396 323 * now switch the HID 397 324 */ ··· 762 683 unsigned long old; 763 684 764 685 #ifdef CONFIG_DEBUG_VM 765 - WARN_ON(!radix__pmd_trans_huge(*pmdp)); 686 + WARN_ON(!radix__pmd_trans_huge(*pmdp) && !pmd_devmap(*pmdp)); 766 687 assert_spin_locked(&mm->page_table_lock); 767 688 #endif 768 689 ··· 780 701 781 702 VM_BUG_ON(address & ~HPAGE_PMD_MASK); 782 703 VM_BUG_ON(radix__pmd_trans_huge(*pmdp)); 704 + VM_BUG_ON(pmd_devmap(*pmdp)); 783 705 /* 784 706 * khugepaged calls this for normal pmd 785 707 */
+5 -10
arch/powerpc/mm/pgtable_32.c
··· 60 60 { 61 61 struct page *ptepage; 62 62 63 - gfp_t flags = GFP_KERNEL | __GFP_ZERO; 63 + gfp_t flags = GFP_KERNEL | __GFP_ZERO | __GFP_ACCOUNT; 64 64 65 65 ptepage = alloc_pages(flags, 0); 66 66 if (!ptepage) ··· 189 189 190 190 err = 0; 191 191 for (i = 0; i < size && err == 0; i += PAGE_SIZE) 192 - err = map_page(v+i, p+i, flags); 192 + err = map_kernel_page(v+i, p+i, flags); 193 193 if (err) { 194 194 if (slab_is_available()) 195 195 vunmap((void *)v); ··· 215 215 } 216 216 EXPORT_SYMBOL(iounmap); 217 217 218 - int map_page(unsigned long va, phys_addr_t pa, int flags) 218 + int map_kernel_page(unsigned long va, phys_addr_t pa, int flags) 219 219 { 220 220 pmd_t *pd; 221 221 pte_t *pg; ··· 255 255 ktext = ((char *)v >= _stext && (char *)v < etext) || 256 256 ((char *)v >= _sinittext && (char *)v < _einittext); 257 257 f = ktext ? pgprot_val(PAGE_KERNEL_TEXT) : pgprot_val(PAGE_KERNEL); 258 - map_page(v, p, f); 258 + map_kernel_page(v, p, f); 259 259 #ifdef CONFIG_PPC_STD_MMU_32 260 260 if (ktext) 261 261 hash_preload(&init_mm, v, 0, 0x300); ··· 387 387 return; 388 388 } 389 389 390 - map_page(address, phys, pgprot_val(flags)); 390 + map_kernel_page(address, phys, pgprot_val(flags)); 391 391 fixmaps++; 392 - } 393 - 394 - void __this_fixmap_does_not_exist(void) 395 - { 396 - WARN_ON(1); 397 392 }
+36 -9
arch/powerpc/mm/pgtable_64.c
··· 47 47 #include <asm/smp.h> 48 48 #include <asm/machdep.h> 49 49 #include <asm/tlb.h> 50 + #include <asm/trace.h> 50 51 #include <asm/processor.h> 51 52 #include <asm/cputable.h> 52 53 #include <asm/sections.h> ··· 324 323 */ 325 324 struct page *pmd_page(pmd_t pmd) 326 325 { 327 - if (pmd_trans_huge(pmd) || pmd_huge(pmd)) 326 + if (pmd_trans_huge(pmd) || pmd_huge(pmd) || pmd_devmap(pmd)) 328 327 return pte_page(pmd_pte(pmd)); 329 328 return virt_to_page(pmd_page_vaddr(pmd)); 330 329 } ··· 352 351 static pte_t *__alloc_for_cache(struct mm_struct *mm, int kernel) 353 352 { 354 353 void *ret = NULL; 355 - struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO); 356 - if (!page) 357 - return NULL; 358 - if (!kernel && !pgtable_page_ctor(page)) { 359 - __free_page(page); 360 - return NULL; 354 + struct page *page; 355 + 356 + if (!kernel) { 357 + page = alloc_page(PGALLOC_GFP | __GFP_ACCOUNT); 358 + if (!page) 359 + return NULL; 360 + if (!pgtable_page_ctor(page)) { 361 + __free_page(page); 362 + return NULL; 363 + } 364 + } else { 365 + page = alloc_page(PGALLOC_GFP); 366 + if (!page) 367 + return NULL; 361 368 } 362 369 363 370 ret = page_address(page); ··· 478 469 * use of this partition ID was, not the new use. 479 470 */ 480 471 asm volatile("ptesync" : : : "memory"); 481 - if (old & PATB_HR) 472 + if (old & PATB_HR) { 482 473 asm volatile(PPC_TLBIE_5(%0,%1,2,0,1) : : 483 474 "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid)); 484 - else 475 + trace_tlbie(lpid, 0, TLBIEL_INVAL_SET_LPID, lpid, 2, 0, 1); 476 + } else { 485 477 asm volatile(PPC_TLBIE_5(%0,%1,2,0,0) : : 486 478 "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid)); 479 + trace_tlbie(lpid, 0, TLBIEL_INVAL_SET_LPID, lpid, 2, 0, 0); 480 + } 487 481 asm volatile("eieio; tlbsync; ptesync" : : : "memory"); 488 482 } 489 483 EXPORT_SYMBOL_GPL(mmu_partition_table_set_entry); 490 484 #endif /* CONFIG_PPC_BOOK3S_64 */ 485 + 486 + #ifdef CONFIG_STRICT_KERNEL_RWX 487 + void mark_rodata_ro(void) 488 + { 489 + if (!mmu_has_feature(MMU_FTR_KERNEL_RO)) { 490 + pr_warn("Warning: Unable to mark rodata read only on this CPU.\n"); 491 + return; 492 + } 493 + 494 + if (radix_enabled()) 495 + radix__mark_rodata_ro(); 496 + else 497 + hash__mark_rodata_ro(); 498 + } 499 + #endif
+1 -9
arch/powerpc/mm/slb.c
··· 33 33 KSTACK_INDEX = 2, /* Kernel stack map */ 34 34 }; 35 35 36 - extern void slb_allocate_realmode(unsigned long ea); 37 - 38 - static void slb_allocate(unsigned long ea) 39 - { 40 - /* Currently, we do real mode for all SLBs including user, but 41 - * that will change if we bring back dynamic VSIDs 42 - */ 43 - slb_allocate_realmode(ea); 44 - } 36 + extern void slb_allocate(unsigned long ea); 45 37 46 38 #define slb_esid_mask(ssize) \ 47 39 (((ssize) == MMU_SEGSIZE_256M)? ESID_MASK: ESID_MASK_1T)
+17 -13
arch/powerpc/mm/slb_low.S
··· 65 65 ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_68_BIT_VA) 66 66 67 67 68 - /* void slb_allocate_realmode(unsigned long ea); 68 + /* void slb_allocate(unsigned long ea); 69 69 * 70 70 * Create an SLB entry for the given EA (user or kernel). 71 71 * r3 = faulting address, r13 = PACA 72 72 * r9, r10, r11 are clobbered by this function 73 + * r3 is preserved. 73 74 * No other registers are examined or changed. 74 75 */ 75 - _GLOBAL(slb_allocate_realmode) 76 + _GLOBAL(slb_allocate) 76 77 /* 77 78 * check for bad kernel/user address 78 79 * (ea & ~REGION_MASK) >= PGTABLE_RANGE ··· 236 235 * dont have any LRU information to help us choose a slot. 237 236 */ 238 237 238 + mr r9,r3 239 + 240 + /* slb_finish_load_1T continues here. r9=EA with non-ESID bits clear */ 239 241 7: ld r10,PACASTABRR(r13) 240 242 addi r10,r10,1 241 243 /* This gets soft patched on boot. */ ··· 253 249 std r10,PACASTABRR(r13) 254 250 255 251 3: 256 - rldimi r3,r10,0,36 /* r3= EA[0:35] | entry */ 257 - oris r10,r3,SLB_ESID_V@h /* r3 |= SLB_ESID_V */ 252 + rldimi r9,r10,0,36 /* r9 = EA[0:35] | entry */ 253 + oris r10,r9,SLB_ESID_V@h /* r10 = r9 | SLB_ESID_V */ 258 254 259 - /* r3 = ESID data, r11 = VSID data */ 255 + /* r9 = ESID data, r11 = VSID data */ 260 256 261 257 /* 262 258 * No need for an isync before or after this slbmte. The exception ··· 269 265 bgelr cr7 270 266 271 267 /* Update the slb cache */ 272 - lhz r3,PACASLBCACHEPTR(r13) /* offset = paca->slb_cache_ptr */ 273 - cmpldi r3,SLB_CACHE_ENTRIES 268 + lhz r9,PACASLBCACHEPTR(r13) /* offset = paca->slb_cache_ptr */ 269 + cmpldi r9,SLB_CACHE_ENTRIES 274 270 bge 1f 275 271 276 272 /* still room in the slb cache */ 277 - sldi r11,r3,2 /* r11 = offset * sizeof(u32) */ 273 + sldi r11,r9,2 /* r11 = offset * sizeof(u32) */ 278 274 srdi r10,r10,28 /* get the 36 bits of the ESID */ 279 275 add r11,r11,r13 /* r11 = (u32 *)paca + offset */ 280 276 stw r10,PACASLBCACHE(r11) /* paca->slb_cache[offset] = esid */ 281 - addi r3,r3,1 /* offset++ */ 277 + addi r9,r9,1 /* offset++ */ 282 278 b 2f 283 279 1: /* offset >= SLB_CACHE_ENTRIES */ 284 - li r3,SLB_CACHE_ENTRIES+1 280 + li r9,SLB_CACHE_ENTRIES+1 285 281 2: 286 - sth r3,PACASLBCACHEPTR(r13) /* paca->slb_cache_ptr = offset */ 282 + sth r9,PACASLBCACHEPTR(r13) /* paca->slb_cache_ptr = offset */ 287 283 crclr 4*cr0+eq /* set result to "success" */ 288 284 blr 289 285 ··· 305 301 rldimi r11,r10,SLB_VSID_SSIZE_SHIFT,0 /* insert segment size */ 306 302 307 303 /* r3 = EA, r11 = VSID data */ 308 - clrrdi r3,r3,SID_SHIFT_1T /* clear out non-ESID bits */ 304 + clrrdi r9,r3,SID_SHIFT_1T /* clear out non-ESID bits */ 309 305 b 7b 310 306 311 307 312 - _ASM_NOKPROBE_SYMBOL(slb_allocate_realmode) 308 + _ASM_NOKPROBE_SYMBOL(slb_allocate) 313 309 _ASM_NOKPROBE_SYMBOL(slb_miss_kernel_load_linear) 314 310 _ASM_NOKPROBE_SYMBOL(slb_miss_kernel_load_io) 315 311 _ASM_NOKPROBE_SYMBOL(slb_compare_rr_to_size)
+9
arch/powerpc/mm/tlb-radix.c
··· 16 16 17 17 #include <asm/tlb.h> 18 18 #include <asm/tlbflush.h> 19 + #include <asm/trace.h> 19 20 20 21 21 22 #define RIC_FLUSH_TLB 0 ··· 36 35 37 36 asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) 38 37 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 38 + trace_tlbie(0, 1, rb, rs, ric, prs, r); 39 39 } 40 40 41 41 /* ··· 89 87 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 90 88 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 91 89 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 90 + trace_tlbie(0, 0, rb, rs, ric, prs, r); 92 91 } 93 92 94 93 static inline void _tlbiel_va(unsigned long va, unsigned long pid, ··· 107 104 asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) 108 105 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 109 106 asm volatile("ptesync": : :"memory"); 107 + trace_tlbie(0, 1, rb, rs, ric, prs, r); 110 108 } 111 109 112 110 static inline void _tlbie_va(unsigned long va, unsigned long pid, ··· 125 121 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 126 122 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 127 123 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 124 + trace_tlbie(0, 0, rb, rs, ric, prs, r); 128 125 } 129 126 130 127 /* ··· 382 377 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 383 378 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 384 379 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 380 + trace_tlbie(lpid, 0, rb, rs, ric, prs, r); 385 381 } 386 382 EXPORT_SYMBOL(radix__flush_tlb_lpid_va); 387 383 ··· 400 394 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 401 395 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 402 396 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 397 + trace_tlbie(lpid, 0, rb, rs, ric, prs, r); 403 398 } 404 399 EXPORT_SYMBOL(radix__flush_tlb_lpid); 405 400 ··· 427 420 */ 428 421 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 429 422 : : "r"(rb), "i"(r), "i"(1), "i"(ric), "r"(rs) : "memory"); 423 + trace_tlbie(0, 0, rb, rs, ric, prs, r); 430 424 /* 431 425 * now flush host entires by passing PRS = 0 and LPID == 0 432 426 */ 433 427 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 434 428 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(0) : "memory"); 435 429 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 430 + trace_tlbie(0, 0, rb, 0, ric, prs, r); 436 431 } 437 432 438 433 void radix__flush_tlb_pte_p9_dd1(unsigned long old_pte, struct mm_struct *mm,
+2 -4
arch/powerpc/mm/tlb_hash64.c
··· 93 93 94 94 /* 95 95 * Check if we have an active batch on this CPU. If not, just 96 - * flush now and return. For now, we don global invalidates 97 - * in that case, might be worth testing the mm cpu mask though 98 - * and decide to use local invalidates instead... 96 + * flush now and return. 99 97 */ 100 98 if (!batch->active) { 101 - flush_hash_page(vpn, rpte, psize, ssize, 0); 99 + flush_hash_page(vpn, rpte, psize, ssize, mm_is_thread_local(mm)); 102 100 put_cpu_var(ppc64_tlb_batch); 103 101 return; 104 102 }
+181 -61
arch/powerpc/perf/hv-24x7.c
··· 18 18 #include <linux/slab.h> 19 19 #include <linux/vmalloc.h> 20 20 21 + #include <asm/cputhreads.h> 21 22 #include <asm/firmware.h> 22 23 #include <asm/hvcall.h> 23 24 #include <asm/io.h> ··· 27 26 #include "hv-24x7.h" 28 27 #include "hv-24x7-catalog.h" 29 28 #include "hv-common.h" 29 + 30 + /* Version of the 24x7 hypervisor API that we should use in this machine. */ 31 + static int interface_version; 32 + 33 + /* Whether we have to aggregate result data for some domains. */ 34 + static bool aggregate_result_elements; 30 35 31 36 static bool domain_is_valid(unsigned domain) 32 37 { ··· 61 54 } 62 55 } 63 56 57 + /* Domains for which more than one result element are returned for each event. */ 58 + static bool domain_needs_aggregation(unsigned int domain) 59 + { 60 + return aggregate_result_elements && 61 + (domain == HV_PERF_DOMAIN_PHYS_CORE || 62 + (domain >= HV_PERF_DOMAIN_VCPU_HOME_CORE && 63 + domain <= HV_PERF_DOMAIN_VCPU_REMOTE_NODE)); 64 + } 65 + 64 66 static const char *domain_name(unsigned domain) 65 67 { 66 68 if (!domain_is_valid(domain)) ··· 90 74 91 75 static bool catalog_entry_domain_is_valid(unsigned domain) 92 76 { 93 - return is_physical_domain(domain); 77 + /* POWER8 doesn't support virtual domains. */ 78 + if (interface_version == 1) 79 + return is_physical_domain(domain); 80 + else 81 + return domain_is_valid(domain); 94 82 } 95 83 96 84 /* ··· 185 165 #define H24x7_DATA_BUFFER_SIZE 4096 186 166 DEFINE_PER_CPU(char, hv_24x7_reqb[H24x7_DATA_BUFFER_SIZE]) __aligned(4096); 187 167 DEFINE_PER_CPU(char, hv_24x7_resb[H24x7_DATA_BUFFER_SIZE]) __aligned(4096); 168 + 169 + static unsigned int max_num_requests(int interface_version) 170 + { 171 + return (H24x7_DATA_BUFFER_SIZE - sizeof(struct hv_24x7_request_buffer)) 172 + / H24x7_REQUEST_SIZE(interface_version); 173 + } 188 174 189 175 static char *event_name(struct hv_24x7_event_data *ev, int *len) 190 176 { ··· 286 260 return start + nl + dl + ldl; 287 261 } 288 262 289 - static unsigned long h_get_24x7_catalog_page_(unsigned long phys_4096, 290 - unsigned long version, 291 - unsigned long index) 263 + static long h_get_24x7_catalog_page_(unsigned long phys_4096, 264 + unsigned long version, unsigned long index) 292 265 { 293 266 pr_devel("h_get_24x7_catalog_page(0x%lx, %lu, %lu)", 294 267 phys_4096, version, index); ··· 298 273 phys_4096, version, index); 299 274 } 300 275 301 - static unsigned long h_get_24x7_catalog_page(char page[], 302 - u64 version, u32 index) 276 + static long h_get_24x7_catalog_page(char page[], u64 version, u32 index) 303 277 { 304 278 return h_get_24x7_catalog_page_(virt_to_phys(page), 305 279 version, index); ··· 688 664 struct attribute ***event_descs_, 689 665 struct attribute ***event_long_descs_) 690 666 { 691 - unsigned long hret; 667 + long hret; 692 668 size_t catalog_len, catalog_page_len, event_entry_count, 693 669 event_data_len, event_data_offs, 694 670 event_data_bytes, junk_events, event_idx, event_attr_ct, i, 695 671 attr_max, event_idx_last, desc_ct, long_desc_ct; 696 672 ssize_t ct, ev_len; 697 - uint32_t catalog_version_num; 673 + uint64_t catalog_version_num; 698 674 struct attribute **events, **event_descs, **event_long_descs; 699 675 struct hv_24x7_catalog_page_0 *page_0 = 700 676 kmem_cache_alloc(hv_page_cache, GFP_KERNEL); ··· 730 706 event_data_offs = be16_to_cpu(page_0->event_data_offs); 731 707 event_data_len = be16_to_cpu(page_0->event_data_len); 732 708 733 - pr_devel("cv %zu cl %zu eec %zu edo %zu edl %zu\n", 734 - (size_t)catalog_version_num, catalog_len, 709 + pr_devel("cv %llu cl %zu eec %zu edo %zu edl %zu\n", 710 + catalog_version_num, catalog_len, 735 711 event_entry_count, event_data_offs, event_data_len); 736 712 737 713 if ((MAX_4K < event_data_len) ··· 785 761 catalog_version_num, 786 762 i + event_data_offs); 787 763 if (hret) { 788 - pr_err("failed to get event data in page %zu\n", 789 - i + event_data_offs); 764 + pr_err("Failed to get event data in page %zu: rc=%ld\n", 765 + i + event_data_offs, hret); 790 766 ret = -EIO; 791 767 goto e_event_data; 792 768 } ··· 927 903 struct bin_attribute *bin_attr, char *buf, 928 904 loff_t offset, size_t count) 929 905 { 930 - unsigned long hret; 906 + long hret; 931 907 ssize_t ret = 0; 932 908 size_t catalog_len = 0, catalog_page_len = 0; 933 909 loff_t page_offset = 0; ··· 1012 988 struct device_attribute *dev_attr, \ 1013 989 char *buf) \ 1014 990 { \ 1015 - unsigned long hret; \ 991 + long hret; \ 1016 992 ssize_t ret = 0; \ 1017 993 void *page = kmem_cache_alloc(hv_page_cache, GFP_USER); \ 1018 994 struct hv_24x7_catalog_page_0 *page_0 = page; \ ··· 1064 1040 NULL, 1065 1041 }; 1066 1042 1067 - static void log_24x7_hcall(struct hv_24x7_request_buffer *request_buffer, 1068 - struct hv_24x7_data_result_buffer *result_buffer, 1069 - unsigned long ret) 1070 - { 1071 - struct hv_24x7_request *req; 1072 - 1073 - req = &request_buffer->requests[0]; 1074 - pr_notice_ratelimited("hcall failed: [%d %#x %#x %d] => " 1075 - "ret 0x%lx (%ld) detail=0x%x failing ix=%x\n", 1076 - req->performance_domain, req->data_offset, 1077 - req->starting_ix, req->starting_lpar_ix, ret, ret, 1078 - result_buffer->detailed_rc, 1079 - result_buffer->failing_request_ix); 1080 - } 1081 - 1082 1043 /* 1083 1044 * Start the process for a new H_GET_24x7_DATA hcall. 1084 1045 */ ··· 1071 1062 struct hv_24x7_data_result_buffer *result_buffer) 1072 1063 { 1073 1064 1074 - memset(request_buffer, 0, 4096); 1075 - memset(result_buffer, 0, 4096); 1065 + memset(request_buffer, 0, H24x7_DATA_BUFFER_SIZE); 1066 + memset(result_buffer, 0, H24x7_DATA_BUFFER_SIZE); 1076 1067 1077 - request_buffer->interface_version = HV_24X7_IF_VERSION_CURRENT; 1068 + request_buffer->interface_version = interface_version; 1078 1069 /* memset above set request_buffer->num_requests to 0 */ 1079 1070 } 1080 1071 ··· 1085 1076 static int make_24x7_request(struct hv_24x7_request_buffer *request_buffer, 1086 1077 struct hv_24x7_data_result_buffer *result_buffer) 1087 1078 { 1088 - unsigned long ret; 1079 + long ret; 1089 1080 1090 1081 /* 1091 1082 * NOTE: Due to variable number of array elements in request and ··· 1096 1087 virt_to_phys(request_buffer), H24x7_DATA_BUFFER_SIZE, 1097 1088 virt_to_phys(result_buffer), H24x7_DATA_BUFFER_SIZE); 1098 1089 1099 - if (ret) 1100 - log_24x7_hcall(request_buffer, result_buffer, ret); 1090 + if (ret) { 1091 + struct hv_24x7_request *req; 1101 1092 1102 - return ret; 1093 + req = request_buffer->requests; 1094 + pr_notice_ratelimited("hcall failed: [%d %#x %#x %d] => ret 0x%lx (%ld) detail=0x%x failing ix=%x\n", 1095 + req->performance_domain, req->data_offset, 1096 + req->starting_ix, req->starting_lpar_ix, 1097 + ret, ret, result_buffer->detailed_rc, 1098 + result_buffer->failing_request_ix); 1099 + return -EIO; 1100 + } 1101 + 1102 + return 0; 1103 1103 } 1104 1104 1105 1105 /* ··· 1123 1105 { 1124 1106 u16 idx; 1125 1107 int i; 1108 + size_t req_size; 1126 1109 struct hv_24x7_request *req; 1127 1110 1128 - if (request_buffer->num_requests > 254) { 1111 + if (request_buffer->num_requests >= 1112 + max_num_requests(request_buffer->interface_version)) { 1129 1113 pr_devel("Too many requests for 24x7 HCALL %d\n", 1130 1114 request_buffer->num_requests); 1131 1115 return -EINVAL; ··· 1144 1124 idx = event_get_vcpu(event); 1145 1125 } 1146 1126 1127 + req_size = H24x7_REQUEST_SIZE(request_buffer->interface_version); 1128 + 1147 1129 i = request_buffer->num_requests++; 1148 - req = &request_buffer->requests[i]; 1130 + req = (void *) request_buffer->requests + i * req_size; 1149 1131 1150 1132 req->performance_domain = event_get_domain(event); 1151 1133 req->data_size = cpu_to_be16(8); 1152 1134 req->data_offset = cpu_to_be32(event_get_offset(event)); 1153 - req->starting_lpar_ix = cpu_to_be16(event_get_lpar(event)), 1135 + req->starting_lpar_ix = cpu_to_be16(event_get_lpar(event)); 1154 1136 req->max_num_lpars = cpu_to_be16(1); 1155 1137 req->starting_ix = cpu_to_be16(idx); 1156 1138 req->max_ix = cpu_to_be16(1); 1157 1139 1140 + if (request_buffer->interface_version > 1) { 1141 + if (domain_needs_aggregation(req->performance_domain)) 1142 + req->max_num_thread_groups = -1; 1143 + else if (req->performance_domain != HV_PERF_DOMAIN_PHYS_CHIP) { 1144 + req->starting_thread_group_ix = idx % 2; 1145 + req->max_num_thread_groups = 1; 1146 + } 1147 + } 1148 + 1158 1149 return 0; 1159 1150 } 1160 1151 1161 - static unsigned long single_24x7_request(struct perf_event *event, u64 *count) 1152 + /** 1153 + * get_count_from_result - get event count from all result elements in result 1154 + * 1155 + * If the event corresponding to this result needs aggregation of the result 1156 + * element values, then this function does that. 1157 + * 1158 + * @event: Event associated with @res. 1159 + * @resb: Result buffer containing @res. 1160 + * @res: Result to work on. 1161 + * @countp: Output variable containing the event count. 1162 + * @next: Optional output variable pointing to the next result in @resb. 1163 + */ 1164 + static int get_count_from_result(struct perf_event *event, 1165 + struct hv_24x7_data_result_buffer *resb, 1166 + struct hv_24x7_result *res, u64 *countp, 1167 + struct hv_24x7_result **next) 1162 1168 { 1163 - unsigned long ret; 1169 + u16 num_elements = be16_to_cpu(res->num_elements_returned); 1170 + u16 data_size = be16_to_cpu(res->result_element_data_size); 1171 + unsigned int data_offset; 1172 + void *element_data; 1173 + int i; 1174 + u64 count; 1175 + 1176 + /* 1177 + * We can bail out early if the result is empty. 1178 + */ 1179 + if (!num_elements) { 1180 + pr_debug("Result of request %hhu is empty, nothing to do\n", 1181 + res->result_ix); 1182 + 1183 + if (next) 1184 + *next = (struct hv_24x7_result *) res->elements; 1185 + 1186 + return -ENODATA; 1187 + } 1188 + 1189 + /* 1190 + * Since we always specify 1 as the maximum for the smallest resource 1191 + * we're requesting, there should to be only one element per result. 1192 + * Except when an event needs aggregation, in which case there are more. 1193 + */ 1194 + if (num_elements != 1 && 1195 + !domain_needs_aggregation(event_get_domain(event))) { 1196 + pr_err("Error: result of request %hhu has %hu elements\n", 1197 + res->result_ix, num_elements); 1198 + 1199 + return -EIO; 1200 + } 1201 + 1202 + if (data_size != sizeof(u64)) { 1203 + pr_debug("Error: result of request %hhu has data of %hu bytes\n", 1204 + res->result_ix, data_size); 1205 + 1206 + return -ENOTSUPP; 1207 + } 1208 + 1209 + if (resb->interface_version == 1) 1210 + data_offset = offsetof(struct hv_24x7_result_element_v1, 1211 + element_data); 1212 + else 1213 + data_offset = offsetof(struct hv_24x7_result_element_v2, 1214 + element_data); 1215 + 1216 + /* Go through the result elements in the result. */ 1217 + for (i = count = 0, element_data = res->elements + data_offset; 1218 + i < num_elements; 1219 + i++, element_data += data_size + data_offset) 1220 + count += be64_to_cpu(*((u64 *) element_data)); 1221 + 1222 + *countp = count; 1223 + 1224 + /* The next result is after the last result element. */ 1225 + if (next) 1226 + *next = element_data - data_offset; 1227 + 1228 + return 0; 1229 + } 1230 + 1231 + static int single_24x7_request(struct perf_event *event, u64 *count) 1232 + { 1233 + int ret; 1164 1234 struct hv_24x7_request_buffer *request_buffer; 1165 1235 struct hv_24x7_data_result_buffer *result_buffer; 1166 1236 ··· 1267 1157 goto out; 1268 1158 1269 1159 ret = make_24x7_request(request_buffer, result_buffer); 1270 - if (ret) { 1271 - log_24x7_hcall(request_buffer, result_buffer, ret); 1160 + if (ret) 1272 1161 goto out; 1273 - } 1274 1162 1275 1163 /* process result from hcall */ 1276 - *count = be64_to_cpu(result_buffer->results[0].elements[0].element_data[0]); 1164 + ret = get_count_from_result(event, result_buffer, 1165 + result_buffer->results, count, NULL); 1277 1166 1278 1167 out: 1279 1168 put_cpu_var(hv_24x7_reqb); ··· 1325 1216 return -EINVAL; 1326 1217 } 1327 1218 1328 - /* Domains above 6 are invalid */ 1329 1219 domain = event_get_domain(event); 1330 - if (domain > 6) { 1220 + if (domain >= HV_PERF_DOMAIN_MAX) { 1331 1221 pr_devel("invalid domain %d\n", domain); 1332 1222 return -EINVAL; 1333 1223 } ··· 1358 1250 1359 1251 static u64 h_24x7_get_value(struct perf_event *event) 1360 1252 { 1361 - unsigned long ret; 1362 1253 u64 ct; 1363 - ret = single_24x7_request(event, &ct); 1364 - if (ret) 1254 + 1255 + if (single_24x7_request(event, &ct)) 1365 1256 /* We checked this in event init, shouldn't fail here... */ 1366 1257 return 0; 1367 1258 ··· 1503 1396 { 1504 1397 struct hv_24x7_request_buffer *request_buffer; 1505 1398 struct hv_24x7_data_result_buffer *result_buffer; 1506 - struct hv_24x7_result *resb; 1507 - struct perf_event *event; 1399 + struct hv_24x7_result *res, *next_res; 1508 1400 u64 count; 1509 1401 int i, ret, txn_flags; 1510 1402 struct hv_24x7_hw *h24x7hw; ··· 1523 1417 result_buffer = (void *)get_cpu_var(hv_24x7_resb); 1524 1418 1525 1419 ret = make_24x7_request(request_buffer, result_buffer); 1526 - if (ret) { 1527 - log_24x7_hcall(request_buffer, result_buffer, ret); 1420 + if (ret) 1528 1421 goto put_reqb; 1529 - } 1530 1422 1531 1423 h24x7hw = &get_cpu_var(hv_24x7_hw); 1532 1424 1533 - /* Update event counts from hcall */ 1534 - for (i = 0; i < request_buffer->num_requests; i++) { 1535 - resb = &result_buffer->results[i]; 1536 - count = be64_to_cpu(resb->elements[0].element_data[0]); 1537 - event = h24x7hw->events[i]; 1538 - h24x7hw->events[i] = NULL; 1425 + /* Go through results in the result buffer to update event counts. */ 1426 + for (i = 0, res = result_buffer->results; 1427 + i < result_buffer->num_results; i++, res = next_res) { 1428 + struct perf_event *event = h24x7hw->events[res->result_ix]; 1429 + 1430 + ret = get_count_from_result(event, result_buffer, res, &count, 1431 + &next_res); 1432 + if (ret) 1433 + break; 1434 + 1539 1435 update_event_count(event, count); 1540 1436 } 1541 1437 ··· 1588 1480 if (!firmware_has_feature(FW_FEATURE_LPAR)) { 1589 1481 pr_debug("not a virtualized system, not enabling\n"); 1590 1482 return -ENODEV; 1483 + } else if (!cur_cpu_spec->oprofile_cpu_type) 1484 + return -ENODEV; 1485 + 1486 + /* POWER8 only supports v1, while POWER9 only supports v2. */ 1487 + if (!strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power8")) 1488 + interface_version = 1; 1489 + else { 1490 + interface_version = 2; 1491 + 1492 + /* SMT8 in POWER9 needs to aggregate result elements. */ 1493 + if (threads_per_core == 8) 1494 + aggregate_result_elements = true; 1591 1495 } 1592 1496 1593 1497 hret = hv_perf_caps_get(&caps);
+59 -10
arch/powerpc/perf/hv-24x7.h
··· 10 10 HV_PERF_DOMAIN_MAX, 11 11 }; 12 12 13 + #define H24x7_REQUEST_SIZE(iface_version) (iface_version == 1 ? 16 : 32) 14 + 13 15 struct hv_24x7_request { 14 16 /* PHYSICAL domains require enabling via phyp/hmc. */ 15 17 __u8 performance_domain; ··· 44 42 /* chip, core, or virtual processor based on @performance_domain */ 45 43 __be16 starting_ix; 46 44 __be16 max_ix; 45 + 46 + /* The following fields were added in v2 of the 24x7 interface. */ 47 + 48 + __u8 starting_thread_group_ix; 49 + 50 + /* -1 means all thread groups starting at @starting_thread_group_ix */ 51 + __u8 max_num_thread_groups; 52 + 53 + __u8 reserved2[0xE]; 47 54 } __packed; 48 55 49 56 struct hv_24x7_request_buffer { 50 57 /* 0 - ? */ 51 58 /* 1 - ? */ 52 - #define HV_24X7_IF_VERSION_CURRENT 0x01 53 59 __u8 interface_version; 54 60 __u8 num_requests; 55 61 __u8 reserved[0xE]; 56 - struct hv_24x7_request requests[1]; 62 + struct hv_24x7_request requests[]; 57 63 } __packed; 58 64 59 - struct hv_24x7_result_element { 65 + struct hv_24x7_result_element_v1 { 60 66 __be16 lpar_ix; 61 67 62 68 /* ··· 77 67 __be32 lpar_cfg_instance_id; 78 68 79 69 /* size = @result_element_data_size of containing result. */ 80 - __u64 element_data[1]; 70 + __u64 element_data[]; 71 + } __packed; 72 + 73 + /* 74 + * We need a separate struct for v2 because the offset of @element_data changed 75 + * between versions. 76 + */ 77 + struct hv_24x7_result_element_v2 { 78 + __be16 lpar_ix; 79 + 80 + /* 81 + * represents the core, chip, or virtual processor based on the 82 + * request's @performance_domain 83 + */ 84 + __be16 domain_ix; 85 + 86 + /* -1 if @performance_domain does not refer to a virtual processor */ 87 + __be32 lpar_cfg_instance_id; 88 + 89 + __u8 thread_group_ix; 90 + 91 + __u8 reserved[7]; 92 + 93 + /* size = @result_element_data_size of containing result. */ 94 + __u64 element_data[]; 81 95 } __packed; 82 96 83 97 struct hv_24x7_result { 98 + /* 99 + * The index of the 24x7 Request Structure in the 24x7 Request Buffer 100 + * used to request this result. 101 + */ 84 102 __u8 result_ix; 85 103 86 104 /* ··· 119 81 __u8 results_complete; 120 82 __be16 num_elements_returned; 121 83 122 - /* This is a copy of @data_size from the corresponding hv_24x7_request */ 84 + /* 85 + * This is a copy of @data_size from the corresponding hv_24x7_request 86 + * 87 + * Warning: to obtain the size of each element in @elements you have 88 + * to add the size of the other members of the result_element struct. 89 + */ 123 90 __be16 result_element_data_size; 124 91 __u8 reserved[0x2]; 125 92 126 - /* WARNING: only valid for first result element due to variable sizes 127 - * of result elements */ 128 - /* struct hv_24x7_result_element[@num_elements_returned] */ 129 - struct hv_24x7_result_element elements[1]; 93 + /* 94 + * Either 95 + * struct hv_24x7_result_element_v1[@num_elements_returned] 96 + * or 97 + * struct hv_24x7_result_element_v2[@num_elements_returned] 98 + * 99 + * depending on the interface_version field of the 100 + * struct hv_24x7_data_result_buffer containing this result. 101 + */ 102 + char elements[]; 130 103 } __packed; 131 104 132 105 struct hv_24x7_data_result_buffer { ··· 153 104 __u8 reserved2[0x8]; 154 105 /* WARNING: only valid for the first result due to variable sizes of 155 106 * results */ 156 - struct hv_24x7_result results[1]; /* [@num_results] */ 107 + struct hv_24x7_result results[]; /* [@num_results] */ 157 108 } __packed; 158 109 159 110 #endif
+3 -1
arch/powerpc/perf/power9-events-list.h
··· 16 16 EVENT(PM_ICT_NOSLOT_CYC, 0x100f8) 17 17 EVENT(PM_CMPLU_STALL, 0x1e054) 18 18 EVENT(PM_INST_CMPL, 0x00002) 19 - EVENT(PM_BRU_CMPL, 0x10012) 19 + EVENT(PM_BRU_CMPL, 0x4d05e) 20 20 EVENT(PM_BR_MPRED_CMPL, 0x400f6) 21 21 22 22 /* All L1 D cache load references counted at finish, gated by reject */ ··· 56 56 /* Instruction Dispatched */ 57 57 EVENT(PM_INST_DISP, 0x200f2) 58 58 EVENT(PM_INST_DISP_ALT, 0x300f2) 59 + /* Alternate Branch event code */ 60 + EVENT(PM_BR_CMPL_ALT, 0x10012)
+7 -1
arch/powerpc/perf/power9-pmu.c
··· 231 231 [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = PM_ICT_NOSLOT_CYC, 232 232 [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = PM_CMPLU_STALL, 233 233 [PERF_COUNT_HW_INSTRUCTIONS] = PM_INST_DISP, 234 - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = PM_BRU_CMPL, 234 + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = PM_BR_CMPL_ALT, 235 235 [PERF_COUNT_HW_BRANCH_MISSES] = PM_BR_MPRED_CMPL, 236 236 [PERF_COUNT_HW_CACHE_REFERENCES] = PM_LD_REF_L1, 237 237 [PERF_COUNT_HW_CACHE_MISSES] = PM_LD_MISS_L1_FIN, ··· 453 453 * sampling scenarios in power9 DD1, instead use PM_INST_DISP. 454 454 */ 455 455 EVENT_VAR(PM_INST_CMPL, _g).id = PM_INST_DISP; 456 + /* 457 + * Power9 DD1 should use PM_BR_CMPL_ALT event code for 458 + * "branches" to provide correct counter value. 459 + */ 460 + EVENT_VAR(PM_BRU_CMPL, _g).id = PM_BR_CMPL_ALT; 461 + EVENT_VAR(PM_BRU_CMPL, _c).id = PM_BR_CMPL_ALT; 456 462 rc = register_power_pmu(&power9_isa207_pmu); 457 463 } else { 458 464 rc = register_power_pmu(&power9_pmu);
+12
arch/powerpc/platforms/44x/Kconfig
··· 199 199 help 200 200 This option enables support for the IBM Currituck (476fpe) evaluation board 201 201 202 + config FSP2 203 + bool "IBM FSP2 (476fpe) Support" 204 + depends on PPC_47x 205 + default n 206 + select 476FPE 207 + select IBM_EMAC_EMAC4 if IBM_EMAC 208 + select IBM_EMAC_RGMII if IBM_EMAC 209 + select COMMON_CLK 210 + select DEFAULT_UIMAGE 211 + help 212 + This option enables support for the IBM FSP2 (476fpe) board 213 + 202 214 config AKEBONO 203 215 bool "IBM Akebono (476gtr) Support" 204 216 depends on PPC_47x
+1
arch/powerpc/platforms/44x/Makefile
··· 12 12 obj-$(CONFIG_CANYONLANDS)+= canyonlands.o 13 13 obj-$(CONFIG_CURRITUCK) += ppc476.o 14 14 obj-$(CONFIG_AKEBONO) += ppc476.o 15 + obj-$(CONFIG_FSP2) += fsp2.o
+62
arch/powerpc/platforms/44x/fsp2.c
··· 1 + /* 2 + * FSP-2 board specific routines 3 + * 4 + * Based on earlier code: 5 + * Matt Porter <mporter@kernel.crashing.org> 6 + * Copyright 2002-2005 MontaVista Software Inc. 7 + * 8 + * Eugene Surovegin <eugene.surovegin@zultys.com> or <ebs@ebshome.net> 9 + * Copyright (c) 2003-2005 Zultys Technologies 10 + * 11 + * Rewritten and ported to the merged powerpc tree: 12 + * Copyright 2007 David Gibson <dwg@au1.ibm.com>, IBM Corporation. 13 + * 14 + * This program is free software; you can redistribute it and/or modify it 15 + * under the terms of the GNU General Public License as published by the 16 + * Free Software Foundation; either version 2 of the License, or (at your 17 + * option) any later version. 18 + */ 19 + 20 + #include <linux/init.h> 21 + #include <linux/of_platform.h> 22 + #include <linux/rtc.h> 23 + 24 + #include <asm/machdep.h> 25 + #include <asm/prom.h> 26 + #include <asm/udbg.h> 27 + #include <asm/time.h> 28 + #include <asm/uic.h> 29 + #include <asm/ppc4xx.h> 30 + 31 + static __initdata struct of_device_id fsp2_of_bus[] = { 32 + { .compatible = "ibm,plb4", }, 33 + { .compatible = "ibm,plb6", }, 34 + { .compatible = "ibm,opb", }, 35 + {}, 36 + }; 37 + 38 + static int __init fsp2_device_probe(void) 39 + { 40 + of_platform_bus_probe(NULL, fsp2_of_bus, NULL); 41 + return 0; 42 + } 43 + machine_device_initcall(fsp2, fsp2_device_probe); 44 + 45 + static int __init fsp2_probe(void) 46 + { 47 + unsigned long root = of_get_flat_dt_root(); 48 + 49 + if (!of_flat_dt_is_compatible(root, "ibm,fsp2")) 50 + return 0; 51 + return 1; 52 + } 53 + 54 + define_machine(fsp2) { 55 + .name = "FSP-2", 56 + .probe = fsp2_probe, 57 + .progress = udbg_progress, 58 + .init_IRQ = uic_init_tree, 59 + .get_irq = uic_get_irq, 60 + .restart = ppc4xx_reset_system, 61 + .calibrate_decr = generic_calibrate_decr, 62 + };
+2 -1
arch/powerpc/platforms/cell/smp.c
··· 115 115 116 116 static int smp_cell_kick_cpu(int nr) 117 117 { 118 - BUG_ON(nr < 0 || nr >= NR_CPUS); 118 + if (nr < 0 || nr >= nr_cpu_ids) 119 + return -EINVAL; 119 120 120 121 if (!smp_startup_cpu(nr)) 121 122 return -ENOENT;
+11 -5
arch/powerpc/platforms/powernv/eeh-powernv.c
··· 48 48 { 49 49 struct pci_controller *hose; 50 50 struct pnv_phb *phb; 51 + int max_diag_size = PNV_PCI_DIAG_BUF_SIZE; 51 52 52 53 if (!firmware_has_feature(FW_FEATURE_OPAL)) { 53 54 pr_warn("%s: OPAL is required !\n", ··· 70 69 if (phb->model == PNV_PHB_MODEL_P7IOC) 71 70 eeh_add_flag(EEH_ENABLE_IO_FOR_LOG); 72 71 72 + if (phb->diag_data_size > max_diag_size) 73 + max_diag_size = phb->diag_data_size; 74 + 73 75 /* 74 76 * PE#0 should be regarded as valid by EEH core 75 77 * if it's not the reserved one. Currently, we ··· 85 81 86 82 break; 87 83 } 84 + 85 + eeh_set_pe_aux_size(max_diag_size); 88 86 89 87 return 0; 90 88 } ··· 546 540 s64 rc; 547 541 548 542 rc = opal_pci_get_phb_diag_data2(phb->opal_id, pe->data, 549 - PNV_PCI_DIAG_BUF_SIZE); 543 + phb->diag_data_size); 550 544 if (rc != OPAL_SUCCESS) 551 545 pr_warn("%s: Failure %lld getting PHB#%x diag-data\n", 552 546 __func__, rc, pe->phb->global_number); ··· 1320 1314 static void pnv_eeh_get_and_dump_hub_diag(struct pci_controller *hose) 1321 1315 { 1322 1316 struct pnv_phb *phb = hose->private_data; 1323 - struct OpalIoP7IOCErrorData *data = &phb->diag.hub_diag; 1317 + struct OpalIoP7IOCErrorData *data = 1318 + (struct OpalIoP7IOCErrorData*)phb->diag_data; 1324 1319 long rc; 1325 1320 1326 1321 rc = opal_pci_get_hub_diag_data(phb->hub_id, data, sizeof(*data)); ··· 1556 1549 1557 1550 /* Dump PHB diag-data */ 1558 1551 rc = opal_pci_get_phb_diag_data2(phb->opal_id, 1559 - phb->diag.blob, PNV_PCI_DIAG_BUF_SIZE); 1552 + phb->diag_data, phb->diag_data_size); 1560 1553 if (rc == OPAL_SUCCESS) 1561 1554 pnv_pci_dump_phb_diag_data(hose, 1562 - phb->diag.blob); 1555 + phb->diag_data); 1563 1556 1564 1557 /* Try best to clear it */ 1565 1558 opal_pci_eeh_freeze_clear(phb->opal_id, ··· 1802 1795 { 1803 1796 int ret = -EINVAL; 1804 1797 1805 - eeh_set_pe_aux_size(PNV_PCI_DIAG_BUF_SIZE); 1806 1798 ret = eeh_ops_register(&pnv_eeh_ops); 1807 1799 if (!ret) 1808 1800 pr_info("EEH: PowerNV platform initialized\n");
+152 -48
arch/powerpc/platforms/powernv/idle.c
··· 23 23 #include <asm/cpuidle.h> 24 24 #include <asm/code-patching.h> 25 25 #include <asm/smp.h> 26 + #include <asm/runlatch.h> 26 27 27 28 #include "powernv.h" 28 29 #include "subcore.h" ··· 31 30 /* Power ISA 3.0 allows for stop states 0x0 - 0xF */ 32 31 #define MAX_STOP_STATE 0xF 33 32 33 + #define P9_STOP_SPR_MSR 2000 34 + #define P9_STOP_SPR_PSSCR 855 35 + 34 36 static u32 supported_cpuidle_states; 37 + 38 + /* 39 + * The default stop state that will be used by ppc_md.power_save 40 + * function on platforms that support stop instruction. 41 + */ 42 + static u64 pnv_default_stop_val; 43 + static u64 pnv_default_stop_mask; 44 + static bool default_stop_found; 45 + 46 + /* 47 + * First deep stop state. Used to figure out when to save/restore 48 + * hypervisor context. 49 + */ 50 + u64 pnv_first_deep_stop_state = MAX_STOP_STATE; 51 + 52 + /* 53 + * psscr value and mask of the deepest stop idle state. 54 + * Used when a cpu is offlined. 55 + */ 56 + static u64 pnv_deepest_stop_psscr_val; 57 + static u64 pnv_deepest_stop_psscr_mask; 58 + static bool deepest_stop_found; 35 59 36 60 static int pnv_save_sprs_for_deep_states(void) 37 61 { ··· 74 48 uint64_t hid4_val = mfspr(SPRN_HID4); 75 49 uint64_t hid5_val = mfspr(SPRN_HID5); 76 50 uint64_t hmeer_val = mfspr(SPRN_HMEER); 51 + uint64_t msr_val = MSR_IDLE; 52 + uint64_t psscr_val = pnv_deepest_stop_psscr_val; 77 53 78 54 for_each_possible_cpu(cpu) { 79 55 uint64_t pir = get_hard_smp_processor_id(cpu); ··· 89 61 if (rc != 0) 90 62 return rc; 91 63 64 + if (cpu_has_feature(CPU_FTR_ARCH_300)) { 65 + rc = opal_slw_set_reg(pir, P9_STOP_SPR_MSR, msr_val); 66 + if (rc) 67 + return rc; 68 + 69 + rc = opal_slw_set_reg(pir, 70 + P9_STOP_SPR_PSSCR, psscr_val); 71 + 72 + if (rc) 73 + return rc; 74 + } 75 + 92 76 /* HIDs are per core registers */ 93 77 if (cpu_thread_in_core(cpu) == 0) { 94 78 ··· 112 72 if (rc != 0) 113 73 return rc; 114 74 115 - rc = opal_slw_set_reg(pir, SPRN_HID1, hid1_val); 116 - if (rc != 0) 117 - return rc; 75 + /* Only p8 needs to set extra HID regiters */ 76 + if (!cpu_has_feature(CPU_FTR_ARCH_300)) { 118 77 119 - rc = opal_slw_set_reg(pir, SPRN_HID4, hid4_val); 120 - if (rc != 0) 121 - return rc; 78 + rc = opal_slw_set_reg(pir, SPRN_HID1, hid1_val); 79 + if (rc != 0) 80 + return rc; 122 81 123 - rc = opal_slw_set_reg(pir, SPRN_HID5, hid5_val); 124 - if (rc != 0) 125 - return rc; 82 + rc = opal_slw_set_reg(pir, SPRN_HID4, hid4_val); 83 + if (rc != 0) 84 + return rc; 85 + 86 + rc = opal_slw_set_reg(pir, SPRN_HID5, hid5_val); 87 + if (rc != 0) 88 + return rc; 89 + } 126 90 } 127 91 } 128 92 ··· 140 96 u32 *core_idle_state; 141 97 142 98 /* 143 - * core_idle_state - First 8 bits track the idle state of each thread 144 - * of the core. The 8th bit is the lock bit. Initially all thread bits 145 - * are set. They are cleared when the thread enters deep idle state 146 - * like sleep and winkle. Initially the lock bit is cleared. 147 - * The lock bit has 2 purposes 148 - * a. While the first thread is restoring core state, it prevents 149 - * other threads in the core from switching to process context. 150 - * b. While the last thread in the core is saving the core state, it 151 - * prevents a different thread from waking up. 99 + * core_idle_state - The lower 8 bits track the idle state of 100 + * each thread of the core. 101 + * 102 + * The most significant bit is the lock bit. 103 + * 104 + * Initially all the bits corresponding to threads_per_core 105 + * are set. They are cleared when the thread enters deep idle 106 + * state like sleep and winkle/stop. 107 + * 108 + * Initially the lock bit is cleared. The lock bit has 2 109 + * purposes: 110 + * a. While the first thread in the core waking up from 111 + * idle is restoring core state, it prevents other 112 + * threads in the core from switching to process 113 + * context. 114 + * b. While the last thread in the core is saving the 115 + * core state, it prevents a different thread from 116 + * waking up. 152 117 */ 153 118 for (i = 0; i < nr_cores; i++) { 154 119 int first_cpu = i * threads_per_core; ··· 165 112 size_t paca_ptr_array_size; 166 113 167 114 core_idle_state = kmalloc_node(sizeof(u32), GFP_KERNEL, node); 168 - *core_idle_state = PNV_CORE_IDLE_THREAD_BITS; 115 + *core_idle_state = (1 << threads_per_core) - 1; 169 116 paca_ptr_array_size = (threads_per_core * 170 117 sizeof(struct paca_struct *)); 171 118 ··· 284 231 show_fastsleep_workaround_applyonce, 285 232 store_fastsleep_workaround_applyonce); 286 233 287 - /* 288 - * The default stop state that will be used by ppc_md.power_save 289 - * function on platforms that support stop instruction. 290 - */ 291 - static u64 pnv_default_stop_val; 292 - static u64 pnv_default_stop_mask; 293 - static bool default_stop_found; 234 + static unsigned long __power7_idle_type(unsigned long type) 235 + { 236 + unsigned long srr1; 237 + 238 + if (!prep_irq_for_idle_irqsoff()) 239 + return 0; 240 + 241 + __ppc64_runlatch_off(); 242 + srr1 = power7_idle_insn(type); 243 + __ppc64_runlatch_on(); 244 + 245 + fini_irq_for_idle_irqsoff(); 246 + 247 + return srr1; 248 + } 249 + 250 + void power7_idle_type(unsigned long type) 251 + { 252 + unsigned long srr1; 253 + 254 + srr1 = __power7_idle_type(type); 255 + irq_set_pending_from_srr1(srr1); 256 + } 257 + 258 + void power7_idle(void) 259 + { 260 + if (!powersave_nap) 261 + return; 262 + 263 + power7_idle_type(PNV_THREAD_NAP); 264 + } 265 + 266 + static unsigned long __power9_idle_type(unsigned long stop_psscr_val, 267 + unsigned long stop_psscr_mask) 268 + { 269 + unsigned long psscr; 270 + unsigned long srr1; 271 + 272 + if (!prep_irq_for_idle_irqsoff()) 273 + return 0; 274 + 275 + psscr = mfspr(SPRN_PSSCR); 276 + psscr = (psscr & ~stop_psscr_mask) | stop_psscr_val; 277 + 278 + __ppc64_runlatch_off(); 279 + srr1 = power9_idle_stop(psscr); 280 + __ppc64_runlatch_on(); 281 + 282 + fini_irq_for_idle_irqsoff(); 283 + 284 + return srr1; 285 + } 286 + 287 + void power9_idle_type(unsigned long stop_psscr_val, 288 + unsigned long stop_psscr_mask) 289 + { 290 + unsigned long srr1; 291 + 292 + srr1 = __power9_idle_type(stop_psscr_val, stop_psscr_mask); 293 + irq_set_pending_from_srr1(srr1); 294 + } 294 295 295 296 /* 296 297 * Used for ppc_md.power_save which needs a function with no parameters 297 298 */ 298 - static void power9_idle(void) 299 + void power9_idle(void) 299 300 { 300 - power9_idle_stop(pnv_default_stop_val, pnv_default_stop_mask); 301 + power9_idle_type(pnv_default_stop_val, pnv_default_stop_mask); 301 302 } 302 303 303 - /* 304 - * First deep stop state. Used to figure out when to save/restore 305 - * hypervisor context. 306 - */ 307 - u64 pnv_first_deep_stop_state = MAX_STOP_STATE; 308 - 309 - /* 310 - * psscr value and mask of the deepest stop idle state. 311 - * Used when a cpu is offlined. 312 - */ 313 - static u64 pnv_deepest_stop_psscr_val; 314 - static u64 pnv_deepest_stop_psscr_mask; 315 - static bool deepest_stop_found; 316 - 304 + #ifdef CONFIG_HOTPLUG_CPU 317 305 /* 318 306 * pnv_cpu_offline: A function that puts the CPU into the deepest 319 307 * available platform idle state on a CPU-Offline. 308 + * interrupts hard disabled and no lazy irq pending. 320 309 */ 321 310 unsigned long pnv_cpu_offline(unsigned int cpu) 322 311 { 323 312 unsigned long srr1; 324 - 325 313 u32 idle_states = pnv_get_supported_cpuidle_states(); 326 314 315 + __ppc64_runlatch_off(); 316 + 327 317 if (cpu_has_feature(CPU_FTR_ARCH_300) && deepest_stop_found) { 328 - srr1 = power9_idle_stop(pnv_deepest_stop_psscr_val, 329 - pnv_deepest_stop_psscr_mask); 318 + unsigned long psscr; 319 + 320 + psscr = mfspr(SPRN_PSSCR); 321 + psscr = (psscr & ~pnv_deepest_stop_psscr_mask) | 322 + pnv_deepest_stop_psscr_val; 323 + srr1 = power9_idle_stop(psscr); 324 + 330 325 } else if (idle_states & OPAL_PM_WINKLE_ENABLED) { 331 - srr1 = power7_winkle(); 326 + srr1 = power7_idle_insn(PNV_THREAD_WINKLE); 332 327 } else if ((idle_states & OPAL_PM_SLEEP_ENABLED) || 333 328 (idle_states & OPAL_PM_SLEEP_ENABLED_ER1)) { 334 - srr1 = power7_sleep(); 329 + srr1 = power7_idle_insn(PNV_THREAD_SLEEP); 335 330 } else if (idle_states & OPAL_PM_NAP_ENABLED) { 336 - srr1 = power7_nap(1); 331 + srr1 = power7_idle_insn(PNV_THREAD_NAP); 337 332 } else { 338 333 /* This is the fallback method. We emulate snooze */ 339 334 while (!generic_check_cpu_restart(cpu)) { ··· 392 291 HMT_medium(); 393 292 } 394 293 294 + __ppc64_runlatch_on(); 295 + 395 296 return srr1; 396 297 } 298 + #endif 397 299 398 300 /* 399 301 * Power ISA 3.0 idle initialization.
+3 -3
arch/powerpc/platforms/powernv/opal-wrappers.S
··· 99 99 lwz r4,8(r1); 100 100 ld r5,PPC_LR_STKOFF(r1); 101 101 ld r6,PACASAVEDMSR(r13); 102 - mtspr SPRN_SRR0,r5; 103 - mtspr SPRN_SRR1,r6; 104 102 mtcr r4; 105 - rfid 103 + mtspr SPRN_HSRR0,r5; 104 + mtspr SPRN_HSRR1,r6; 105 + hrfid 106 106 107 107 opal_real_call: 108 108 mfcr r11
+128 -5
arch/powerpc/platforms/powernv/pci-ioda.c
··· 1718 1718 */ 1719 1719 } 1720 1720 1721 + static bool pnv_pci_ioda_pe_single_vendor(struct pnv_ioda_pe *pe) 1722 + { 1723 + unsigned short vendor = 0; 1724 + struct pci_dev *pdev; 1725 + 1726 + if (pe->device_count == 1) 1727 + return true; 1728 + 1729 + /* pe->pdev should be set if it's a single device, pe->pbus if not */ 1730 + if (!pe->pbus) 1731 + return true; 1732 + 1733 + list_for_each_entry(pdev, &pe->pbus->devices, bus_list) { 1734 + if (!vendor) { 1735 + vendor = pdev->vendor; 1736 + continue; 1737 + } 1738 + 1739 + if (pdev->vendor != vendor) 1740 + return false; 1741 + } 1742 + 1743 + return true; 1744 + } 1745 + 1746 + /* 1747 + * Reconfigure TVE#0 to be usable as 64-bit DMA space. 1748 + * 1749 + * The first 4GB of virtual memory for a PE is reserved for 32-bit accesses. 1750 + * Devices can only access more than that if bit 59 of the PCI address is set 1751 + * by hardware, which indicates TVE#1 should be used instead of TVE#0. 1752 + * Many PCI devices are not capable of addressing that many bits, and as a 1753 + * result are limited to the 4GB of virtual memory made available to 32-bit 1754 + * devices in TVE#0. 1755 + * 1756 + * In order to work around this, reconfigure TVE#0 to be suitable for 64-bit 1757 + * devices by configuring the virtual memory past the first 4GB inaccessible 1758 + * by 64-bit DMAs. This should only be used by devices that want more than 1759 + * 4GB, and only on PEs that have no 32-bit devices. 1760 + * 1761 + * Currently this will only work on PHB3 (POWER8). 1762 + */ 1763 + static int pnv_pci_ioda_dma_64bit_bypass(struct pnv_ioda_pe *pe) 1764 + { 1765 + u64 window_size, table_size, tce_count, addr; 1766 + struct page *table_pages; 1767 + u64 tce_order = 28; /* 256MB TCEs */ 1768 + __be64 *tces; 1769 + s64 rc; 1770 + 1771 + /* 1772 + * Window size needs to be a power of two, but needs to account for 1773 + * shifting memory by the 4GB offset required to skip 32bit space. 1774 + */ 1775 + window_size = roundup_pow_of_two(memory_hotplug_max() + (1ULL << 32)); 1776 + tce_count = window_size >> tce_order; 1777 + table_size = tce_count << 3; 1778 + 1779 + if (table_size < PAGE_SIZE) 1780 + table_size = PAGE_SIZE; 1781 + 1782 + table_pages = alloc_pages_node(pe->phb->hose->node, GFP_KERNEL, 1783 + get_order(table_size)); 1784 + if (!table_pages) 1785 + goto err; 1786 + 1787 + tces = page_address(table_pages); 1788 + if (!tces) 1789 + goto err; 1790 + 1791 + memset(tces, 0, table_size); 1792 + 1793 + for (addr = 0; addr < memory_hotplug_max(); addr += (1 << tce_order)) { 1794 + tces[(addr + (1ULL << 32)) >> tce_order] = 1795 + cpu_to_be64(addr | TCE_PCI_READ | TCE_PCI_WRITE); 1796 + } 1797 + 1798 + rc = opal_pci_map_pe_dma_window(pe->phb->opal_id, 1799 + pe->pe_number, 1800 + /* reconfigure window 0 */ 1801 + (pe->pe_number << 1) + 0, 1802 + 1, 1803 + __pa(tces), 1804 + table_size, 1805 + 1 << tce_order); 1806 + if (rc == OPAL_SUCCESS) { 1807 + pe_info(pe, "Using 64-bit DMA iommu bypass (through TVE#0)\n"); 1808 + return 0; 1809 + } 1810 + err: 1811 + pe_err(pe, "Error configuring 64-bit DMA bypass\n"); 1812 + return -EIO; 1813 + } 1814 + 1721 1815 static int pnv_pci_ioda_dma_set_mask(struct pci_dev *pdev, u64 dma_mask) 1722 1816 { 1723 1817 struct pci_controller *hose = pci_bus_to_host(pdev->bus); ··· 1820 1726 struct pnv_ioda_pe *pe; 1821 1727 uint64_t top; 1822 1728 bool bypass = false; 1729 + s64 rc; 1823 1730 1824 1731 if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE)) 1825 1732 return -ENODEV;; ··· 1835 1740 dev_info(&pdev->dev, "Using 64-bit DMA iommu bypass\n"); 1836 1741 set_dma_ops(&pdev->dev, &dma_direct_ops); 1837 1742 } else { 1838 - dev_info(&pdev->dev, "Using 32-bit DMA via iommu\n"); 1839 - set_dma_ops(&pdev->dev, &dma_iommu_ops); 1743 + /* 1744 + * If the device can't set the TCE bypass bit but still wants 1745 + * to access 4GB or more, on PHB3 we can reconfigure TVE#0 to 1746 + * bypass the 32-bit region and be usable for 64-bit DMAs. 1747 + * The device needs to be able to address all of this space. 1748 + */ 1749 + if (dma_mask >> 32 && 1750 + dma_mask > (memory_hotplug_max() + (1ULL << 32)) && 1751 + pnv_pci_ioda_pe_single_vendor(pe) && 1752 + phb->model == PNV_PHB_MODEL_PHB3) { 1753 + /* Configure the bypass mode */ 1754 + rc = pnv_pci_ioda_dma_64bit_bypass(pe); 1755 + if (rc) 1756 + return rc; 1757 + /* 4GB offset bypasses 32-bit space */ 1758 + set_dma_offset(&pdev->dev, (1ULL << 32)); 1759 + set_dma_ops(&pdev->dev, &dma_direct_ops); 1760 + } else { 1761 + dev_info(&pdev->dev, "Using 32-bit DMA via iommu\n"); 1762 + set_dma_ops(&pdev->dev, &dma_iommu_ops); 1763 + } 1840 1764 } 1841 1765 *pdev->dev.dma_mask = dma_mask; 1842 1766 ··· 3237 3123 phb = hose->private_data; 3238 3124 3239 3125 /* Retrieve the diag data from firmware */ 3240 - ret = opal_pci_get_phb_diag_data2(phb->opal_id, phb->diag.blob, 3241 - PNV_PCI_DIAG_BUF_SIZE); 3126 + ret = opal_pci_get_phb_diag_data2(phb->opal_id, phb->diag_data, 3127 + phb->diag_data_size); 3242 3128 if (ret != OPAL_SUCCESS) 3243 3129 return -EIO; 3244 3130 3245 3131 /* Print the diag data to the kernel log */ 3246 - pnv_pci_dump_phb_diag_data(phb->hose, phb->diag.blob); 3132 + pnv_pci_dump_phb_diag_data(phb->hose, phb->diag_data); 3247 3133 return 0; 3248 3134 } 3249 3135 ··· 3838 3724 phb->model = PNV_PHB_MODEL_NPU2; 3839 3725 else 3840 3726 phb->model = PNV_PHB_MODEL_UNKNOWN; 3727 + 3728 + /* Initialize diagnostic data buffer */ 3729 + prop32 = of_get_property(np, "ibm,phb-diag-data-size", NULL); 3730 + if (prop32) 3731 + phb->diag_data_size = be32_to_cpup(prop32); 3732 + else 3733 + phb->diag_data_size = PNV_PCI_DIAG_BUF_SIZE; 3734 + 3735 + phb->diag_data = memblock_virt_alloc(phb->diag_data_size, 0); 3841 3736 3842 3737 /* Parse 32-bit and IO ranges (if any) */ 3843 3738 pci_process_bridge_OF_ranges(hose, np, !hose->global_number);
+138 -22
arch/powerpc/platforms/powernv/pci.c
··· 227 227 } 228 228 #endif /* CONFIG_PCI_MSI */ 229 229 230 + /* Nicely print the contents of the PE State Tables (PEST). */ 231 + static void pnv_pci_dump_pest(__be64 pestA[], __be64 pestB[], int pest_size) 232 + { 233 + __be64 prevA = ULONG_MAX, prevB = ULONG_MAX; 234 + bool dup = false; 235 + int i; 236 + 237 + for (i = 0; i < pest_size; i++) { 238 + __be64 peA = be64_to_cpu(pestA[i]); 239 + __be64 peB = be64_to_cpu(pestB[i]); 240 + 241 + if (peA != prevA || peB != prevB) { 242 + if (dup) { 243 + pr_info("PE[..%03x] A/B: as above\n", i-1); 244 + dup = false; 245 + } 246 + prevA = peA; 247 + prevB = peB; 248 + if (peA & PNV_IODA_STOPPED_STATE || 249 + peB & PNV_IODA_STOPPED_STATE) 250 + pr_info("PE[%03x] A/B: %016llx %016llx\n", 251 + i, peA, peB); 252 + } else if (!dup && (peA & PNV_IODA_STOPPED_STATE || 253 + peB & PNV_IODA_STOPPED_STATE)) { 254 + dup = true; 255 + } 256 + } 257 + } 258 + 230 259 static void pnv_pci_dump_p7ioc_diag_data(struct pci_controller *hose, 231 260 struct OpalIoPhbErrorCommon *common) 232 261 { 233 262 struct OpalIoP7IOCPhbErrorData *data; 234 - int i; 235 263 236 264 data = (struct OpalIoP7IOCPhbErrorData *)common; 237 265 pr_info("P7IOC PHB#%x Diag-data (Version: %d)\n", ··· 336 308 be64_to_cpu(data->dma1ErrorLog0), 337 309 be64_to_cpu(data->dma1ErrorLog1)); 338 310 339 - for (i = 0; i < OPAL_P7IOC_NUM_PEST_REGS; i++) { 340 - if ((be64_to_cpu(data->pestA[i]) >> 63) == 0 && 341 - (be64_to_cpu(data->pestB[i]) >> 63) == 0) 342 - continue; 343 - 344 - pr_info("PE[%3d] A/B: %016llx %016llx\n", 345 - i, be64_to_cpu(data->pestA[i]), 346 - be64_to_cpu(data->pestB[i])); 347 - } 311 + pnv_pci_dump_pest(data->pestA, data->pestB, OPAL_P7IOC_NUM_PEST_REGS); 348 312 } 349 313 350 314 static void pnv_pci_dump_phb3_diag_data(struct pci_controller *hose, 351 315 struct OpalIoPhbErrorCommon *common) 352 316 { 353 317 struct OpalIoPhb3ErrorData *data; 354 - int i; 355 318 356 319 data = (struct OpalIoPhb3ErrorData*)common; 357 320 pr_info("PHB3 PHB#%x Diag-data (Version: %d)\n", ··· 423 404 be64_to_cpu(data->dma1ErrorLog0), 424 405 be64_to_cpu(data->dma1ErrorLog1)); 425 406 426 - for (i = 0; i < OPAL_PHB3_NUM_PEST_REGS; i++) { 427 - if ((be64_to_cpu(data->pestA[i]) >> 63) == 0 && 428 - (be64_to_cpu(data->pestB[i]) >> 63) == 0) 429 - continue; 407 + pnv_pci_dump_pest(data->pestA, data->pestB, OPAL_PHB3_NUM_PEST_REGS); 408 + } 430 409 431 - pr_info("PE[%3d] A/B: %016llx %016llx\n", 432 - i, be64_to_cpu(data->pestA[i]), 433 - be64_to_cpu(data->pestB[i])); 434 - } 410 + static void pnv_pci_dump_phb4_diag_data(struct pci_controller *hose, 411 + struct OpalIoPhbErrorCommon *common) 412 + { 413 + struct OpalIoPhb4ErrorData *data; 414 + 415 + data = (struct OpalIoPhb4ErrorData*)common; 416 + pr_info("PHB4 PHB#%d Diag-data (Version: %d)\n", 417 + hose->global_number, be32_to_cpu(common->version)); 418 + if (data->brdgCtl) 419 + pr_info("brdgCtl: %08x\n", 420 + be32_to_cpu(data->brdgCtl)); 421 + if (data->deviceStatus || data->slotStatus || 422 + data->linkStatus || data->devCmdStatus || 423 + data->devSecStatus) 424 + pr_info("RootSts: %08x %08x %08x %08x %08x\n", 425 + be32_to_cpu(data->deviceStatus), 426 + be32_to_cpu(data->slotStatus), 427 + be32_to_cpu(data->linkStatus), 428 + be32_to_cpu(data->devCmdStatus), 429 + be32_to_cpu(data->devSecStatus)); 430 + if (data->rootErrorStatus || data->uncorrErrorStatus || 431 + data->corrErrorStatus) 432 + pr_info("RootErrSts: %08x %08x %08x\n", 433 + be32_to_cpu(data->rootErrorStatus), 434 + be32_to_cpu(data->uncorrErrorStatus), 435 + be32_to_cpu(data->corrErrorStatus)); 436 + if (data->tlpHdr1 || data->tlpHdr2 || 437 + data->tlpHdr3 || data->tlpHdr4) 438 + pr_info("RootErrLog: %08x %08x %08x %08x\n", 439 + be32_to_cpu(data->tlpHdr1), 440 + be32_to_cpu(data->tlpHdr2), 441 + be32_to_cpu(data->tlpHdr3), 442 + be32_to_cpu(data->tlpHdr4)); 443 + if (data->sourceId) 444 + pr_info("sourceId: %08x\n", be32_to_cpu(data->sourceId)); 445 + if (data->nFir) 446 + pr_info("nFir: %016llx %016llx %016llx\n", 447 + be64_to_cpu(data->nFir), 448 + be64_to_cpu(data->nFirMask), 449 + be64_to_cpu(data->nFirWOF)); 450 + if (data->phbPlssr || data->phbCsr) 451 + pr_info("PhbSts: %016llx %016llx\n", 452 + be64_to_cpu(data->phbPlssr), 453 + be64_to_cpu(data->phbCsr)); 454 + if (data->lemFir) 455 + pr_info("Lem: %016llx %016llx %016llx\n", 456 + be64_to_cpu(data->lemFir), 457 + be64_to_cpu(data->lemErrorMask), 458 + be64_to_cpu(data->lemWOF)); 459 + if (data->phbErrorStatus) 460 + pr_info("PhbErr: %016llx %016llx %016llx %016llx\n", 461 + be64_to_cpu(data->phbErrorStatus), 462 + be64_to_cpu(data->phbFirstErrorStatus), 463 + be64_to_cpu(data->phbErrorLog0), 464 + be64_to_cpu(data->phbErrorLog1)); 465 + if (data->phbTxeErrorStatus) 466 + pr_info("PhbTxeErr: %016llx %016llx %016llx %016llx\n", 467 + be64_to_cpu(data->phbTxeErrorStatus), 468 + be64_to_cpu(data->phbTxeFirstErrorStatus), 469 + be64_to_cpu(data->phbTxeErrorLog0), 470 + be64_to_cpu(data->phbTxeErrorLog1)); 471 + if (data->phbRxeArbErrorStatus) 472 + pr_info("RxeArbErr: %016llx %016llx %016llx %016llx\n", 473 + be64_to_cpu(data->phbRxeArbErrorStatus), 474 + be64_to_cpu(data->phbRxeArbFirstErrorStatus), 475 + be64_to_cpu(data->phbRxeArbErrorLog0), 476 + be64_to_cpu(data->phbRxeArbErrorLog1)); 477 + if (data->phbRxeMrgErrorStatus) 478 + pr_info("RxeMrgErr: %016llx %016llx %016llx %016llx\n", 479 + be64_to_cpu(data->phbRxeMrgErrorStatus), 480 + be64_to_cpu(data->phbRxeMrgFirstErrorStatus), 481 + be64_to_cpu(data->phbRxeMrgErrorLog0), 482 + be64_to_cpu(data->phbRxeMrgErrorLog1)); 483 + if (data->phbRxeTceErrorStatus) 484 + pr_info("RxeTceErr: %016llx %016llx %016llx %016llx\n", 485 + be64_to_cpu(data->phbRxeTceErrorStatus), 486 + be64_to_cpu(data->phbRxeTceFirstErrorStatus), 487 + be64_to_cpu(data->phbRxeTceErrorLog0), 488 + be64_to_cpu(data->phbRxeTceErrorLog1)); 489 + 490 + if (data->phbPblErrorStatus) 491 + pr_info("PblErr: %016llx %016llx %016llx %016llx\n", 492 + be64_to_cpu(data->phbPblErrorStatus), 493 + be64_to_cpu(data->phbPblFirstErrorStatus), 494 + be64_to_cpu(data->phbPblErrorLog0), 495 + be64_to_cpu(data->phbPblErrorLog1)); 496 + if (data->phbPcieDlpErrorStatus) 497 + pr_info("PcieDlp: %016llx %016llx %016llx\n", 498 + be64_to_cpu(data->phbPcieDlpErrorLog1), 499 + be64_to_cpu(data->phbPcieDlpErrorLog2), 500 + be64_to_cpu(data->phbPcieDlpErrorStatus)); 501 + if (data->phbRegbErrorStatus) 502 + pr_info("RegbErr: %016llx %016llx %016llx %016llx\n", 503 + be64_to_cpu(data->phbRegbErrorStatus), 504 + be64_to_cpu(data->phbRegbFirstErrorStatus), 505 + be64_to_cpu(data->phbRegbErrorLog0), 506 + be64_to_cpu(data->phbRegbErrorLog1)); 507 + 508 + 509 + pnv_pci_dump_pest(data->pestA, data->pestB, OPAL_PHB4_NUM_PEST_REGS); 435 510 } 436 511 437 512 void pnv_pci_dump_phb_diag_data(struct pci_controller *hose, ··· 544 431 case OPAL_PHB_ERROR_DATA_TYPE_PHB3: 545 432 pnv_pci_dump_phb3_diag_data(hose, common); 546 433 break; 434 + case OPAL_PHB_ERROR_DATA_TYPE_PHB4: 435 + pnv_pci_dump_phb4_diag_data(hose, common); 436 + break; 547 437 default: 548 438 pr_warn("%s: Unrecognized ioType %d\n", 549 439 __func__, be32_to_cpu(common->ioType)); ··· 561 445 spin_lock_irqsave(&phb->lock, flags); 562 446 563 447 /* Fetch PHB diag-data */ 564 - rc = opal_pci_get_phb_diag_data2(phb->opal_id, phb->diag.blob, 565 - PNV_PCI_DIAG_BUF_SIZE); 448 + rc = opal_pci_get_phb_diag_data2(phb->opal_id, phb->diag_data, 449 + phb->diag_data_size); 566 450 has_diag = (rc == OPAL_SUCCESS); 567 451 568 452 /* If PHB supports compound PE, to handle it */ ··· 590 474 * with the normal errors generated when probing empty slots 591 475 */ 592 476 if (has_diag && ret) 593 - pnv_pci_dump_phb_diag_data(phb->hose, phb->diag.blob); 477 + pnv_pci_dump_phb_diag_data(phb->hose, phb->diag_data); 594 478 595 479 spin_unlock_irqrestore(&phb->lock, flags); 596 480 }
+6 -7
arch/powerpc/platforms/powernv/pci.h
··· 33 33 #define PNV_IODA_PE_SLAVE (1 << 4) /* Slave PE in compound case */ 34 34 #define PNV_IODA_PE_VF (1 << 5) /* PE for one VF */ 35 35 36 + /* Indicates operations are frozen for a PE: MMIO in PESTA & DMA in PESTB. */ 37 + #define PNV_IODA_STOPPED_STATE 0x8000000000000000 38 + 36 39 /* Data associated with a PE, including IOMMU tracking etc.. */ 37 40 struct pnv_phb; 38 41 struct pnv_ioda_pe { ··· 172 169 unsigned int pe_rmap[0x10000]; 173 170 } ioda; 174 171 175 - /* PHB and hub status structure */ 176 - union { 177 - unsigned char blob[PNV_PCI_DIAG_BUF_SIZE]; 178 - struct OpalIoP7IOCPhbErrorData p7ioc; 179 - struct OpalIoPhb3ErrorData phb3; 180 - struct OpalIoP7IOCErrorData hub_diag; 181 - } diag; 172 + /* PHB and hub diagnostics */ 173 + unsigned int diag_data_size; 174 + u8 *diag_data; 182 175 183 176 /* Nvlink2 data */ 184 177 struct npu {
+16 -18
arch/powerpc/platforms/powernv/smp.c
··· 63 63 long rc; 64 64 uint8_t status; 65 65 66 - BUG_ON(nr < 0 || nr >= NR_CPUS); 66 + if (nr < 0 || nr >= nr_cpu_ids) 67 + return -EINVAL; 67 68 68 69 /* 69 70 * If we already started or OPAL is not supported, we just ··· 145 144 unsigned long srr1, wmask; 146 145 147 146 /* Standard hot unplug procedure */ 148 - local_irq_disable(); 147 + /* 148 + * This hard disables local interurpts, ensuring we have no lazy 149 + * irqs pending. 150 + */ 151 + WARN_ON(irqs_disabled()); 152 + hard_irq_disable(); 153 + WARN_ON(lazy_irq_pending()); 154 + 149 155 idle_task_exit(); 150 156 current->active_mm = NULL; /* for sanity */ 151 157 cpu = smp_processor_id(); ··· 170 162 */ 171 163 mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1); 172 164 173 - /* 174 - * Hard-disable interrupts, and then clear irq_happened flags 175 - * that we can safely ignore while off-line, since they 176 - * are for things for which we do no processing when off-line 177 - * (or in the case of HMI, all the processing we need to do 178 - * is done in lower-level real-mode code). 179 - */ 180 - hard_irq_disable(); 181 - local_paca->irq_happened &= ~(PACA_IRQ_DEC | PACA_IRQ_HMI); 182 - 183 165 while (!generic_check_cpu_restart(cpu)) { 184 166 /* 185 167 * Clear IPI flag, since we don't handle IPIs while ··· 180 182 */ 181 183 kvmppc_set_host_ipi(cpu, 0); 182 184 183 - ppc64_runlatch_off(); 184 185 srr1 = pnv_cpu_offline(cpu); 185 - ppc64_runlatch_on(); 186 + 187 + WARN_ON(lazy_irq_pending()); 186 188 187 189 /* 188 190 * If the SRR1 value indicates that we woke up due to ··· 196 198 * contains 0. 197 199 */ 198 200 if (((srr1 & wmask) == SRR1_WAKEEE) || 199 - ((srr1 & wmask) == SRR1_WAKEHVI) || 200 - (local_paca->irq_happened & PACA_IRQ_EE)) { 201 + ((srr1 & wmask) == SRR1_WAKEHVI)) { 201 202 if (cpu_has_feature(CPU_FTR_ARCH_300)) { 202 203 if (xive_enabled()) 203 204 xive_flush_interrupt(); ··· 208 211 unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER); 209 212 asm volatile(PPC_MSGCLR(%0) : : "r" (msg)); 210 213 } 211 - local_paca->irq_happened &= ~(PACA_IRQ_EE | PACA_IRQ_DBELL); 212 214 smp_mb(); 213 215 214 216 if (cpu_core_split_required()) 215 217 continue; 216 218 217 219 if (srr1 && !generic_check_cpu_restart(cpu)) 218 - DBG("CPU%d Unexpected exit while offline !\n", cpu); 220 + DBG("CPU%d Unexpected exit while offline srr1=%lx!\n", 221 + cpu, srr1); 222 + 219 223 } 220 224 221 225 /* Re-enable decrementer interrupts */
+2 -1
arch/powerpc/platforms/powernv/subcore.c
··· 18 18 #include <linux/stop_machine.h> 19 19 20 20 #include <asm/cputhreads.h> 21 + #include <asm/cpuidle.h> 21 22 #include <asm/kvm_ppc.h> 22 23 #include <asm/machdep.h> 23 24 #include <asm/opal.h> ··· 183 182 cpu = smp_processor_id(); 184 183 if (cpu_thread_in_core(cpu) != 0) { 185 184 while (mfspr(SPRN_HID0) & mask) 186 - power7_nap(0); 185 + power7_idle_insn(PNV_THREAD_NAP); 187 186 188 187 per_cpu(split_state, cpu).step = SYNC_STEP_UNSPLIT; 189 188 return;
+1 -1
arch/powerpc/platforms/pseries/Kconfig
··· 124 124 Enable access to hypervisor supplied counters in perf. Currently, 125 125 this enables code that uses the hcall GetPerfCounterInfo and 24x7 126 126 interfaces to retrieve counters. GPCI exists on Power 6 and later 127 - systems. 24x7 is available on Power 8 systems. 127 + systems. 24x7 is available on Power 8 and later systems. 128 128 129 129 If unsure, select Y. 130 130
+1 -1
arch/powerpc/platforms/pseries/hotplug-cpu.c
··· 554 554 { 555 555 int rc; 556 556 557 - pr_debug("Attemping to remove CPU %s, drc index: %x\n", 557 + pr_debug("Attempting to remove CPU %s, drc index: %x\n", 558 558 dn->name, drc_index); 559 559 560 560 rc = dlpar_offline_cpu(dn);
+7
arch/powerpc/platforms/pseries/hotplug-memory.c
··· 22 22 #include <asm/machdep.h> 23 23 #include <asm/prom.h> 24 24 #include <asm/sparsemem.h> 25 + #include <asm/fadump.h> 25 26 #include "pseries.h" 26 27 27 28 static bool rtas_hp_event; ··· 408 407 block_sz = memory_block_size_bytes(); 409 408 scns_per_block = block_sz / MIN_MEMORY_BLOCK_SIZE; 410 409 phys_addr = lmb->base_addr; 410 + 411 + #ifdef CONFIG_FA_DUMP 412 + /* Don't hot-remove memory that falls in fadump boot memory area */ 413 + if (is_fadump_boot_memory_area(phys_addr, block_sz)) 414 + return false; 415 + #endif 411 416 412 417 for (i = 0; i < scns_per_block; i++) { 413 418 pfn = PFN_DOWN(phys_addr);
+10 -1
arch/powerpc/platforms/pseries/lpar.c
··· 301 301 int ssize, unsigned long inv_flags) 302 302 { 303 303 unsigned long lpar_rc; 304 - unsigned long flags = (newpp & 7) | H_AVPN; 304 + unsigned long flags; 305 305 unsigned long want_v; 306 306 307 307 want_v = hpte_encode_avpn(vpn, psize, ssize); 308 308 309 309 pr_devel(" update: avpnv=%016lx, hash=%016lx, f=%lx, psize: %d ...", 310 310 want_v, slot, flags, psize); 311 + 312 + flags = (newpp & 7) | H_AVPN; 313 + if (mmu_has_feature(MMU_FTR_KERNEL_RO)) 314 + /* Move pp0 into bit 8 (IBM 55) */ 315 + flags |= (newpp & HPTE_R_PP0) >> 55; 311 316 312 317 lpar_rc = plpar_pte_protect(flags, slot, want_v); 313 318 ··· 385 380 BUG_ON(slot == -1); 386 381 387 382 flags = newpp & 7; 383 + if (mmu_has_feature(MMU_FTR_KERNEL_RO)) 384 + /* Move pp0 into bit 8 (IBM 55) */ 385 + flags |= (newpp & HPTE_R_PP0) >> 55; 386 + 388 387 lpar_rc = plpar_pte_protect(flags, slot, 0); 389 388 390 389 BUG_ON(lpar_rc != H_SUCCESS);
+2 -1
arch/powerpc/platforms/pseries/smp.c
··· 151 151 152 152 static int smp_pSeries_kick_cpu(int nr) 153 153 { 154 - BUG_ON(nr < 0 || nr >= NR_CPUS); 154 + if (nr < 0 || nr >= nr_cpu_ids) 155 + return -EINVAL; 155 156 156 157 if (!smp_startup_cpu(nr)) 157 158 return -ENOENT;
+1 -1
arch/powerpc/sysdev/mpc8xx_pic.c
··· 79 79 irq = in_be32(&siu_reg->sc_sivec) >> 26; 80 80 81 81 if (irq == PIC_VEC_SPURRIOUS) 82 - irq = 0; 82 + return 0; 83 83 84 84 return irq_linear_revmap(mpc8xx_pic_host, irq); 85 85
+1 -1
arch/powerpc/sysdev/xive/common.c
··· 1417 1417 /* Get ready for interrupts */ 1418 1418 xive_setup_cpu(); 1419 1419 1420 - pr_info("Interrupt handling intialized with %s backend\n", 1420 + pr_info("Interrupt handling initialized with %s backend\n", 1421 1421 xive_ops->name); 1422 1422 pr_info("Using priority %d for all interrupts\n", max_prio); 1423 1423
+2 -2
arch/powerpc/sysdev/xive/native.c
··· 633 633 if (max_vcpus > (1 << order)) 634 634 order++; 635 635 636 - pr_info("VP block alloc, for max VCPUs %d use order %d\n", 637 - max_vcpus, order); 636 + pr_debug("VP block alloc, for max VCPUs %d use order %d\n", 637 + max_vcpus, order); 638 638 639 639 for (;;) { 640 640 rc = opal_xive_alloc_vp_block(order);
+78
arch/powerpc/tools/head_check.sh
··· 1 + # Copyright © 2016 IBM Corporation 2 + 3 + # This program is free software; you can redistribute it and/or 4 + # modify it under the terms of the GNU General Public License 5 + # as published by the Free Software Foundation; either version 6 + # 2 of the License, or (at your option) any later version. 7 + 8 + # This script checks the head of a vmlinux for linker stubs that 9 + # break our placement of fixed-location code for 64-bit. 10 + 11 + # based on relocs_check.pl 12 + # Copyright © 2009 IBM Corporation 13 + 14 + # NOTE! 15 + # 16 + # If the build dies here, it's likely code in head_64.S/exception-64*.S or 17 + # nearby, is branching to labels it can't reach directly, which results in the 18 + # linker inserting branch stubs. This can move code around in ways that break 19 + # the fixed section calculations (head-64.h). To debug this, disassemble the 20 + # vmlinux and look for branch stubs (long_branch, plt_branch, etc.) in the 21 + # fixed section region (0 - 0x8000ish). Check what code is calling those stubs, 22 + # and perhaps change so a direct branch can reach. 23 + # 24 + # A ".linker_stub_catch" section is used to catch some stubs generated by 25 + # early .text code, which tend to get placed at the start of the section. 26 + # If there are too many such stubs, they can overflow this section. Expanding 27 + # it may help (or reducing the number of stub branches). 28 + # 29 + # Linker stubs use the TOC pointer, so even if fixed section code could 30 + # tolerate them being inserted into head code, they can't be allowed in low 31 + # level entry code (boot, interrupt vectors, etc) until r2 is set up. This 32 + # could cause the kernel to die in early boot. 33 + 34 + # Turn this on if you want more debug output: 35 + # set -x 36 + 37 + if [ $# -lt 2 ]; then 38 + echo "$0 [path to nm] [path to vmlinux]" 1>&2 39 + exit 1 40 + fi 41 + 42 + # Have Kbuild supply the path to nm so we handle cross compilation. 43 + nm="$1" 44 + vmlinux="$2" 45 + 46 + # gcc-4.6-era toolchain make _stext an A (absolute) symbol rather than T 47 + $nm "$vmlinux" | grep -e " [TA] _stext$" -e " t start_first_256B$" -e " a text_start$" -e " t start_text$" -m4 > .tmp_symbols.txt 48 + 49 + 50 + vma=$(cat .tmp_symbols.txt | grep -e " [TA] _stext$" | cut -d' ' -f1) 51 + 52 + expected_start_head_addr=$vma 53 + 54 + start_head_addr=$(cat .tmp_symbols.txt | grep " t start_first_256B$" | cut -d' ' -f1) 55 + 56 + if [ "$start_head_addr" != "$expected_start_head_addr" ]; then 57 + echo "ERROR: head code starts at $start_head_addr, should be $expected_start_head_addr" 58 + echo "ERROR: try to enable LD_HEAD_STUB_CATCH config option" 59 + echo "ERROR: see comments in arch/powerpc/tools/head_check.sh" 60 + 61 + exit 1 62 + fi 63 + 64 + top_vma=$(echo $vma | cut -d'0' -f1) 65 + 66 + expected_start_text_addr=$(cat .tmp_symbols.txt | grep " a text_start$" | cut -d' ' -f1 | sed "s/^0/$top_vma/") 67 + 68 + start_text_addr=$(cat .tmp_symbols.txt | grep " t start_text$" | cut -d' ' -f1) 69 + 70 + if [ "$start_text_addr" != "$expected_start_text_addr" ]; then 71 + echo "ERROR: start_text address is $start_text_addr, should be $expected_start_text_addr" 72 + echo "ERROR: try to enable LD_HEAD_STUB_CATCH config option" 73 + echo "ERROR: see comments in arch/powerpc/tools/head_check.sh" 74 + 75 + exit 1 76 + fi 77 + 78 + rm -f .tmp_symbols.txt
+57
arch/powerpc/tools/unrel_branch_check.sh
··· 1 + # Copyright © 2016 IBM Corporation 2 + # 3 + # This program is free software; you can redistribute it and/or 4 + # modify it under the terms of the GNU General Public License 5 + # as published by the Free Software Foundation; either version 6 + # 2 of the License, or (at your option) any later version. 7 + # 8 + # This script checks the relocations of a vmlinux for "suspicious" 9 + # branches from unrelocated code (head_64.S code). 10 + 11 + # Turn this on if you want more debug output: 12 + # set -x 13 + 14 + # Have Kbuild supply the path to objdump so we handle cross compilation. 15 + objdump="$1" 16 + vmlinux="$2" 17 + 18 + #__end_interrupts should be located within the first 64K 19 + 20 + end_intr=0x$( 21 + "$objdump" -R "$vmlinux" -d --start-address=0xc000000000000000 \ 22 + --stop-address=0xc000000000010000 | 23 + grep '\<__end_interrupts>:' | 24 + awk '{print $1}' 25 + ) 26 + 27 + BRANCHES=$( 28 + "$objdump" -R "$vmlinux" -D --start-address=0xc000000000000000 \ 29 + --stop-address=${end_intr} | 30 + grep -e "^c[0-9a-f]*:[[:space:]]*\([0-9a-f][0-9a-f][[:space:]]\)\{4\}[[:space:]]*b" | 31 + grep -v '\<__start_initialization_multiplatform>' | 32 + grep -v -e 'b.\?.\?ctr' | 33 + grep -v -e 'b.\?.\?lr' | 34 + sed 's/://' | 35 + awk '{ print $1 ":" $6 ":0x" $7 ":" $8 " "}' 36 + ) 37 + 38 + for tuple in $BRANCHES 39 + do 40 + from=`echo $tuple | cut -d':' -f1` 41 + branch=`echo $tuple | cut -d':' -f2` 42 + to=`echo $tuple | cut -d':' -f3 | sed 's/cr[0-7],//'` 43 + sym=`echo $tuple | cut -d':' -f4` 44 + 45 + if (( $to > $end_intr )) 46 + then 47 + if [ -z "$bad_branches" ]; then 48 + echo "WARNING: Unrelocated relative branches" 49 + bad_branches="yes" 50 + fi 51 + echo "$from $branch-> $to $sym" 52 + fi 53 + done 54 + 55 + if [ -z "$bad_branches" ]; then 56 + exit 0 57 + fi
+9 -6
arch/powerpc/xmon/xmon.c
··· 53 53 #include <asm/xive.h> 54 54 #include <asm/opal.h> 55 55 #include <asm/firmware.h> 56 + #include <asm/code-patching.h> 56 57 57 58 #ifdef CONFIG_PPC64 58 59 #include <asm/hvcall.h> ··· 838 837 store_inst(&bp->instr[0]); 839 838 if (bp->enabled & BP_CIABR) 840 839 continue; 841 - if (mwrite(bp->address, &bpinstr, 4) != 4) { 840 + if (patch_instruction((unsigned int *)bp->address, 841 + bpinstr) != 0) { 842 842 printf("Couldn't write instruction at %lx, " 843 843 "disabling breakpoint there\n", bp->address); 844 844 bp->enabled &= ~BP_TRAP; ··· 876 874 continue; 877 875 if (mread(bp->address, &instr, 4) == 4 878 876 && instr == bpinstr 879 - && mwrite(bp->address, &bp->instr, 4) != 4) 877 + && patch_instruction( 878 + (unsigned int *)bp->address, bp->instr[0]) != 0) 880 879 printf("Couldn't remove breakpoint at %lx\n", 881 880 bp->address); 882 881 else ··· 1245 1242 { 1246 1243 int cmd; 1247 1244 unsigned long a; 1248 - int mode, i; 1245 + int i; 1249 1246 struct bpt *bp; 1250 - const char badaddr[] = "Only kernel addresses are permitted " 1251 - "for breakpoints\n"; 1252 1247 1253 1248 cmd = inchar(); 1254 1249 switch (cmd) { 1255 - #ifndef CONFIG_8xx 1250 + #ifndef CONFIG_PPC_8xx 1251 + static const char badaddr[] = "Only kernel addresses are permitted for breakpoints\n"; 1252 + int mode; 1256 1253 case 'd': /* bd - hardware data breakpoint */ 1257 1254 mode = 7; 1258 1255 cmd = inchar();
+1
arch/x86/Kconfig
··· 60 60 select ARCH_HAS_STRICT_KERNEL_RWX 61 61 select ARCH_HAS_STRICT_MODULE_RWX 62 62 select ARCH_HAS_UBSAN_SANITIZE_ALL 63 + select ARCH_HAS_ZONE_DEVICE if X86_64 63 64 select ARCH_HAVE_NMI_SAFE_CMPXCHG 64 65 select ARCH_MIGHT_HAVE_ACPI_PDC if ACPI 65 66 select ARCH_MIGHT_HAVE_PC_PARPORT
+32 -21
drivers/cpuidle/cpuidle-powernv.c
··· 32 32 .owner = THIS_MODULE, 33 33 }; 34 34 35 - static int max_idle_state; 36 - static struct cpuidle_state *cpuidle_state_table; 35 + static int max_idle_state __read_mostly; 36 + static struct cpuidle_state *cpuidle_state_table __read_mostly; 37 37 38 38 struct stop_psscr_table { 39 39 u64 val; 40 40 u64 mask; 41 41 }; 42 42 43 - static struct stop_psscr_table stop_psscr_table[CPUIDLE_STATE_MAX]; 43 + static struct stop_psscr_table stop_psscr_table[CPUIDLE_STATE_MAX] __read_mostly; 44 44 45 - static u64 snooze_timeout; 46 - static bool snooze_timeout_en; 45 + static u64 snooze_timeout __read_mostly; 46 + static bool snooze_timeout_en __read_mostly; 47 47 48 48 static int snooze_loop(struct cpuidle_device *dev, 49 49 struct cpuidle_driver *drv, ··· 51 51 { 52 52 u64 snooze_exit_time; 53 53 54 - local_irq_enable(); 55 54 set_thread_flag(TIF_POLLING_NRFLAG); 55 + 56 + local_irq_enable(); 56 57 57 58 snooze_exit_time = get_tb() + snooze_timeout; 58 59 ppc64_runlatch_off(); 59 60 HMT_very_low(); 60 61 while (!need_resched()) { 61 - if (likely(snooze_timeout_en) && get_tb() > snooze_exit_time) 62 + if (likely(snooze_timeout_en) && get_tb() > snooze_exit_time) { 63 + /* 64 + * Task has not woken up but we are exiting the polling 65 + * loop anyway. Require a barrier after polling is 66 + * cleared to order subsequent test of need_resched(). 67 + */ 68 + clear_thread_flag(TIF_POLLING_NRFLAG); 69 + smp_mb(); 62 70 break; 71 + } 63 72 } 64 73 65 74 HMT_medium(); 66 75 ppc64_runlatch_on(); 67 76 clear_thread_flag(TIF_POLLING_NRFLAG); 68 - smp_mb(); 77 + 69 78 return index; 70 79 } 71 80 ··· 82 73 struct cpuidle_driver *drv, 83 74 int index) 84 75 { 85 - ppc64_runlatch_off(); 86 - power7_idle(); 87 - ppc64_runlatch_on(); 76 + power7_idle_type(PNV_THREAD_NAP); 77 + 88 78 return index; 89 79 } 90 80 ··· 106 98 new_lpcr &= ~LPCR_PECE1; 107 99 108 100 mtspr(SPRN_LPCR, new_lpcr); 109 - power7_sleep(); 101 + 102 + power7_idle_type(PNV_THREAD_SLEEP); 110 103 111 104 mtspr(SPRN_LPCR, old_lpcr); 112 105 ··· 119 110 struct cpuidle_driver *drv, 120 111 int index) 121 112 { 122 - ppc64_runlatch_off(); 123 - power9_idle_stop(stop_psscr_table[index].val, 113 + power9_idle_type(stop_psscr_table[index].val, 124 114 stop_psscr_table[index].mask); 125 - ppc64_runlatch_on(); 126 115 return index; 127 116 } 128 117 ··· 361 354 362 355 for (i = 0; i < dt_idle_states; i++) { 363 356 unsigned int exit_latency, target_residency; 357 + bool stops_timebase = false; 364 358 /* 365 359 * If an idle state has exit latency beyond 366 360 * POWERNV_THRESHOLD_LATENCY_NS then don't use it ··· 389 381 } 390 382 } 391 383 384 + if (flags[i] & OPAL_PM_TIMEBASE_STOP) 385 + stops_timebase = true; 386 + 392 387 /* 393 388 * For nap and fastsleep, use default target_residency 394 389 * values if f/w does not expose it. ··· 403 392 add_powernv_state(nr_idle_states, "Nap", 404 393 CPUIDLE_FLAG_NONE, nap_loop, 405 394 target_residency, exit_latency, 0, 0); 406 - } else if ((flags[i] & OPAL_PM_STOP_INST_FAST) && 407 - !(flags[i] & OPAL_PM_TIMEBASE_STOP)) { 395 + } else if (has_stop_states && !stops_timebase) { 408 396 add_powernv_state(nr_idle_states, names[i], 409 397 CPUIDLE_FLAG_NONE, stop_loop, 410 398 target_residency, exit_latency, ··· 415 405 * within this config dependency check. 416 406 */ 417 407 #ifdef CONFIG_TICK_ONESHOT 418 - if (flags[i] & OPAL_PM_SLEEP_ENABLED || 419 - flags[i] & OPAL_PM_SLEEP_ENABLED_ER1) { 408 + else if (flags[i] & OPAL_PM_SLEEP_ENABLED || 409 + flags[i] & OPAL_PM_SLEEP_ENABLED_ER1) { 420 410 if (!rc) 421 411 target_residency = 300000; 422 412 /* Add FASTSLEEP state */ ··· 424 414 CPUIDLE_FLAG_TIMER_STOP, 425 415 fastsleep_loop, 426 416 target_residency, exit_latency, 0, 0); 427 - } else if ((flags[i] & OPAL_PM_STOP_INST_DEEP) && 428 - (flags[i] & OPAL_PM_TIMEBASE_STOP)) { 417 + } else if (has_stop_states && stops_timebase) { 429 418 add_powernv_state(nr_idle_states, names[i], 430 419 CPUIDLE_FLAG_TIMER_STOP, stop_loop, 431 420 target_residency, exit_latency, 432 421 psscr_val[i], psscr_mask[i]); 433 422 } 434 423 #endif 424 + else 425 + continue; 435 426 nr_idle_states++; 436 427 } 437 428 out:
+15 -7
drivers/cpuidle/cpuidle-pseries.c
··· 25 25 .owner = THIS_MODULE, 26 26 }; 27 27 28 - static int max_idle_state; 29 - static struct cpuidle_state *cpuidle_state_table; 30 - static u64 snooze_timeout; 31 - static bool snooze_timeout_en; 28 + static int max_idle_state __read_mostly; 29 + static struct cpuidle_state *cpuidle_state_table __read_mostly; 30 + static u64 snooze_timeout __read_mostly; 31 + static bool snooze_timeout_en __read_mostly; 32 32 33 33 static inline void idle_loop_prolog(unsigned long *in_purr) 34 34 { ··· 62 62 unsigned long in_purr; 63 63 u64 snooze_exit_time; 64 64 65 + set_thread_flag(TIF_POLLING_NRFLAG); 66 + 65 67 idle_loop_prolog(&in_purr); 66 68 local_irq_enable(); 67 - set_thread_flag(TIF_POLLING_NRFLAG); 68 69 snooze_exit_time = get_tb() + snooze_timeout; 69 70 70 71 while (!need_resched()) { 71 72 HMT_low(); 72 73 HMT_very_low(); 73 - if (snooze_timeout_en && get_tb() > snooze_exit_time) 74 + if (likely(snooze_timeout_en) && get_tb() > snooze_exit_time) { 75 + /* 76 + * Task has not woken up but we are exiting the polling 77 + * loop anyway. Require a barrier after polling is 78 + * cleared to order subsequent test of need_resched(). 79 + */ 80 + clear_thread_flag(TIF_POLLING_NRFLAG); 81 + smp_mb(); 74 82 break; 83 + } 75 84 } 76 85 77 86 HMT_medium(); 78 87 clear_thread_flag(TIF_POLLING_NRFLAG); 79 - smp_mb(); 80 88 81 89 idle_loop_epilog(in_purr); 82 90
+5
drivers/misc/cxl/Kconfig
··· 11 11 bool 12 12 default n 13 13 14 + config CXL_LIB 15 + bool 16 + default n 17 + 14 18 config CXL 15 19 tristate "Support for IBM Coherent Accelerators (CXL)" 16 20 depends on PPC_POWERNV && PCI_MSI && EEH 17 21 select CXL_BASE 18 22 select CXL_AFU_DRIVER_OPS 23 + select CXL_LIB 19 24 default m 20 25 help 21 26 Select this option to enable driver support for IBM Coherent
+1 -1
drivers/misc/cxl/Makefile
··· 3 3 4 4 cxl-y += main.o file.o irq.o fault.o native.o 5 5 cxl-y += context.o sysfs.o pci.o trace.o 6 - cxl-y += vphb.o phb.o api.o 6 + cxl-y += vphb.o phb.o api.o cxllib.o 7 7 cxl-$(CONFIG_PPC_PSERIES) += flash.o guest.o of.o hcalls.o 8 8 cxl-$(CONFIG_DEBUG_FS) += debugfs.o 9 9 obj-$(CONFIG_CXL) += cxl.o
+6
drivers/misc/cxl/cxl.h
··· 1010 1010 1011 1011 void cxl_handle_fault(struct work_struct *work); 1012 1012 void cxl_prefault(struct cxl_context *ctx, u64 wed); 1013 + int cxl_handle_mm_fault(struct mm_struct *mm, u64 dsisr, u64 dar); 1013 1014 1014 1015 struct cxl *get_cxl_adapter(int num); 1015 1016 int cxl_alloc_sst(struct cxl_context *ctx); ··· 1062 1061 int cxl_data_cache_flush(struct cxl *adapter); 1063 1062 int cxl_afu_disable(struct cxl_afu *afu); 1064 1063 int cxl_psl_purge(struct cxl_afu *afu); 1064 + int cxl_calc_capp_routing(struct pci_dev *dev, u64 *chipid, 1065 + u32 *phb_index, u64 *capp_unit_id); 1066 + int cxl_slot_is_switched(struct pci_dev *dev); 1067 + int cxl_get_xsl9_dsnctl(u64 capp_unit_id, u64 *reg); 1068 + u64 cxl_calculate_sr(bool master, bool kernel, bool real_mode, bool p9); 1065 1069 1066 1070 void cxl_native_irq_dump_regs_psl9(struct cxl_context *ctx); 1067 1071 void cxl_native_irq_dump_regs_psl8(struct cxl_context *ctx);
+246
drivers/misc/cxl/cxllib.c
··· 1 + /* 2 + * Copyright 2017 IBM Corp. 3 + * 4 + * This program is free software; you can redistribute it and/or 5 + * modify it under the terms of the GNU General Public License 6 + * as published by the Free Software Foundation; either version 7 + * 2 of the License, or (at your option) any later version. 8 + */ 9 + 10 + #include <linux/hugetlb.h> 11 + #include <linux/sched/mm.h> 12 + #include <asm/pnv-pci.h> 13 + #include <misc/cxllib.h> 14 + 15 + #include "cxl.h" 16 + 17 + #define CXL_INVALID_DRA ~0ull 18 + #define CXL_DUMMY_READ_SIZE 128 19 + #define CXL_DUMMY_READ_ALIGN 8 20 + #define CXL_CAPI_WINDOW_START 0x2000000000000ull 21 + #define CXL_CAPI_WINDOW_LOG_SIZE 48 22 + #define CXL_XSL_CONFIG_CURRENT_VERSION CXL_XSL_CONFIG_VERSION1 23 + 24 + 25 + bool cxllib_slot_is_supported(struct pci_dev *dev, unsigned long flags) 26 + { 27 + int rc; 28 + u32 phb_index; 29 + u64 chip_id, capp_unit_id; 30 + 31 + /* No flags currently supported */ 32 + if (flags) 33 + return false; 34 + 35 + if (!cpu_has_feature(CPU_FTR_HVMODE)) 36 + return false; 37 + 38 + if (!cxl_is_power9()) 39 + return false; 40 + 41 + if (cxl_slot_is_switched(dev)) 42 + return false; 43 + 44 + /* on p9, some pci slots are not connected to a CAPP unit */ 45 + rc = cxl_calc_capp_routing(dev, &chip_id, &phb_index, &capp_unit_id); 46 + if (rc) 47 + return false; 48 + 49 + return true; 50 + } 51 + EXPORT_SYMBOL_GPL(cxllib_slot_is_supported); 52 + 53 + static DEFINE_MUTEX(dra_mutex); 54 + static u64 dummy_read_addr = CXL_INVALID_DRA; 55 + 56 + static int allocate_dummy_read_buf(void) 57 + { 58 + u64 buf, vaddr; 59 + size_t buf_size; 60 + 61 + /* 62 + * Dummy read buffer is 128-byte long, aligned on a 63 + * 256-byte boundary and we need the physical address. 64 + */ 65 + buf_size = CXL_DUMMY_READ_SIZE + (1ull << CXL_DUMMY_READ_ALIGN); 66 + buf = (u64) kzalloc(buf_size, GFP_KERNEL); 67 + if (!buf) 68 + return -ENOMEM; 69 + 70 + vaddr = (buf + (1ull << CXL_DUMMY_READ_ALIGN) - 1) & 71 + (~0ull << CXL_DUMMY_READ_ALIGN); 72 + 73 + WARN((vaddr + CXL_DUMMY_READ_SIZE) > (buf + buf_size), 74 + "Dummy read buffer alignment issue"); 75 + dummy_read_addr = virt_to_phys((void *) vaddr); 76 + return 0; 77 + } 78 + 79 + int cxllib_get_xsl_config(struct pci_dev *dev, struct cxllib_xsl_config *cfg) 80 + { 81 + int rc; 82 + u32 phb_index; 83 + u64 chip_id, capp_unit_id; 84 + 85 + if (!cpu_has_feature(CPU_FTR_HVMODE)) 86 + return -EINVAL; 87 + 88 + mutex_lock(&dra_mutex); 89 + if (dummy_read_addr == CXL_INVALID_DRA) { 90 + rc = allocate_dummy_read_buf(); 91 + if (rc) { 92 + mutex_unlock(&dra_mutex); 93 + return rc; 94 + } 95 + } 96 + mutex_unlock(&dra_mutex); 97 + 98 + rc = cxl_calc_capp_routing(dev, &chip_id, &phb_index, &capp_unit_id); 99 + if (rc) 100 + return rc; 101 + 102 + rc = cxl_get_xsl9_dsnctl(capp_unit_id, &cfg->dsnctl); 103 + if (rc) 104 + return rc; 105 + if (cpu_has_feature(CPU_FTR_POWER9_DD1)) { 106 + /* workaround for DD1 - nbwind = capiind */ 107 + cfg->dsnctl |= ((u64)0x02 << (63-47)); 108 + } 109 + 110 + cfg->version = CXL_XSL_CONFIG_CURRENT_VERSION; 111 + cfg->log_bar_size = CXL_CAPI_WINDOW_LOG_SIZE; 112 + cfg->bar_addr = CXL_CAPI_WINDOW_START; 113 + cfg->dra = dummy_read_addr; 114 + return 0; 115 + } 116 + EXPORT_SYMBOL_GPL(cxllib_get_xsl_config); 117 + 118 + int cxllib_switch_phb_mode(struct pci_dev *dev, enum cxllib_mode mode, 119 + unsigned long flags) 120 + { 121 + int rc = 0; 122 + 123 + if (!cpu_has_feature(CPU_FTR_HVMODE)) 124 + return -EINVAL; 125 + 126 + switch (mode) { 127 + case CXL_MODE_PCI: 128 + /* 129 + * We currently don't support going back to PCI mode 130 + * However, we'll turn the invalidations off, so that 131 + * the firmware doesn't have to ack them and can do 132 + * things like reset, etc.. with no worries. 133 + * So always return EPERM (can't go back to PCI) or 134 + * EBUSY if we couldn't even turn off snooping 135 + */ 136 + rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_SNOOP_OFF); 137 + if (rc) 138 + rc = -EBUSY; 139 + else 140 + rc = -EPERM; 141 + break; 142 + case CXL_MODE_CXL: 143 + /* DMA only supported on TVT1 for the time being */ 144 + if (flags != CXL_MODE_DMA_TVT1) 145 + return -EINVAL; 146 + rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_DMA_TVT1); 147 + if (rc) 148 + return rc; 149 + rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_SNOOP_ON); 150 + break; 151 + default: 152 + rc = -EINVAL; 153 + } 154 + return rc; 155 + } 156 + EXPORT_SYMBOL_GPL(cxllib_switch_phb_mode); 157 + 158 + /* 159 + * When switching the PHB to capi mode, the TVT#1 entry for 160 + * the Partitionable Endpoint is set in bypass mode, like 161 + * in PCI mode. 162 + * Configure the device dma to use TVT#1, which is done 163 + * by calling dma_set_mask() with a mask large enough. 164 + */ 165 + int cxllib_set_device_dma(struct pci_dev *dev, unsigned long flags) 166 + { 167 + int rc; 168 + 169 + if (flags) 170 + return -EINVAL; 171 + 172 + rc = dma_set_mask(&dev->dev, DMA_BIT_MASK(64)); 173 + return rc; 174 + } 175 + EXPORT_SYMBOL_GPL(cxllib_set_device_dma); 176 + 177 + int cxllib_get_PE_attributes(struct task_struct *task, 178 + unsigned long translation_mode, 179 + struct cxllib_pe_attributes *attr) 180 + { 181 + struct mm_struct *mm = NULL; 182 + 183 + if (translation_mode != CXL_TRANSLATED_MODE && 184 + translation_mode != CXL_REAL_MODE) 185 + return -EINVAL; 186 + 187 + attr->sr = cxl_calculate_sr(false, 188 + task == NULL, 189 + translation_mode == CXL_REAL_MODE, 190 + true); 191 + attr->lpid = mfspr(SPRN_LPID); 192 + if (task) { 193 + mm = get_task_mm(task); 194 + if (mm == NULL) 195 + return -EINVAL; 196 + /* 197 + * Caller is keeping a reference on mm_users for as long 198 + * as XSL uses the memory context 199 + */ 200 + attr->pid = mm->context.id; 201 + mmput(mm); 202 + } else { 203 + attr->pid = 0; 204 + } 205 + attr->tid = 0; 206 + return 0; 207 + } 208 + EXPORT_SYMBOL_GPL(cxllib_get_PE_attributes); 209 + 210 + int cxllib_handle_fault(struct mm_struct *mm, u64 addr, u64 size, u64 flags) 211 + { 212 + int rc; 213 + u64 dar; 214 + struct vm_area_struct *vma = NULL; 215 + unsigned long page_size; 216 + 217 + if (mm == NULL) 218 + return -EFAULT; 219 + 220 + down_read(&mm->mmap_sem); 221 + 222 + for (dar = addr; dar < addr + size; dar += page_size) { 223 + if (!vma || dar < vma->vm_start || dar > vma->vm_end) { 224 + vma = find_vma(mm, addr); 225 + if (!vma) { 226 + pr_err("Can't find vma for addr %016llx\n", addr); 227 + rc = -EFAULT; 228 + goto out; 229 + } 230 + /* get the size of the pages allocated */ 231 + page_size = vma_kernel_pagesize(vma); 232 + } 233 + 234 + rc = cxl_handle_mm_fault(mm, flags, dar); 235 + if (rc) { 236 + pr_err("cxl_handle_mm_fault failed %d", rc); 237 + rc = -EFAULT; 238 + goto out; 239 + } 240 + } 241 + rc = 0; 242 + out: 243 + up_read(&mm->mmap_sem); 244 + return rc; 245 + } 246 + EXPORT_SYMBOL_GPL(cxllib_handle_fault);
+19 -10
drivers/misc/cxl/fault.c
··· 132 132 return IRQ_HANDLED; 133 133 } 134 134 135 - static void cxl_handle_page_fault(struct cxl_context *ctx, 136 - struct mm_struct *mm, u64 dsisr, u64 dar) 135 + int cxl_handle_mm_fault(struct mm_struct *mm, u64 dsisr, u64 dar) 137 136 { 138 137 unsigned flt = 0; 139 138 int result; 140 139 unsigned long access, flags, inv_flags = 0; 141 140 142 - trace_cxl_pte_miss(ctx, dsisr, dar); 143 - 144 141 if ((result = copro_handle_mm_fault(mm, dar, dsisr, &flt))) { 145 142 pr_devel("copro_handle_mm_fault failed: %#x\n", result); 146 - return cxl_ack_ae(ctx); 143 + return result; 147 144 } 148 145 149 146 if (!radix_enabled()) { ··· 152 155 if (dsisr & CXL_PSL_DSISR_An_S) 153 156 access |= _PAGE_WRITE; 154 157 155 - access |= _PAGE_PRIVILEGED; 156 - if ((!ctx->kernel) || (REGION_ID(dar) == USER_REGION_ID)) 157 - access &= ~_PAGE_PRIVILEGED; 158 + if (!mm && (REGION_ID(dar) != USER_REGION_ID)) 159 + access |= _PAGE_PRIVILEGED; 158 160 159 161 if (dsisr & DSISR_NOHPTE) 160 162 inv_flags |= HPTE_NOHPTE_UPDATE; ··· 162 166 hash_page_mm(mm, dar, access, 0x300, inv_flags); 163 167 local_irq_restore(flags); 164 168 } 165 - pr_devel("Page fault successfully handled for pe: %i!\n", ctx->pe); 166 - cxl_ops->ack_irq(ctx, CXL_PSL_TFC_An_R, 0); 169 + return 0; 170 + } 171 + 172 + static void cxl_handle_page_fault(struct cxl_context *ctx, 173 + struct mm_struct *mm, 174 + u64 dsisr, u64 dar) 175 + { 176 + trace_cxl_pte_miss(ctx, dsisr, dar); 177 + 178 + if (cxl_handle_mm_fault(mm, dsisr, dar)) { 179 + cxl_ack_ae(ctx); 180 + } else { 181 + pr_devel("Page fault successfully handled for pe: %i!\n", ctx->pe); 182 + cxl_ops->ack_irq(ctx, CXL_PSL_TFC_An_R, 0); 183 + } 167 184 } 168 185 169 186 /*
+6 -2
drivers/misc/cxl/flash.c
··· 401 401 if (down_interruptible(&sem) != 0) 402 402 return -EPERM; 403 403 404 - if (!(adapter = get_cxl_adapter(adapter_num))) 405 - return -ENODEV; 404 + if (!(adapter = get_cxl_adapter(adapter_num))) { 405 + rc = -ENODEV; 406 + goto err_unlock; 407 + } 406 408 407 409 file->private_data = adapter; 408 410 continue_token = 0; ··· 448 446 free_page((unsigned long) le); 449 447 err: 450 448 put_device(&adapter->dev); 449 + err_unlock: 450 + up(&sem); 451 451 452 452 return rc; 453 453 }
+11 -5
drivers/misc/cxl/native.c
··· 586 586 #define set_endian(sr) ((sr) &= ~(CXL_PSL_SR_An_LE)) 587 587 #endif 588 588 589 - static u64 calculate_sr(struct cxl_context *ctx) 589 + u64 cxl_calculate_sr(bool master, bool kernel, bool real_mode, bool p9) 590 590 { 591 591 u64 sr = 0; 592 592 593 593 set_endian(sr); 594 - if (ctx->master) 594 + if (master) 595 595 sr |= CXL_PSL_SR_An_MP; 596 596 if (mfspr(SPRN_LPCR) & LPCR_TC) 597 597 sr |= CXL_PSL_SR_An_TC; 598 - if (ctx->kernel) { 599 - if (!ctx->real_mode) 598 + if (kernel) { 599 + if (!real_mode) 600 600 sr |= CXL_PSL_SR_An_R; 601 601 sr |= (mfmsr() & MSR_SF) | CXL_PSL_SR_An_HV; 602 602 } else { ··· 608 608 if (!test_tsk_thread_flag(current, TIF_32BIT)) 609 609 sr |= CXL_PSL_SR_An_SF; 610 610 } 611 - if (cxl_is_power9()) { 611 + if (p9) { 612 612 if (radix_enabled()) 613 613 sr |= CXL_PSL_SR_An_XLAT_ror; 614 614 else 615 615 sr |= CXL_PSL_SR_An_XLAT_hpt; 616 616 } 617 617 return sr; 618 + } 619 + 620 + static u64 calculate_sr(struct cxl_context *ctx) 621 + { 622 + return cxl_calculate_sr(ctx->master, ctx->kernel, ctx->real_mode, 623 + cxl_is_power9()); 618 624 } 619 625 620 626 static void update_ivtes_directed(struct cxl_context *ctx)
+27 -14
drivers/misc/cxl/pci.c
··· 375 375 return 0; 376 376 } 377 377 378 - static int calc_capp_routing(struct pci_dev *dev, u64 *chipid, 378 + int cxl_calc_capp_routing(struct pci_dev *dev, u64 *chipid, 379 379 u32 *phb_index, u64 *capp_unit_id) 380 380 { 381 381 int rc; ··· 408 408 return 0; 409 409 } 410 410 411 - static int init_implementation_adapter_regs_psl9(struct cxl *adapter, struct pci_dev *dev) 411 + int cxl_get_xsl9_dsnctl(u64 capp_unit_id, u64 *reg) 412 412 { 413 - u64 xsl_dsnctl, psl_fircntl; 414 - u64 chipid; 415 - u32 phb_index; 416 - u64 capp_unit_id; 417 - int rc; 418 - 419 - rc = calc_capp_routing(dev, &chipid, &phb_index, &capp_unit_id); 420 - if (rc) 421 - return rc; 413 + u64 xsl_dsnctl; 422 414 423 415 /* 424 416 * CAPI Identifier bits [0:7] ··· 445 453 */ 446 454 xsl_dsnctl |= ((u64)0x04 << (63-55)); 447 455 } 456 + 457 + *reg = xsl_dsnctl; 458 + return 0; 459 + } 460 + 461 + static int init_implementation_adapter_regs_psl9(struct cxl *adapter, 462 + struct pci_dev *dev) 463 + { 464 + u64 xsl_dsnctl, psl_fircntl; 465 + u64 chipid; 466 + u32 phb_index; 467 + u64 capp_unit_id; 468 + int rc; 469 + 470 + rc = cxl_calc_capp_routing(dev, &chipid, &phb_index, &capp_unit_id); 471 + if (rc) 472 + return rc; 473 + 474 + rc = cxl_get_xsl9_dsnctl(capp_unit_id, &xsl_dsnctl); 475 + if (rc) 476 + return rc; 448 477 449 478 cxl_p1_write(adapter, CXL_XSL9_DSNCTL, xsl_dsnctl); 450 479 ··· 518 505 u64 capp_unit_id; 519 506 int rc; 520 507 521 - rc = calc_capp_routing(dev, &chipid, &phb_index, &capp_unit_id); 508 + rc = cxl_calc_capp_routing(dev, &chipid, &phb_index, &capp_unit_id); 522 509 if (rc) 523 510 return rc; 524 511 ··· 551 538 u64 capp_unit_id; 552 539 int rc; 553 540 554 - rc = calc_capp_routing(dev, &chipid, &phb_index, &capp_unit_id); 541 + rc = cxl_calc_capp_routing(dev, &chipid, &phb_index, &capp_unit_id); 555 542 if (rc) 556 543 return rc; 557 544 ··· 1910 1897 1911 1898 #define CXL_MAX_PCIEX_PARENT 2 1912 1899 1913 - static int cxl_slot_is_switched(struct pci_dev *dev) 1900 + int cxl_slot_is_switched(struct pci_dev *dev) 1914 1901 { 1915 1902 struct device_node *np; 1916 1903 int depth = 0;
+1 -1
drivers/watchdog/Kconfig
··· 1688 1688 1689 1689 config WATCHDOG_RTAS 1690 1690 tristate "RTAS watchdog" 1691 - depends on PPC_RTAS || (PPC64 && COMPILE_TEST) 1691 + depends on PPC_RTAS 1692 1692 help 1693 1693 This driver adds watchdog support for the RTAS watchdog. 1694 1694
+12
include/asm-generic/vmlinux.lds.h
··· 594 594 #define SBSS(sbss_align) \ 595 595 . = ALIGN(sbss_align); \ 596 596 .sbss : AT(ADDR(.sbss) - LOAD_OFFSET) { \ 597 + *(.dynsbss) \ 597 598 *(.sbss) \ 598 599 *(.scommon) \ 599 600 } ··· 641 640 .debug_str 0 : { *(.debug_str) } \ 642 641 .debug_loc 0 : { *(.debug_loc) } \ 643 642 .debug_macinfo 0 : { *(.debug_macinfo) } \ 643 + .debug_pubtypes 0 : { *(.debug_pubtypes) } \ 644 + /* DWARF 3 */ \ 645 + .debug_ranges 0 : { *(.debug_ranges) } \ 644 646 /* SGI/MIPS DWARF 2 extensions */ \ 645 647 .debug_weaknames 0 : { *(.debug_weaknames) } \ 646 648 .debug_funcnames 0 : { *(.debug_funcnames) } \ 647 649 .debug_typenames 0 : { *(.debug_typenames) } \ 648 650 .debug_varnames 0 : { *(.debug_varnames) } \ 651 + /* GNU DWARF 2 extensions */ \ 652 + .debug_gnu_pubnames 0 : { *(.debug_gnu_pubnames) } \ 653 + .debug_gnu_pubtypes 0 : { *(.debug_gnu_pubtypes) } \ 654 + /* DWARF 4 */ \ 655 + .debug_types 0 : { *(.debug_types) } \ 656 + /* DWARF 5 */ \ 657 + .debug_macro 0 : { *(.debug_macro) } \ 658 + .debug_addr 0 : { *(.debug_addr) } 649 659 650 660 /* Stabs debugging sections. */ 651 661 #define STABS_DEBUG \
+70
include/linux/processor.h
··· 1 + /* Misc low level processor primitives */ 2 + #ifndef _LINUX_PROCESSOR_H 3 + #define _LINUX_PROCESSOR_H 4 + 5 + #include <asm/processor.h> 6 + 7 + /* 8 + * spin_begin is used before beginning a busy-wait loop, and must be paired 9 + * with spin_end when the loop is exited. spin_cpu_relax must be called 10 + * within the loop. 11 + * 12 + * The loop body should be as small and fast as possible, on the order of 13 + * tens of instructions/cycles as a guide. It should and avoid calling 14 + * cpu_relax, or any "spin" or sleep type of primitive including nested uses 15 + * of these primitives. It should not lock or take any other resource. 16 + * Violations of these guidelies will not cause a bug, but may cause sub 17 + * optimal performance. 18 + * 19 + * These loops are optimized to be used where wait times are expected to be 20 + * less than the cost of a context switch (and associated overhead). 21 + * 22 + * Detection of resource owner and decision to spin or sleep or guest-yield 23 + * (e.g., spin lock holder vcpu preempted, or mutex owner not on CPU) can be 24 + * tested within the loop body. 25 + */ 26 + #ifndef spin_begin 27 + #define spin_begin() 28 + #endif 29 + 30 + #ifndef spin_cpu_relax 31 + #define spin_cpu_relax() cpu_relax() 32 + #endif 33 + 34 + /* 35 + * spin_cpu_yield may be called to yield (undirected) to the hypervisor if 36 + * necessary. This should be used if the wait is expected to take longer 37 + * than context switch overhead, but we can't sleep or do a directed yield. 38 + */ 39 + #ifndef spin_cpu_yield 40 + #define spin_cpu_yield() cpu_relax_yield() 41 + #endif 42 + 43 + #ifndef spin_end 44 + #define spin_end() 45 + #endif 46 + 47 + /* 48 + * spin_until_cond can be used to wait for a condition to become true. It 49 + * may be expected that the first iteration will true in the common case 50 + * (no spinning), so that callers should not require a first "likely" test 51 + * for the uncontended case before using this primitive. 52 + * 53 + * Usage and implementation guidelines are the same as for the spin_begin 54 + * primitives, above. 55 + */ 56 + #ifndef spin_until_cond 57 + #define spin_until_cond(cond) \ 58 + do { \ 59 + if (unlikely(!(cond))) { \ 60 + spin_begin(); \ 61 + do { \ 62 + spin_cpu_relax(); \ 63 + } while (!(cond)); \ 64 + spin_end(); \ 65 + } \ 66 + } while (0) 67 + 68 + #endif 69 + 70 + #endif /* _LINUX_PROCESSOR_H */
+133
include/misc/cxllib.h
··· 1 + /* 2 + * Copyright 2017 IBM Corp. 3 + * 4 + * This program is free software; you can redistribute it and/or 5 + * modify it under the terms of the GNU General Public License 6 + * as published by the Free Software Foundation; either version 7 + * 2 of the License, or (at your option) any later version. 8 + */ 9 + 10 + #ifndef _MISC_CXLLIB_H 11 + #define _MISC_CXLLIB_H 12 + 13 + #include <linux/pci.h> 14 + #include <asm/reg.h> 15 + 16 + /* 17 + * cxl driver exports a in-kernel 'library' API which can be called by 18 + * other drivers to help interacting with an IBM XSL. 19 + */ 20 + 21 + /* 22 + * tells whether capi is supported on the PCIe slot where the 23 + * device is seated 24 + * 25 + * Input: 26 + * dev: device whose slot needs to be checked 27 + * flags: 0 for the time being 28 + */ 29 + bool cxllib_slot_is_supported(struct pci_dev *dev, unsigned long flags); 30 + 31 + 32 + /* 33 + * Returns the configuration parameters to be used by the XSL or device 34 + * 35 + * Input: 36 + * dev: device, used to find PHB 37 + * Output: 38 + * struct cxllib_xsl_config: 39 + * version 40 + * capi BAR address, i.e. 0x2000000000000-0x2FFFFFFFFFFFF 41 + * capi BAR size 42 + * data send control (XSL_DSNCTL) 43 + * dummy read address (XSL_DRA) 44 + */ 45 + #define CXL_XSL_CONFIG_VERSION1 1 46 + struct cxllib_xsl_config { 47 + u32 version; /* format version for register encoding */ 48 + u32 log_bar_size;/* log size of the capi_window */ 49 + u64 bar_addr; /* address of the start of capi window */ 50 + u64 dsnctl; /* matches definition of XSL_DSNCTL */ 51 + u64 dra; /* real address that can be used for dummy read */ 52 + }; 53 + 54 + int cxllib_get_xsl_config(struct pci_dev *dev, struct cxllib_xsl_config *cfg); 55 + 56 + 57 + /* 58 + * Activate capi for the pci host bridge associated with the device. 59 + * Can be extended to deactivate once we know how to do it. 60 + * Device must be ready to accept messages from the CAPP unit and 61 + * respond accordingly (TLB invalidates, ...) 62 + * 63 + * PHB is switched to capi mode through calls to skiboot. 64 + * CAPP snooping is activated 65 + * 66 + * Input: 67 + * dev: device whose PHB should switch mode 68 + * mode: mode to switch to i.e. CAPI or PCI 69 + * flags: options related to the mode 70 + */ 71 + enum cxllib_mode { 72 + CXL_MODE_CXL, 73 + CXL_MODE_PCI, 74 + }; 75 + 76 + #define CXL_MODE_NO_DMA 0 77 + #define CXL_MODE_DMA_TVT0 1 78 + #define CXL_MODE_DMA_TVT1 2 79 + 80 + int cxllib_switch_phb_mode(struct pci_dev *dev, enum cxllib_mode mode, 81 + unsigned long flags); 82 + 83 + 84 + /* 85 + * Set the device for capi DMA. 86 + * Define its dma_ops and dma offset so that allocations will be using TVT#1 87 + * 88 + * Input: 89 + * dev: device to set 90 + * flags: options. CXL_MODE_DMA_TVT1 should be used 91 + */ 92 + int cxllib_set_device_dma(struct pci_dev *dev, unsigned long flags); 93 + 94 + 95 + /* 96 + * Get the Process Element structure for the given thread 97 + * 98 + * Input: 99 + * task: task_struct for the context of the translation 100 + * translation_mode: whether addresses should be translated 101 + * Output: 102 + * attr: attributes to fill up the Process Element structure from CAIA 103 + */ 104 + struct cxllib_pe_attributes { 105 + u64 sr; 106 + u32 lpid; 107 + u32 tid; 108 + u32 pid; 109 + }; 110 + #define CXL_TRANSLATED_MODE 0 111 + #define CXL_REAL_MODE 1 112 + 113 + int cxllib_get_PE_attributes(struct task_struct *task, 114 + unsigned long translation_mode, struct cxllib_pe_attributes *attr); 115 + 116 + 117 + /* 118 + * Handle memory fault. 119 + * Fault in all the pages of the specified buffer for the permissions 120 + * provided in ‘flags’ 121 + * 122 + * Shouldn't be called from interrupt context 123 + * 124 + * Input: 125 + * mm: struct mm for the thread faulting the pages 126 + * addr: base address of the buffer to page in 127 + * size: size of the buffer to page in 128 + * flags: permission requested (DSISR_ISSTORE...) 129 + */ 130 + int cxllib_handle_fault(struct mm_struct *mm, u64 addr, u64 size, u64 flags); 131 + 132 + 133 + #endif /* _MISC_CXLLIB_H */
+5 -1
mm/Kconfig
··· 669 669 670 670 See Documentation/vm/idle_page_tracking.txt for more details. 671 671 672 + # arch_add_memory() comprehends device memory 673 + config ARCH_HAS_ZONE_DEVICE 674 + bool 675 + 672 676 config ZONE_DEVICE 673 677 bool "Device memory (pmem, etc...) hotplug support" 674 678 depends on MEMORY_HOTPLUG 675 679 depends on MEMORY_HOTREMOVE 676 680 depends on SPARSEMEM_VMEMMAP 677 - depends on X86_64 #arch_add_memory() comprehends device memory 681 + depends on ARCH_HAS_ZONE_DEVICE 678 682 679 683 help 680 684 Device memory hotplug support allows for establishing pmem,
+33 -20
tools/testing/selftests/powerpc/benchmarks/context_switch.c
··· 258 258 return __atomic_exchange_n(p, val, __ATOMIC_SEQ_CST); 259 259 } 260 260 261 + static int processes; 262 + 261 263 static int mutex_lock(unsigned long *m) 262 264 { 263 265 int c; 266 + int flags = FUTEX_WAIT; 267 + if (!processes) 268 + flags |= FUTEX_PRIVATE_FLAG; 264 269 265 270 c = cmpxchg(m, 0, 1); 266 271 if (!c) ··· 275 270 c = xchg(m, 2); 276 271 277 272 while (c) { 278 - sys_futex(m, FUTEX_WAIT, 2, NULL, NULL, 0); 273 + sys_futex(m, flags, 2, NULL, NULL, 0); 279 274 c = xchg(m, 2); 280 275 } 281 276 ··· 284 279 285 280 static int mutex_unlock(unsigned long *m) 286 281 { 282 + int flags = FUTEX_WAKE; 283 + if (!processes) 284 + flags |= FUTEX_PRIVATE_FLAG; 285 + 287 286 if (*m == 2) 288 287 *m = 0; 289 288 else if (xchg(m, 0) == 1) 290 289 return 0; 291 290 292 - sys_futex(m, FUTEX_WAKE, 1, NULL, NULL, 0); 291 + sys_futex(m, flags, 1, NULL, NULL, 0); 293 292 294 293 return 0; 295 294 } ··· 302 293 303 294 static void futex_setup(int cpu1, int cpu2) 304 295 { 305 - int shmid; 306 - void *shmaddr; 296 + if (!processes) { 297 + static unsigned long _m1, _m2; 298 + m1 = &_m1; 299 + m2 = &_m2; 300 + } else { 301 + int shmid; 302 + void *shmaddr; 307 303 308 - shmid = shmget(IPC_PRIVATE, getpagesize(), SHM_R | SHM_W); 309 - if (shmid < 0) { 310 - perror("shmget"); 311 - exit(1); 312 - } 304 + shmid = shmget(IPC_PRIVATE, getpagesize(), SHM_R | SHM_W); 305 + if (shmid < 0) { 306 + perror("shmget"); 307 + exit(1); 308 + } 313 309 314 - shmaddr = shmat(shmid, NULL, 0); 315 - if (shmaddr == (char *)-1) { 316 - perror("shmat"); 310 + shmaddr = shmat(shmid, NULL, 0); 311 + if (shmaddr == (char *)-1) { 312 + perror("shmat"); 313 + shmctl(shmid, IPC_RMID, NULL); 314 + exit(1); 315 + } 316 + 317 317 shmctl(shmid, IPC_RMID, NULL); 318 - exit(1); 318 + 319 + m1 = shmaddr; 320 + m2 = shmaddr + sizeof(*m1); 319 321 } 320 - 321 - shmctl(shmid, IPC_RMID, NULL); 322 - 323 - m1 = shmaddr; 324 - m2 = shmaddr + sizeof(*m1); 325 322 326 323 *m1 = 0; 327 324 *m2 = 0; ··· 366 351 .thread1 = futex_thread1, 367 352 .thread2 = futex_thread2, 368 353 }; 369 - 370 - static int processes; 371 354 372 355 static struct option options[] = { 373 356 { "test", required_argument, 0, 't' },