Merge tag 'powerpc-4.17-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux

+11 -3

arch/powerpc/Makefile

··· 141 141 endif 142 142 CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mcmodel=medium,$(call cc-option,-mminimal-toc)) 143 143 CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mno-pointers-to-nested-functions) 144 + 144 145 CFLAGS-$(CONFIG_PPC32) := -ffixed-r2 $(MULTIPLEWORD) 146 + CFLAGS-$(CONFIG_PPC32) += $(call cc-option,-mno-readonly-in-sdata) 145 147 146 148 ifeq ($(CONFIG_PPC_BOOK3S_64),y) 147 - CFLAGS-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=power7,-mtune=power4) 148 - CFLAGS-$(CONFIG_GENERIC_CPU) += -mcpu=power4 149 + ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y) 150 + CFLAGS-$(CONFIG_GENERIC_CPU) += -mcpu=power8 151 + CFLAGS-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=power9,-mtune=power8) 152 + else 153 + CFLAGS-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=power7,$(call cc-option,-mtune=power5)) 154 + CFLAGS-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mcpu=power5,-mcpu=power4) 155 + endif 149 156 else 150 157 CFLAGS-$(CONFIG_GENERIC_CPU) += -mcpu=powerpc64 151 158 endif ··· 173 166 endif 174 167 175 168 CFLAGS-$(CONFIG_CELL_CPU) += $(call cc-option,-mcpu=cell) 176 - CFLAGS-$(CONFIG_POWER4_CPU) += $(call cc-option,-mcpu=power4) 177 169 CFLAGS-$(CONFIG_POWER5_CPU) += $(call cc-option,-mcpu=power5) 178 170 CFLAGS-$(CONFIG_POWER6_CPU) += $(call cc-option,-mcpu=power6) 179 171 CFLAGS-$(CONFIG_POWER7_CPU) += $(call cc-option,-mcpu=power7) 180 172 CFLAGS-$(CONFIG_POWER8_CPU) += $(call cc-option,-mcpu=power8) 173 + CFLAGS-$(CONFIG_POWER9_CPU) += $(call cc-option,-mcpu=power9) 181 174 182 175 # Altivec option not allowed with e500mc64 in GCC. 183 176 ifeq ($(CONFIG_ALTIVEC),y) ··· 250 243 cpu-as-$(CONFIG_4xx) += -Wa,-m405 251 244 cpu-as-$(CONFIG_ALTIVEC) += $(call as-option,-Wa$(comma)-maltivec) 252 245 cpu-as-$(CONFIG_E200) += -Wa,-me200 246 + cpu-as-$(CONFIG_PPC_BOOK3S_64) += -Wa,-mpower4 253 247 254 248 KBUILD_AFLAGS += $(cpu-as-y) 255 249 KBUILD_CFLAGS += $(cpu-as-y)

+1 -1

arch/powerpc/boot/dts/acadia.dts

··· 219 219 }; 220 220 221 221 chosen { 222 - linux,stdout-path = "/plb/opb/serial@ef600300"; 222 + stdout-path = "/plb/opb/serial@ef600300"; 223 223 }; 224 224 };

+1 -1

arch/powerpc/boot/dts/adder875-redboot.dts

··· 178 178 }; 179 179 180 180 chosen { 181 - linux,stdout-path = &console; 181 + stdout-path = &console; 182 182 }; 183 183 };

+1 -1

arch/powerpc/boot/dts/adder875-uboot.dts

··· 177 177 }; 178 178 179 179 chosen { 180 - linux,stdout-path = &console; 180 + stdout-path = &console; 181 181 }; 182 182 };

+1 -1

arch/powerpc/boot/dts/akebono.dts

··· 410 410 }; 411 411 412 412 chosen { 413 - linux,stdout-path = &UART0; 413 + stdout-path = &UART0; 414 414 }; 415 415 };

+1 -1

arch/powerpc/boot/dts/amigaone.dts

··· 168 168 }; 169 169 170 170 chosen { 171 - linux,stdout-path = "/pci@80000000/isa@7/serial@3f8"; 171 + stdout-path = "/pci@80000000/isa@7/serial@3f8"; 172 172 }; 173 173 };

+1 -1

arch/powerpc/boot/dts/asp834x-redboot.dts

··· 304 304 305 305 chosen { 306 306 bootargs = "console=ttyS0,38400 root=/dev/mtdblock3 rootfstype=jffs2"; 307 - linux,stdout-path = &serial0; 307 + stdout-path = &serial0; 308 308 }; 309 309 310 310 };

+1 -1

arch/powerpc/boot/dts/bamboo.dts

··· 295 295 }; 296 296 297 297 chosen { 298 - linux,stdout-path = "/plb/opb/serial@ef600300"; 298 + stdout-path = "/plb/opb/serial@ef600300"; 299 299 }; 300 300 };

+1 -1

arch/powerpc/boot/dts/c2k.dts

··· 361 361 }; 362 362 }; 363 363 chosen { 364 - linux,stdout-path = &MPSC0; 364 + stdout-path = &MPSC0; 365 365 }; 366 366 };

+1 -1

arch/powerpc/boot/dts/currituck.dts

··· 237 237 }; 238 238 239 239 chosen { 240 - linux,stdout-path = &UART0; 240 + stdout-path = &UART0; 241 241 }; 242 242 };

+1 -1

arch/powerpc/boot/dts/digsy_mtc.dts

··· 78 78 }; 79 79 80 80 rtc@56 { 81 - compatible = "mc,rv3029c2"; 81 + compatible = "microcrystal,rv3029"; 82 82 reg = <0x56>; 83 83 }; 84 84

+1 -1

arch/powerpc/boot/dts/ebony.dts

··· 332 332 }; 333 333 334 334 chosen { 335 - linux,stdout-path = "/plb/opb/serial@40000200"; 335 + stdout-path = "/plb/opb/serial@40000200"; 336 336 }; 337 337 };

+1 -1

arch/powerpc/boot/dts/eiger.dts

··· 421 421 422 422 }; 423 423 chosen { 424 - linux,stdout-path = "/plb/opb/serial@ef600200"; 424 + stdout-path = "/plb/opb/serial@ef600200"; 425 425 }; 426 426 427 427 };

+1 -1

arch/powerpc/boot/dts/ep405.dts

··· 225 225 }; 226 226 227 227 chosen { 228 - linux,stdout-path = "/plb/opb/serial@ef600300"; 228 + stdout-path = "/plb/opb/serial@ef600300"; 229 229 }; 230 230 };

+1 -1

arch/powerpc/boot/dts/fsl/mvme7100.dts

··· 146 146 }; 147 147 148 148 chosen { 149 - linux,stdout-path = &serial0; 149 + stdout-path = &serial0; 150 150 }; 151 151 }; 152 152

+1 -1

arch/powerpc/boot/dts/fsp2.dts

··· 607 607 }; 608 608 609 609 chosen { 610 - linux,stdout-path = "/plb/opb/serial@b0020000"; 610 + stdout-path = "/plb/opb/serial@b0020000"; 611 611 bootargs = "console=ttyS0,115200 rw log_buf_len=32768 debug"; 612 612 }; 613 613 };

+1 -1

arch/powerpc/boot/dts/holly.dts

··· 191 191 }; 192 192 193 193 chosen { 194 - linux,stdout-path = "/tsi109@c0000000/serial@7808"; 194 + stdout-path = "/tsi109@c0000000/serial@7808"; 195 195 }; 196 196 };

+1 -1

arch/powerpc/boot/dts/hotfoot.dts

··· 291 291 }; 292 292 293 293 chosen { 294 - linux,stdout-path = &UART0; 294 + stdout-path = &UART0; 295 295 }; 296 296 };

+1 -1

arch/powerpc/boot/dts/icon.dts

··· 442 442 }; 443 443 444 444 chosen { 445 - linux,stdout-path = "/plb/opb/serial@f0000200"; 445 + stdout-path = "/plb/opb/serial@f0000200"; 446 446 }; 447 447 };

+1 -1

arch/powerpc/boot/dts/iss4xx-mpic.dts

··· 150 150 }; 151 151 152 152 chosen { 153 - linux,stdout-path = "/plb/opb/serial@40000200"; 153 + stdout-path = "/plb/opb/serial@40000200"; 154 154 }; 155 155 };

+1 -1

arch/powerpc/boot/dts/iss4xx.dts

··· 111 111 }; 112 112 113 113 chosen { 114 - linux,stdout-path = "/plb/opb/serial@40000200"; 114 + stdout-path = "/plb/opb/serial@40000200"; 115 115 }; 116 116 };

+1 -1

arch/powerpc/boot/dts/katmai.dts

··· 505 505 }; 506 506 507 507 chosen { 508 - linux,stdout-path = "/plb/opb/serial@f0000200"; 508 + stdout-path = "/plb/opb/serial@f0000200"; 509 509 }; 510 510 };

+1 -1

arch/powerpc/boot/dts/klondike.dts

··· 222 222 }; 223 223 224 224 chosen { 225 - linux,stdout-path = "/plb/opb/serial@50001000"; 225 + stdout-path = "/plb/opb/serial@50001000"; 226 226 }; 227 227 };

+1 -1

arch/powerpc/boot/dts/ksi8560.dts

··· 339 339 340 340 341 341 chosen { 342 - linux,stdout-path = "/soc/cpm/serial@91a00"; 342 + stdout-path = "/soc/cpm/serial@91a00"; 343 343 }; 344 344 };

+1 -1

arch/powerpc/boot/dts/media5200.dts

··· 25 25 }; 26 26 27 27 chosen { 28 - linux,stdout-path = &console; 28 + stdout-path = &console; 29 29 }; 30 30 31 31 cpus {

+1 -1

arch/powerpc/boot/dts/mpc8272ads.dts

··· 262 262 }; 263 263 264 264 chosen { 265 - linux,stdout-path = "/soc/cpm/serial@11a00"; 265 + stdout-path = "/soc/cpm/serial@11a00"; 266 266 }; 267 267 };

+1 -1

arch/powerpc/boot/dts/mpc866ads.dts

··· 185 185 }; 186 186 187 187 chosen { 188 - linux,stdout-path = "/soc/cpm/serial@a80"; 188 + stdout-path = "/soc/cpm/serial@a80"; 189 189 }; 190 190 };

+1 -1

arch/powerpc/boot/dts/mpc885ads.dts

··· 227 227 }; 228 228 229 229 chosen { 230 - linux,stdout-path = "/soc/cpm/serial@a80"; 230 + stdout-path = "/soc/cpm/serial@a80"; 231 231 }; 232 232 };

+1 -1

arch/powerpc/boot/dts/mvme5100.dts

··· 179 179 }; 180 180 181 181 chosen { 182 - linux,stdout-path = &serial0; 182 + stdout-path = &serial0; 183 183 }; 184 184 185 185 };

+1 -1

arch/powerpc/boot/dts/obs600.dts

··· 309 309 }; 310 310 }; 311 311 chosen { 312 - linux,stdout-path = "/plb/opb/serial@ef600200"; 312 + stdout-path = "/plb/opb/serial@ef600200"; 313 313 }; 314 314 };

+1 -1

arch/powerpc/boot/dts/pq2fads.dts

··· 242 242 }; 243 243 244 244 chosen { 245 - linux,stdout-path = "/soc/cpm/serial@11a00"; 245 + stdout-path = "/soc/cpm/serial@11a00"; 246 246 }; 247 247 };

+1 -1

arch/powerpc/boot/dts/rainier.dts

··· 344 344 }; 345 345 346 346 chosen { 347 - linux,stdout-path = "/plb/opb/serial@ef600300"; 347 + stdout-path = "/plb/opb/serial@ef600300"; 348 348 bootargs = "console=ttyS0,115200"; 349 349 }; 350 350 };

+1 -1

arch/powerpc/boot/dts/redwood.dts

··· 381 381 382 382 383 383 chosen { 384 - linux,stdout-path = "/plb/opb/serial@ef600200"; 384 + stdout-path = "/plb/opb/serial@ef600200"; 385 385 }; 386 386 387 387 };

+1 -1

arch/powerpc/boot/dts/sam440ep.dts

··· 288 288 }; 289 289 290 290 chosen { 291 - linux,stdout-path = "/plb/opb/serial@ef600300"; 291 + stdout-path = "/plb/opb/serial@ef600300"; 292 292 }; 293 293 };

+1 -1

arch/powerpc/boot/dts/sequoia.dts

··· 406 406 }; 407 407 408 408 chosen { 409 - linux,stdout-path = "/plb/opb/serial@ef600300"; 409 + stdout-path = "/plb/opb/serial@ef600300"; 410 410 bootargs = "console=ttyS0,115200"; 411 411 }; 412 412 };

+1 -1

arch/powerpc/boot/dts/storcenter.dts

··· 137 137 }; 138 138 139 139 chosen { 140 - linux,stdout-path = &serial0; 140 + stdout-path = &serial0; 141 141 }; 142 142 };

+1 -1

arch/powerpc/boot/dts/taishan.dts

··· 422 422 }; 423 423 424 424 chosen { 425 - linux,stdout-path = "/plb/opb/serial@40000300"; 425 + stdout-path = "/plb/opb/serial@40000300"; 426 426 }; 427 427 };

+1 -1

arch/powerpc/boot/dts/virtex440-ml507.dts

··· 32 32 } ; 33 33 chosen { 34 34 bootargs = "console=ttyS0 root=/dev/ram"; 35 - linux,stdout-path = &RS232_Uart_1; 35 + stdout-path = &RS232_Uart_1; 36 36 } ; 37 37 cpus { 38 38 #address-cells = <1>;

+1 -1

arch/powerpc/boot/dts/virtex440-ml510.dts

··· 26 26 } ; 27 27 chosen { 28 28 bootargs = "console=ttyS0 root=/dev/ram"; 29 - linux,stdout-path = "/plb@0/serial@83e00000"; 29 + stdout-path = "/plb@0/serial@83e00000"; 30 30 } ; 31 31 cpus { 32 32 #address-cells = <1>;

+1 -1

arch/powerpc/boot/dts/walnut.dts

··· 241 241 }; 242 242 243 243 chosen { 244 - linux,stdout-path = "/plb/opb/serial@ef600300"; 244 + stdout-path = "/plb/opb/serial@ef600300"; 245 245 }; 246 246 };

+1 -1

arch/powerpc/boot/dts/warp.dts

··· 304 304 }; 305 305 306 306 chosen { 307 - linux,stdout-path = "/plb/opb/serial@ef600300"; 307 + stdout-path = "/plb/opb/serial@ef600300"; 308 308 }; 309 309 };

+21

arch/powerpc/boot/dts/wii.dts

··· 13 13 */ 14 14 15 15 /dts-v1/; 16 + #include <dt-bindings/gpio/gpio.h> 16 17 17 18 /* 18 19 * This is commented-out for now. ··· 177 176 compatible = "nintendo,hollywood-gpio"; 178 177 reg = <0x0d8000c0 0x40>; 179 178 gpio-controller; 179 + ngpios = <24>; 180 + 181 + gpio-line-names = 182 + "POWER", "SHUTDOWN", "FAN", "DC_DC", 183 + "DI_SPIN", "SLOT_LED", "EJECT_BTN", "SLOT_IN", 184 + "SENSOR_BAR", "DO_EJECT", "EEP_CS", "EEP_CLK", 185 + "EEP_MOSI", "EEP_MISO", "AVE_SCL", "AVE_SDA", 186 + "DEBUG0", "DEBUG1", "DEBUG2", "DEBUG3", 187 + "DEBUG4", "DEBUG5", "DEBUG6", "DEBUG7"; 180 188 181 189 /* 182 190 * This is commented out while a standard binding ··· 222 212 compatible = "nintendo,hollywood-di"; 223 213 reg = <0x0d806000 0x40>; 224 214 interrupts = <2>; 215 + }; 216 + }; 217 + 218 + gpio-leds { 219 + compatible = "gpio-leds"; 220 + 221 + /* This is the blue LED in the disk drive slot */ 222 + drive-slot { 223 + label = "wii:blue:drive_slot"; 224 + gpios = <&GPIO 5 GPIO_ACTIVE_HIGH>; 225 + panic-indicator; 225 226 }; 226 227 }; 227 228 };

+1 -1

arch/powerpc/boot/dts/xpedite5200_xmon.dts

··· 503 503 504 504 /* Needed for dtbImage boot wrapper compatibility */ 505 505 chosen { 506 - linux,stdout-path = &serial0; 506 + stdout-path = &serial0; 507 507 }; 508 508 };

+1 -1

arch/powerpc/boot/dts/yosemite.dts

··· 327 327 }; 328 328 329 329 chosen { 330 - linux,stdout-path = "/plb/opb/serial@ef600300"; 330 + stdout-path = "/plb/opb/serial@ef600300"; 331 331 }; 332 332 };

-2

arch/powerpc/boot/libfdt_env.h

··· 7 7 8 8 #include "of.h" 9 9 10 - typedef u32 uint32_t; 11 - typedef u64 uint64_t; 12 10 typedef unsigned long uintptr_t; 13 11 14 12 typedef __be16 fdt16_t;

+15

arch/powerpc/include/asm/asm-prototypes.h

··· 62 62 void single_step_exception(struct pt_regs *regs); 63 63 void program_check_exception(struct pt_regs *regs); 64 64 void alignment_exception(struct pt_regs *regs); 65 + void slb_miss_bad_addr(struct pt_regs *regs); 65 66 void StackOverflow(struct pt_regs *regs); 66 67 void nonrecoverable_exception(struct pt_regs *regs); 67 68 void kernel_fp_unavailable_exception(struct pt_regs *regs); ··· 89 88 long sys_swapcontext(struct ucontext __user *old_ctx, 90 89 struct ucontext __user *new_ctx, 91 90 int ctx_size, int r6, int r7, int r8, struct pt_regs *regs); 91 + int sys_debug_setcontext(struct ucontext __user *ctx, 92 + int ndbg, struct sig_dbg_op __user *dbg, 93 + int r6, int r7, int r8, 94 + struct pt_regs *regs); 95 + int 96 + ppc_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, struct timeval __user *tvp); 97 + unsigned long __init early_init(unsigned long dt_ptr); 98 + void __init machine_init(u64 dt_ptr); 92 99 #endif 100 + 101 + long ppc_fadvise64_64(int fd, int advice, u32 offset_high, u32 offset_low, 102 + u32 len_high, u32 len_low); 93 103 long sys_switch_endian(void); 94 104 notrace unsigned int __check_irq_replay(void); 95 105 void notrace restore_interrupts(void); ··· 137 125 /* tracing */ 138 126 void _mcount(void); 139 127 unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip); 128 + 129 + void pnv_power9_force_smt4_catch(void); 130 + void pnv_power9_force_smt4_release(void); 140 131 141 132 #endif /* _ASM_POWERPC_ASM_PROTOTYPES_H */

+2 -1

arch/powerpc/include/asm/barrier.h

··· 35 35 #define rmb() __asm__ __volatile__ ("sync" : : : "memory") 36 36 #define wmb() __asm__ __volatile__ ("sync" : : : "memory") 37 37 38 - #ifdef __SUBARCH_HAS_LWSYNC 38 + /* The sub-arch has lwsync */ 39 + #if defined(__powerpc64__) || defined(CONFIG_PPC_E500MC) 39 40 # define SMPWMB LWSYNC 40 41 #else 41 42 # define SMPWMB eieio

+14

arch/powerpc/include/asm/book3s/64/hash-4k.h

··· 11 11 #define H_PUD_INDEX_SIZE 9 12 12 #define H_PGD_INDEX_SIZE 9 13 13 14 + /* 15 + * Each context is 512TB. But on 4k we restrict our max TASK size to 64TB 16 + * Hence also limit max EA bits to 64TB. 17 + */ 18 + #define MAX_EA_BITS_PER_CONTEXT 46 19 + 14 20 #ifndef __ASSEMBLY__ 15 21 #define H_PTE_TABLE_SIZE (sizeof(pte_t) << H_PTE_INDEX_SIZE) 16 22 #define H_PMD_TABLE_SIZE (sizeof(pmd_t) << H_PMD_INDEX_SIZE) ··· 40 34 #define H_PAGE_COMBO 0x0 41 35 #define H_PTE_FRAG_NR 0 42 36 #define H_PTE_FRAG_SIZE_SHIFT 0 37 + 38 + /* memory key bits, only 8 keys supported */ 39 + #define H_PTE_PKEY_BIT0 0 40 + #define H_PTE_PKEY_BIT1 0 41 + #define H_PTE_PKEY_BIT2 _RPAGE_RSV3 42 + #define H_PTE_PKEY_BIT3 _RPAGE_RSV4 43 + #define H_PTE_PKEY_BIT4 _RPAGE_RSV5 44 + 43 45 /* 44 46 * On all 4K setups, remap_4k_pfn() equates to remap_pfn_range() 45 47 */

+18 -7

arch/powerpc/include/asm/book3s/64/hash-64k.h

··· 4 4 5 5 #define H_PTE_INDEX_SIZE 8 6 6 #define H_PMD_INDEX_SIZE 10 7 - #define H_PUD_INDEX_SIZE 7 7 + #define H_PUD_INDEX_SIZE 10 8 8 #define H_PGD_INDEX_SIZE 8 9 + 10 + /* 11 + * Each context is 512TB size. SLB miss for first context/default context 12 + * is handled in the hotpath. 13 + */ 14 + #define MAX_EA_BITS_PER_CONTEXT 49 9 15 10 16 /* 11 17 * 64k aligned address free up few of the lower bits of RPN for us ··· 22 16 #define H_PAGE_BUSY _RPAGE_RPN44 /* software: PTE & hash are busy */ 23 17 #define H_PAGE_HASHPTE _RPAGE_RPN43 /* PTE has associated HPTE */ 24 18 19 + /* memory key bits. */ 20 + #define H_PTE_PKEY_BIT0 _RPAGE_RSV1 21 + #define H_PTE_PKEY_BIT1 _RPAGE_RSV2 22 + #define H_PTE_PKEY_BIT2 _RPAGE_RSV3 23 + #define H_PTE_PKEY_BIT3 _RPAGE_RSV4 24 + #define H_PTE_PKEY_BIT4 _RPAGE_RSV5 25 + 25 26 /* 26 27 * We need to differentiate between explicit huge page and THP huge 27 28 * page, since THP huge page also need to track real subpage details ··· 38 25 /* PTE flags to conserve for HPTE identification */ 39 26 #define _PAGE_HPTEFLAGS (H_PAGE_BUSY | H_PAGE_HASHPTE | H_PAGE_COMBO) 40 27 /* 41 - * we support 16 fragments per PTE page of 64K size. 42 - */ 43 - #define H_PTE_FRAG_NR 16 44 - /* 45 28 * We use a 2K PTE page fragment and another 2K for storing 46 29 * real_pte_t hash index 30 + * 8 bytes per each pte entry and another 8 bytes for storing 31 + * slot details. 47 32 */ 48 - #define H_PTE_FRAG_SIZE_SHIFT 12 49 - #define PTE_FRAG_SIZE (1UL << PTE_FRAG_SIZE_SHIFT) 33 + #define H_PTE_FRAG_SIZE_SHIFT (H_PTE_INDEX_SIZE + 3 + 1) 34 + #define H_PTE_FRAG_NR (PAGE_SIZE >> H_PTE_FRAG_SIZE_SHIFT) 50 35 51 36 #ifndef __ASSEMBLY__ 52 37 #include <asm/errno.h>

+1 -1

arch/powerpc/include/asm/book3s/64/hash.h

··· 212 212 extern void hash__vmemmap_remove_mapping(unsigned long start, 213 213 unsigned long page_size); 214 214 215 - int hash__create_section_mapping(unsigned long start, unsigned long end); 215 + int hash__create_section_mapping(unsigned long start, unsigned long end, int nid); 216 216 int hash__remove_section_mapping(unsigned long start, unsigned long end); 217 217 218 218 #endif /* !__ASSEMBLY__ */

+52 -2

arch/powerpc/include/asm/book3s/64/mmu.h

··· 80 80 /* Maximum possible number of NPUs in a system. */ 81 81 #define NV_MAX_NPUS 8 82 82 83 + /* 84 + * One bit per slice. We have lower slices which cover 256MB segments 85 + * upto 4G range. That gets us 16 low slices. For the rest we track slices 86 + * in 1TB size. 87 + */ 88 + struct slice_mask { 89 + u64 low_slices; 90 + DECLARE_BITMAP(high_slices, SLICE_NUM_HIGH); 91 + }; 92 + 83 93 typedef struct { 84 - mm_context_id_t id; 94 + union { 95 + /* 96 + * We use id as the PIDR content for radix. On hash we can use 97 + * more than one id. The extended ids are used when we start 98 + * having address above 512TB. We allocate one extended id 99 + * for each 512TB. The new id is then used with the 49 bit 100 + * EA to build a new VA. We always use ESID_BITS_1T_MASK bits 101 + * from EA and new context ids to build the new VAs. 102 + */ 103 + mm_context_id_t id; 104 + mm_context_id_t extended_id[TASK_SIZE_USER64/TASK_CONTEXT_SIZE]; 105 + }; 85 106 u16 user_psize; /* page size index */ 86 107 87 108 /* Number of bits in the mm_cpumask */ ··· 115 94 struct npu_context *npu_context; 116 95 117 96 #ifdef CONFIG_PPC_MM_SLICES 118 - u64 low_slices_psize; /* SLB page size encodings */ 97 + /* SLB page size encodings*/ 98 + unsigned char low_slices_psize[BITS_PER_LONG / BITS_PER_BYTE]; 119 99 unsigned char high_slices_psize[SLICE_ARRAY_SIZE]; 120 100 unsigned long slb_addr_limit; 101 + # ifdef CONFIG_PPC_64K_PAGES 102 + struct slice_mask mask_64k; 103 + # endif 104 + struct slice_mask mask_4k; 105 + # ifdef CONFIG_HUGETLB_PAGE 106 + struct slice_mask mask_16m; 107 + struct slice_mask mask_16g; 108 + # endif 121 109 #else 122 110 u16 sllp; /* SLB page size encoding */ 123 111 #endif ··· 206 176 #else 207 177 static inline void radix_init_pseries(void) { }; 208 178 #endif 179 + 180 + static inline int get_ea_context(mm_context_t *ctx, unsigned long ea) 181 + { 182 + int index = ea >> MAX_EA_BITS_PER_CONTEXT; 183 + 184 + if (likely(index < ARRAY_SIZE(ctx->extended_id))) 185 + return ctx->extended_id[index]; 186 + 187 + /* should never happen */ 188 + WARN_ON(1); 189 + return 0; 190 + } 191 + 192 + static inline unsigned long get_user_vsid(mm_context_t *ctx, 193 + unsigned long ea, int ssize) 194 + { 195 + unsigned long context = get_ea_context(ctx, ea); 196 + 197 + return get_vsid(context, ea, ssize); 198 + } 209 199 210 200 #endif /* __ASSEMBLY__ */ 211 201 #endif /* _ASM_POWERPC_BOOK3S_64_MMU_H_ */

+11 -1

arch/powerpc/include/asm/book3s/64/pgalloc.h

··· 80 80 81 81 pgd = kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE), 82 82 pgtable_gfp_flags(mm, GFP_KERNEL)); 83 + /* 84 + * With hugetlb, we don't clear the second half of the page table. 85 + * If we share the same slab cache with the pmd or pud level table, 86 + * we need to make sure we zero out the full table on alloc. 87 + * With 4K we don't store slot in the second half. Hence we don't 88 + * need to do this for 4k. 89 + */ 90 + #if defined(CONFIG_HUGETLB_PAGE) && defined(CONFIG_PPC_64K_PAGES) && \ 91 + ((H_PGD_INDEX_SIZE == H_PUD_CACHE_INDEX) || \ 92 + (H_PGD_INDEX_SIZE == H_PMD_CACHE_INDEX)) 83 93 memset(pgd, 0, PGD_TABLE_SIZE); 84 - 94 + #endif 85 95 return pgd; 86 96 } 87 97

-19

arch/powerpc/include/asm/book3s/64/pgtable.h

··· 60 60 /* Max physical address bit as per radix table */ 61 61 #define _RPAGE_PA_MAX 57 62 62 63 - #ifdef CONFIG_PPC_MEM_KEYS 64 - #ifdef CONFIG_PPC_64K_PAGES 65 - #define H_PTE_PKEY_BIT0 _RPAGE_RSV1 66 - #define H_PTE_PKEY_BIT1 _RPAGE_RSV2 67 - #else /* CONFIG_PPC_64K_PAGES */ 68 - #define H_PTE_PKEY_BIT0 0 /* _RPAGE_RSV1 is not available */ 69 - #define H_PTE_PKEY_BIT1 0 /* _RPAGE_RSV2 is not available */ 70 - #endif /* CONFIG_PPC_64K_PAGES */ 71 - #define H_PTE_PKEY_BIT2 _RPAGE_RSV3 72 - #define H_PTE_PKEY_BIT3 _RPAGE_RSV4 73 - #define H_PTE_PKEY_BIT4 _RPAGE_RSV5 74 - #else /* CONFIG_PPC_MEM_KEYS */ 75 - #define H_PTE_PKEY_BIT0 0 76 - #define H_PTE_PKEY_BIT1 0 77 - #define H_PTE_PKEY_BIT2 0 78 - #define H_PTE_PKEY_BIT3 0 79 - #define H_PTE_PKEY_BIT4 0 80 - #endif /* CONFIG_PPC_MEM_KEYS */ 81 - 82 63 /* 83 64 * Max physical address bit we will use for now. 84 65 *

+5

arch/powerpc/include/asm/book3s/64/radix-4k.h

··· 9 9 #define RADIX_PMD_INDEX_SIZE 9 /* 1G huge page */ 10 10 #define RADIX_PUD_INDEX_SIZE 9 11 11 #define RADIX_PGD_INDEX_SIZE 13 12 + /* 13 + * One fragment per per page 14 + */ 15 + #define RADIX_PTE_FRAG_SIZE_SHIFT (RADIX_PTE_INDEX_SIZE + 3) 16 + #define RADIX_PTE_FRAG_NR (PAGE_SIZE >> RADIX_PTE_FRAG_SIZE_SHIFT) 12 17 13 18 #endif /* _ASM_POWERPC_PGTABLE_RADIX_4K_H */

+6

arch/powerpc/include/asm/book3s/64/radix-64k.h

··· 10 10 #define RADIX_PUD_INDEX_SIZE 9 11 11 #define RADIX_PGD_INDEX_SIZE 13 12 12 13 + /* 14 + * We use a 256 byte PTE page fragment in radix 15 + * 8 bytes per each PTE entry. 16 + */ 17 + #define RADIX_PTE_FRAG_SIZE_SHIFT (RADIX_PTE_INDEX_SIZE + 3) 18 + #define RADIX_PTE_FRAG_NR (PAGE_SIZE >> RADIX_PTE_FRAG_SIZE_SHIFT) 13 19 #endif /* _ASM_POWERPC_PGTABLE_RADIX_64K_H */

+1 -1

arch/powerpc/include/asm/book3s/64/radix.h

··· 313 313 } 314 314 315 315 #ifdef CONFIG_MEMORY_HOTPLUG 316 - int radix__create_section_mapping(unsigned long start, unsigned long end); 316 + int radix__create_section_mapping(unsigned long start, unsigned long end, int nid); 317 317 int radix__remove_section_mapping(unsigned long start, unsigned long end); 318 318 #endif /* CONFIG_MEMORY_HOTPLUG */ 319 319 #endif /* __ASSEMBLY__ */

+27

arch/powerpc/include/asm/book3s/64/slice.h

··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + #ifndef _ASM_POWERPC_BOOK3S_64_SLICE_H 3 + #define _ASM_POWERPC_BOOK3S_64_SLICE_H 4 + 5 + #ifdef CONFIG_PPC_MM_SLICES 6 + 7 + #define SLICE_LOW_SHIFT 28 8 + #define SLICE_LOW_TOP (0x100000000ul) 9 + #define SLICE_NUM_LOW (SLICE_LOW_TOP >> SLICE_LOW_SHIFT) 10 + #define GET_LOW_SLICE_INDEX(addr) ((addr) >> SLICE_LOW_SHIFT) 11 + 12 + #define SLICE_HIGH_SHIFT 40 13 + #define SLICE_NUM_HIGH (H_PGTABLE_RANGE >> SLICE_HIGH_SHIFT) 14 + #define GET_HIGH_SLICE_INDEX(addr) ((addr) >> SLICE_HIGH_SHIFT) 15 + 16 + #else /* CONFIG_PPC_MM_SLICES */ 17 + 18 + #define get_slice_psize(mm, addr) ((mm)->context.user_psize) 19 + #define slice_set_user_psize(mm, psize) \ 20 + do { \ 21 + (mm)->context.user_psize = (psize); \ 22 + (mm)->context.sllp = SLB_VSID_USER | mmu_psize_defs[(psize)].sllp; \ 23 + } while (0) 24 + 25 + #endif /* CONFIG_PPC_MM_SLICES */ 26 + 27 + #endif /* _ASM_POWERPC_BOOK3S_64_SLICE_H */

-1

arch/powerpc/include/asm/cacheflush.h

··· 99 99 #ifdef CONFIG_PPC64 100 100 extern void flush_dcache_range(unsigned long start, unsigned long stop); 101 101 extern void flush_inval_dcache_range(unsigned long start, unsigned long stop); 102 - extern void flush_dcache_phys_range(unsigned long start, unsigned long stop); 103 102 #endif 104 103 105 104 #define copy_to_user_page(vma, page, vaddr, dst, src, len) \

+139 -124

arch/powerpc/include/asm/cputable.h

··· 131 131 132 132 /* CPU kernel features */ 133 133 134 - /* Retain the 32b definitions all use bottom half of word */ 134 + /* Definitions for features that we have on both 32-bit and 64-bit chips */ 135 135 #define CPU_FTR_COHERENT_ICACHE ASM_CONST(0x00000001) 136 - #define CPU_FTR_L2CR ASM_CONST(0x00000002) 137 - #define CPU_FTR_SPEC7450 ASM_CONST(0x00000004) 138 - #define CPU_FTR_ALTIVEC ASM_CONST(0x00000008) 139 - #define CPU_FTR_TAU ASM_CONST(0x00000010) 140 - #define CPU_FTR_CAN_DOZE ASM_CONST(0x00000020) 141 - #define CPU_FTR_USE_TB ASM_CONST(0x00000040) 142 - #define CPU_FTR_L2CSR ASM_CONST(0x00000080) 143 - #define CPU_FTR_601 ASM_CONST(0x00000100) 144 - #define CPU_FTR_DBELL ASM_CONST(0x00000200) 145 - #define CPU_FTR_CAN_NAP ASM_CONST(0x00000400) 146 - #define CPU_FTR_L3CR ASM_CONST(0x00000800) 147 - #define CPU_FTR_L3_DISABLE_NAP ASM_CONST(0x00001000) 148 - #define CPU_FTR_NAP_DISABLE_L2_PR ASM_CONST(0x00002000) 149 - #define CPU_FTR_DUAL_PLL_750FX ASM_CONST(0x00004000) 150 - #define CPU_FTR_NO_DPM ASM_CONST(0x00008000) 151 - #define CPU_FTR_476_DD2 ASM_CONST(0x00010000) 152 - #define CPU_FTR_NEED_COHERENT ASM_CONST(0x00020000) 153 - #define CPU_FTR_NO_BTIC ASM_CONST(0x00040000) 154 - #define CPU_FTR_DEBUG_LVL_EXC ASM_CONST(0x00080000) 155 - #define CPU_FTR_NODSISRALIGN ASM_CONST(0x00100000) 156 - #define CPU_FTR_PPC_LE ASM_CONST(0x00200000) 157 - #define CPU_FTR_REAL_LE ASM_CONST(0x00400000) 158 - #define CPU_FTR_FPU_UNAVAILABLE ASM_CONST(0x00800000) 159 - #define CPU_FTR_UNIFIED_ID_CACHE ASM_CONST(0x01000000) 160 - #define CPU_FTR_SPE ASM_CONST(0x02000000) 161 - #define CPU_FTR_NEED_PAIRED_STWCX ASM_CONST(0x04000000) 162 - #define CPU_FTR_LWSYNC ASM_CONST(0x08000000) 163 - #define CPU_FTR_NOEXECUTE ASM_CONST(0x10000000) 164 - #define CPU_FTR_INDEXED_DCR ASM_CONST(0x20000000) 165 - #define CPU_FTR_EMB_HV ASM_CONST(0x40000000) 136 + #define CPU_FTR_ALTIVEC ASM_CONST(0x00000002) 137 + #define CPU_FTR_DBELL ASM_CONST(0x00000004) 138 + #define CPU_FTR_CAN_NAP ASM_CONST(0x00000008) 139 + #define CPU_FTR_DEBUG_LVL_EXC ASM_CONST(0x00000010) 140 + #define CPU_FTR_NODSISRALIGN ASM_CONST(0x00000020) 141 + #define CPU_FTR_FPU_UNAVAILABLE ASM_CONST(0x00000040) 142 + #define CPU_FTR_LWSYNC ASM_CONST(0x00000080) 143 + #define CPU_FTR_NOEXECUTE ASM_CONST(0x00000100) 144 + #define CPU_FTR_EMB_HV ASM_CONST(0x00000200) 145 + 146 + /* Definitions for features that only exist on 32-bit chips */ 147 + #ifdef CONFIG_PPC32 148 + #define CPU_FTR_601 ASM_CONST(0x00001000) 149 + #define CPU_FTR_L2CR ASM_CONST(0x00002000) 150 + #define CPU_FTR_SPEC7450 ASM_CONST(0x00004000) 151 + #define CPU_FTR_TAU ASM_CONST(0x00008000) 152 + #define CPU_FTR_CAN_DOZE ASM_CONST(0x00010000) 153 + #define CPU_FTR_USE_RTC ASM_CONST(0x00020000) 154 + #define CPU_FTR_L3CR ASM_CONST(0x00040000) 155 + #define CPU_FTR_L3_DISABLE_NAP ASM_CONST(0x00080000) 156 + #define CPU_FTR_NAP_DISABLE_L2_PR ASM_CONST(0x00100000) 157 + #define CPU_FTR_DUAL_PLL_750FX ASM_CONST(0x00200000) 158 + #define CPU_FTR_NO_DPM ASM_CONST(0x00400000) 159 + #define CPU_FTR_476_DD2 ASM_CONST(0x00800000) 160 + #define CPU_FTR_NEED_COHERENT ASM_CONST(0x01000000) 161 + #define CPU_FTR_NO_BTIC ASM_CONST(0x02000000) 162 + #define CPU_FTR_PPC_LE ASM_CONST(0x04000000) 163 + #define CPU_FTR_UNIFIED_ID_CACHE ASM_CONST(0x08000000) 164 + #define CPU_FTR_SPE ASM_CONST(0x10000000) 165 + #define CPU_FTR_NEED_PAIRED_STWCX ASM_CONST(0x20000000) 166 + #define CPU_FTR_INDEXED_DCR ASM_CONST(0x40000000) 167 + 168 + #else /* CONFIG_PPC32 */ 169 + /* Define these to 0 for the sake of tests in common code */ 170 + #define CPU_FTR_601 (0) 171 + #define CPU_FTR_PPC_LE (0) 172 + #endif 166 173 167 174 /* 168 - * Add the 64-bit processor unique features in the top half of the word; 175 + * Definitions for the 64-bit processor unique features; 169 176 * on 32-bit, make the names available but defined to be 0. 170 177 */ 171 178 #ifdef __powerpc64__ ··· 181 174 #define LONG_ASM_CONST(x) 0 182 175 #endif 183 176 184 - #define CPU_FTR_HVMODE LONG_ASM_CONST(0x0000000100000000) 185 - #define CPU_FTR_ARCH_201 LONG_ASM_CONST(0x0000000200000000) 186 - #define CPU_FTR_ARCH_206 LONG_ASM_CONST(0x0000000400000000) 187 - #define CPU_FTR_ARCH_207S LONG_ASM_CONST(0x0000000800000000) 188 - #define CPU_FTR_ARCH_300 LONG_ASM_CONST(0x0000001000000000) 189 - #define CPU_FTR_MMCRA LONG_ASM_CONST(0x0000002000000000) 190 - #define CPU_FTR_CTRL LONG_ASM_CONST(0x0000004000000000) 191 - #define CPU_FTR_SMT LONG_ASM_CONST(0x0000008000000000) 192 - #define CPU_FTR_PAUSE_ZERO LONG_ASM_CONST(0x0000010000000000) 193 - #define CPU_FTR_PURR LONG_ASM_CONST(0x0000020000000000) 194 - #define CPU_FTR_CELL_TB_BUG LONG_ASM_CONST(0x0000040000000000) 195 - #define CPU_FTR_SPURR LONG_ASM_CONST(0x0000080000000000) 196 - #define CPU_FTR_DSCR LONG_ASM_CONST(0x0000100000000000) 197 - #define CPU_FTR_VSX LONG_ASM_CONST(0x0000200000000000) 198 - #define CPU_FTR_SAO LONG_ASM_CONST(0x0000400000000000) 199 - #define CPU_FTR_CP_USE_DCBTZ LONG_ASM_CONST(0x0000800000000000) 200 - #define CPU_FTR_UNALIGNED_LD_STD LONG_ASM_CONST(0x0001000000000000) 201 - #define CPU_FTR_ASYM_SMT LONG_ASM_CONST(0x0002000000000000) 202 - #define CPU_FTR_STCX_CHECKS_ADDRESS LONG_ASM_CONST(0x0004000000000000) 203 - #define CPU_FTR_POPCNTB LONG_ASM_CONST(0x0008000000000000) 204 - #define CPU_FTR_POPCNTD LONG_ASM_CONST(0x0010000000000000) 205 - #define CPU_FTR_PKEY LONG_ASM_CONST(0x0020000000000000) 206 - #define CPU_FTR_VMX_COPY LONG_ASM_CONST(0x0040000000000000) 207 - #define CPU_FTR_TM LONG_ASM_CONST(0x0080000000000000) 208 - #define CPU_FTR_CFAR LONG_ASM_CONST(0x0100000000000000) 209 - #define CPU_FTR_HAS_PPR LONG_ASM_CONST(0x0200000000000000) 210 - #define CPU_FTR_DAWR LONG_ASM_CONST(0x0400000000000000) 211 - #define CPU_FTR_DABRX LONG_ASM_CONST(0x0800000000000000) 212 - #define CPU_FTR_PMAO_BUG LONG_ASM_CONST(0x1000000000000000) 213 - #define CPU_FTR_P9_TLBIE_BUG LONG_ASM_CONST(0x2000000000000000) 214 - #define CPU_FTR_POWER9_DD1 LONG_ASM_CONST(0x4000000000000000) 215 - #define CPU_FTR_POWER9_DD2_1 LONG_ASM_CONST(0x8000000000000000) 177 + #define CPU_FTR_REAL_LE LONG_ASM_CONST(0x0000000000001000) 178 + #define CPU_FTR_HVMODE LONG_ASM_CONST(0x0000000000002000) 179 + #define CPU_FTR_ARCH_206 LONG_ASM_CONST(0x0000000000008000) 180 + #define CPU_FTR_ARCH_207S LONG_ASM_CONST(0x0000000000010000) 181 + #define CPU_FTR_ARCH_300 LONG_ASM_CONST(0x0000000000020000) 182 + #define CPU_FTR_MMCRA LONG_ASM_CONST(0x0000000000040000) 183 + #define CPU_FTR_CTRL LONG_ASM_CONST(0x0000000000080000) 184 + #define CPU_FTR_SMT LONG_ASM_CONST(0x0000000000100000) 185 + #define CPU_FTR_PAUSE_ZERO LONG_ASM_CONST(0x0000000000200000) 186 + #define CPU_FTR_PURR LONG_ASM_CONST(0x0000000000400000) 187 + #define CPU_FTR_CELL_TB_BUG LONG_ASM_CONST(0x0000000000800000) 188 + #define CPU_FTR_SPURR LONG_ASM_CONST(0x0000000001000000) 189 + #define CPU_FTR_DSCR LONG_ASM_CONST(0x0000000002000000) 190 + #define CPU_FTR_VSX LONG_ASM_CONST(0x0000000004000000) 191 + #define CPU_FTR_SAO LONG_ASM_CONST(0x0000000008000000) 192 + #define CPU_FTR_CP_USE_DCBTZ LONG_ASM_CONST(0x0000000010000000) 193 + #define CPU_FTR_UNALIGNED_LD_STD LONG_ASM_CONST(0x0000000020000000) 194 + #define CPU_FTR_ASYM_SMT LONG_ASM_CONST(0x0000000040000000) 195 + #define CPU_FTR_STCX_CHECKS_ADDRESS LONG_ASM_CONST(0x0000000080000000) 196 + #define CPU_FTR_POPCNTB LONG_ASM_CONST(0x0000000100000000) 197 + #define CPU_FTR_POPCNTD LONG_ASM_CONST(0x0000000200000000) 198 + #define CPU_FTR_PKEY LONG_ASM_CONST(0x0000000400000000) 199 + #define CPU_FTR_VMX_COPY LONG_ASM_CONST(0x0000000800000000) 200 + #define CPU_FTR_TM LONG_ASM_CONST(0x0000001000000000) 201 + #define CPU_FTR_CFAR LONG_ASM_CONST(0x0000002000000000) 202 + #define CPU_FTR_HAS_PPR LONG_ASM_CONST(0x0000004000000000) 203 + #define CPU_FTR_DAWR LONG_ASM_CONST(0x0000008000000000) 204 + #define CPU_FTR_DABRX LONG_ASM_CONST(0x0000010000000000) 205 + #define CPU_FTR_PMAO_BUG LONG_ASM_CONST(0x0000020000000000) 206 + #define CPU_FTR_POWER9_DD1 LONG_ASM_CONST(0x0000040000000000) 207 + #define CPU_FTR_POWER9_DD2_1 LONG_ASM_CONST(0x0000080000000000) 208 + #define CPU_FTR_P9_TM_HV_ASSIST LONG_ASM_CONST(0x0000100000000000) 209 + #define CPU_FTR_P9_TM_XER_SO_BUG LONG_ASM_CONST(0x0000200000000000) 210 + #define CPU_FTR_P9_TLBIE_BUG LONG_ASM_CONST(0x0000400000000000) 216 211 217 212 #ifndef __ASSEMBLY__ 218 213 ··· 295 286 #endif 296 287 297 288 #define CPU_FTRS_PPC601 (CPU_FTR_COMMON | CPU_FTR_601 | \ 298 - CPU_FTR_COHERENT_ICACHE | CPU_FTR_UNIFIED_ID_CACHE) 299 - #define CPU_FTRS_603 (CPU_FTR_COMMON | \ 300 - CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | \ 289 + CPU_FTR_COHERENT_ICACHE | CPU_FTR_UNIFIED_ID_CACHE | CPU_FTR_USE_RTC) 290 + #define CPU_FTRS_603 (CPU_FTR_COMMON | CPU_FTR_MAYBE_CAN_DOZE | \ 301 291 CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_PPC_LE) 302 - #define CPU_FTRS_604 (CPU_FTR_COMMON | \ 303 - CPU_FTR_USE_TB | CPU_FTR_PPC_LE) 292 + #define CPU_FTRS_604 (CPU_FTR_COMMON | CPU_FTR_PPC_LE) 304 293 #define CPU_FTRS_740_NOTAU (CPU_FTR_COMMON | \ 305 - CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | \ 294 + CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_L2CR | \ 306 295 CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_PPC_LE) 307 296 #define CPU_FTRS_740 (CPU_FTR_COMMON | \ 308 - CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | \ 297 + CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_L2CR | \ 309 298 CPU_FTR_TAU | CPU_FTR_MAYBE_CAN_NAP | \ 310 299 CPU_FTR_PPC_LE) 311 300 #define CPU_FTRS_750 (CPU_FTR_COMMON | \ 312 - CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | \ 301 + CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_L2CR | \ 313 302 CPU_FTR_TAU | CPU_FTR_MAYBE_CAN_NAP | \ 314 303 CPU_FTR_PPC_LE) 315 304 #define CPU_FTRS_750CL (CPU_FTRS_750) ··· 316 309 #define CPU_FTRS_750FX (CPU_FTRS_750 | CPU_FTR_DUAL_PLL_750FX) 317 310 #define CPU_FTRS_750GX (CPU_FTRS_750FX) 318 311 #define CPU_FTRS_7400_NOTAU (CPU_FTR_COMMON | \ 319 - CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | \ 312 + CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_L2CR | \ 320 313 CPU_FTR_ALTIVEC_COMP | \ 321 314 CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_PPC_LE) 322 315 #define CPU_FTRS_7400 (CPU_FTR_COMMON | \ 323 - CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | \ 316 + CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_L2CR | \ 324 317 CPU_FTR_TAU | CPU_FTR_ALTIVEC_COMP | \ 325 318 CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_PPC_LE) 326 319 #define CPU_FTRS_7450_20 (CPU_FTR_COMMON | \ 327 - CPU_FTR_USE_TB | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \ 320 + CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \ 328 321 CPU_FTR_L3CR | CPU_FTR_SPEC7450 | \ 329 322 CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE | CPU_FTR_NEED_PAIRED_STWCX) 330 323 #define CPU_FTRS_7450_21 (CPU_FTR_COMMON | \ 331 - CPU_FTR_USE_TB | \ 332 324 CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \ 333 325 CPU_FTR_L3CR | CPU_FTR_SPEC7450 | \ 334 326 CPU_FTR_NAP_DISABLE_L2_PR | CPU_FTR_L3_DISABLE_NAP | \ 335 327 CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE | CPU_FTR_NEED_PAIRED_STWCX) 336 328 #define CPU_FTRS_7450_23 (CPU_FTR_COMMON | \ 337 - CPU_FTR_USE_TB | CPU_FTR_NEED_PAIRED_STWCX | \ 329 + CPU_FTR_NEED_PAIRED_STWCX | \ 338 330 CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \ 339 331 CPU_FTR_L3CR | CPU_FTR_SPEC7450 | \ 340 332 CPU_FTR_NAP_DISABLE_L2_PR | CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE) 341 333 #define CPU_FTRS_7455_1 (CPU_FTR_COMMON | \ 342 - CPU_FTR_USE_TB | CPU_FTR_NEED_PAIRED_STWCX | \ 334 + CPU_FTR_NEED_PAIRED_STWCX | \ 343 335 CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | CPU_FTR_L3CR | \ 344 336 CPU_FTR_SPEC7450 | CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE) 345 337 #define CPU_FTRS_7455_20 (CPU_FTR_COMMON | \ 346 - CPU_FTR_USE_TB | CPU_FTR_NEED_PAIRED_STWCX | \ 338 + CPU_FTR_NEED_PAIRED_STWCX | \ 347 339 CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \ 348 340 CPU_FTR_L3CR | CPU_FTR_SPEC7450 | \ 349 341 CPU_FTR_NAP_DISABLE_L2_PR | CPU_FTR_L3_DISABLE_NAP | \ 350 342 CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE) 351 343 #define CPU_FTRS_7455 (CPU_FTR_COMMON | \ 352 - CPU_FTR_USE_TB | \ 353 344 CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \ 354 345 CPU_FTR_L3CR | CPU_FTR_SPEC7450 | CPU_FTR_NAP_DISABLE_L2_PR | \ 355 346 CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE | CPU_FTR_NEED_PAIRED_STWCX) 356 347 #define CPU_FTRS_7447_10 (CPU_FTR_COMMON | \ 357 - CPU_FTR_USE_TB | \ 358 348 CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \ 359 349 CPU_FTR_L3CR | CPU_FTR_SPEC7450 | CPU_FTR_NAP_DISABLE_L2_PR | \ 360 350 CPU_FTR_NEED_COHERENT | CPU_FTR_NO_BTIC | CPU_FTR_PPC_LE | \ 361 351 CPU_FTR_NEED_PAIRED_STWCX) 362 352 #define CPU_FTRS_7447 (CPU_FTR_COMMON | \ 363 - CPU_FTR_USE_TB | \ 364 353 CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \ 365 354 CPU_FTR_L3CR | CPU_FTR_SPEC7450 | CPU_FTR_NAP_DISABLE_L2_PR | \ 366 355 CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE | CPU_FTR_NEED_PAIRED_STWCX) 367 356 #define CPU_FTRS_7447A (CPU_FTR_COMMON | \ 368 - CPU_FTR_USE_TB | \ 369 357 CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \ 370 358 CPU_FTR_SPEC7450 | CPU_FTR_NAP_DISABLE_L2_PR | \ 371 359 CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE | CPU_FTR_NEED_PAIRED_STWCX) 372 360 #define CPU_FTRS_7448 (CPU_FTR_COMMON | \ 373 - CPU_FTR_USE_TB | \ 374 361 CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \ 375 362 CPU_FTR_SPEC7450 | CPU_FTR_NAP_DISABLE_L2_PR | \ 376 363 CPU_FTR_PPC_LE | CPU_FTR_NEED_PAIRED_STWCX) 377 - #define CPU_FTRS_82XX (CPU_FTR_COMMON | \ 378 - CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB) 364 + #define CPU_FTRS_82XX (CPU_FTR_COMMON | CPU_FTR_MAYBE_CAN_DOZE) 379 365 #define CPU_FTRS_G2_LE (CPU_FTR_COMMON | CPU_FTR_MAYBE_CAN_DOZE | \ 380 - CPU_FTR_USE_TB | CPU_FTR_MAYBE_CAN_NAP) 366 + CPU_FTR_MAYBE_CAN_NAP) 381 367 #define CPU_FTRS_E300 (CPU_FTR_MAYBE_CAN_DOZE | \ 382 - CPU_FTR_USE_TB | CPU_FTR_MAYBE_CAN_NAP | \ 368 + CPU_FTR_MAYBE_CAN_NAP | \ 383 369 CPU_FTR_COMMON) 384 370 #define CPU_FTRS_E300C2 (CPU_FTR_MAYBE_CAN_DOZE | \ 385 - CPU_FTR_USE_TB | CPU_FTR_MAYBE_CAN_NAP | \ 371 + CPU_FTR_MAYBE_CAN_NAP | \ 386 372 CPU_FTR_COMMON | CPU_FTR_FPU_UNAVAILABLE) 387 - #define CPU_FTRS_CLASSIC32 (CPU_FTR_COMMON | CPU_FTR_USE_TB) 388 - #define CPU_FTRS_8XX (CPU_FTR_USE_TB | CPU_FTR_NOEXECUTE) 389 - #define CPU_FTRS_40X (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE) 390 - #define CPU_FTRS_44X (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE) 391 - #define CPU_FTRS_440x6 (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE | \ 373 + #define CPU_FTRS_CLASSIC32 (CPU_FTR_COMMON) 374 + #define CPU_FTRS_8XX (CPU_FTR_NOEXECUTE) 375 + #define CPU_FTRS_40X (CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE) 376 + #define CPU_FTRS_44X (CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE) 377 + #define CPU_FTRS_440x6 (CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE | \ 392 378 CPU_FTR_INDEXED_DCR) 393 379 #define CPU_FTRS_47X (CPU_FTRS_440x6) 394 - #define CPU_FTRS_E200 (CPU_FTR_USE_TB | CPU_FTR_SPE_COMP | \ 380 + #define CPU_FTRS_E200 (CPU_FTR_SPE_COMP | \ 395 381 CPU_FTR_NODSISRALIGN | CPU_FTR_COHERENT_ICACHE | \ 396 382 CPU_FTR_UNIFIED_ID_CACHE | CPU_FTR_NOEXECUTE | \ 397 383 CPU_FTR_DEBUG_LVL_EXC) 398 - #define CPU_FTRS_E500 (CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | \ 384 + #define CPU_FTRS_E500 (CPU_FTR_MAYBE_CAN_DOZE | \ 399 385 CPU_FTR_SPE_COMP | CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_NODSISRALIGN | \ 400 386 CPU_FTR_NOEXECUTE) 401 - #define CPU_FTRS_E500_2 (CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | \ 387 + #define CPU_FTRS_E500_2 (CPU_FTR_MAYBE_CAN_DOZE | \ 402 388 CPU_FTR_SPE_COMP | CPU_FTR_MAYBE_CAN_NAP | \ 403 389 CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE) 404 - #define CPU_FTRS_E500MC (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | \ 405 - CPU_FTR_L2CSR | CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \ 390 + #define CPU_FTRS_E500MC (CPU_FTR_NODSISRALIGN | \ 391 + CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \ 406 392 CPU_FTR_DBELL | CPU_FTR_DEBUG_LVL_EXC | CPU_FTR_EMB_HV) 407 393 /* 408 394 * e5500/e6500 erratum A-006958 is a timebase bug that can use the 409 395 * same workaround as CPU_FTR_CELL_TB_BUG. 410 396 */ 411 - #define CPU_FTRS_E5500 (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | \ 412 - CPU_FTR_L2CSR | CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \ 397 + #define CPU_FTRS_E5500 (CPU_FTR_NODSISRALIGN | \ 398 + CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \ 413 399 CPU_FTR_DBELL | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \ 414 400 CPU_FTR_DEBUG_LVL_EXC | CPU_FTR_EMB_HV | CPU_FTR_CELL_TB_BUG) 415 - #define CPU_FTRS_E6500 (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | \ 416 - CPU_FTR_L2CSR | CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \ 401 + #define CPU_FTRS_E6500 (CPU_FTR_NODSISRALIGN | \ 402 + CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \ 417 403 CPU_FTR_DBELL | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \ 418 404 CPU_FTR_DEBUG_LVL_EXC | CPU_FTR_EMB_HV | CPU_FTR_ALTIVEC_COMP | \ 419 405 CPU_FTR_CELL_TB_BUG | CPU_FTR_SMT) 420 406 #define CPU_FTRS_GENERIC_32 (CPU_FTR_COMMON | CPU_FTR_NODSISRALIGN) 421 407 422 408 /* 64-bit CPUs */ 423 - #define CPU_FTRS_POWER4 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ 409 + #define CPU_FTRS_PPC970 (CPU_FTR_LWSYNC | \ 424 410 CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \ 425 - CPU_FTR_MMCRA | CPU_FTR_CP_USE_DCBTZ | \ 426 - CPU_FTR_STCX_CHECKS_ADDRESS) 427 - #define CPU_FTRS_PPC970 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ 428 - CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | CPU_FTR_ARCH_201 | \ 429 411 CPU_FTR_ALTIVEC_COMP | CPU_FTR_CAN_NAP | CPU_FTR_MMCRA | \ 430 412 CPU_FTR_CP_USE_DCBTZ | CPU_FTR_STCX_CHECKS_ADDRESS | \ 431 413 CPU_FTR_HVMODE | CPU_FTR_DABRX) 432 - #define CPU_FTRS_POWER5 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ 414 + #define CPU_FTRS_POWER5 (CPU_FTR_LWSYNC | \ 433 415 CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \ 434 416 CPU_FTR_MMCRA | CPU_FTR_SMT | \ 435 417 CPU_FTR_COHERENT_ICACHE | CPU_FTR_PURR | \ 436 418 CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_DABRX) 437 - #define CPU_FTRS_POWER6 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ 419 + #define CPU_FTRS_POWER6 (CPU_FTR_LWSYNC | \ 438 420 CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \ 439 421 CPU_FTR_MMCRA | CPU_FTR_SMT | \ 440 422 CPU_FTR_COHERENT_ICACHE | \ ··· 431 435 CPU_FTR_DSCR | CPU_FTR_UNALIGNED_LD_STD | \ 432 436 CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_CFAR | \ 433 437 CPU_FTR_DABRX) 434 - #define CPU_FTRS_POWER7 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ 438 + #define CPU_FTRS_POWER7 (CPU_FTR_LWSYNC | \ 435 439 CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | CPU_FTR_ARCH_206 |\ 436 440 CPU_FTR_MMCRA | CPU_FTR_SMT | \ 437 441 CPU_FTR_COHERENT_ICACHE | \ ··· 440 444 CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \ 441 445 CPU_FTR_CFAR | CPU_FTR_HVMODE | \ 442 446 CPU_FTR_VMX_COPY | CPU_FTR_HAS_PPR | CPU_FTR_DABRX | CPU_FTR_PKEY) 443 - #define CPU_FTRS_POWER8 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ 447 + #define CPU_FTRS_POWER8 (CPU_FTR_LWSYNC | \ 444 448 CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | CPU_FTR_ARCH_206 |\ 445 449 CPU_FTR_MMCRA | CPU_FTR_SMT | \ 446 450 CPU_FTR_COHERENT_ICACHE | \ ··· 452 456 CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP | CPU_FTR_PKEY) 453 457 #define CPU_FTRS_POWER8E (CPU_FTRS_POWER8 | CPU_FTR_PMAO_BUG) 454 458 #define CPU_FTRS_POWER8_DD1 (CPU_FTRS_POWER8 & ~CPU_FTR_DBELL) 455 - #define CPU_FTRS_POWER9 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ 459 + #define CPU_FTRS_POWER9 (CPU_FTR_LWSYNC | \ 456 460 CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | CPU_FTR_ARCH_206 |\ 457 461 CPU_FTR_MMCRA | CPU_FTR_SMT | \ 458 462 CPU_FTR_COHERENT_ICACHE | \ ··· 460 464 CPU_FTR_DSCR | CPU_FTR_SAO | \ 461 465 CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \ 462 466 CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \ 463 - CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_DAWR | \ 464 - CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP | CPU_FTR_ARCH_300 | \ 465 - CPU_FTR_PKEY | CPU_FTR_P9_TLBIE_BUG) 467 + CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_ARCH_207S | \ 468 + CPU_FTR_TM_COMP | CPU_FTR_ARCH_300 | CPU_FTR_PKEY | \ 469 + CPU_FTR_P9_TLBIE_BUG) 466 470 #define CPU_FTRS_POWER9_DD1 ((CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD1) & \ 467 471 (~CPU_FTR_SAO)) 468 472 #define CPU_FTRS_POWER9_DD2_0 CPU_FTRS_POWER9 469 473 #define CPU_FTRS_POWER9_DD2_1 (CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD2_1) 470 - #define CPU_FTRS_CELL (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ 474 + #define CPU_FTRS_POWER9_DD2_2 (CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD2_1 | \ 475 + CPU_FTR_P9_TM_HV_ASSIST | \ 476 + CPU_FTR_P9_TM_XER_SO_BUG) 477 + #define CPU_FTRS_CELL (CPU_FTR_LWSYNC | \ 471 478 CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \ 472 479 CPU_FTR_ALTIVEC_COMP | CPU_FTR_MMCRA | CPU_FTR_SMT | \ 473 480 CPU_FTR_PAUSE_ZERO | CPU_FTR_CELL_TB_BUG | CPU_FTR_CP_USE_DCBTZ | \ 474 481 CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_DABRX) 475 - #define CPU_FTRS_PA6T (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ 482 + #define CPU_FTRS_PA6T (CPU_FTR_LWSYNC | \ 476 483 CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_ALTIVEC_COMP | \ 477 484 CPU_FTR_PURR | CPU_FTR_REAL_LE | CPU_FTR_DABRX) 478 - #define CPU_FTRS_COMPATIBLE (CPU_FTR_USE_TB | CPU_FTR_PPCAS_ARCH_V2) 485 + #define CPU_FTRS_COMPATIBLE (CPU_FTR_PPCAS_ARCH_V2) 479 486 480 487 #ifdef __powerpc64__ 481 488 #ifdef CONFIG_PPC_BOOK3E 482 489 #define CPU_FTRS_POSSIBLE (CPU_FTRS_E6500 | CPU_FTRS_E5500) 483 490 #else 491 + #ifdef CONFIG_CPU_LITTLE_ENDIAN 484 492 #define CPU_FTRS_POSSIBLE \ 485 - (CPU_FTRS_POWER4 | CPU_FTRS_PPC970 | CPU_FTRS_POWER5 | \ 493 + (CPU_FTRS_POWER7 | CPU_FTRS_POWER8E | CPU_FTRS_POWER8 | \ 494 + CPU_FTRS_POWER8_DD1 | CPU_FTR_ALTIVEC_COMP | CPU_FTR_VSX_COMP | \ 495 + CPU_FTRS_POWER9 | CPU_FTRS_POWER9_DD1 | CPU_FTRS_POWER9_DD2_1 | \ 496 + CPU_FTRS_POWER9_DD2_2) 497 + #else 498 + #define CPU_FTRS_POSSIBLE \ 499 + (CPU_FTRS_PPC970 | CPU_FTRS_POWER5 | \ 486 500 CPU_FTRS_POWER6 | CPU_FTRS_POWER7 | CPU_FTRS_POWER8E | \ 487 501 CPU_FTRS_POWER8 | CPU_FTRS_POWER8_DD1 | CPU_FTRS_CELL | \ 488 - CPU_FTRS_PA6T | CPU_FTR_VSX | CPU_FTRS_POWER9 | \ 489 - CPU_FTRS_POWER9_DD1 | CPU_FTRS_POWER9_DD2_1) 502 + CPU_FTRS_PA6T | CPU_FTR_VSX_COMP | CPU_FTR_ALTIVEC_COMP | \ 503 + CPU_FTRS_POWER9 | CPU_FTRS_POWER9_DD1 | CPU_FTRS_POWER9_DD2_1 | \ 504 + CPU_FTRS_POWER9_DD2_2) 505 + #endif /* CONFIG_CPU_LITTLE_ENDIAN */ 490 506 #endif 491 507 #else 492 508 enum { ··· 545 537 #ifdef CONFIG_PPC_BOOK3E 546 538 #define CPU_FTRS_ALWAYS (CPU_FTRS_E6500 & CPU_FTRS_E5500) 547 539 #else 540 + #ifdef CONFIG_CPU_LITTLE_ENDIAN 541 + #define CPU_FTRS_ALWAYS \ 542 + (CPU_FTRS_POSSIBLE & ~CPU_FTR_HVMODE & CPU_FTRS_POWER7 & \ 543 + CPU_FTRS_POWER8E & CPU_FTRS_POWER8 & CPU_FTRS_POWER8_DD1 & \ 544 + CPU_FTRS_POWER9 & CPU_FTRS_POWER9_DD1 & CPU_FTRS_POWER9_DD2_1) 545 + #else 548 546 #define CPU_FTRS_ALWAYS \ 549 - (CPU_FTRS_POWER4 & CPU_FTRS_PPC970 & CPU_FTRS_POWER5 & \ 547 + (CPU_FTRS_PPC970 & CPU_FTRS_POWER5 & \ 550 548 CPU_FTRS_POWER6 & CPU_FTRS_POWER7 & CPU_FTRS_CELL & \ 551 549 CPU_FTRS_PA6T & CPU_FTRS_POWER8 & CPU_FTRS_POWER8E & \ 552 550 CPU_FTRS_POWER8_DD1 & ~CPU_FTR_HVMODE & CPU_FTRS_POSSIBLE & \ 553 - CPU_FTRS_POWER9) 551 + CPU_FTRS_POWER9 & CPU_FTRS_POWER9_DD1 & CPU_FTRS_POWER9_DD2_1) 552 + #endif /* CONFIG_CPU_LITTLE_ENDIAN */ 554 553 #endif 555 554 #else 556 555 enum {

+1

arch/powerpc/include/asm/debug.h

··· 47 47 48 48 void set_breakpoint(struct arch_hw_breakpoint *brk); 49 49 void __set_breakpoint(struct arch_hw_breakpoint *brk); 50 + bool ppc_breakpoint_available(void); 50 51 #ifdef CONFIG_PPC_ADV_DEBUG_REGS 51 52 extern void do_send_trap(struct pt_regs *regs, unsigned long address, 52 53 unsigned long error_code, int brkpt);

+6

arch/powerpc/include/asm/eeh.h

··· 256 256 raw_spin_unlock_irqrestore(&confirm_error_lock, flags); 257 257 } 258 258 259 + static inline bool eeh_state_active(int state) 260 + { 261 + return (state & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) 262 + == (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); 263 + } 264 + 259 265 typedef void *(*eeh_traverse_func)(void *data, void *flag); 260 266 void eeh_set_pe_aux_size(int size); 261 267 int eeh_phb_pe_create(struct pci_controller *phb);

+2 -1

arch/powerpc/include/asm/eeh_event.h

··· 34 34 int eeh_event_init(void); 35 35 int eeh_send_failure_event(struct eeh_pe *pe); 36 36 void eeh_remove_event(struct eeh_pe *pe, bool force); 37 - void eeh_handle_event(struct eeh_pe *pe); 37 + void eeh_handle_normal_event(struct eeh_pe *pe); 38 + void eeh_handle_special_event(void); 38 39 39 40 #endif /* __KERNEL__ */ 40 41 #endif /* ASM_POWERPC_EEH_EVENT_H */

+11 -11

arch/powerpc/include/asm/epapr_hcalls.h

··· 466 466 unsigned long *out, 467 467 unsigned long nr) 468 468 { 469 - unsigned long register r0 asm("r0"); 470 - unsigned long register r3 asm("r3") = in[0]; 471 - unsigned long register r4 asm("r4") = in[1]; 472 - unsigned long register r5 asm("r5") = in[2]; 473 - unsigned long register r6 asm("r6") = in[3]; 474 - unsigned long register r7 asm("r7") = in[4]; 475 - unsigned long register r8 asm("r8") = in[5]; 476 - unsigned long register r9 asm("r9") = in[6]; 477 - unsigned long register r10 asm("r10") = in[7]; 478 - unsigned long register r11 asm("r11") = nr; 479 - unsigned long register r12 asm("r12"); 469 + register unsigned long r0 asm("r0"); 470 + register unsigned long r3 asm("r3") = in[0]; 471 + register unsigned long r4 asm("r4") = in[1]; 472 + register unsigned long r5 asm("r5") = in[2]; 473 + register unsigned long r6 asm("r6") = in[3]; 474 + register unsigned long r7 asm("r7") = in[4]; 475 + register unsigned long r8 asm("r8") = in[5]; 476 + register unsigned long r9 asm("r9") = in[6]; 477 + register unsigned long r10 asm("r10") = in[7]; 478 + register unsigned long r11 asm("r11") = nr; 479 + register unsigned long r12 asm("r12"); 480 480 481 481 asm volatile("bl epapr_hypercall_start" 482 482 : "=r"(r0), "=r"(r3), "=r"(r4), "=r"(r5), "=r"(r6),

+4 -4

arch/powerpc/include/asm/hugetlb.h

··· 89 89 90 90 void flush_dcache_icache_hugepage(struct page *page); 91 91 92 - #if defined(CONFIG_PPC_MM_SLICES) 93 - int is_hugepage_only_range(struct mm_struct *mm, unsigned long addr, 92 + int slice_is_hugepage_only_range(struct mm_struct *mm, unsigned long addr, 94 93 unsigned long len); 95 - #else 94 + 96 95 static inline int is_hugepage_only_range(struct mm_struct *mm, 97 96 unsigned long addr, 98 97 unsigned long len) 99 98 { 99 + if (IS_ENABLED(CONFIG_PPC_MM_SLICES) && !radix_enabled()) 100 + return slice_is_hugepage_only_range(mm, addr, len); 100 101 return 0; 101 102 } 102 - #endif 103 103 104 104 void book3e_hugetlb_preload(struct vm_area_struct *vma, unsigned long ea, 105 105 pte_t pte);

+4

arch/powerpc/include/asm/hvcall.h

··· 88 88 #define H_P8 -61 89 89 #define H_P9 -62 90 90 #define H_TOO_BIG -64 91 + #define H_UNSUPPORTED -67 91 92 #define H_OVERLAP -68 92 93 #define H_INTERRUPT -69 93 94 #define H_BAD_DATA -70 ··· 338 337 #define H_CPU_CHAR_L1D_FLUSH_ORI30 (1ull << 61) // IBM bit 2 339 338 #define H_CPU_CHAR_L1D_FLUSH_TRIG2 (1ull << 60) // IBM bit 3 340 339 #define H_CPU_CHAR_L1D_THREAD_PRIV (1ull << 59) // IBM bit 4 340 + #define H_CPU_CHAR_BRANCH_HINTS_HONORED (1ull << 58) // IBM bit 5 341 + #define H_CPU_CHAR_THREAD_RECONFIG_CTRL (1ull << 57) // IBM bit 6 342 + #define H_CPU_CHAR_COUNT_CACHE_DISABLED (1ull << 56) // IBM bit 7 341 343 342 344 #define H_CPU_BEHAV_FAVOUR_SECURITY (1ull << 63) // IBM bit 0 343 345 #define H_CPU_BEHAV_L1D_FLUSH_PR (1ull << 62) // IBM bit 1

+4 -1

arch/powerpc/include/asm/hw_breakpoint.h

··· 66 66 unsigned long val, void *data); 67 67 int arch_install_hw_breakpoint(struct perf_event *bp); 68 68 void arch_uninstall_hw_breakpoint(struct perf_event *bp); 69 + void arch_unregister_hw_breakpoint(struct perf_event *bp); 69 70 void hw_breakpoint_pmu_read(struct perf_event *bp); 70 71 extern void flush_ptrace_hw_breakpoint(struct task_struct *tsk); 71 72 ··· 80 79 brk.address = 0; 81 80 brk.type = 0; 82 81 brk.len = 0; 83 - __set_breakpoint(&brk); 82 + if (ppc_breakpoint_available()) 83 + __set_breakpoint(&brk); 84 84 } 85 85 extern void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs); 86 + int hw_breakpoint_handler(struct die_args *args); 86 87 87 88 #else /* CONFIG_HAVE_HW_BREAKPOINT */ 88 89 static inline void hw_breakpoint_disable(void) { }

+2 -2

arch/powerpc/include/asm/io.h

··· 33 33 #include <asm/mmu.h> 34 34 #include <asm/ppc_asm.h> 35 35 36 - #include <asm-generic/iomap.h> 37 - 38 36 #ifdef CONFIG_PPC64 39 37 #include <asm/paca.h> 40 38 #endif ··· 660 662 #define writew_relaxed(v, addr) writew(v, addr) 661 663 #define writel_relaxed(v, addr) writel(v, addr) 662 664 #define writeq_relaxed(v, addr) writeq(v, addr) 665 + 666 + #include <asm-generic/iomap.h> 663 667 664 668 #ifdef CONFIG_PPC32 665 669 #define mmiowb()

+1

arch/powerpc/include/asm/irq.h

··· 66 66 extern void call_do_softirq(struct thread_info *tp); 67 67 extern void call_do_irq(struct pt_regs *regs, struct thread_info *tp); 68 68 extern void do_IRQ(struct pt_regs *regs); 69 + extern void __init init_IRQ(void); 69 70 extern void __do_irq(struct pt_regs *regs); 70 71 71 72 int irq_choose_cpu(const struct cpumask *mask);

+1

arch/powerpc/include/asm/irq_work.h

··· 6 6 { 7 7 return true; 8 8 } 9 + extern void arch_irq_work_raise(void); 9 10 10 11 #endif /* _ASM_POWERPC_IRQ_WORK_H */

+2

arch/powerpc/include/asm/kvm_asm.h

··· 108 108 109 109 /* book3s_hv */ 110 110 111 + #define BOOK3S_INTERRUPT_HV_SOFTPATCH 0x1500 112 + 111 113 /* 112 114 * Special trap used to indicate to host that this is a 113 115 * passthrough interrupt that could not be handled

+4

arch/powerpc/include/asm/kvm_book3s.h

··· 241 241 unsigned long mask); 242 242 extern void kvmppc_set_fscr(struct kvm_vcpu *vcpu, u64 fscr); 243 243 244 + extern int kvmhv_p9_tm_emulation_early(struct kvm_vcpu *vcpu); 245 + extern int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu); 246 + extern void kvmhv_emulate_tm_rollback(struct kvm_vcpu *vcpu); 247 + 244 248 extern void kvmppc_entry_trampoline(void); 245 249 extern void kvmppc_hv_entry_trampoline(void); 246 250 extern u32 kvmppc_alignment_dsisr(struct kvm_vcpu *vcpu, unsigned int inst);

+43

arch/powerpc/include/asm/kvm_book3s_64.h

··· 472 472 set_bit_le(i, map); 473 473 } 474 474 475 + static inline u64 sanitize_msr(u64 msr) 476 + { 477 + msr &= ~MSR_HV; 478 + msr |= MSR_ME; 479 + return msr; 480 + } 481 + 482 + #ifdef CONFIG_PPC_TRANSACTIONAL_MEM 483 + static inline void copy_from_checkpoint(struct kvm_vcpu *vcpu) 484 + { 485 + vcpu->arch.cr = vcpu->arch.cr_tm; 486 + vcpu->arch.xer = vcpu->arch.xer_tm; 487 + vcpu->arch.lr = vcpu->arch.lr_tm; 488 + vcpu->arch.ctr = vcpu->arch.ctr_tm; 489 + vcpu->arch.amr = vcpu->arch.amr_tm; 490 + vcpu->arch.ppr = vcpu->arch.ppr_tm; 491 + vcpu->arch.dscr = vcpu->arch.dscr_tm; 492 + vcpu->arch.tar = vcpu->arch.tar_tm; 493 + memcpy(vcpu->arch.gpr, vcpu->arch.gpr_tm, 494 + sizeof(vcpu->arch.gpr)); 495 + vcpu->arch.fp = vcpu->arch.fp_tm; 496 + vcpu->arch.vr = vcpu->arch.vr_tm; 497 + vcpu->arch.vrsave = vcpu->arch.vrsave_tm; 498 + } 499 + 500 + static inline void copy_to_checkpoint(struct kvm_vcpu *vcpu) 501 + { 502 + vcpu->arch.cr_tm = vcpu->arch.cr; 503 + vcpu->arch.xer_tm = vcpu->arch.xer; 504 + vcpu->arch.lr_tm = vcpu->arch.lr; 505 + vcpu->arch.ctr_tm = vcpu->arch.ctr; 506 + vcpu->arch.amr_tm = vcpu->arch.amr; 507 + vcpu->arch.ppr_tm = vcpu->arch.ppr; 508 + vcpu->arch.dscr_tm = vcpu->arch.dscr; 509 + vcpu->arch.tar_tm = vcpu->arch.tar; 510 + memcpy(vcpu->arch.gpr_tm, vcpu->arch.gpr, 511 + sizeof(vcpu->arch.gpr)); 512 + vcpu->arch.fp_tm = vcpu->arch.fp; 513 + vcpu->arch.vr_tm = vcpu->arch.vr; 514 + vcpu->arch.vrsave_tm = vcpu->arch.vrsave; 515 + } 516 + #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ 517 + 475 518 #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ 476 519 477 520 #endif /* __ASM_KVM_BOOK3S_64_H__ */

+1

arch/powerpc/include/asm/kvm_book3s_asm.h

··· 119 119 u8 host_ipi; 120 120 u8 ptid; /* thread number within subcore when split */ 121 121 u8 tid; /* thread number within whole core */ 122 + u8 fake_suspend; 122 123 struct kvm_vcpu *kvm_vcpu; 123 124 struct kvmppc_vcore *kvm_vcore; 124 125 void __iomem *xics_phys;

+1

arch/powerpc/include/asm/kvm_host.h

··· 610 610 u64 tfhar; 611 611 u64 texasr; 612 612 u64 tfiar; 613 + u64 orig_texasr; 613 614 614 615 u32 cr_tm; 615 616 u64 xer_tm;

+4 -4

arch/powerpc/include/asm/kvm_ppc.h

··· 436 436 extern void kvm_cma_reserve(void) __init; 437 437 static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr) 438 438 { 439 - paca[cpu].kvm_hstate.xics_phys = (void __iomem *)addr; 439 + paca_ptrs[cpu]->kvm_hstate.xics_phys = (void __iomem *)addr; 440 440 } 441 441 442 442 static inline void kvmppc_set_xive_tima(int cpu, 443 443 unsigned long phys_addr, 444 444 void __iomem *virt_addr) 445 445 { 446 - paca[cpu].kvm_hstate.xive_tima_phys = (void __iomem *)phys_addr; 447 - paca[cpu].kvm_hstate.xive_tima_virt = virt_addr; 446 + paca_ptrs[cpu]->kvm_hstate.xive_tima_phys = (void __iomem *)phys_addr; 447 + paca_ptrs[cpu]->kvm_hstate.xive_tima_virt = virt_addr; 448 448 } 449 449 450 450 static inline u32 kvmppc_get_xics_latch(void) ··· 458 458 459 459 static inline void kvmppc_set_host_ipi(int cpu, u8 host_ipi) 460 460 { 461 - paca[cpu].kvm_hstate.host_ipi = host_ipi; 461 + paca_ptrs[cpu]->kvm_hstate.host_ipi = host_ipi; 462 462 } 463 463 464 464 static inline void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu)

+16 -13

arch/powerpc/include/asm/lppaca.h

··· 34 34 #include <linux/threads.h> 35 35 #include <asm/types.h> 36 36 #include <asm/mmu.h> 37 + #include <asm/firmware.h> 37 38 38 39 /* 39 - * We only have to have statically allocated lppaca structs on 40 - * legacy iSeries, which supports at most 64 cpus. 41 - */ 42 - #define NR_LPPACAS 1 43 - 44 - /* 45 - * The Hypervisor barfs if the lppaca crosses a page boundary. A 1k 46 - * alignment is sufficient to prevent this 40 + * The lppaca is the "virtual processor area" registered with the hypervisor, 41 + * H_REGISTER_VPA etc. 42 + * 43 + * According to PAPR, the structure is 640 bytes long, must be L1 cache line 44 + * aligned, and must not cross a 4kB boundary. Its size field must be at 45 + * least 640 bytes (but may be more). 46 + * 47 + * Pre-v4.14 KVM hypervisors reject the VPA if its size field is smaller than 48 + * 1kB, so we dynamically allocate 1kB and advertise size as 1kB, but keep 49 + * this structure as the canonical 640 byte size. 47 50 */ 48 51 struct lppaca { 49 52 /* cacheline 1 contains read-only data */ ··· 100 97 101 98 __be32 page_ins; /* CMO Hint - # page ins by OS */ 102 99 u8 reserved11[148]; 103 - volatile __be64 dtl_idx; /* Dispatch Trace Log head index */ 100 + volatile __be64 dtl_idx; /* Dispatch Trace Log head index */ 104 101 u8 reserved12[96]; 105 - } __attribute__((__aligned__(0x400))); 102 + } ____cacheline_aligned; 106 103 107 - extern struct lppaca lppaca[]; 108 - 109 - #define lppaca_of(cpu) (*paca[cpu].lppaca_ptr) 104 + #define lppaca_of(cpu) (*paca_ptrs[cpu]->lppaca_ptr) 110 105 111 106 /* 112 107 * We are using a non architected field to determine if a partition is ··· 115 114 116 115 static inline bool lppaca_shared_proc(struct lppaca *l) 117 116 { 117 + if (!firmware_has_feature(FW_FEATURE_SPLPAR)) 118 + return false; 118 119 return !!(l->__old_status & LPPACA_OLD_SHARED_PROC); 119 120 } 120 121

+21

arch/powerpc/include/asm/mmu-8xx.h

··· 186 186 #define M_APG2 0x00000040 187 187 #define M_APG3 0x00000060 188 188 189 + #ifdef CONFIG_PPC_MM_SLICES 190 + #include <asm/nohash/32/slice.h> 191 + #define SLICE_ARRAY_SIZE (1 << (32 - SLICE_LOW_SHIFT - 1)) 192 + #endif 193 + 189 194 #ifndef __ASSEMBLY__ 195 + struct slice_mask { 196 + u64 low_slices; 197 + DECLARE_BITMAP(high_slices, 0); 198 + }; 199 + 190 200 typedef struct { 191 201 unsigned int id; 192 202 unsigned int active; 193 203 unsigned long vdso_base; 204 + #ifdef CONFIG_PPC_MM_SLICES 205 + u16 user_psize; /* page size index */ 206 + unsigned char low_slices_psize[SLICE_ARRAY_SIZE]; 207 + unsigned char high_slices_psize[0]; 208 + unsigned long slb_addr_limit; 209 + struct slice_mask mask_base_psize; /* 4k or 16k */ 210 + # ifdef CONFIG_HUGETLB_PAGE 211 + struct slice_mask mask_512k; 212 + struct slice_mask mask_8m; 213 + # endif 214 + #endif 194 215 } mm_context_t; 195 216 196 217 #define PHYS_IMMR_BASE (mfspr(SPRN_IMMR) & 0xfff80000)

+3 -3

arch/powerpc/include/asm/mmu.h

··· 111 111 /* MMU feature bit sets for various CPUs */ 112 112 #define MMU_FTRS_DEFAULT_HPTE_ARCH_V2 \ 113 113 MMU_FTR_HPTE_TABLE | MMU_FTR_PPCAS_ARCH_V2 114 - #define MMU_FTRS_POWER4 MMU_FTRS_DEFAULT_HPTE_ARCH_V2 115 - #define MMU_FTRS_PPC970 MMU_FTRS_POWER4 | MMU_FTR_TLBIE_CROP_VA 116 - #define MMU_FTRS_POWER5 MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE 114 + #define MMU_FTRS_POWER MMU_FTRS_DEFAULT_HPTE_ARCH_V2 115 + #define MMU_FTRS_PPC970 MMU_FTRS_POWER | MMU_FTR_TLBIE_CROP_VA 116 + #define MMU_FTRS_POWER5 MMU_FTRS_POWER | MMU_FTR_LOCKLESS_TLBIE 117 117 #define MMU_FTRS_POWER6 MMU_FTRS_POWER5 | MMU_FTR_KERNEL_RO | MMU_FTR_68_BIT_VA 118 118 #define MMU_FTRS_POWER7 MMU_FTRS_POWER6 119 119 #define MMU_FTRS_POWER8 MMU_FTRS_POWER6

+39

arch/powerpc/include/asm/mmu_context.h

··· 60 60 extern void hash__reserve_context_id(int id); 61 61 extern void __destroy_context(int context_id); 62 62 static inline void mmu_context_init(void) { } 63 + 64 + static inline int alloc_extended_context(struct mm_struct *mm, 65 + unsigned long ea) 66 + { 67 + int context_id; 68 + 69 + int index = ea >> MAX_EA_BITS_PER_CONTEXT; 70 + 71 + context_id = hash__alloc_context_id(); 72 + if (context_id < 0) 73 + return context_id; 74 + 75 + VM_WARN_ON(mm->context.extended_id[index]); 76 + mm->context.extended_id[index] = context_id; 77 + return context_id; 78 + } 79 + 80 + static inline bool need_extra_context(struct mm_struct *mm, unsigned long ea) 81 + { 82 + int context_id; 83 + 84 + context_id = get_ea_context(&mm->context, ea); 85 + if (!context_id) 86 + return true; 87 + return false; 88 + } 89 + 63 90 #else 64 91 extern void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next, 65 92 struct task_struct *tsk); 66 93 extern unsigned long __init_new_context(void); 67 94 extern void __destroy_context(unsigned long context_id); 68 95 extern void mmu_context_init(void); 96 + static inline int alloc_extended_context(struct mm_struct *mm, 97 + unsigned long ea) 98 + { 99 + /* non book3s_64 should never find this called */ 100 + WARN_ON(1); 101 + return -ENOMEM; 102 + } 103 + 104 + static inline bool need_extra_context(struct mm_struct *mm, unsigned long ea) 105 + { 106 + return false; 107 + } 69 108 #endif 70 109 71 110 #if defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE) && defined(CONFIG_PPC_RADIX_MMU)

+18

arch/powerpc/include/asm/nohash/32/slice.h

··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + #ifndef _ASM_POWERPC_NOHASH_32_SLICE_H 3 + #define _ASM_POWERPC_NOHASH_32_SLICE_H 4 + 5 + #ifdef CONFIG_PPC_MM_SLICES 6 + 7 + #define SLICE_LOW_SHIFT 26 /* 64 slices */ 8 + #define SLICE_LOW_TOP (0x100000000ull) 9 + #define SLICE_NUM_LOW (SLICE_LOW_TOP >> SLICE_LOW_SHIFT) 10 + #define GET_LOW_SLICE_INDEX(addr) ((addr) >> SLICE_LOW_SHIFT) 11 + 12 + #define SLICE_HIGH_SHIFT 0 13 + #define SLICE_NUM_HIGH 0ul 14 + #define GET_HIGH_SLICE_INDEX(addr) (addr & 0) 15 + 16 + #endif /* CONFIG_PPC_MM_SLICES */ 17 + 18 + #endif /* _ASM_POWERPC_NOHASH_32_SLICE_H */

+12

arch/powerpc/include/asm/nohash/64/slice.h

··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + #ifndef _ASM_POWERPC_NOHASH_64_SLICE_H 3 + #define _ASM_POWERPC_NOHASH_64_SLICE_H 4 + 5 + #ifdef CONFIG_PPC_64K_PAGES 6 + #define get_slice_psize(mm, addr) MMU_PAGE_64K 7 + #else /* CONFIG_PPC_64K_PAGES */ 8 + #define get_slice_psize(mm, addr) MMU_PAGE_4K 9 + #endif /* !CONFIG_PPC_64K_PAGES */ 10 + #define slice_set_user_psize(mm, psize) do { BUG(); } while (0) 11 + 12 + #endif /* _ASM_POWERPC_NOHASH_64_SLICE_H */

+3 -1

arch/powerpc/include/asm/opal-api.h

··· 204 204 #define OPAL_NPU_SPA_SETUP 159 205 205 #define OPAL_NPU_SPA_CLEAR_CACHE 160 206 206 #define OPAL_NPU_TL_SET 161 207 - #define OPAL_LAST 161 207 + #define OPAL_PCI_GET_PBCQ_TUNNEL_BAR 164 208 + #define OPAL_PCI_SET_PBCQ_TUNNEL_BAR 165 209 + #define OPAL_LAST 165 208 210 209 211 /* Device tree flags */ 210 212

+3 -1

arch/powerpc/include/asm/opal.h

··· 204 204 int64_t opal_slw_set_reg(uint64_t cpu_pir, uint64_t sprn, uint64_t val); 205 205 int64_t opal_config_cpu_idle_state(uint64_t state, uint64_t flag); 206 206 int64_t opal_pci_set_phb_cxl_mode(uint64_t phb_id, uint64_t mode, uint64_t pe_number); 207 + int64_t opal_pci_get_pbcq_tunnel_bar(uint64_t phb_id, uint64_t *addr); 208 + int64_t opal_pci_set_pbcq_tunnel_bar(uint64_t phb_id, uint64_t addr); 207 209 int64_t opal_ipmi_send(uint64_t interface, struct opal_ipmi_msg *msg, 208 210 uint64_t msg_len); 209 211 int64_t opal_ipmi_recv(uint64_t interface, struct opal_ipmi_msg *msg, ··· 325 323 extern unsigned long opal_get_boot_time(void); 326 324 extern void opal_nvram_init(void); 327 325 extern void opal_flash_update_init(void); 328 - extern void opal_flash_term_callback(void); 326 + extern void opal_flash_update_print_message(void); 329 327 extern int opal_elog_init(void); 330 328 extern void opal_platform_dump_init(void); 331 329 extern void opal_sys_param_init(void);

+20 -7

arch/powerpc/include/asm/paca.h

··· 32 32 #include <asm/accounting.h> 33 33 #include <asm/hmi.h> 34 34 #include <asm/cpuidle.h> 35 + #include <asm/atomic.h> 35 36 36 37 register struct paca_struct *local_paca asm("r13"); 37 38 ··· 47 46 #define get_paca() local_paca 48 47 #endif 49 48 49 + #ifdef CONFIG_PPC_PSERIES 50 50 #define get_lppaca() (get_paca()->lppaca_ptr) 51 + #endif 52 + 51 53 #define get_slb_shadow() (get_paca()->slb_shadow_ptr) 52 54 53 55 struct task_struct; ··· 62 58 * processor. 63 59 */ 64 60 struct paca_struct { 65 - #ifdef CONFIG_PPC_BOOK3S 61 + #ifdef CONFIG_PPC_PSERIES 66 62 /* 67 63 * Because hw_cpu_id, unlike other paca fields, is accessed 68 64 * routinely from other CPUs (from the IRQ code), we stick to ··· 71 67 */ 72 68 73 69 struct lppaca *lppaca_ptr; /* Pointer to LpPaca for PLIC */ 74 - #endif /* CONFIG_PPC_BOOK3S */ 70 + #endif /* CONFIG_PPC_PSERIES */ 71 + 75 72 /* 76 73 * MAGIC: the spinlock functions in arch/powerpc/lib/locks.c 77 74 * load lock_token and paca_index with a single lwz ··· 146 141 #ifdef CONFIG_PPC_BOOK3S 147 142 mm_context_id_t mm_ctx_id; 148 143 #ifdef CONFIG_PPC_MM_SLICES 149 - u64 mm_ctx_low_slices_psize; 144 + unsigned char mm_ctx_low_slices_psize[BITS_PER_LONG / BITS_PER_BYTE]; 150 145 unsigned char mm_ctx_high_slices_psize[SLICE_ARRAY_SIZE]; 151 146 unsigned long mm_ctx_slb_addr_limit; 152 147 #else ··· 165 160 u64 saved_msr; /* MSR saved here by enter_rtas */ 166 161 u16 trap_save; /* Used when bad stack is encountered */ 167 162 u8 irq_soft_mask; /* mask for irq soft masking */ 163 + u8 soft_enabled; /* irq soft-enable flag */ 168 164 u8 irq_happened; /* irq happened while soft-disabled */ 169 165 u8 io_sync; /* writel() needs spin_unlock sync */ 170 166 u8 irq_work_pending; /* IRQ_WORK interrupt while soft-disable */ 171 167 u8 nap_state_lost; /* NV GPR values lost in power7_idle */ 168 + #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 169 + u8 pmcregs_in_use; /* pseries puts this in lppaca */ 170 + #endif 172 171 u64 sprg_vdso; /* Saved user-visible sprg */ 173 172 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM 174 173 u64 tm_scratch; /* TM scratch area for reclaim */ ··· 186 177 u8 thread_mask; 187 178 /* Mask to denote subcore sibling threads */ 188 179 u8 subcore_sibling_mask; 180 + /* Flag to request this thread not to stop */ 181 + atomic_t dont_stop; 189 182 /* 190 183 * Pointer to an array which contains pointer 191 184 * to the sibling threads' paca. ··· 252 241 void *rfi_flush_fallback_area; 253 242 u64 l1d_flush_size; 254 243 #endif 255 - }; 244 + } ____cacheline_aligned; 256 245 257 246 extern void copy_mm_to_paca(struct mm_struct *mm); 258 - extern struct paca_struct *paca; 247 + extern struct paca_struct **paca_ptrs; 259 248 extern void initialise_paca(struct paca_struct *new_paca, int cpu); 260 249 extern void setup_paca(struct paca_struct *new_paca); 261 - extern void allocate_pacas(void); 250 + extern void allocate_paca_ptrs(void); 251 + extern void allocate_paca(int cpu); 262 252 extern void free_unused_pacas(void); 263 253 264 254 #else /* CONFIG_PPC64 */ 265 255 266 - static inline void allocate_pacas(void) { }; 256 + static inline void allocate_paca_ptrs(void) { }; 257 + static inline void allocate_paca(int cpu) { }; 267 258 static inline void free_unused_pacas(void) { }; 268 259 269 260 #endif /* CONFIG_PPC64 */

+10 -1

arch/powerpc/include/asm/page.h

··· 126 126 127 127 #ifdef CONFIG_FLATMEM 128 128 #define ARCH_PFN_OFFSET ((unsigned long)(MEMORY_START >> PAGE_SHIFT)) 129 - #define pfn_valid(pfn) ((pfn) >= ARCH_PFN_OFFSET && (pfn) < max_mapnr) 129 + #ifndef __ASSEMBLY__ 130 + extern unsigned long max_mapnr; 131 + static inline bool pfn_valid(unsigned long pfn) 132 + { 133 + unsigned long min_pfn = ARCH_PFN_OFFSET; 134 + 135 + return pfn >= min_pfn && pfn < max_mapnr; 136 + } 137 + #endif 130 138 #endif 131 139 132 140 #define virt_to_pfn(kaddr) (__pa(kaddr) >> PAGE_SHIFT) ··· 352 344 353 345 #include <asm-generic/memory_model.h> 354 346 #endif /* __ASSEMBLY__ */ 347 + #include <asm/slice.h> 355 348 356 349 #endif /* _ASM_POWERPC_PAGE_H */

-59

arch/powerpc/include/asm/page_64.h

··· 86 86 87 87 #endif /* __ASSEMBLY__ */ 88 88 89 - #ifdef CONFIG_PPC_MM_SLICES 90 - 91 - #define SLICE_LOW_SHIFT 28 92 - #define SLICE_HIGH_SHIFT 40 93 - 94 - #define SLICE_LOW_TOP (0x100000000ul) 95 - #define SLICE_NUM_LOW (SLICE_LOW_TOP >> SLICE_LOW_SHIFT) 96 - #define SLICE_NUM_HIGH (H_PGTABLE_RANGE >> SLICE_HIGH_SHIFT) 97 - 98 - #define GET_LOW_SLICE_INDEX(addr) ((addr) >> SLICE_LOW_SHIFT) 99 - #define GET_HIGH_SLICE_INDEX(addr) ((addr) >> SLICE_HIGH_SHIFT) 100 - 101 - #ifndef __ASSEMBLY__ 102 - struct mm_struct; 103 - 104 - extern unsigned long slice_get_unmapped_area(unsigned long addr, 105 - unsigned long len, 106 - unsigned long flags, 107 - unsigned int psize, 108 - int topdown); 109 - 110 - extern unsigned int get_slice_psize(struct mm_struct *mm, 111 - unsigned long addr); 112 - 113 - extern void slice_set_user_psize(struct mm_struct *mm, unsigned int psize); 114 - extern void slice_set_range_psize(struct mm_struct *mm, unsigned long start, 115 - unsigned long len, unsigned int psize); 116 - 117 - #endif /* __ASSEMBLY__ */ 118 - #else 119 - #define slice_init() 120 - #ifdef CONFIG_PPC_BOOK3S_64 121 - #define get_slice_psize(mm, addr) ((mm)->context.user_psize) 122 - #define slice_set_user_psize(mm, psize) \ 123 - do { \ 124 - (mm)->context.user_psize = (psize); \ 125 - (mm)->context.sllp = SLB_VSID_USER | mmu_psize_defs[(psize)].sllp; \ 126 - } while (0) 127 - #else /* !CONFIG_PPC_BOOK3S_64 */ 128 - #ifdef CONFIG_PPC_64K_PAGES 129 - #define get_slice_psize(mm, addr) MMU_PAGE_64K 130 - #else /* CONFIG_PPC_64K_PAGES */ 131 - #define get_slice_psize(mm, addr) MMU_PAGE_4K 132 - #endif /* !CONFIG_PPC_64K_PAGES */ 133 - #define slice_set_user_psize(mm, psize) do { BUG(); } while(0) 134 - #endif /* CONFIG_PPC_BOOK3S_64 */ 135 - 136 - #define slice_set_range_psize(mm, start, len, psize) \ 137 - slice_set_user_psize((mm), (psize)) 138 - #endif /* CONFIG_PPC_MM_SLICES */ 139 - 140 - #ifdef CONFIG_HUGETLB_PAGE 141 - 142 - #ifdef CONFIG_PPC_MM_SLICES 143 - #define HAVE_ARCH_HUGETLB_UNMAPPED_AREA 144 - #endif 145 - 146 - #endif /* !CONFIG_HUGETLB_PAGE */ 147 - 148 89 #define VM_DATA_DEFAULT_FLAGS \ 149 90 (is_32bit_task() ? \ 150 91 VM_DATA_DEFAULT_FLAGS32 : VM_DATA_DEFAULT_FLAGS64)

+2

arch/powerpc/include/asm/perf_event_server.h

··· 53 53 [PERF_COUNT_HW_CACHE_OP_MAX] 54 54 [PERF_COUNT_HW_CACHE_RESULT_MAX]; 55 55 56 + int n_blacklist_ev; 57 + int *blacklist_ev; 56 58 /* BHRB entries in the PMU */ 57 59 int bhrb_nr; 58 60 };

+13 -11

arch/powerpc/include/asm/plpar_wrappers.h

··· 2 2 #ifndef _ASM_POWERPC_PLPAR_WRAPPERS_H 3 3 #define _ASM_POWERPC_PLPAR_WRAPPERS_H 4 4 5 + #ifdef CONFIG_PPC_PSERIES 6 + 5 7 #include <linux/string.h> 6 8 #include <linux/irqflags.h> 7 9 8 10 #include <asm/hvcall.h> 9 11 #include <asm/paca.h> 10 12 #include <asm/page.h> 11 - 12 - /* Get state of physical CPU from query_cpu_stopped */ 13 - int smp_query_cpu_stopped(unsigned int pcpu); 14 - #define QCSS_STOPPED 0 15 - #define QCSS_STOPPING 1 16 - #define QCSS_NOT_STOPPED 2 17 - #define QCSS_HARDWARE_ERROR -1 18 - #define QCSS_HARDWARE_BUSY -2 19 13 20 14 static inline long poll_pending(void) 21 15 { ··· 305 311 return plpar_set_mode(1, H_SET_MODE_RESOURCE_LE, 0, 0); 306 312 } 307 313 308 - static inline long plapr_set_ciabr(unsigned long ciabr) 314 + static inline long plpar_set_ciabr(unsigned long ciabr) 309 315 { 310 316 return plpar_set_mode(0, H_SET_MODE_RESOURCE_SET_CIABR, ciabr, 0); 311 317 } 312 318 313 - static inline long plapr_set_watchpoint0(unsigned long dawr0, unsigned long dawrx0) 319 + static inline long plpar_set_watchpoint0(unsigned long dawr0, unsigned long dawrx0) 314 320 { 315 321 return plpar_set_mode(0, H_SET_MODE_RESOURCE_SET_DAWR, dawr0, dawrx0); 316 322 } 317 323 318 - static inline long plapr_signal_sys_reset(long cpu) 324 + static inline long plpar_signal_sys_reset(long cpu) 319 325 { 320 326 return plpar_hcall_norets(H_SIGNAL_SYS_RESET, cpu); 321 327 } ··· 333 339 334 340 return rc; 335 341 } 342 + 343 + #else /* !CONFIG_PPC_PSERIES */ 344 + 345 + static inline long plpar_set_ciabr(unsigned long ciabr) 346 + { 347 + return 0; 348 + } 349 + #endif /* CONFIG_PPC_PSERIES */ 336 350 337 351 #endif /* _ASM_POWERPC_PLPAR_WRAPPERS_H */

+12 -1

arch/powerpc/include/asm/pmc.h

··· 31 31 32 32 #ifdef CONFIG_PPC_BOOK3S_64 33 33 #include <asm/lppaca.h> 34 + #include <asm/firmware.h> 34 35 35 36 static inline void ppc_set_pmu_inuse(int inuse) 36 37 { 37 - get_lppaca()->pmcregs_in_use = inuse; 38 + #if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE) 39 + if (firmware_has_feature(FW_FEATURE_LPAR)) { 40 + #ifdef CONFIG_PPC_PSERIES 41 + get_lppaca()->pmcregs_in_use = inuse; 42 + #endif 43 + } else { 44 + #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 45 + get_paca()->pmcregs_in_use = inuse; 46 + #endif 47 + } 48 + #endif 38 49 } 39 50 40 51 extern void power4_enable_pmcs(void);

+6

arch/powerpc/include/asm/pnv-pci.h

··· 29 29 extern int pnv_pci_set_p2p(struct pci_dev *initiator, struct pci_dev *target, 30 30 u64 desc); 31 31 32 + extern int pnv_pci_enable_tunnel(struct pci_dev *dev, uint64_t *asnind); 33 + extern int pnv_pci_disable_tunnel(struct pci_dev *dev); 34 + extern int pnv_pci_set_tunnel_bar(struct pci_dev *dev, uint64_t addr, 35 + int enable); 36 + extern int pnv_pci_get_as_notify_info(struct task_struct *task, u32 *lpid, 37 + u32 *pid, u32 *tid); 32 38 int pnv_phb_to_cxl_mode(struct pci_dev *dev, uint64_t mode); 33 39 int pnv_cxl_ioda_msi_setup(struct pci_dev *dev, unsigned int hwirq, 34 40 unsigned int virq);

+1

arch/powerpc/include/asm/powernv.h

··· 40 40 } 41 41 42 42 static inline void pnv_tm_init(void) { } 43 + static inline void pnv_power9_force_smt4(void) { } 43 44 #endif 44 45 45 46 #endif /* _ASM_POWERNV_H */

+10

arch/powerpc/include/asm/ppc-opcode.h

··· 232 232 #define PPC_INST_MSGSYNC 0x7c0006ec 233 233 #define PPC_INST_MSGSNDP 0x7c00011c 234 234 #define PPC_INST_MSGCLRP 0x7c00015c 235 + #define PPC_INST_MTMSRD 0x7c000164 235 236 #define PPC_INST_MTTMR 0x7c0003dc 236 237 #define PPC_INST_NOP 0x60000000 237 238 #define PPC_INST_PASTE 0x7c20070d ··· 240 239 #define PPC_INST_POPCNTB_MASK 0xfc0007fe 241 240 #define PPC_INST_POPCNTD 0x7c0003f4 242 241 #define PPC_INST_POPCNTW 0x7c0002f4 242 + #define PPC_INST_RFEBB 0x4c000124 243 243 #define PPC_INST_RFCI 0x4c000066 244 244 #define PPC_INST_RFDI 0x4c00004e 245 + #define PPC_INST_RFID 0x4c000024 245 246 #define PPC_INST_RFMCI 0x4c00004c 246 247 #define PPC_INST_MFSPR 0x7c0002a6 247 248 #define PPC_INST_MFSPR_DSCR 0x7c1102a6 ··· 274 271 #define PPC_INST_TLBSRX_DOT 0x7c0006a5 275 272 #define PPC_INST_VPMSUMW 0x10000488 276 273 #define PPC_INST_VPMSUMD 0x100004c8 274 + #define PPC_INST_VPERMXOR 0x1000002d 277 275 #define PPC_INST_XXLOR 0xf0000490 278 276 #define PPC_INST_XXSWAPD 0xf0000250 279 277 #define PPC_INST_XVCPSGNDP 0xf0000780 280 278 #define PPC_INST_TRECHKPT 0x7c0007dd 281 279 #define PPC_INST_TRECLAIM 0x7c00075d 282 280 #define PPC_INST_TABORT 0x7c00071d 281 + #define PPC_INST_TSR 0x7c0005dd 283 282 284 283 #define PPC_INST_NAP 0x4c000364 285 284 #define PPC_INST_SLEEP 0x4c0003a4 ··· 521 516 VSX_XX3((t), a, a)) 522 517 #define XVCPSGNDP(t, a, b) stringify_in_c(.long (PPC_INST_XVCPSGNDP | \ 523 518 VSX_XX3((t), (a), (b)))) 519 + 520 + #define VPERMXOR(vrt, vra, vrb, vrc) \ 521 + stringify_in_c(.long (PPC_INST_VPERMXOR | \ 522 + ___PPC_RT(vrt) | ___PPC_RA(vra) | \ 523 + ___PPC_RB(vrb) | (((vrc) & 0x1f) << 6))) 524 524 525 525 #define PPC_NAP stringify_in_c(.long PPC_INST_NAP) 526 526 #define PPC_SLEEP stringify_in_c(.long PPC_INST_SLEEP)

+4 -7

arch/powerpc/include/asm/ppc_asm.h

··· 439 439 440 440 /* The following stops all load and store data streams associated with stream 441 441 * ID (ie. streams created explicitly). The embedded and server mnemonics for 442 - * dcbt are different so we use machine "power4" here explicitly. 442 + * dcbt are different so this must only be used for server. 443 443 */ 444 - #define DCBT_STOP_ALL_STREAM_IDS(scratch) \ 445 - .machine push ; \ 446 - .machine "power4" ; \ 447 - lis scratch,0x60000000@h; \ 448 - dcbt 0,scratch,0b01010; \ 449 - .machine pop 444 + #define DCBT_BOOK3S_STOP_ALL_STREAM_IDS(scratch) \ 445 + lis scratch,0x60000000@h; \ 446 + dcbt 0,scratch,0b01010 450 447 451 448 /* 452 449 * toreal/fromreal/tophys/tovirt macros. 32-bit BookE makes them

+15 -1

arch/powerpc/include/asm/processor.h

··· 109 109 #define TASK_SIZE_64TB (0x0000400000000000UL) 110 110 #define TASK_SIZE_128TB (0x0000800000000000UL) 111 111 #define TASK_SIZE_512TB (0x0002000000000000UL) 112 + #define TASK_SIZE_1PB (0x0004000000000000UL) 113 + #define TASK_SIZE_2PB (0x0008000000000000UL) 114 + /* 115 + * With 52 bits in the address we can support 116 + * upto 4PB of range. 117 + */ 118 + #define TASK_SIZE_4PB (0x0010000000000000UL) 112 119 113 120 /* 114 121 * For now 512TB is only supported with book3s and 64K linux page size. ··· 124 117 /* 125 118 * Max value currently used: 126 119 */ 127 - #define TASK_SIZE_USER64 TASK_SIZE_512TB 120 + #define TASK_SIZE_USER64 TASK_SIZE_4PB 128 121 #define DEFAULT_MAP_WINDOW_USER64 TASK_SIZE_128TB 122 + #define TASK_CONTEXT_SIZE TASK_SIZE_512TB 129 123 #else 130 124 #define TASK_SIZE_USER64 TASK_SIZE_64TB 131 125 #define DEFAULT_MAP_WINDOW_USER64 TASK_SIZE_64TB 126 + /* 127 + * We don't need to allocate extended context ids for 4K page size, because 128 + * we limit the max effective address on this config to 64TB. 129 + */ 130 + #define TASK_CONTEXT_SIZE TASK_SIZE_64TB 132 131 #endif 133 132 134 133 /* ··· 518 505 extern unsigned long power7_idle_insn(unsigned long type); /* PNV_THREAD_NAP/etc*/ 519 506 extern void power7_idle_type(unsigned long type); 520 507 extern unsigned long power9_idle_stop(unsigned long psscr_val); 508 + extern unsigned long power9_offline_stop(unsigned long psscr_val); 521 509 extern void power9_idle_type(unsigned long stop_psscr_val, 522 510 unsigned long stop_psscr_mask); 523 511

+7

arch/powerpc/include/asm/reg.h

··· 156 156 #define PSSCR_SD 0x00400000 /* Status Disable */ 157 157 #define PSSCR_PLS 0xf000000000000000 /* Power-saving Level Status */ 158 158 #define PSSCR_GUEST_VIS 0xf0000000000003ff /* Guest-visible PSSCR fields */ 159 + #define PSSCR_FAKE_SUSPEND 0x00000400 /* Fake-suspend bit (P9 DD2.2) */ 160 + #define PSSCR_FAKE_SUSPEND_LG 10 /* Fake-suspend bit position */ 159 161 160 162 /* Floating Point Status and Control Register (FPSCR) Fields */ 161 163 #define FPSCR_FX 0x80000000 /* FPU exception summary */ ··· 239 237 #define SPRN_TFIAR 0x81 /* Transaction Failure Inst Addr */ 240 238 #define SPRN_TEXASR 0x82 /* Transaction EXception & Summary */ 241 239 #define SPRN_TEXASRU 0x83 /* '' '' '' Upper 32 */ 240 + #define TEXASR_ABORT __MASK(63-31) /* terminated by tabort or treclaim */ 241 + #define TEXASR_SUSP __MASK(63-32) /* tx failed in suspended state */ 242 + #define TEXASR_HV __MASK(63-34) /* MSR[HV] when failure occurred */ 243 + #define TEXASR_PR __MASK(63-35) /* MSR[PR] when failure occurred */ 242 244 #define TEXASR_FS __MASK(63-36) /* TEXASR Failure Summary */ 245 + #define TEXASR_EXACT __MASK(63-37) /* TFIAR value is exact */ 243 246 #define SPRN_TFHAR 0x80 /* Transaction Failure Handler Addr */ 244 247 #define SPRN_TIDR 144 /* Thread ID register */ 245 248 #define SPRN_CTRLF 0x088

+74

arch/powerpc/include/asm/security_features.h

··· 1 + /* SPDX-License-Identifier: GPL-2.0+ */ 2 + /* 3 + * Security related feature bit definitions. 4 + * 5 + * Copyright 2018, Michael Ellerman, IBM Corporation. 6 + */ 7 + 8 + #ifndef _ASM_POWERPC_SECURITY_FEATURES_H 9 + #define _ASM_POWERPC_SECURITY_FEATURES_H 10 + 11 + 12 + extern unsigned long powerpc_security_features; 13 + extern bool rfi_flush; 14 + 15 + static inline void security_ftr_set(unsigned long feature) 16 + { 17 + powerpc_security_features |= feature; 18 + } 19 + 20 + static inline void security_ftr_clear(unsigned long feature) 21 + { 22 + powerpc_security_features &= ~feature; 23 + } 24 + 25 + static inline bool security_ftr_enabled(unsigned long feature) 26 + { 27 + return !!(powerpc_security_features & feature); 28 + } 29 + 30 + 31 + // Features indicating support for Spectre/Meltdown mitigations 32 + 33 + // The L1-D cache can be flushed with ori r30,r30,0 34 + #define SEC_FTR_L1D_FLUSH_ORI30 0x0000000000000001ull 35 + 36 + // The L1-D cache can be flushed with mtspr 882,r0 (aka SPRN_TRIG2) 37 + #define SEC_FTR_L1D_FLUSH_TRIG2 0x0000000000000002ull 38 + 39 + // ori r31,r31,0 acts as a speculation barrier 40 + #define SEC_FTR_SPEC_BAR_ORI31 0x0000000000000004ull 41 + 42 + // Speculation past bctr is disabled 43 + #define SEC_FTR_BCCTRL_SERIALISED 0x0000000000000008ull 44 + 45 + // Entries in L1-D are private to a SMT thread 46 + #define SEC_FTR_L1D_THREAD_PRIV 0x0000000000000010ull 47 + 48 + // Indirect branch prediction cache disabled 49 + #define SEC_FTR_COUNT_CACHE_DISABLED 0x0000000000000020ull 50 + 51 + 52 + // Features indicating need for Spectre/Meltdown mitigations 53 + 54 + // The L1-D cache should be flushed on MSR[HV] 1->0 transition (hypervisor to guest) 55 + #define SEC_FTR_L1D_FLUSH_HV 0x0000000000000040ull 56 + 57 + // The L1-D cache should be flushed on MSR[PR] 0->1 transition (kernel to userspace) 58 + #define SEC_FTR_L1D_FLUSH_PR 0x0000000000000080ull 59 + 60 + // A speculation barrier should be used for bounds checks (Spectre variant 1) 61 + #define SEC_FTR_BNDS_CHK_SPEC_BAR 0x0000000000000100ull 62 + 63 + // Firmware configuration indicates user favours security over performance 64 + #define SEC_FTR_FAVOUR_SECURITY 0x0000000000000200ull 65 + 66 + 67 + // Features enabled by default 68 + #define SEC_FTR_DEFAULT \ 69 + (SEC_FTR_L1D_FLUSH_HV | \ 70 + SEC_FTR_L1D_FLUSH_PR | \ 71 + SEC_FTR_BNDS_CHK_SPEC_BAR | \ 72 + SEC_FTR_FAVOUR_SECURITY) 73 + 74 + #endif /* _ASM_POWERPC_SECURITY_FEATURES_H */

+2 -1

arch/powerpc/include/asm/setup.h

··· 23 23 #define PTRRELOC(x) ((typeof(x)) add_reloc_offset((unsigned long)(x))) 24 24 25 25 void check_for_initrd(void); 26 + void mem_topology_setup(void); 26 27 void initmem_init(void); 27 28 void setup_panic(void); 28 29 #define ARCH_PANIC_TIMEOUT 180 ··· 50 49 L1D_FLUSH_MTTRIG = 0x8, 51 50 }; 52 51 53 - void __init setup_rfi_flush(enum l1d_flush_type, bool enable); 52 + void setup_rfi_flush(enum l1d_flush_type, bool enable); 54 53 void do_rfi_flush_fixups(enum l1d_flush_type types); 55 54 56 55 #endif /* !__ASSEMBLY__ */

+40

arch/powerpc/include/asm/slice.h

··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + #ifndef _ASM_POWERPC_SLICE_H 3 + #define _ASM_POWERPC_SLICE_H 4 + 5 + #ifdef CONFIG_PPC_BOOK3S_64 6 + #include <asm/book3s/64/slice.h> 7 + #elif defined(CONFIG_PPC64) 8 + #include <asm/nohash/64/slice.h> 9 + #elif defined(CONFIG_PPC_MMU_NOHASH) 10 + #include <asm/nohash/32/slice.h> 11 + #endif 12 + 13 + #ifdef CONFIG_PPC_MM_SLICES 14 + 15 + #ifdef CONFIG_HUGETLB_PAGE 16 + #define HAVE_ARCH_HUGETLB_UNMAPPED_AREA 17 + #endif 18 + #define HAVE_ARCH_UNMAPPED_AREA 19 + #define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN 20 + 21 + #ifndef __ASSEMBLY__ 22 + 23 + struct mm_struct; 24 + 25 + unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, 26 + unsigned long flags, unsigned int psize, 27 + int topdown); 28 + 29 + unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr); 30 + 31 + void slice_set_range_psize(struct mm_struct *mm, unsigned long start, 32 + unsigned long len, unsigned int psize); 33 + 34 + void slice_init_new_context_exec(struct mm_struct *mm); 35 + 36 + #endif /* __ASSEMBLY__ */ 37 + 38 + #endif /* CONFIG_PPC_MM_SLICES */ 39 + 40 + #endif /* _ASM_POWERPC_SLICE_H */

+3 -2

arch/powerpc/include/asm/smp.h

··· 31 31 32 32 extern int boot_cpuid; 33 33 extern int spinning_secondaries; 34 + extern u32 *cpu_to_phys_id; 34 35 35 36 extern void cpu_die(void); 36 37 extern int cpu_to_chip_id(int cpu); ··· 171 170 #ifdef CONFIG_PPC64 172 171 static inline int get_hard_smp_processor_id(int cpu) 173 172 { 174 - return paca[cpu].hw_cpu_id; 173 + return paca_ptrs[cpu]->hw_cpu_id; 175 174 } 176 175 177 176 static inline void set_hard_smp_processor_id(int cpu, int phys) 178 177 { 179 - paca[cpu].hw_cpu_id = phys; 178 + paca_ptrs[cpu]->hw_cpu_id = phys; 180 179 } 181 180 #else 182 181 /* 32-bit */

+1 -1

arch/powerpc/include/asm/sparsemem.h

··· 17 17 #endif /* CONFIG_SPARSEMEM */ 18 18 19 19 #ifdef CONFIG_MEMORY_HOTPLUG 20 - extern int create_section_mapping(unsigned long start, unsigned long end); 20 + extern int create_section_mapping(unsigned long start, unsigned long end, int nid); 21 21 extern int remove_section_mapping(unsigned long start, unsigned long end); 22 22 23 23 #ifdef CONFIG_PPC_BOOK3S_64

+2

arch/powerpc/include/asm/spinlock.h

··· 56 56 #define vcpu_is_preempted vcpu_is_preempted 57 57 static inline bool vcpu_is_preempted(int cpu) 58 58 { 59 + if (!firmware_has_feature(FW_FEATURE_SPLPAR)) 60 + return false; 59 61 return !!(be32_to_cpu(lppaca_of(cpu).yield_count) & 1); 60 62 } 61 63 #endif

-1

arch/powerpc/include/asm/switch_to.h

··· 35 35 msr_check_and_clear(MSR_FP); 36 36 } 37 37 #else 38 - static inline void __giveup_fpu(struct task_struct *t) { } 39 38 static inline void save_fpu(struct task_struct *t) { } 40 39 static inline void flush_fp_to_thread(struct task_struct *t) { } 41 40 #endif

-4

arch/powerpc/include/asm/synch.h

··· 6 6 #include <linux/stringify.h> 7 7 #include <asm/feature-fixups.h> 8 8 9 - #if defined(__powerpc64__) || defined(CONFIG_PPC_E500MC) 10 - #define __SUBARCH_HAS_LWSYNC 11 - #endif 12 - 13 9 #ifndef __ASSEMBLY__ 14 10 extern unsigned int __start___lwsync_fixup, __stop___lwsync_fixup; 15 11 extern void do_lwsync_fixups(unsigned long value, void *fixup_start,

+1

arch/powerpc/include/asm/thread_info.h

··· 70 70 return (struct thread_info *)val; 71 71 } 72 72 73 + extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src); 73 74 #endif /* __ASSEMBLY__ */ 74 75 75 76 /*

+3 -1

arch/powerpc/include/asm/time.h

··· 31 31 extern void tick_broadcast_ipi_handler(void); 32 32 33 33 extern void generic_calibrate_decr(void); 34 + extern void hdec_interrupt(struct pt_regs *regs); 34 35 35 36 /* Some sane defaults: 125 MHz timebase, 1GHz processor */ 36 37 extern unsigned long ppc_proc_freq; ··· 47 46 /* Accessor functions for the timebase (RTC on 601) registers. */ 48 47 /* If one day CONFIG_POWER is added just define __USE_RTC as 1 */ 49 48 #ifdef CONFIG_6xx 50 - #define __USE_RTC() (!cpu_has_feature(CPU_FTR_USE_TB)) 49 + #define __USE_RTC() (cpu_has_feature(CPU_FTR_USE_RTC)) 51 50 #else 52 51 #define __USE_RTC() 0 53 52 #endif ··· 205 204 DECLARE_PER_CPU(struct cpu_usage, cpu_usage_array); 206 205 207 206 extern void secondary_cpu_time_init(void); 207 + extern void __init time_init(void); 208 208 209 209 DECLARE_PER_CPU(u64, decrementers_next_tb); 210 210

+7 -3

arch/powerpc/include/asm/uaccess.h

··· 47 47 48 48 #else 49 49 50 - #define __access_ok(addr, size, segment) \ 51 - (((addr) <= (segment).seg) && \ 52 - (((size) == 0) || (((size) - 1) <= ((segment).seg - (addr))))) 50 + static inline int __access_ok(unsigned long addr, unsigned long size, 51 + mm_segment_t seg) 52 + { 53 + if (addr > seg.seg) 54 + return 0; 55 + return (size == 0 || size - 1 <= seg.seg - addr); 56 + } 53 57 54 58 #endif 55 59

+1 -1

arch/powerpc/kernel/Makefile

··· 42 42 obj-$(CONFIG_PPC_WATCHDOG) += watchdog.o 43 43 obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o 44 44 obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_ppc970.o cpu_setup_pa6t.o 45 - obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_power.o 45 + obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_power.o security.o 46 46 obj-$(CONFIG_PPC_BOOK3S_64) += mce.o mce_power.o 47 47 obj-$(CONFIG_PPC_BOOK3E_64) += exceptions-64e.o idle_book3e.o 48 48 obj-$(CONFIG_PPC64) += vdso64/

+8

arch/powerpc/kernel/asm-offsets.c

··· 221 221 OFFSET(PACA_EXMC, paca_struct, exmc); 222 222 OFFSET(PACA_EXSLB, paca_struct, exslb); 223 223 OFFSET(PACA_EXNMI, paca_struct, exnmi); 224 + #ifdef CONFIG_PPC_PSERIES 224 225 OFFSET(PACALPPACAPTR, paca_struct, lppaca_ptr); 226 + #endif 225 227 OFFSET(PACA_SLBSHADOWPTR, paca_struct, slb_shadow_ptr); 226 228 OFFSET(SLBSHADOW_STACKVSID, slb_shadow, save_area[SLB_NUM_BOLTED - 1].vsid); 227 229 OFFSET(SLBSHADOW_STACKESID, slb_shadow, save_area[SLB_NUM_BOLTED - 1].esid); 228 230 OFFSET(SLBSHADOW_SAVEAREA, slb_shadow, save_area); 229 231 OFFSET(LPPACA_PMCINUSE, lppaca, pmcregs_in_use); 232 + #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 233 + OFFSET(PACA_PMCINUSE, paca_struct, pmcregs_in_use); 234 + #endif 230 235 OFFSET(LPPACA_DTLIDX, lppaca, dtl_idx); 231 236 OFFSET(LPPACA_YIELDCOUNT, lppaca, yield_count); 232 237 OFFSET(PACA_DTL_RIDX, paca_struct, dtl_ridx); ··· 573 568 OFFSET(VCPU_TFHAR, kvm_vcpu, arch.tfhar); 574 569 OFFSET(VCPU_TFIAR, kvm_vcpu, arch.tfiar); 575 570 OFFSET(VCPU_TEXASR, kvm_vcpu, arch.texasr); 571 + OFFSET(VCPU_ORIG_TEXASR, kvm_vcpu, arch.orig_texasr); 576 572 OFFSET(VCPU_GPR_TM, kvm_vcpu, arch.gpr_tm); 577 573 OFFSET(VCPU_FPRS_TM, kvm_vcpu, arch.fp_tm.fpr); 578 574 OFFSET(VCPU_VRS_TM, kvm_vcpu, arch.vr_tm.vr); ··· 656 650 HSTATE_FIELD(HSTATE_HOST_IPI, host_ipi); 657 651 HSTATE_FIELD(HSTATE_PTID, ptid); 658 652 HSTATE_FIELD(HSTATE_TID, tid); 653 + HSTATE_FIELD(HSTATE_FAKE_SUSPEND, fake_suspend); 659 654 HSTATE_FIELD(HSTATE_MMCR0, host_mmcr[0]); 660 655 HSTATE_FIELD(HSTATE_MMCR1, host_mmcr[1]); 661 656 HSTATE_FIELD(HSTATE_MMCRA, host_mmcr[2]); ··· 766 759 OFFSET(PACA_SUBCORE_SIBLING_MASK, paca_struct, subcore_sibling_mask); 767 760 OFFSET(PACA_SIBLING_PACA_PTRS, paca_struct, thread_sibling_pacas); 768 761 OFFSET(PACA_REQ_PSSCR, paca_struct, requested_psscr); 762 + OFFSET(PACA_DONT_STOP, paca_struct, dont_stop); 769 763 #define STOP_SPR(x, f) OFFSET(x, paca_struct, stop_sprs.f) 770 764 STOP_SPR(STOP_PID, pid); 771 765 STOP_SPR(STOP_LDBAR, ldbar);

+1 -1

arch/powerpc/kernel/cpu_setup_6xx.S

··· 226 226 beq 1f 227 227 END_FTR_SECTION_IFSET(CPU_FTR_L3CR) 228 228 lwz r6,CPU_SPEC_FEATURES(r4) 229 - andi. r0,r6,CPU_FTR_L3_DISABLE_NAP 229 + andis. r0,r6,CPU_FTR_L3_DISABLE_NAP@h 230 230 beq 1f 231 231 li r7,CPU_FTR_CAN_NAP 232 232 andc r6,r6,r7

+1 -1

arch/powerpc/kernel/cpu_setup_fsl_booke.S

··· 162 162 * the feature on the primary core, avoid doing it on the 163 163 * secondary core. 164 164 */ 165 - andis. r6, r3, CPU_FTR_EMB_HV@h 165 + andi. r6, r3, CPU_FTR_EMB_HV 166 166 beq 2f 167 167 rlwinm r3, r3, 0, ~CPU_FTR_EMB_HV 168 168 stw r3, CPU_SPEC_FEATURES(r4)

+24 -35

arch/powerpc/kernel/cputable.c

··· 133 133 134 134 static struct cpu_spec __initdata cpu_specs[] = { 135 135 #ifdef CONFIG_PPC_BOOK3S_64 136 - { /* Power4 */ 137 - .pvr_mask = 0xffff0000, 138 - .pvr_value = 0x00350000, 139 - .cpu_name = "POWER4 (gp)", 140 - .cpu_features = CPU_FTRS_POWER4, 141 - .cpu_user_features = COMMON_USER_POWER4, 142 - .mmu_features = MMU_FTRS_POWER4 | MMU_FTR_TLBIE_CROP_VA, 143 - .icache_bsize = 128, 144 - .dcache_bsize = 128, 145 - .num_pmcs = 8, 146 - .pmc_type = PPC_PMC_IBM, 147 - .oprofile_cpu_type = "ppc64/power4", 148 - .oprofile_type = PPC_OPROFILE_POWER4, 149 - .platform = "power4", 150 - }, 151 - { /* Power4+ */ 152 - .pvr_mask = 0xffff0000, 153 - .pvr_value = 0x00380000, 154 - .cpu_name = "POWER4+ (gq)", 155 - .cpu_features = CPU_FTRS_POWER4, 156 - .cpu_user_features = COMMON_USER_POWER4, 157 - .mmu_features = MMU_FTRS_POWER4 | MMU_FTR_TLBIE_CROP_VA, 158 - .icache_bsize = 128, 159 - .dcache_bsize = 128, 160 - .num_pmcs = 8, 161 - .pmc_type = PPC_PMC_IBM, 162 - .oprofile_cpu_type = "ppc64/power4", 163 - .oprofile_type = PPC_OPROFILE_POWER4, 164 - .platform = "power4", 165 - }, 166 136 { /* PPC970 */ 167 137 .pvr_mask = 0xffff0000, 168 138 .pvr_value = 0x00390000, ··· 523 553 .machine_check_early = __machine_check_early_realmode_p9, 524 554 .platform = "power9", 525 555 }, 526 - { /* Power9 DD 2.1 or later (see DD2.0 above) */ 556 + { /* Power9 DD 2.1 */ 557 + .pvr_mask = 0xffffefff, 558 + .pvr_value = 0x004e0201, 559 + .cpu_name = "POWER9 (raw)", 560 + .cpu_features = CPU_FTRS_POWER9_DD2_1, 561 + .cpu_user_features = COMMON_USER_POWER9, 562 + .cpu_user_features2 = COMMON_USER2_POWER9, 563 + .mmu_features = MMU_FTRS_POWER9, 564 + .icache_bsize = 128, 565 + .dcache_bsize = 128, 566 + .num_pmcs = 6, 567 + .pmc_type = PPC_PMC_IBM, 568 + .oprofile_cpu_type = "ppc64/power9", 569 + .oprofile_type = PPC_OPROFILE_INVALID, 570 + .cpu_setup = __setup_cpu_power9, 571 + .cpu_restore = __restore_cpu_power9, 572 + .machine_check_early = __machine_check_early_realmode_p9, 573 + .platform = "power9", 574 + }, 575 + { /* Power9 DD2.2 or later */ 527 576 .pvr_mask = 0xffff0000, 528 577 .pvr_value = 0x004e0000, 529 578 .cpu_name = "POWER9 (raw)", 530 - .cpu_features = CPU_FTRS_POWER9_DD2_1, 579 + .cpu_features = CPU_FTRS_POWER9_DD2_2, 531 580 .cpu_user_features = COMMON_USER_POWER9, 532 581 .cpu_user_features2 = COMMON_USER2_POWER9, 533 582 .mmu_features = MMU_FTRS_POWER9, ··· 598 609 { /* default match */ 599 610 .pvr_mask = 0x00000000, 600 611 .pvr_value = 0x00000000, 601 - .cpu_name = "POWER4 (compatible)", 612 + .cpu_name = "POWER5 (compatible)", 602 613 .cpu_features = CPU_FTRS_COMPATIBLE, 603 614 .cpu_user_features = COMMON_USER_PPC64, 604 - .mmu_features = MMU_FTRS_DEFAULT_HPTE_ARCH_V2, 615 + .mmu_features = MMU_FTRS_POWER, 605 616 .icache_bsize = 128, 606 617 .dcache_bsize = 128, 607 618 .num_pmcs = 6, 608 619 .pmc_type = PPC_PMC_IBM, 609 - .platform = "power4", 620 + .platform = "power5", 610 621 } 611 622 #endif /* CONFIG_PPC_BOOK3S_64 */ 612 623

+1 -1

arch/powerpc/kernel/crash.c

··· 238 238 if (i == cpu) 239 239 continue; 240 240 241 - while (paca[i].kexec_state < KEXEC_STATE_REAL_MODE) { 241 + while (paca_ptrs[i]->kexec_state < KEXEC_STATE_REAL_MODE) { 242 242 barrier(); 243 243 if (!cpu_possible(i) || !cpu_online(i) || (msecs <= 0)) 244 244 break;

+30 -6

arch/powerpc/kernel/dt_cpu_ftrs.c

··· 54 54 }; 55 55 56 56 #define CPU_FTRS_BASE \ 57 - (CPU_FTR_USE_TB | \ 58 - CPU_FTR_LWSYNC | \ 57 + (CPU_FTR_LWSYNC | \ 59 58 CPU_FTR_FPU_UNAVAILABLE |\ 60 59 CPU_FTR_NODSISRALIGN |\ 61 60 CPU_FTR_NOEXECUTE |\ ··· 83 84 84 85 static struct { 85 86 u64 lpcr; 87 + u64 lpcr_clear; 86 88 u64 hfscr; 87 89 u64 fscr; 88 90 } system_registers; ··· 92 92 93 93 static void __restore_cpu_cpufeatures(void) 94 94 { 95 + u64 lpcr; 96 + 95 97 /* 96 98 * LPCR is restored by the power on engine already. It can be changed 97 99 * after early init e.g., by radix enable, and we have no unified API ··· 106 104 * The best we can do to accommodate secondary boot and idle restore 107 105 * for now is "or" LPCR with existing. 108 106 */ 109 - 110 - mtspr(SPRN_LPCR, system_registers.lpcr | mfspr(SPRN_LPCR)); 107 + lpcr = mfspr(SPRN_LPCR); 108 + lpcr |= system_registers.lpcr; 109 + lpcr &= ~system_registers.lpcr_clear; 110 + mtspr(SPRN_LPCR, lpcr); 111 111 if (hv_mode) { 112 112 mtspr(SPRN_LPID, 0); 113 113 mtspr(SPRN_HFSCR, system_registers.hfscr); ··· 329 325 { 330 326 u64 lpcr; 331 327 328 + system_registers.lpcr_clear |= (LPCR_ISL | LPCR_UPRT | LPCR_HR); 332 329 lpcr = mfspr(SPRN_LPCR); 333 - lpcr &= ~LPCR_ISL; 330 + lpcr &= ~(LPCR_ISL | LPCR_UPRT | LPCR_HR); 334 331 mtspr(SPRN_LPCR, lpcr); 335 332 336 333 cur_cpu_spec->mmu_features |= MMU_FTRS_HASH_BASE; ··· 595 590 {"virtual-page-class-key-protection", feat_enable, 0}, 596 591 {"transactional-memory", feat_enable_tm, CPU_FTR_TM}, 597 592 {"transactional-memory-v3", feat_enable_tm, 0}, 593 + {"tm-suspend-hypervisor-assist", feat_enable, CPU_FTR_P9_TM_HV_ASSIST}, 594 + {"tm-suspend-xer-so-bug", feat_enable, CPU_FTR_P9_TM_XER_SO_BUG}, 598 595 {"idle-nap", feat_enable_idle_nap, 0}, 599 596 {"alignment-interrupt-dsisr", feat_enable_align_dsisr, 0}, 600 597 {"idle-stop", feat_enable_idle_stop, 0}, ··· 714 707 */ 715 708 if ((version & 0xffffff00) == 0x004e0100) 716 709 cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD1; 710 + else if ((version & 0xffffefff) == 0x004e0200) 711 + ; /* DD2.0 has no feature flag */ 717 712 else if ((version & 0xffffefff) == 0x004e0201) 718 713 cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD2_1; 714 + else if ((version & 0xffffefff) == 0x004e0202) { 715 + cur_cpu_spec->cpu_features |= CPU_FTR_P9_TM_HV_ASSIST; 716 + cur_cpu_spec->cpu_features |= CPU_FTR_P9_TM_XER_SO_BUG; 717 + cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD2_1; 718 + } else /* DD2.1 and up have DD2_1 */ 719 + cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD2_1; 719 720 720 - if ((version & 0xffff0000) == 0x004e0000) 721 + if ((version & 0xffff0000) == 0x004e0000) { 722 + cur_cpu_spec->cpu_features &= ~(CPU_FTR_DAWR); 721 723 cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_BUG; 724 + } 725 + 726 + /* 727 + * PKEY was not in the initial base or feature node 728 + * specification, but it should become optional in the next 729 + * cpu feature version sequence. 730 + */ 731 + cur_cpu_spec->cpu_features |= CPU_FTR_PKEY; 722 732 } 723 733 724 734 static void __init cpufeatures_setup_finished(void)

+6 -13

arch/powerpc/kernel/eeh.c

··· 394 394 /* Check PHB state */ 395 395 ret = eeh_ops->get_state(phb_pe, NULL); 396 396 if ((ret < 0) || 397 - (ret == EEH_STATE_NOT_SUPPORT) || 398 - (ret & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) == 399 - (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) { 397 + (ret == EEH_STATE_NOT_SUPPORT) || eeh_state_active(ret)) { 400 398 ret = 0; 401 399 goto out; 402 400 } ··· 431 433 int eeh_dev_check_failure(struct eeh_dev *edev) 432 434 { 433 435 int ret; 434 - int active_flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); 435 436 unsigned long flags; 436 437 struct device_node *dn; 437 438 struct pci_dev *dev; ··· 522 525 * state, PE is in good state. 523 526 */ 524 527 if ((ret < 0) || 525 - (ret == EEH_STATE_NOT_SUPPORT) || 526 - ((ret & active_flags) == active_flags)) { 528 + (ret == EEH_STATE_NOT_SUPPORT) || eeh_state_active(ret)) { 527 529 eeh_stats.false_positives++; 528 530 pe->false_positives++; 529 531 rc = 0; ··· 542 546 543 547 /* Frozen parent PE ? */ 544 548 ret = eeh_ops->get_state(parent_pe, NULL); 545 - if (ret > 0 && 546 - (ret & active_flags) != active_flags) 549 + if (ret > 0 && !eeh_state_active(ret)) 547 550 pe = parent_pe; 548 551 549 552 /* Next parent level */ ··· 883 888 */ 884 889 int eeh_pe_reset_full(struct eeh_pe *pe) 885 890 { 886 - int active_flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); 887 891 int reset_state = (EEH_PE_RESET | EEH_PE_CFG_BLOCKED); 888 892 int type = EEH_RESET_HOT; 889 893 unsigned int freset = 0; ··· 913 919 914 920 /* Wait until the PE is in a functioning state */ 915 921 state = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC); 916 - if ((state & active_flags) == active_flags) 922 + if (eeh_state_active(state)) 917 923 break; 918 924 919 925 if (state < 0) { ··· 1346 1352 struct eeh_dev *edev, *tmp; 1347 1353 struct pci_dev *pdev; 1348 1354 struct pci_device_id *id; 1349 - int flags, ret; 1355 + int ret; 1350 1356 1351 1357 /* Check PE state */ 1352 - flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); 1353 1358 ret = eeh_ops->get_state(pe, NULL); 1354 1359 if (ret < 0 || ret == EEH_STATE_NOT_SUPPORT) 1355 1360 return 0; 1356 1361 1357 1362 /* Unfrozen PE, nothing to do */ 1358 - if ((ret & flags) == flags) 1363 + if (eeh_state_active(ret)) 1359 1364 return 0; 1360 1365 1361 1366 /* Frozen PE, check if it needs PE level reset */

+1 -2

arch/powerpc/kernel/eeh_cache.c

··· 84 84 * @addr: mmio (PIO) phys address or i/o port number 85 85 * 86 86 * Given an mmio phys address, or a port number, find a pci device 87 - * that implements this address. Be sure to pci_dev_put the device 88 - * when finished. I/O port numbers are assumed to be offset 87 + * that implements this address. I/O port numbers are assumed to be offset 89 88 * from zero (that is, they do *not* have pci_io_addr added in). 90 89 * It is safe to call this function within an interrupt. 91 90 */

+95 -110

arch/powerpc/kernel/eeh_driver.c

··· 207 207 208 208 if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe)) 209 209 return NULL; 210 + 211 + device_lock(&dev->dev); 210 212 dev->error_state = pci_channel_io_frozen; 211 213 212 214 driver = eeh_pcid_get(dev); 213 - if (!driver) return NULL; 215 + if (!driver) goto out_no_dev; 214 216 215 217 eeh_disable_irq(dev); 216 218 217 219 if (!driver->err_handler || 218 - !driver->err_handler->error_detected) { 219 - eeh_pcid_put(dev); 220 - return NULL; 221 - } 220 + !driver->err_handler->error_detected) 221 + goto out; 222 222 223 223 rc = driver->err_handler->error_detected(dev, pci_channel_io_frozen); 224 224 ··· 227 227 if (*res == PCI_ERS_RESULT_NONE) *res = rc; 228 228 229 229 edev->in_error = true; 230 - eeh_pcid_put(dev); 231 230 pci_uevent_ers(dev, PCI_ERS_RESULT_NONE); 231 + 232 + out: 233 + eeh_pcid_put(dev); 234 + out_no_dev: 235 + device_unlock(&dev->dev); 232 236 return NULL; 233 237 } 234 238 ··· 255 251 if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe)) 256 252 return NULL; 257 253 254 + device_lock(&dev->dev); 258 255 driver = eeh_pcid_get(dev); 259 - if (!driver) return NULL; 256 + if (!driver) goto out_no_dev; 260 257 261 258 if (!driver->err_handler || 262 259 !driver->err_handler->mmio_enabled || 263 - (edev->mode & EEH_DEV_NO_HANDLER)) { 264 - eeh_pcid_put(dev); 265 - return NULL; 266 - } 260 + (edev->mode & EEH_DEV_NO_HANDLER)) 261 + goto out; 267 262 268 263 rc = driver->err_handler->mmio_enabled(dev); 269 264 ··· 270 267 if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc; 271 268 if (*res == PCI_ERS_RESULT_NONE) *res = rc; 272 269 270 + out: 273 271 eeh_pcid_put(dev); 272 + out_no_dev: 273 + device_unlock(&dev->dev); 274 274 return NULL; 275 275 } 276 276 ··· 296 290 297 291 if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe)) 298 292 return NULL; 293 + 294 + device_lock(&dev->dev); 299 295 dev->error_state = pci_channel_io_normal; 300 296 301 297 driver = eeh_pcid_get(dev); 302 - if (!driver) return NULL; 298 + if (!driver) goto out_no_dev; 303 299 304 300 eeh_enable_irq(dev); 305 301 306 302 if (!driver->err_handler || 307 303 !driver->err_handler->slot_reset || 308 304 (edev->mode & EEH_DEV_NO_HANDLER) || 309 - (!edev->in_error)) { 310 - eeh_pcid_put(dev); 311 - return NULL; 312 - } 305 + (!edev->in_error)) 306 + goto out; 313 307 314 308 rc = driver->err_handler->slot_reset(dev); 315 309 if ((*res == PCI_ERS_RESULT_NONE) || ··· 317 311 if (*res == PCI_ERS_RESULT_DISCONNECT && 318 312 rc == PCI_ERS_RESULT_NEED_RESET) *res = rc; 319 313 314 + out: 320 315 eeh_pcid_put(dev); 316 + out_no_dev: 317 + device_unlock(&dev->dev); 321 318 return NULL; 322 319 } 323 320 ··· 371 362 372 363 if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe)) 373 364 return NULL; 365 + 366 + device_lock(&dev->dev); 374 367 dev->error_state = pci_channel_io_normal; 375 368 376 369 driver = eeh_pcid_get(dev); 377 - if (!driver) return NULL; 370 + if (!driver) goto out_no_dev; 378 371 379 372 was_in_error = edev->in_error; 380 373 edev->in_error = false; ··· 386 375 !driver->err_handler->resume || 387 376 (edev->mode & EEH_DEV_NO_HANDLER) || !was_in_error) { 388 377 edev->mode &= ~EEH_DEV_NO_HANDLER; 389 - eeh_pcid_put(dev); 390 - return NULL; 378 + goto out; 391 379 } 392 380 393 381 driver->err_handler->resume(dev); 394 382 395 - eeh_pcid_put(dev); 396 383 pci_uevent_ers(dev, PCI_ERS_RESULT_RECOVERED); 384 + out: 385 + eeh_pcid_put(dev); 397 386 #ifdef CONFIG_PCI_IOV 398 387 if (eeh_ops->notify_resume && eeh_dev_to_pdn(edev)) 399 388 eeh_ops->notify_resume(eeh_dev_to_pdn(edev)); 400 389 #endif 390 + out_no_dev: 391 + device_unlock(&dev->dev); 401 392 return NULL; 402 393 } 403 394 ··· 419 406 420 407 if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe)) 421 408 return NULL; 409 + 410 + device_lock(&dev->dev); 422 411 dev->error_state = pci_channel_io_perm_failure; 423 412 424 413 driver = eeh_pcid_get(dev); 425 - if (!driver) return NULL; 414 + if (!driver) goto out_no_dev; 426 415 427 416 eeh_disable_irq(dev); 428 417 429 418 if (!driver->err_handler || 430 - !driver->err_handler->error_detected) { 431 - eeh_pcid_put(dev); 432 - return NULL; 433 - } 419 + !driver->err_handler->error_detected) 420 + goto out; 434 421 435 422 driver->err_handler->error_detected(dev, pci_channel_io_perm_failure); 436 423 437 - eeh_pcid_put(dev); 438 424 pci_uevent_ers(dev, PCI_ERS_RESULT_DISCONNECT); 425 + out: 426 + eeh_pcid_put(dev); 427 + out_no_dev: 428 + device_unlock(&dev->dev); 439 429 return NULL; 440 430 } 441 431 ··· 635 619 636 620 /** 637 621 * eeh_reset_device - Perform actual reset of a pci slot 622 + * @driver_eeh_aware: Does the device's driver provide EEH support? 638 623 * @pe: EEH PE 639 624 * @bus: PCI bus corresponding to the isolcated slot 625 + * @rmv_data: Optional, list to record removed devices 640 626 * 641 627 * This routine must be called to do reset on the indicated PE. 642 628 * During the reset, udev might be invoked because those affected 643 629 * PCI devices will be removed and then added. 644 630 */ 645 631 static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus, 646 - struct eeh_rmv_data *rmv_data) 632 + struct eeh_rmv_data *rmv_data, 633 + bool driver_eeh_aware) 647 634 { 648 - struct pci_bus *frozen_bus = eeh_pe_bus_get(pe); 649 635 time64_t tstamp; 650 636 int cnt, rc; 651 637 struct eeh_dev *edev; ··· 663 645 * into pci_hp_add_devices(). 664 646 */ 665 647 eeh_pe_state_mark(pe, EEH_PE_KEEP); 666 - if (bus) { 667 - if (pe->type & EEH_PE_VF) { 668 - eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL); 669 - } else { 670 - pci_lock_rescan_remove(); 671 - pci_hp_remove_devices(bus); 672 - pci_unlock_rescan_remove(); 673 - } 674 - } else if (frozen_bus) { 648 + if (driver_eeh_aware || (pe->type & EEH_PE_VF)) { 675 649 eeh_pe_dev_traverse(pe, eeh_rmv_device, rmv_data); 650 + } else { 651 + pci_lock_rescan_remove(); 652 + pci_hp_remove_devices(bus); 653 + pci_unlock_rescan_remove(); 676 654 } 677 655 678 656 /* ··· 703 689 * the device up before the scripts have taken it down, 704 690 * potentially weird things happen. 705 691 */ 706 - if (bus) { 707 - pr_info("EEH: Sleep 5s ahead of complete hotplug\n"); 692 + if (!driver_eeh_aware || rmv_data->removed) { 693 + pr_info("EEH: Sleep 5s ahead of %s hotplug\n", 694 + (driver_eeh_aware ? "partial" : "complete")); 708 695 ssleep(5); 709 696 710 697 /* ··· 718 703 if (pe->type & EEH_PE_VF) { 719 704 eeh_add_virt_device(edev, NULL); 720 705 } else { 721 - eeh_pe_state_clear(pe, EEH_PE_PRI_BUS); 706 + if (!driver_eeh_aware) 707 + eeh_pe_state_clear(pe, EEH_PE_PRI_BUS); 722 708 pci_hp_add_devices(bus); 723 709 } 724 - } else if (frozen_bus && rmv_data->removed) { 725 - pr_info("EEH: Sleep 5s ahead of partial hotplug\n"); 726 - ssleep(5); 727 - 728 - edev = list_first_entry(&pe->edevs, struct eeh_dev, list); 729 - eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL); 730 - if (pe->type & EEH_PE_VF) 731 - eeh_add_virt_device(edev, NULL); 732 - else 733 - pci_hp_add_devices(frozen_bus); 734 710 } 735 711 eeh_pe_state_clear(pe, EEH_PE_KEEP); 736 712 ··· 739 733 740 734 /** 741 735 * eeh_handle_normal_event - Handle EEH events on a specific PE 742 - * @pe: EEH PE 736 + * @pe: EEH PE - which should not be used after we return, as it may 737 + * have been invalidated. 743 738 * 744 739 * Attempts to recover the given PE. If recovery fails or the PE has failed 745 740 * too many times, remove the PE. 746 741 * 747 - * Returns true if @pe should no longer be used, else false. 742 + * While PHB detects address or data parity errors on particular PCI 743 + * slot, the associated PE will be frozen. Besides, DMA's occurring 744 + * to wild addresses (which usually happen due to bugs in device 745 + * drivers or in PCI adapter firmware) can cause EEH error. #SERR, 746 + * #PERR or other misc PCI-related errors also can trigger EEH errors. 747 + * 748 + * Recovery process consists of unplugging the device driver (which 749 + * generated hotplug events to userspace), then issuing a PCI #RST to 750 + * the device, then reconfiguring the PCI config space for all bridges 751 + * & devices under this slot, and then finally restarting the device 752 + * drivers (which cause a second set of hotplug events to go out to 753 + * userspace). 748 754 */ 749 - static bool eeh_handle_normal_event(struct eeh_pe *pe) 755 + void eeh_handle_normal_event(struct eeh_pe *pe) 750 756 { 751 - struct pci_bus *frozen_bus; 757 + struct pci_bus *bus; 752 758 struct eeh_dev *edev, *tmp; 753 759 int rc = 0; 754 760 enum pci_ers_result result = PCI_ERS_RESULT_NONE; 755 761 struct eeh_rmv_data rmv_data = {LIST_HEAD_INIT(rmv_data.edev_list), 0}; 756 762 757 - frozen_bus = eeh_pe_bus_get(pe); 758 - if (!frozen_bus) { 763 + bus = eeh_pe_bus_get(pe); 764 + if (!bus) { 759 765 pr_err("%s: Cannot find PCI bus for PHB#%x-PE#%x\n", 760 766 __func__, pe->phb->global_number, pe->addr); 761 - return false; 767 + return; 762 768 } 769 + 770 + eeh_pe_state_mark(pe, EEH_PE_RECOVERING); 763 771 764 772 eeh_pe_update_time_stamp(pe); 765 773 pe->freeze_count++; ··· 826 806 */ 827 807 if (result == PCI_ERS_RESULT_NONE) { 828 808 pr_info("EEH: Reset with hotplug activity\n"); 829 - rc = eeh_reset_device(pe, frozen_bus, NULL); 809 + rc = eeh_reset_device(pe, bus, NULL, false); 830 810 if (rc) { 831 811 pr_warn("%s: Unable to reset, err=%d\n", 832 812 __func__, rc); ··· 878 858 /* If any device called out for a reset, then reset the slot */ 879 859 if (result == PCI_ERS_RESULT_NEED_RESET) { 880 860 pr_info("EEH: Reset without hotplug activity\n"); 881 - rc = eeh_reset_device(pe, NULL, &rmv_data); 861 + rc = eeh_reset_device(pe, bus, &rmv_data, true); 882 862 if (rc) { 883 863 pr_warn("%s: Cannot reset, err=%d\n", 884 864 __func__, rc); ··· 911 891 pr_info("EEH: Notify device driver to resume\n"); 912 892 eeh_pe_dev_traverse(pe, eeh_report_resume, NULL); 913 893 914 - return false; 894 + goto final; 915 895 916 896 hard_fail: 917 897 /* ··· 936 916 * all removed devices correctly to avoid access 937 917 * the their PCI config any more. 938 918 */ 939 - if (frozen_bus) { 940 - if (pe->type & EEH_PE_VF) { 941 - eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL); 942 - eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED); 943 - } else { 944 - eeh_pe_state_clear(pe, EEH_PE_PRI_BUS); 945 - eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED); 919 + if (pe->type & EEH_PE_VF) { 920 + eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL); 921 + eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED); 922 + } else { 923 + eeh_pe_state_clear(pe, EEH_PE_PRI_BUS); 924 + eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED); 946 925 947 - pci_lock_rescan_remove(); 948 - pci_hp_remove_devices(frozen_bus); 949 - pci_unlock_rescan_remove(); 950 - 951 - /* The passed PE should no longer be used */ 952 - return true; 953 - } 926 + pci_lock_rescan_remove(); 927 + pci_hp_remove_devices(bus); 928 + pci_unlock_rescan_remove(); 929 + /* The passed PE should no longer be used */ 930 + return; 954 931 } 955 - return false; 932 + final: 933 + eeh_pe_state_clear(pe, EEH_PE_RECOVERING); 956 934 } 957 935 958 936 /** ··· 960 942 * specific PE. Iterates through possible failures and handles them as 961 943 * necessary. 962 944 */ 963 - static void eeh_handle_special_event(void) 945 + void eeh_handle_special_event(void) 964 946 { 965 947 struct eeh_pe *pe, *phb_pe; 966 948 struct pci_bus *bus; ··· 1023 1005 */ 1024 1006 if (rc == EEH_NEXT_ERR_FROZEN_PE || 1025 1007 rc == EEH_NEXT_ERR_FENCED_PHB) { 1026 - /* 1027 - * eeh_handle_normal_event() can make the PE stale if it 1028 - * determines that the PE cannot possibly be recovered. 1029 - * Don't modify the PE state if that's the case. 1030 - */ 1031 - if (eeh_handle_normal_event(pe)) 1032 - continue; 1033 - 1034 - eeh_pe_state_clear(pe, EEH_PE_RECOVERING); 1008 + eeh_handle_normal_event(pe); 1035 1009 } else { 1036 1010 pci_lock_rescan_remove(); 1037 1011 list_for_each_entry(hose, &hose_list, list_node) { ··· 1058 1048 if (rc == EEH_NEXT_ERR_DEAD_IOC) 1059 1049 break; 1060 1050 } while (rc != EEH_NEXT_ERR_NONE); 1061 - } 1062 - 1063 - /** 1064 - * eeh_handle_event - Reset a PCI device after hard lockup. 1065 - * @pe: EEH PE 1066 - * 1067 - * While PHB detects address or data parity errors on particular PCI 1068 - * slot, the associated PE will be frozen. Besides, DMA's occurring 1069 - * to wild addresses (which usually happen due to bugs in device 1070 - * drivers or in PCI adapter firmware) can cause EEH error. #SERR, 1071 - * #PERR or other misc PCI-related errors also can trigger EEH errors. 1072 - * 1073 - * Recovery process consists of unplugging the device driver (which 1074 - * generated hotplug events to userspace), then issuing a PCI #RST to 1075 - * the device, then reconfiguring the PCI config space for all bridges 1076 - * & devices under this slot, and then finally restarting the device 1077 - * drivers (which cause a second set of hotplug events to go out to 1078 - * userspace). 1079 - */ 1080 - void eeh_handle_event(struct eeh_pe *pe) 1081 - { 1082 - if (pe) 1083 - eeh_handle_normal_event(pe); 1084 - else 1085 - eeh_handle_special_event(); 1086 1051 }

+2 -4

arch/powerpc/kernel/eeh_event.c

··· 73 73 /* We might have event without binding PE */ 74 74 pe = event->pe; 75 75 if (pe) { 76 - eeh_pe_state_mark(pe, EEH_PE_RECOVERING); 77 76 if (pe->type & EEH_PE_PHB) 78 77 pr_info("EEH: Detected error on PHB#%x\n", 79 78 pe->phb->global_number); ··· 80 81 pr_info("EEH: Detected PCI bus error on " 81 82 "PHB#%x-PE#%x\n", 82 83 pe->phb->global_number, pe->addr); 83 - eeh_handle_event(pe); 84 - eeh_pe_state_clear(pe, EEH_PE_RECOVERING); 84 + eeh_handle_normal_event(pe); 85 85 } else { 86 - eeh_handle_event(NULL); 86 + eeh_handle_special_event(); 87 87 } 88 88 89 89 kfree(event);

+1 -1

arch/powerpc/kernel/entry_64.S

··· 545 545 /* Cancel all explict user streams as they will have no use after context 546 546 * switch and will stop the HW from creating streams itself 547 547 */ 548 - DCBT_STOP_ALL_STREAM_IDS(r6) 548 + DCBT_BOOK3S_STOP_ALL_STREAM_IDS(r6) 549 549 #endif 550 550 551 551 addi r6,r4,-THREAD /* Convert THREAD to 'current' */

+69 -17

arch/powerpc/kernel/exceptions-64s.S

··· 139 139 b pnv_powersave_wakeup 140 140 #endif 141 141 142 + /* 143 + * Set IRQS_ALL_DISABLED unconditionally so arch_irqs_disabled does 144 + * the right thing. We do not want to reconcile because that goes 145 + * through irq tracing which we don't want in NMI. 146 + * 147 + * Save PACAIRQHAPPENED because some code will do a hard disable 148 + * (e.g., xmon). So we want to restore this back to where it was 149 + * when we return. DAR is unused in the stack, so save it there. 150 + */ 151 + #define ADD_RECONCILE_NMI \ 152 + li r10,IRQS_ALL_DISABLED; \ 153 + stb r10,PACAIRQSOFTMASK(r13); \ 154 + lbz r10,PACAIRQHAPPENED(r13); \ 155 + std r10,_DAR(r1) 156 + 142 157 EXC_COMMON_BEGIN(system_reset_common) 143 158 /* 144 159 * Increment paca->in_nmi then enable MSR_RI. SLB or MCE will be able ··· 172 157 subi r1,r1,INT_FRAME_SIZE 173 158 EXCEPTION_COMMON_NORET_STACK(PACA_EXNMI, 0x100, 174 159 system_reset, system_reset_exception, 175 - ADD_NVGPRS;ADD_RECONCILE) 160 + ADD_NVGPRS;ADD_RECONCILE_NMI) 161 + 162 + /* This (and MCE) can be simplified with mtmsrd L=1 */ 163 + /* Clear MSR_RI before setting SRR0 and SRR1. */ 164 + li r0,MSR_RI 165 + mfmsr r9 166 + andc r9,r9,r0 167 + mtmsrd r9,1 176 168 177 169 /* 178 - * The stack is no longer in use, decrement in_nmi. 170 + * MSR_RI is clear, now we can decrement paca->in_nmi. 179 171 */ 180 172 lhz r10,PACA_IN_NMI(r13) 181 173 subi r10,r10,1 182 174 sth r10,PACA_IN_NMI(r13) 183 175 184 - b ret_from_except 176 + /* 177 + * Restore soft mask settings. 178 + */ 179 + ld r10,_DAR(r1) 180 + stb r10,PACAIRQHAPPENED(r13) 181 + ld r10,SOFTE(r1) 182 + stb r10,PACAIRQSOFTMASK(r13) 183 + 184 + /* 185 + * Keep below code in synch with MACHINE_CHECK_HANDLER_WINDUP. 186 + * Should share common bits... 187 + */ 188 + 189 + /* Move original SRR0 and SRR1 into the respective regs */ 190 + ld r9,_MSR(r1) 191 + mtspr SPRN_SRR1,r9 192 + ld r3,_NIP(r1) 193 + mtspr SPRN_SRR0,r3 194 + ld r9,_CTR(r1) 195 + mtctr r9 196 + ld r9,_XER(r1) 197 + mtxer r9 198 + ld r9,_LINK(r1) 199 + mtlr r9 200 + REST_GPR(0, r1) 201 + REST_8GPRS(2, r1) 202 + REST_GPR(10, r1) 203 + ld r11,_CCR(r1) 204 + mtcr r11 205 + REST_GPR(11, r1) 206 + REST_2GPRS(12, r1) 207 + /* restore original r1. */ 208 + ld r1,GPR1(r1) 209 + RFI_TO_USER_OR_KERNEL 185 210 186 211 #ifdef CONFIG_PPC_PSERIES 187 212 /* ··· 676 621 lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */ 677 622 mtlr r10 678 623 679 - beq- 8f /* if bad address, make full stack frame */ 624 + /* 625 + * Large address, check whether we have to allocate new contexts. 626 + */ 627 + beq- 8f 680 628 681 629 bne- cr5,2f /* if unrecoverable exception, oops */ 682 630 ··· 687 629 688 630 bne cr4,1f /* returning to kernel */ 689 631 690 - .machine push 691 - .machine "power4" 692 632 mtcrf 0x80,r9 693 633 mtcrf 0x08,r9 /* MSR[PR] indication is in cr4 */ 694 634 mtcrf 0x04,r9 /* MSR[RI] indication is in cr5 */ 695 635 mtcrf 0x02,r9 /* I/D indication is in cr6 */ 696 636 mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */ 697 - .machine pop 698 637 699 638 RESTORE_CTR(r9, PACA_EXSLB) 700 639 RESTORE_PPR_PACA(PACA_EXSLB, r9) ··· 704 649 RFI_TO_USER 705 650 b . /* prevent speculative execution */ 706 651 1: 707 - .machine push 708 - .machine "power4" 709 652 mtcrf 0x80,r9 710 653 mtcrf 0x08,r9 /* MSR[PR] indication is in cr4 */ 711 654 mtcrf 0x04,r9 /* MSR[RI] indication is in cr5 */ 712 655 mtcrf 0x02,r9 /* I/D indication is in cr6 */ 713 656 mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */ 714 - .machine pop 715 657 716 658 RESTORE_CTR(r9, PACA_EXSLB) 717 659 RESTORE_PPR_PACA(PACA_EXSLB, r9) ··· 737 685 mr r3,r12 738 686 mfspr r11,SPRN_SRR0 739 687 mfspr r12,SPRN_SRR1 740 - LOAD_HANDLER(r10,bad_addr_slb) 688 + LOAD_HANDLER(r10, large_addr_slb) 741 689 mtspr SPRN_SRR0,r10 742 690 ld r10,PACAKMSR(r13) 743 691 mtspr SPRN_SRR1,r10 ··· 752 700 bl unrecoverable_exception 753 701 b 1b 754 702 755 - EXC_COMMON_BEGIN(bad_addr_slb) 703 + EXC_COMMON_BEGIN(large_addr_slb) 756 704 EXCEPTION_PROLOG_COMMON(0x380, PACA_EXSLB) 757 705 RECONCILE_IRQ_STATE(r10, r11) 758 706 ld r3, PACA_EXSLB+EX_DAR(r13) ··· 762 710 std r10, _TRAP(r1) 763 711 2: bl save_nvgprs 764 712 addi r3, r1, STACK_FRAME_OVERHEAD 765 - bl slb_miss_bad_addr 713 + bl slb_miss_large_addr 766 714 b ret_from_except 767 715 768 716 EXC_REAL_BEGIN(hardware_interrupt, 0x500, 0x100) ··· 1325 1273 bne+ denorm_assist 1326 1274 #endif 1327 1275 1328 - KVMTEST_PR(0x1500) 1276 + KVMTEST_HV(0x1500) 1329 1277 EXCEPTION_PROLOG_PSERIES_1(denorm_common, EXC_HV) 1330 1278 EXC_REAL_END(denorm_exception_hv, 0x1500, 0x100) 1331 1279 ··· 1337 1285 EXC_VIRT_NONE(0x5500, 0x100) 1338 1286 #endif 1339 1287 1340 - TRAMP_KVM_SKIP(PACA_EXGEN, 0x1500) 1288 + TRAMP_KVM_HV(PACA_EXGEN, 0x1500) 1341 1289 1342 1290 #ifdef CONFIG_PPC_DENORMALISATION 1343 1291 TRAMP_REAL_BEGIN(denorm_assist) ··· 1518 1466 ld r11,PACA_L1D_FLUSH_SIZE(r13) 1519 1467 srdi r11,r11,(7 + 3) /* 128 byte lines, unrolled 8x */ 1520 1468 mtctr r11 1521 - DCBT_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */ 1469 + DCBT_BOOK3S_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */ 1522 1470 1523 1471 /* order ld/st prior to dcbt stop all streams with flushing */ 1524 1472 sync ··· 1558 1506 ld r11,PACA_L1D_FLUSH_SIZE(r13) 1559 1507 srdi r11,r11,(7 + 3) /* 128 byte lines, unrolled 8x */ 1560 1508 mtctr r11 1561 - DCBT_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */ 1509 + DCBT_BOOK3S_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */ 1562 1510 1563 1511 /* order ld/st prior to dcbt stop all streams with flushing */ 1564 1512 sync

+10 -9

arch/powerpc/kernel/head_64.S

··· 392 392 * physical cpu id in r24, we need to search the pacas to find 393 393 * which logical id maps to our physical one. 394 394 */ 395 - LOAD_REG_ADDR(r13, paca) /* Load paca pointer */ 396 - ld r13,0(r13) /* Get base vaddr of paca array */ 397 395 #ifndef CONFIG_SMP 398 - addi r13,r13,PACA_SIZE /* know r13 if used accidentally */ 399 396 b kexec_wait /* wait for next kernel if !SMP */ 400 397 #else 398 + LOAD_REG_ADDR(r8, paca_ptrs) /* Load paca_ptrs pointe */ 399 + ld r8,0(r8) /* Get base vaddr of array */ 401 400 LOAD_REG_ADDR(r7, nr_cpu_ids) /* Load nr_cpu_ids address */ 402 401 lwz r7,0(r7) /* also the max paca allocated */ 403 402 li r5,0 /* logical cpu id */ 404 - 1: lhz r6,PACAHWCPUID(r13) /* Load HW procid from paca */ 403 + 1: 404 + sldi r9,r5,3 /* get paca_ptrs[] index from cpu id */ 405 + ldx r13,r9,r8 /* r13 = paca_ptrs[cpu id] */ 406 + lhz r6,PACAHWCPUID(r13) /* Load HW procid from paca */ 405 407 cmpw r6,r24 /* Compare to our id */ 406 408 beq 2f 407 - addi r13,r13,PACA_SIZE /* Loop to next PACA on miss */ 408 409 addi r5,r5,1 409 410 cmpw r5,r7 /* Check if more pacas exist */ 410 411 blt 1b ··· 757 756 mtmsrd r3 /* RI on */ 758 757 759 758 /* Set up a paca value for this processor. */ 760 - LOAD_REG_ADDR(r4,paca) /* Load paca pointer */ 761 - ld r4,0(r4) /* Get base vaddr of paca array */ 762 - mulli r13,r24,PACA_SIZE /* Calculate vaddr of right paca */ 763 - add r13,r13,r4 /* for this processor. */ 759 + LOAD_REG_ADDR(r4,paca_ptrs) /* Load paca pointer */ 760 + ld r4,0(r4) /* Get base vaddr of paca_ptrs array */ 761 + sldi r5,r24,3 /* get paca_ptrs[] index from cpu id */ 762 + ldx r13,r5,r4 /* r13 = paca_ptrs[cpu id] */ 764 763 SET_PACA(r13) /* Save vaddr of paca in an SPRG*/ 765 764 766 765 /* Mark interrupts soft and hard disabled (they might be enabled

+3

arch/powerpc/kernel/hw_breakpoint.c

··· 33 33 #include <asm/hw_breakpoint.h> 34 34 #include <asm/processor.h> 35 35 #include <asm/sstep.h> 36 + #include <asm/debug.h> 36 37 #include <linux/uaccess.h> 37 38 38 39 /* ··· 172 171 * HW_BREAKPOINT_ALIGN by rounding off to the lower address, the 173 172 * 'symbolsize' should satisfy the check below. 174 173 */ 174 + if (!ppc_breakpoint_available()) 175 + return -ENODEV; 175 176 length_max = 8; /* DABR */ 176 177 if (cpu_has_feature(CPU_FTR_DAWR)) { 177 178 length_max = 512 ; /* 64 doublewords */

+44 -6

arch/powerpc/kernel/idle_book3s.S

··· 325 325 * r3 - PSSCR value corresponding to the requested stop state. 326 326 */ 327 327 power_enter_stop: 328 - #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 329 - /* Tell KVM we're entering idle */ 330 - li r4,KVM_HWTHREAD_IN_IDLE 331 - /* DO THIS IN REAL MODE! See comment above. */ 332 - stb r4,HSTATE_HWTHREAD_STATE(r13) 333 - #endif 334 328 /* 335 329 * Check if we are executing the lite variant with ESL=EC=0 336 330 */ ··· 333 339 bne .Lhandle_esl_ec_set 334 340 PPC_STOP 335 341 li r3,0 /* Since we didn't lose state, return 0 */ 342 + std r3, PACA_REQ_PSSCR(r13) 336 343 337 344 /* 338 345 * pnv_wakeup_noloss() expects r12 to contain the SRR1 value so ··· 422 427 /* 423 428 * Entered with MSR[EE]=0 and no soft-masked interrupts pending. 424 429 * r3 contains desired PSSCR register value. 430 + * 431 + * Offline (CPU unplug) case also must notify KVM that the CPU is 432 + * idle. 425 433 */ 434 + _GLOBAL(power9_offline_stop) 435 + #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 436 + /* 437 + * Tell KVM we're entering idle. 438 + * This does not have to be done in real mode because the P9 MMU 439 + * is independent per-thread. Some steppings share radix/hash mode 440 + * between threads, but in that case KVM has a barrier sync in real 441 + * mode before and after switching between radix and hash. 442 + */ 443 + li r4,KVM_HWTHREAD_IN_IDLE 444 + stb r4,HSTATE_HWTHREAD_STATE(r13) 445 + #endif 446 + /* fall through */ 447 + 426 448 _GLOBAL(power9_idle_stop) 427 449 std r3, PACA_REQ_PSSCR(r13) 450 + #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 451 + BEGIN_FTR_SECTION 452 + sync 453 + lwz r5, PACA_DONT_STOP(r13) 454 + cmpwi r5, 0 455 + bne 1f 456 + END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_XER_SO_BUG) 457 + #endif 428 458 mtspr SPRN_PSSCR,r3 429 459 LOAD_REG_ADDR(r4,power_enter_stop) 430 460 b pnv_powersave_common 431 461 /* No return */ 462 + #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 463 + 1: 464 + /* 465 + * We get here when TM / thread reconfiguration bug workaround 466 + * code wants to get the CPU into SMT4 mode, and therefore 467 + * we are being asked not to stop. 468 + */ 469 + li r3, 0 470 + std r3, PACA_REQ_PSSCR(r13) 471 + blr /* return 0 for wakeup cause / SRR1 value */ 472 + #endif 432 473 433 474 /* 434 475 * On waking up from stop 0,1,2 with ESL=1 on POWER9 DD1, ··· 551 520 mr r3,r12 552 521 553 522 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 523 + lbz r0,HSTATE_HWTHREAD_STATE(r13) 524 + cmpwi r0,KVM_HWTHREAD_IN_KERNEL 525 + beq 1f 554 526 li r0,KVM_HWTHREAD_IN_KERNEL 555 527 stb r0,HSTATE_HWTHREAD_STATE(r13) 556 528 /* Order setting hwthread_state vs. testing hwthread_req */ ··· 618 584 mfspr r5, SPRN_PSSCR 619 585 rldicl r5,r5,4,60 620 586 ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_POWER9_DD1, 71) 587 + li r0, 0 /* clear requested_psscr to say we're awake */ 588 + std r0, PACA_REQ_PSSCR(r13) 621 589 cmpd cr4,r5,r4 622 590 bge cr4,pnv_wakeup_tb_loss /* returns to caller */ 623 591 ··· 870 834 mtspr SPRN_PTCR,r4 871 835 ld r4,_RPR(r1) 872 836 mtspr SPRN_RPR,r4 837 + ld r4,_AMOR(r1) 838 + mtspr SPRN_AMOR,r4 873 839 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) 874 840 875 841 ld r4,_TSCR(r1)

+40

arch/powerpc/kernel/iomap.c

··· 45 45 { 46 46 return readq(addr); 47 47 } 48 + u64 ioread64_lo_hi(void __iomem *addr) 49 + { 50 + return readq(addr); 51 + } 52 + u64 ioread64_hi_lo(void __iomem *addr) 53 + { 54 + return readq(addr); 55 + } 48 56 u64 ioread64be(void __iomem *addr) 49 57 { 50 58 return readq_be(addr); 51 59 } 60 + u64 ioread64be_lo_hi(void __iomem *addr) 61 + { 62 + return readq_be(addr); 63 + } 64 + u64 ioread64be_hi_lo(void __iomem *addr) 65 + { 66 + return readq_be(addr); 67 + } 52 68 EXPORT_SYMBOL(ioread64); 69 + EXPORT_SYMBOL(ioread64_lo_hi); 70 + EXPORT_SYMBOL(ioread64_hi_lo); 53 71 EXPORT_SYMBOL(ioread64be); 72 + EXPORT_SYMBOL(ioread64be_lo_hi); 73 + EXPORT_SYMBOL(ioread64be_hi_lo); 54 74 #endif /* __powerpc64__ */ 55 75 56 76 void iowrite8(u8 val, void __iomem *addr) ··· 103 83 { 104 84 writeq(val, addr); 105 85 } 86 + void iowrite64_lo_hi(u64 val, void __iomem *addr) 87 + { 88 + writeq(val, addr); 89 + } 90 + void iowrite64_hi_lo(u64 val, void __iomem *addr) 91 + { 92 + writeq(val, addr); 93 + } 106 94 void iowrite64be(u64 val, void __iomem *addr) 107 95 { 108 96 writeq_be(val, addr); 109 97 } 98 + void iowrite64be_lo_hi(u64 val, void __iomem *addr) 99 + { 100 + writeq_be(val, addr); 101 + } 102 + void iowrite64be_hi_lo(u64 val, void __iomem *addr) 103 + { 104 + writeq_be(val, addr); 105 + } 110 106 EXPORT_SYMBOL(iowrite64); 107 + EXPORT_SYMBOL(iowrite64_lo_hi); 108 + EXPORT_SYMBOL(iowrite64_hi_lo); 111 109 EXPORT_SYMBOL(iowrite64be); 110 + EXPORT_SYMBOL(iowrite64be_lo_hi); 111 + EXPORT_SYMBOL(iowrite64be_hi_lo); 112 112 #endif /* __powerpc64__ */ 113 113 114 114 /*

+17 -13

arch/powerpc/kernel/kprobes.c

··· 455 455 } 456 456 457 457 kretprobe_assert(ri, orig_ret_address, trampoline_address); 458 - regs->nip = orig_ret_address; 458 + 459 459 /* 460 - * Make LR point to the orig_ret_address. 461 - * When the 'nop' inside the kretprobe_trampoline 462 - * is optimized, we can do a 'blr' after executing the 463 - * detour buffer code. 460 + * We get here through one of two paths: 461 + * 1. by taking a trap -> kprobe_handler() -> here 462 + * 2. by optprobe branch -> optimized_callback() -> opt_pre_handler() -> here 463 + * 464 + * When going back through (1), we need regs->nip to be setup properly 465 + * as it is used to determine the return address from the trap. 466 + * For (2), since nip is not honoured with optprobes, we instead setup 467 + * the link register properly so that the subsequent 'blr' in 468 + * kretprobe_trampoline jumps back to the right instruction. 469 + * 470 + * For nip, we should set the address to the previous instruction since 471 + * we end up emulating it in kprobe_handler(), which increments the nip 472 + * again. 464 473 */ 474 + regs->nip = orig_ret_address - 4; 465 475 regs->link = orig_ret_address; 466 476 467 - reset_current_kprobe(); 468 477 kretprobe_hash_unlock(current, &flags); 469 - preempt_enable_no_resched(); 470 478 471 479 hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) { 472 480 hlist_del(&ri->hlist); 473 481 kfree(ri); 474 482 } 475 - /* 476 - * By returning a non-zero value, we are telling 477 - * kprobe_handler() that we don't want the post_handler 478 - * to run (and have re-enabled preemption) 479 - */ 480 - return 1; 483 + 484 + return 0; 481 485 } 482 486 NOKPROBE_SYMBOL(trampoline_probe_handler); 483 487

+22 -15

arch/powerpc/kernel/machine_kexec_64.c

··· 168 168 * are correctly onlined. If somehow we start a CPU on boot with RTAS 169 169 * start-cpu, but somehow that CPU doesn't write callin_cpu_map[] in 170 170 * time, the boot CPU will timeout. If it does eventually execute 171 - * stuff, the secondary will start up (paca[].cpu_start was written) and 172 - * get into a peculiar state. If the platform supports 173 - * smp_ops->take_timebase(), the secondary CPU will probably be spinning 174 - * in there. If not (i.e. pseries), the secondary will continue on and 175 - * try to online itself/idle/etc. If it survives that, we need to find 176 - * these possible-but-not-online-but-should-be CPUs and chaperone them 177 - * into kexec_smp_wait(). 171 + * stuff, the secondary will start up (paca_ptrs[]->cpu_start was 172 + * written) and get into a peculiar state. 173 + * If the platform supports smp_ops->take_timebase(), the secondary CPU 174 + * will probably be spinning in there. If not (i.e. pseries), the 175 + * secondary will continue on and try to online itself/idle/etc. If it 176 + * survives that, we need to find these 177 + * possible-but-not-online-but-should-be CPUs and chaperone them into 178 + * kexec_smp_wait(). 178 179 */ 179 180 for_each_online_cpu(i) { 180 181 if (i == my_cpu) 181 182 continue; 182 183 183 - while (paca[i].kexec_state < wait_state) { 184 + while (paca_ptrs[i]->kexec_state < wait_state) { 184 185 barrier(); 185 186 if (i != notified) { 186 187 printk(KERN_INFO "kexec: waiting for cpu %d " 187 188 "(physical %d) to enter %i state\n", 188 - i, paca[i].hw_cpu_id, wait_state); 189 + i, paca_ptrs[i]->hw_cpu_id, wait_state); 189 190 notified = i; 190 191 } 191 192 } ··· 323 322 kexec_stack.thread_info.cpu = current_thread_info()->cpu; 324 323 325 324 /* We need a static PACA, too; copy this CPU's PACA over and switch to 326 - * it. Also poison per_cpu_offset to catch anyone using non-static 327 - * data. 325 + * it. Also poison per_cpu_offset and NULL lppaca to catch anyone using 326 + * non-static data. 328 327 */ 329 328 memcpy(&kexec_paca, get_paca(), sizeof(struct paca_struct)); 330 329 kexec_paca.data_offset = 0xedeaddeadeeeeeeeUL; 331 - paca = (struct paca_struct *)RELOC_HIDE(&kexec_paca, 0) - 332 - kexec_paca.paca_index; 330 + #ifdef CONFIG_PPC_PSERIES 331 + kexec_paca.lppaca_ptr = NULL; 332 + #endif 333 + paca_ptrs[kexec_paca.paca_index] = &kexec_paca; 334 + 333 335 setup_paca(&kexec_paca); 334 336 335 - /* XXX: If anyone does 'dynamic lppacas' this will also need to be 336 - * switched to a static version! 337 + /* 338 + * The lppaca should be unregistered at this point so the HV won't 339 + * touch it. In the case of a crash, none of the lppacas are 340 + * unregistered so there is not much we can do about it here. 337 341 */ 342 + 338 343 /* 339 344 * On Book3S, the copy must happen with the MMU off if we are either 340 345 * using Radix page tables or we are not in an LPAR since we can

+1 -1

arch/powerpc/kernel/machine_kexec_file_64.c

··· 43 43 44 44 /* We don't support crash kernels yet. */ 45 45 if (image->type == KEXEC_TYPE_CRASH) 46 - return -ENOTSUPP; 46 + return -EOPNOTSUPP; 47 47 48 48 for (i = 0; i < ARRAY_SIZE(kexec_file_loaders); i++) { 49 49 fops = kexec_file_loaders[i];

-38

arch/powerpc/kernel/misc_64.S

··· 144 144 blr 145 145 EXPORT_SYMBOL(flush_dcache_range) 146 146 147 - /* 148 - * Like above, but works on non-mapped physical addresses. 149 - * Use only for non-LPAR setups ! It also assumes real mode 150 - * is cacheable. Used for flushing out the DART before using 151 - * it as uncacheable memory 152 - * 153 - * flush_dcache_phys_range(unsigned long start, unsigned long stop) 154 - * 155 - * flush all bytes from start to stop-1 inclusive 156 - */ 157 - _GLOBAL(flush_dcache_phys_range) 158 - ld r10,PPC64_CACHES@toc(r2) 159 - lwz r7,DCACHEL1BLOCKSIZE(r10) /* Get dcache block size */ 160 - addi r5,r7,-1 161 - andc r6,r3,r5 /* round low to line bdy */ 162 - subf r8,r6,r4 /* compute length */ 163 - add r8,r8,r5 /* ensure we get enough */ 164 - lwz r9,DCACHEL1LOGBLOCKSIZE(r10) /* Get log-2 of dcache block size */ 165 - srw. r8,r8,r9 /* compute line count */ 166 - beqlr /* nothing to do? */ 167 - mfmsr r5 /* Disable MMU Data Relocation */ 168 - ori r0,r5,MSR_DR 169 - xori r0,r0,MSR_DR 170 - sync 171 - mtmsr r0 172 - sync 173 - isync 174 - mtctr r8 175 - 0: dcbst 0,r6 176 - add r6,r6,r7 177 - bdnz 0b 178 - sync 179 - isync 180 - mtmsr r5 /* Re-enable MMU Data Relocation */ 181 - sync 182 - isync 183 - blr 184 - 185 147 _GLOBAL(flush_inval_dcache_range) 186 148 ld r10,PPC64_CACHES@toc(r2) 187 149 lwz r7,DCACHEL1BLOCKSIZE(r10) /* Get dcache block size */

+3 -6

arch/powerpc/kernel/nvram_64.c

··· 207 207 208 208 tmp_index = part->index; 209 209 210 - rc = ppc_md.nvram_write((char *)&info, sizeof(struct err_log_info), 211 - &tmp_index); 210 + rc = ppc_md.nvram_write((char *)&info, sizeof(info), &tmp_index); 212 211 if (rc <= 0) { 213 212 pr_err("%s: Failed nvram_write (%d)\n", __func__, rc); 214 213 return rc; ··· 243 244 tmp_index = part->index; 244 245 245 246 if (part->os_partition) { 246 - rc = ppc_md.nvram_read((char *)&info, 247 - sizeof(struct err_log_info), 248 - &tmp_index); 247 + rc = ppc_md.nvram_read((char *)&info, sizeof(info), &tmp_index); 249 248 if (rc <= 0) { 250 249 pr_err("%s: Failed nvram_read (%d)\n", __func__, rc); 251 250 return rc; ··· 1170 1173 "detected: 0-length partition\n"); 1171 1174 goto out; 1172 1175 } 1173 - tmp_part = kmalloc(sizeof(struct nvram_partition), GFP_KERNEL); 1176 + tmp_part = kmalloc(sizeof(*tmp_part), GFP_KERNEL); 1174 1177 err = -ENOMEM; 1175 1178 if (!tmp_part) { 1176 1179 printk(KERN_ERR "nvram_scan_partitions: kmalloc failed\n");

+123 -119

arch/powerpc/kernel/paca.c

··· 20 20 21 21 #include "setup.h" 22 22 23 - #ifdef CONFIG_PPC_BOOK3S 23 + #ifndef CONFIG_SMP 24 + #define boot_cpuid 0 25 + #endif 26 + 27 + static void *__init alloc_paca_data(unsigned long size, unsigned long align, 28 + unsigned long limit, int cpu) 29 + { 30 + unsigned long pa; 31 + int nid; 32 + 33 + /* 34 + * boot_cpuid paca is allocated very early before cpu_to_node is up. 35 + * Set bottom-up mode, because the boot CPU should be on node-0, 36 + * which will put its paca in the right place. 37 + */ 38 + if (cpu == boot_cpuid) { 39 + nid = -1; 40 + memblock_set_bottom_up(true); 41 + } else { 42 + nid = early_cpu_to_node(cpu); 43 + } 44 + 45 + pa = memblock_alloc_base_nid(size, align, limit, nid, MEMBLOCK_NONE); 46 + if (!pa) { 47 + pa = memblock_alloc_base(size, align, limit); 48 + if (!pa) 49 + panic("cannot allocate paca data"); 50 + } 51 + 52 + if (cpu == boot_cpuid) 53 + memblock_set_bottom_up(false); 54 + 55 + return __va(pa); 56 + } 57 + 58 + #ifdef CONFIG_PPC_PSERIES 24 59 25 60 /* 26 - * The structure which the hypervisor knows about - this structure 27 - * should not cross a page boundary. The vpa_init/register_vpa call 28 - * is now known to fail if the lppaca structure crosses a page 29 - * boundary. The lppaca is also used on POWER5 pSeries boxes. 30 - * The lppaca is 640 bytes long, and cannot readily 31 - * change since the hypervisor knows its layout, so a 1kB alignment 32 - * will suffice to ensure that it doesn't cross a page boundary. 61 + * See asm/lppaca.h for more detail. 62 + * 63 + * lppaca structures must must be 1kB in size, L1 cache line aligned, 64 + * and not cross 4kB boundary. A 1kB size and 1kB alignment will satisfy 65 + * these requirements. 33 66 */ 34 - struct lppaca lppaca[] = { 35 - [0 ... (NR_LPPACAS-1)] = { 67 + static inline void init_lppaca(struct lppaca *lppaca) 68 + { 69 + BUILD_BUG_ON(sizeof(struct lppaca) != 640); 70 + 71 + *lppaca = (struct lppaca) { 36 72 .desc = cpu_to_be32(0xd397d781), /* "LpPa" */ 37 - .size = cpu_to_be16(sizeof(struct lppaca)), 73 + .size = cpu_to_be16(0x400), 38 74 .fpregs_in_use = 1, 39 75 .slb_count = cpu_to_be16(64), 40 76 .vmxregs_in_use = 0, 41 - .page_ins = 0, 42 - }, 77 + .page_ins = 0, }; 43 78 }; 44 79 45 - static struct lppaca *extra_lppacas; 46 - static long __initdata lppaca_size; 47 - 48 - static void __init allocate_lppacas(int nr_cpus, unsigned long limit) 49 - { 50 - if (nr_cpus <= NR_LPPACAS) 51 - return; 52 - 53 - lppaca_size = PAGE_ALIGN(sizeof(struct lppaca) * 54 - (nr_cpus - NR_LPPACAS)); 55 - extra_lppacas = __va(memblock_alloc_base(lppaca_size, 56 - PAGE_SIZE, limit)); 57 - } 58 - 59 - static struct lppaca * __init new_lppaca(int cpu) 80 + static struct lppaca * __init new_lppaca(int cpu, unsigned long limit) 60 81 { 61 82 struct lppaca *lp; 83 + size_t size = 0x400; 62 84 63 - if (cpu < NR_LPPACAS) 64 - return &lppaca[cpu]; 85 + BUILD_BUG_ON(size < sizeof(struct lppaca)); 65 86 66 - lp = extra_lppacas + (cpu - NR_LPPACAS); 67 - *lp = lppaca[0]; 87 + if (early_cpu_has_feature(CPU_FTR_HVMODE)) 88 + return NULL; 89 + 90 + lp = alloc_paca_data(size, 0x400, limit, cpu); 91 + init_lppaca(lp); 68 92 69 93 return lp; 70 94 } 71 - 72 - static void __init free_lppacas(void) 73 - { 74 - long new_size = 0, nr; 75 - 76 - if (!lppaca_size) 77 - return; 78 - nr = num_possible_cpus() - NR_LPPACAS; 79 - if (nr > 0) 80 - new_size = PAGE_ALIGN(nr * sizeof(struct lppaca)); 81 - if (new_size >= lppaca_size) 82 - return; 83 - 84 - memblock_free(__pa(extra_lppacas) + new_size, lppaca_size - new_size); 85 - lppaca_size = new_size; 86 - } 87 - 88 - #else 89 - 90 - static inline void allocate_lppacas(int nr_cpus, unsigned long limit) { } 91 - static inline void free_lppacas(void) { } 92 - 93 95 #endif /* CONFIG_PPC_BOOK3S */ 94 96 95 97 #ifdef CONFIG_PPC_BOOK3S_64 96 98 97 99 /* 98 - * 3 persistent SLBs are registered here. The buffer will be zero 100 + * 3 persistent SLBs are allocated here. The buffer will be zero 99 101 * initially, hence will all be invaild until we actually write them. 100 102 * 101 103 * If you make the number of persistent SLB entries dynamic, please also 102 104 * update PR KVM to flush and restore them accordingly. 103 105 */ 104 - static struct slb_shadow * __initdata slb_shadow; 105 - 106 - static void __init allocate_slb_shadows(int nr_cpus, int limit) 107 - { 108 - int size = PAGE_ALIGN(sizeof(struct slb_shadow) * nr_cpus); 109 - 110 - if (early_radix_enabled()) 111 - return; 112 - 113 - slb_shadow = __va(memblock_alloc_base(size, PAGE_SIZE, limit)); 114 - memset(slb_shadow, 0, size); 115 - } 116 - 117 - static struct slb_shadow * __init init_slb_shadow(int cpu) 106 + static struct slb_shadow * __init new_slb_shadow(int cpu, unsigned long limit) 118 107 { 119 108 struct slb_shadow *s; 120 109 121 - if (early_radix_enabled()) 122 - return NULL; 110 + if (cpu != boot_cpuid) { 111 + /* 112 + * Boot CPU comes here before early_radix_enabled 113 + * is parsed (e.g., for disable_radix). So allocate 114 + * always and this will be fixed up in free_unused_pacas. 115 + */ 116 + if (early_radix_enabled()) 117 + return NULL; 118 + } 123 119 124 - s = &slb_shadow[cpu]; 125 - 126 - /* 127 - * When we come through here to initialise boot_paca, the slb_shadow 128 - * buffers are not allocated yet. That's OK, we'll get one later in 129 - * boot, but make sure we don't corrupt memory at 0. 130 - */ 131 - if (!slb_shadow) 132 - return NULL; 120 + s = alloc_paca_data(sizeof(*s), L1_CACHE_BYTES, limit, cpu); 121 + memset(s, 0, sizeof(*s)); 133 122 134 123 s->persistent = cpu_to_be32(SLB_NUM_BOLTED); 135 124 s->buffer_length = cpu_to_be32(sizeof(*s)); 136 125 137 126 return s; 138 127 } 139 - 140 - #else /* !CONFIG_PPC_BOOK3S_64 */ 141 - 142 - static void __init allocate_slb_shadows(int nr_cpus, int limit) { } 143 128 144 129 #endif /* CONFIG_PPC_BOOK3S_64 */ 145 130 ··· 137 152 * processors. The processor VPD array needs one entry per physical 138 153 * processor (not thread). 139 154 */ 140 - struct paca_struct *paca; 141 - EXPORT_SYMBOL(paca); 155 + struct paca_struct **paca_ptrs __read_mostly; 156 + EXPORT_SYMBOL(paca_ptrs); 142 157 143 158 void __init initialise_paca(struct paca_struct *new_paca, int cpu) 144 159 { 145 - #ifdef CONFIG_PPC_BOOK3S 146 - new_paca->lppaca_ptr = new_lppaca(cpu); 147 - #else 160 + #ifdef CONFIG_PPC_PSERIES 161 + new_paca->lppaca_ptr = NULL; 162 + #endif 163 + #ifdef CONFIG_PPC_BOOK3E 148 164 new_paca->kernel_pgd = swapper_pg_dir; 149 165 #endif 150 166 new_paca->lock_token = 0x8000; ··· 159 173 new_paca->__current = &init_task; 160 174 new_paca->data_offset = 0xfeeeeeeeeeeeeeeeULL; 161 175 #ifdef CONFIG_PPC_BOOK3S_64 162 - new_paca->slb_shadow_ptr = init_slb_shadow(cpu); 176 + new_paca->slb_shadow_ptr = NULL; 163 177 #endif 164 178 165 179 #ifdef CONFIG_PPC_BOOK3E ··· 189 203 190 204 } 191 205 192 - static int __initdata paca_size; 206 + static int __initdata paca_nr_cpu_ids; 207 + static int __initdata paca_ptrs_size; 208 + static int __initdata paca_struct_size; 193 209 194 - void __init allocate_pacas(void) 210 + void __init allocate_paca_ptrs(void) 211 + { 212 + paca_nr_cpu_ids = nr_cpu_ids; 213 + 214 + paca_ptrs_size = sizeof(struct paca_struct *) * nr_cpu_ids; 215 + paca_ptrs = __va(memblock_alloc(paca_ptrs_size, 0)); 216 + memset(paca_ptrs, 0x88, paca_ptrs_size); 217 + } 218 + 219 + void __init allocate_paca(int cpu) 195 220 { 196 221 u64 limit; 197 - int cpu; 222 + struct paca_struct *paca; 223 + 224 + BUG_ON(cpu >= paca_nr_cpu_ids); 198 225 199 226 #ifdef CONFIG_PPC_BOOK3S_64 200 227 /* ··· 219 220 limit = ppc64_rma_size; 220 221 #endif 221 222 222 - paca_size = PAGE_ALIGN(sizeof(struct paca_struct) * nr_cpu_ids); 223 + paca = alloc_paca_data(sizeof(struct paca_struct), L1_CACHE_BYTES, 224 + limit, cpu); 225 + paca_ptrs[cpu] = paca; 226 + memset(paca, 0, sizeof(struct paca_struct)); 223 227 224 - paca = __va(memblock_alloc_base(paca_size, PAGE_SIZE, limit)); 225 - memset(paca, 0, paca_size); 226 - 227 - printk(KERN_DEBUG "Allocated %u bytes for %u pacas at %p\n", 228 - paca_size, nr_cpu_ids, paca); 229 - 230 - allocate_lppacas(nr_cpu_ids, limit); 231 - 232 - allocate_slb_shadows(nr_cpu_ids, limit); 233 - 234 - /* Can't use for_each_*_cpu, as they aren't functional yet */ 235 - for (cpu = 0; cpu < nr_cpu_ids; cpu++) 236 - initialise_paca(&paca[cpu], cpu); 228 + initialise_paca(paca, cpu); 229 + #ifdef CONFIG_PPC_PSERIES 230 + paca->lppaca_ptr = new_lppaca(cpu, limit); 231 + #endif 232 + #ifdef CONFIG_PPC_BOOK3S_64 233 + paca->slb_shadow_ptr = new_slb_shadow(cpu, limit); 234 + #endif 235 + paca_struct_size += sizeof(struct paca_struct); 237 236 } 238 237 239 238 void __init free_unused_pacas(void) 240 239 { 241 - int new_size; 240 + int new_ptrs_size; 242 241 243 - new_size = PAGE_ALIGN(sizeof(struct paca_struct) * nr_cpu_ids); 242 + new_ptrs_size = sizeof(struct paca_struct *) * nr_cpu_ids; 243 + if (new_ptrs_size < paca_ptrs_size) 244 + memblock_free(__pa(paca_ptrs) + new_ptrs_size, 245 + paca_ptrs_size - new_ptrs_size); 244 246 245 - if (new_size >= paca_size) 246 - return; 247 + paca_nr_cpu_ids = nr_cpu_ids; 248 + paca_ptrs_size = new_ptrs_size; 247 249 248 - memblock_free(__pa(paca) + new_size, paca_size - new_size); 250 + #ifdef CONFIG_PPC_BOOK3S_64 251 + if (early_radix_enabled()) { 252 + /* Ugly fixup, see new_slb_shadow() */ 253 + memblock_free(__pa(paca_ptrs[boot_cpuid]->slb_shadow_ptr), 254 + sizeof(struct slb_shadow)); 255 + paca_ptrs[boot_cpuid]->slb_shadow_ptr = NULL; 256 + } 257 + #endif 249 258 250 - printk(KERN_DEBUG "Freed %u bytes for unused pacas\n", 251 - paca_size - new_size); 252 - 253 - paca_size = new_size; 254 - 255 - free_lppacas(); 259 + printk(KERN_DEBUG "Allocated %u bytes for %u pacas\n", 260 + paca_ptrs_size + paca_struct_size, nr_cpu_ids); 256 261 } 257 262 258 263 void copy_mm_to_paca(struct mm_struct *mm) ··· 268 265 #ifdef CONFIG_PPC_MM_SLICES 269 266 VM_BUG_ON(!mm->context.slb_addr_limit); 270 267 get_paca()->mm_ctx_slb_addr_limit = mm->context.slb_addr_limit; 271 - get_paca()->mm_ctx_low_slices_psize = context->low_slices_psize; 268 + memcpy(&get_paca()->mm_ctx_low_slices_psize, 269 + &context->low_slices_psize, sizeof(context->low_slices_psize)); 272 270 memcpy(&get_paca()->mm_ctx_high_slices_psize, 273 271 &context->high_slices_psize, TASK_SLICE_ARRAY_SZ(mm)); 274 272 #else /* CONFIG_PPC_MM_SLICES */

+22 -4

arch/powerpc/kernel/process.c

··· 173 173 EXPORT_SYMBOL(__msr_check_and_clear); 174 174 175 175 #ifdef CONFIG_PPC_FPU 176 - void __giveup_fpu(struct task_struct *tsk) 176 + static void __giveup_fpu(struct task_struct *tsk) 177 177 { 178 178 unsigned long msr; 179 179 ··· 556 556 regs->msr = msr; 557 557 } 558 558 559 - void save_all(struct task_struct *tsk) 559 + static void save_all(struct task_struct *tsk) 560 560 { 561 561 unsigned long usermsr; 562 562 ··· 718 718 { 719 719 thread->hw_brk.address = 0; 720 720 thread->hw_brk.type = 0; 721 - set_breakpoint(&thread->hw_brk); 721 + if (ppc_breakpoint_available()) 722 + set_breakpoint(&thread->hw_brk); 722 723 } 723 724 #endif /* !CONFIG_HAVE_HW_BREAKPOINT */ 724 725 #endif /* CONFIG_PPC_ADV_DEBUG_REGS */ ··· 816 815 memcpy(this_cpu_ptr(&current_brk), brk, sizeof(*brk)); 817 816 818 817 if (cpu_has_feature(CPU_FTR_DAWR)) 818 + // Power8 or later 819 819 set_dawr(brk); 820 - else 820 + else if (!cpu_has_feature(CPU_FTR_ARCH_207S)) 821 + // Power7 or earlier 821 822 set_dabr(brk); 823 + else 824 + // Shouldn't happen due to higher level checks 825 + WARN_ON_ONCE(1); 822 826 } 823 827 824 828 void set_breakpoint(struct arch_hw_breakpoint *brk) ··· 832 826 __set_breakpoint(brk); 833 827 preempt_enable(); 834 828 } 829 + 830 + /* Check if we have DAWR or DABR hardware */ 831 + bool ppc_breakpoint_available(void) 832 + { 833 + if (cpu_has_feature(CPU_FTR_DAWR)) 834 + return true; /* POWER8 DAWR */ 835 + if (cpu_has_feature(CPU_FTR_ARCH_207S)) 836 + return false; /* POWER9 with DAWR disabled */ 837 + /* DABR: Everything but POWER8 and POWER9 */ 838 + return true; 839 + } 840 + EXPORT_SYMBOL_GPL(ppc_breakpoint_available); 835 841 836 842 #ifdef CONFIG_PPC64 837 843 DEFINE_PER_CPU(struct cpu_usage, cpu_usage_array);

+15 -4

arch/powerpc/kernel/prom.c

··· 291 291 292 292 static void __init check_cpu_feature_properties(unsigned long node) 293 293 { 294 - unsigned long i; 294 + int i; 295 295 struct feature_property *fp = feature_properties; 296 296 const __be32 *prop; 297 297 298 - for (i = 0; i < ARRAY_SIZE(feature_properties); ++i, ++fp) { 298 + for (i = 0; i < (int)ARRAY_SIZE(feature_properties); ++i, ++fp) { 299 299 prop = of_get_flat_dt_prop(node, fp->name, NULL); 300 300 if (prop && be32_to_cpup(prop) >= fp->min_value) { 301 301 cur_cpu_spec->cpu_features |= fp->cpu_feature; ··· 365 365 DBG("boot cpu: logical %d physical %d\n", found, 366 366 be32_to_cpu(intserv[found_thread])); 367 367 boot_cpuid = found; 368 - set_hard_smp_processor_id(found, be32_to_cpu(intserv[found_thread])); 369 368 370 369 /* 371 370 * PAPR defines "logical" PVR values for cpus that ··· 402 403 cur_cpu_spec->cpu_features &= ~CPU_FTR_SMT; 403 404 else if (!dt_cpu_ftrs_in_use()) 404 405 cur_cpu_spec->cpu_features |= CPU_FTR_SMT; 406 + allocate_paca(boot_cpuid); 405 407 #endif 408 + set_hard_smp_processor_id(found, be32_to_cpu(intserv[found_thread])); 406 409 407 410 return 0; 408 411 } ··· 745 744 * FIXME .. and the initrd too? */ 746 745 move_device_tree(); 747 746 748 - allocate_pacas(); 747 + allocate_paca_ptrs(); 749 748 750 749 DBG("Scanning CPUs ...\n"); 751 750 ··· 875 874 876 875 bool arch_match_cpu_phys_id(int cpu, u64 phys_id) 877 876 { 877 + #ifdef CONFIG_SMP 878 + /* 879 + * Early firmware scanning must use this rather than 880 + * get_hard_smp_processor_id because we don't have pacas allocated 881 + * until memory topology is discovered. 882 + */ 883 + if (cpu_to_phys_id != NULL) 884 + return (int)phys_id == cpu_to_phys_id[cpu]; 885 + #endif 886 + 878 887 return (int)phys_id == get_hard_smp_processor_id(cpu); 879 888 }

+16 -13

arch/powerpc/kernel/prom_init.c

··· 171 171 static unsigned long __initdata prom_tce_alloc_end; 172 172 #endif 173 173 174 - static bool __initdata prom_radix_disable; 174 + static bool prom_radix_disable __initdata = !IS_ENABLED(CONFIG_PPC_RADIX_MMU_DEFAULT); 175 175 176 176 struct platform_support { 177 177 bool hash_mmu; ··· 641 641 642 642 opt = strstr(prom_cmd_line, "disable_radix"); 643 643 if (opt) { 644 - prom_debug("Radix disabled from cmdline\n"); 645 - prom_radix_disable = true; 644 + opt += 13; 645 + if (*opt && *opt == '=') { 646 + bool val; 647 + 648 + if (kstrtobool(++opt, &val)) 649 + prom_radix_disable = false; 650 + else 651 + prom_radix_disable = val; 652 + } else 653 + prom_radix_disable = true; 646 654 } 655 + if (prom_radix_disable) 656 + prom_debug("Radix disabled from cmdline\n"); 647 657 } 648 658 649 659 #if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) ··· 1120 1110 } 1121 1111 } 1122 1112 1123 - if (supported.radix_mmu && supported.radix_gtse) { 1113 + if (supported.radix_mmu && supported.radix_gtse && 1114 + IS_ENABLED(CONFIG_PPC_RADIX_MMU)) { 1124 1115 /* Radix preferred - but we require GTSE for now */ 1125 1116 prom_debug("Asking for radix with GTSE\n"); 1126 1117 ibm_architecture_vec.vec5.mmu = OV5_FEAT(OV5_MMU_RADIX); ··· 1820 1809 * size to 4 MB. This is enough to map 2GB of PCI DMA space. 1821 1810 * By doing this, we avoid the pitfalls of trying to DMA to 1822 1811 * MMIO space and the DMA alias hole. 1823 - * 1824 - * On POWER4, firmware sets the TCE region by assuming 1825 - * each TCE table is 8MB. Using this memory for anything 1826 - * else will impact performance, so we always allocate 8MB. 1827 - * Anton 1828 1812 */ 1829 - if (pvr_version_is(PVR_POWER4) || pvr_version_is(PVR_POWER4p)) 1830 - minsize = 8UL << 20; 1831 - else 1832 - minsize = 4UL << 20; 1813 + minsize = 4UL << 20; 1833 1814 1834 1815 /* Align to the greater of the align or size */ 1835 1816 align = max(minalign, minsize);

+1 -1

arch/powerpc/kernel/prom_init_check.sh

··· 19 19 WHITELIST="add_reloc_offset __bss_start __bss_stop copy_and_flush 20 20 _end enter_prom memcpy memset reloc_offset __secondary_hold 21 21 __secondary_hold_acknowledge __secondary_hold_spinloop __start 22 - strcmp strcpy strlcpy strlen strncmp strstr logo_linux_clut224 22 + strcmp strcpy strlcpy strlen strncmp strstr kstrtobool logo_linux_clut224 23 23 reloc_got2 kernstart_addr memstart_addr linux_banner _stext 24 24 __prom_init_toc_start __prom_init_toc_end btext_setup_display TOC." 25 25

+14 -2

arch/powerpc/kernel/ptrace.c

··· 41 41 #include <asm/switch_to.h> 42 42 #include <asm/tm.h> 43 43 #include <asm/asm-prototypes.h> 44 + #include <asm/debug.h> 44 45 45 46 #define CREATE_TRACE_POINTS 46 47 #include <trace/events/syscalls.h> ··· 2379 2378 struct perf_event_attr attr; 2380 2379 #endif /* CONFIG_HAVE_HW_BREAKPOINT */ 2381 2380 #ifndef CONFIG_PPC_ADV_DEBUG_REGS 2381 + bool set_bp = true; 2382 2382 struct arch_hw_breakpoint hw_brk; 2383 2383 #endif 2384 2384 ··· 2413 2411 hw_brk.address = data & (~HW_BRK_TYPE_DABR); 2414 2412 hw_brk.type = (data & HW_BRK_TYPE_DABR) | HW_BRK_TYPE_PRIV_ALL; 2415 2413 hw_brk.len = 8; 2414 + set_bp = (data) && (hw_brk.type & HW_BRK_TYPE_RDWR); 2416 2415 #ifdef CONFIG_HAVE_HW_BREAKPOINT 2417 2416 bp = thread->ptrace_bps[0]; 2418 - if ((!data) || !(hw_brk.type & HW_BRK_TYPE_RDWR)) { 2417 + if (!set_bp) { 2419 2418 if (bp) { 2420 2419 unregister_hw_breakpoint(bp); 2421 2420 thread->ptrace_bps[0] = NULL; ··· 2453 2450 return PTR_ERR(bp); 2454 2451 } 2455 2452 2453 + #else /* !CONFIG_HAVE_HW_BREAKPOINT */ 2454 + if (set_bp && (!ppc_breakpoint_available())) 2455 + return -ENODEV; 2456 2456 #endif /* CONFIG_HAVE_HW_BREAKPOINT */ 2457 2457 task->thread.hw_brk = hw_brk; 2458 2458 #else /* CONFIG_PPC_ADV_DEBUG_REGS */ ··· 2910 2904 if (child->thread.hw_brk.address) 2911 2905 return -ENOSPC; 2912 2906 2907 + if (!ppc_breakpoint_available()) 2908 + return -ENODEV; 2909 + 2913 2910 child->thread.hw_brk = brk; 2914 2911 2915 2912 return 1; ··· 3061 3052 #endif 3062 3053 #else /* !CONFIG_PPC_ADV_DEBUG_REGS */ 3063 3054 dbginfo.num_instruction_bps = 0; 3064 - dbginfo.num_data_bps = 1; 3055 + if (ppc_breakpoint_available()) 3056 + dbginfo.num_data_bps = 1; 3057 + else 3058 + dbginfo.num_data_bps = 0; 3065 3059 dbginfo.num_condition_regs = 0; 3066 3060 #ifdef CONFIG_PPC64 3067 3061 dbginfo.data_bp_alignment = 8;

+88

arch/powerpc/kernel/security.c

··· 1 + // SPDX-License-Identifier: GPL-2.0+ 2 + // 3 + // Security related flags and so on. 4 + // 5 + // Copyright 2018, Michael Ellerman, IBM Corporation. 6 + 7 + #include <linux/kernel.h> 8 + #include <linux/device.h> 9 + #include <linux/seq_buf.h> 10 + 11 + #include <asm/security_features.h> 12 + 13 + 14 + unsigned long powerpc_security_features __read_mostly = SEC_FTR_DEFAULT; 15 + 16 + ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf) 17 + { 18 + bool thread_priv; 19 + 20 + thread_priv = security_ftr_enabled(SEC_FTR_L1D_THREAD_PRIV); 21 + 22 + if (rfi_flush || thread_priv) { 23 + struct seq_buf s; 24 + seq_buf_init(&s, buf, PAGE_SIZE - 1); 25 + 26 + seq_buf_printf(&s, "Mitigation: "); 27 + 28 + if (rfi_flush) 29 + seq_buf_printf(&s, "RFI Flush"); 30 + 31 + if (rfi_flush && thread_priv) 32 + seq_buf_printf(&s, ", "); 33 + 34 + if (thread_priv) 35 + seq_buf_printf(&s, "L1D private per thread"); 36 + 37 + seq_buf_printf(&s, "\n"); 38 + 39 + return s.len; 40 + } 41 + 42 + if (!security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV) && 43 + !security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR)) 44 + return sprintf(buf, "Not affected\n"); 45 + 46 + return sprintf(buf, "Vulnerable\n"); 47 + } 48 + 49 + ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, char *buf) 50 + { 51 + if (!security_ftr_enabled(SEC_FTR_BNDS_CHK_SPEC_BAR)) 52 + return sprintf(buf, "Not affected\n"); 53 + 54 + return sprintf(buf, "Vulnerable\n"); 55 + } 56 + 57 + ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, char *buf) 58 + { 59 + bool bcs, ccd, ori; 60 + struct seq_buf s; 61 + 62 + seq_buf_init(&s, buf, PAGE_SIZE - 1); 63 + 64 + bcs = security_ftr_enabled(SEC_FTR_BCCTRL_SERIALISED); 65 + ccd = security_ftr_enabled(SEC_FTR_COUNT_CACHE_DISABLED); 66 + ori = security_ftr_enabled(SEC_FTR_SPEC_BAR_ORI31); 67 + 68 + if (bcs || ccd) { 69 + seq_buf_printf(&s, "Mitigation: "); 70 + 71 + if (bcs) 72 + seq_buf_printf(&s, "Indirect branch serialisation (kernel only)"); 73 + 74 + if (bcs && ccd) 75 + seq_buf_printf(&s, ", "); 76 + 77 + if (ccd) 78 + seq_buf_printf(&s, "Indirect branch cache disabled"); 79 + } else 80 + seq_buf_printf(&s, "Vulnerable"); 81 + 82 + if (ori) 83 + seq_buf_printf(&s, ", ori31 speculation barrier enabled"); 84 + 85 + seq_buf_printf(&s, "\n"); 86 + 87 + return s.len; 88 + }

+34 -3

arch/powerpc/kernel/setup-common.c

··· 437 437 } 438 438 439 439 440 + u32 *cpu_to_phys_id = NULL; 441 + 440 442 /** 441 443 * setup_cpu_maps - initialize the following cpu maps: 442 444 * cpu_possible_mask ··· 465 463 466 464 DBG("smp_setup_cpu_maps()\n"); 467 465 466 + cpu_to_phys_id = __va(memblock_alloc(nr_cpu_ids * sizeof(u32), 467 + __alignof__(u32))); 468 + memset(cpu_to_phys_id, 0, nr_cpu_ids * sizeof(u32)); 469 + 468 470 for_each_node_by_type(dn, "cpu") { 469 471 const __be32 *intserv; 470 472 __be32 cpu_be; ··· 486 480 intserv = of_get_property(dn, "reg", &len); 487 481 if (!intserv) { 488 482 cpu_be = cpu_to_be32(cpu); 483 + /* XXX: what is this? uninitialized?? */ 489 484 intserv = &cpu_be; /* assume logical == phys */ 490 485 len = 4; 491 486 } ··· 506 499 "enable-method", "spin-table"); 507 500 508 501 set_cpu_present(cpu, avail); 509 - set_hard_smp_processor_id(cpu, be32_to_cpu(intserv[j])); 510 502 set_cpu_possible(cpu, true); 503 + cpu_to_phys_id[cpu] = be32_to_cpu(intserv[j]); 511 504 cpu++; 512 505 } 513 506 ··· 842 835 pr_info("-----------------------------------------------------\n"); 843 836 } 844 837 838 + #ifdef CONFIG_SMP 839 + static void smp_setup_pacas(void) 840 + { 841 + int cpu; 842 + 843 + for_each_possible_cpu(cpu) { 844 + if (cpu == smp_processor_id()) 845 + continue; 846 + allocate_paca(cpu); 847 + set_hard_smp_processor_id(cpu, cpu_to_phys_id[cpu]); 848 + } 849 + 850 + memblock_free(__pa(cpu_to_phys_id), nr_cpu_ids * sizeof(u32)); 851 + cpu_to_phys_id = NULL; 852 + } 853 + #endif 854 + 845 855 /* 846 856 * Called into from start_kernel this initializes memblock, which is used 847 857 * to manage page allocation until mem_init is called. ··· 912 888 /* Check the SMT related command line arguments (ppc64). */ 913 889 check_smt_enabled(); 914 890 915 - /* On BookE, setup per-core TLB data structures. */ 916 - setup_tlb_core_data(); 891 + /* Parse memory topology */ 892 + mem_topology_setup(); 917 893 918 894 /* 919 895 * Release secondary cpus out of their spinloops at 0x60 now that ··· 923 899 * so smp_release_cpus() does nothing for them. 924 900 */ 925 901 #ifdef CONFIG_SMP 902 + smp_setup_pacas(); 903 + 904 + /* On BookE, setup per-core TLB data structures. */ 905 + setup_tlb_core_data(); 906 + 926 907 smp_release_cpus(); 927 908 #endif 928 909 ··· 948 919 #ifdef CONFIG_PPC64 949 920 if (!radix_enabled()) 950 921 init_mm.context.slb_addr_limit = DEFAULT_MAP_WINDOW_USER64; 922 + #elif defined(CONFIG_PPC_8xx) 923 + init_mm.context.slb_addr_limit = DEFAULT_MAP_WINDOW; 951 924 #else 952 925 #error "context.addr_limit not initialized." 953 926 #endif

+3 -6

arch/powerpc/kernel/setup.h

··· 46 46 #endif 47 47 48 48 #ifdef CONFIG_PPC64 49 - void record_spr_defaults(void); 50 - #else 51 - static inline void record_spr_defaults(void) { }; 52 - #endif 53 - 54 - #ifdef CONFIG_PPC64 55 49 u64 ppc64_bolted_size(void); 50 + 51 + /* Default SPR values from firmware/kexec */ 52 + extern unsigned long spr_default_dscr; 56 53 #endif 57 54 58 55 /*

+4 -4

arch/powerpc/kernel/setup_32.c

··· 39 39 #include <asm/udbg.h> 40 40 #include <asm/code-patching.h> 41 41 #include <asm/cpu_has_feature.h> 42 + #include <asm/asm-prototypes.h> 42 43 43 44 #define DBG(fmt...) 44 45 ··· 122 121 } 123 122 124 123 /* Checks "l2cr=xxxx" command-line option */ 125 - int __init ppc_setup_l2cr(char *str) 124 + static int __init ppc_setup_l2cr(char *str) 126 125 { 127 126 if (cpu_has_feature(CPU_FTR_L2CR)) { 128 127 unsigned long val = simple_strtoul(str, NULL, 0); ··· 135 134 __setup("l2cr=", ppc_setup_l2cr); 136 135 137 136 /* Checks "l3cr=xxxx" command-line option */ 138 - int __init ppc_setup_l3cr(char *str) 137 + static int __init ppc_setup_l3cr(char *str) 139 138 { 140 139 if (cpu_has_feature(CPU_FTR_L3CR)) { 141 140 unsigned long val = simple_strtoul(str, NULL, 0); ··· 181 180 182 181 #endif /* CONFIG_NVRAM */ 183 182 184 - int __init ppc_init(void) 183 + static int __init ppc_init(void) 185 184 { 186 185 /* clear the progress line */ 187 186 if (ppc_md.progress) ··· 193 192 } 194 193 return 0; 195 194 } 196 - 197 195 arch_initcall(ppc_init); 198 196 199 197 void __init irqstack_early_init(void)

+69 -44

arch/powerpc/kernel/setup_64.c

··· 110 110 if (cpu_first_thread_sibling(boot_cpuid) == first) 111 111 first = boot_cpuid; 112 112 113 - paca[cpu].tcd_ptr = &paca[first].tcd; 113 + paca_ptrs[cpu]->tcd_ptr = &paca_ptrs[first]->tcd; 114 114 115 115 /* 116 116 * If we have threads, we need either tlbsrx. ··· 254 254 get_paca()->kernel_msr = MSR_KERNEL; 255 255 } 256 256 257 + unsigned long spr_default_dscr = 0; 258 + 259 + void __init record_spr_defaults(void) 260 + { 261 + if (early_cpu_has_feature(CPU_FTR_DSCR)) 262 + spr_default_dscr = mfspr(SPRN_DSCR); 263 + } 264 + 257 265 /* 258 266 * Early initialization entry point. This is called by head.S 259 267 * with MMU translation disabled. We rely on the "feature" of ··· 312 304 early_init_devtree(__va(dt_ptr)); 313 305 314 306 /* Now we know the logical id of our boot cpu, setup the paca. */ 315 - setup_paca(&paca[boot_cpuid]); 307 + if (boot_cpuid != 0) { 308 + /* Poison paca_ptrs[0] again if it's not the boot cpu */ 309 + memset(&paca_ptrs[0], 0x88, sizeof(paca_ptrs[0])); 310 + } 311 + setup_paca(paca_ptrs[boot_cpuid]); 316 312 fixup_boot_paca(); 317 313 318 314 /* ··· 611 599 #endif 612 600 } 613 601 602 + static void *__init alloc_stack(unsigned long limit, int cpu) 603 + { 604 + unsigned long pa; 605 + 606 + pa = memblock_alloc_base_nid(THREAD_SIZE, THREAD_SIZE, limit, 607 + early_cpu_to_node(cpu), MEMBLOCK_NONE); 608 + if (!pa) { 609 + pa = memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit); 610 + if (!pa) 611 + panic("cannot allocate stacks"); 612 + } 613 + 614 + return __va(pa); 615 + } 616 + 614 617 void __init irqstack_early_init(void) 615 618 { 616 619 u64 limit = ppc64_bolted_size(); ··· 637 610 * accessed in realmode. 638 611 */ 639 612 for_each_possible_cpu(i) { 640 - softirq_ctx[i] = (struct thread_info *) 641 - __va(memblock_alloc_base(THREAD_SIZE, 642 - THREAD_SIZE, limit)); 643 - hardirq_ctx[i] = (struct thread_info *) 644 - __va(memblock_alloc_base(THREAD_SIZE, 645 - THREAD_SIZE, limit)); 613 + softirq_ctx[i] = alloc_stack(limit, i); 614 + hardirq_ctx[i] = alloc_stack(limit, i); 646 615 } 647 616 } 648 617 ··· 646 623 void __init exc_lvl_early_init(void) 647 624 { 648 625 unsigned int i; 649 - unsigned long sp; 650 626 651 627 for_each_possible_cpu(i) { 652 - sp = memblock_alloc(THREAD_SIZE, THREAD_SIZE); 653 - critirq_ctx[i] = (struct thread_info *)__va(sp); 654 - paca[i].crit_kstack = __va(sp + THREAD_SIZE); 628 + void *sp; 655 629 656 - sp = memblock_alloc(THREAD_SIZE, THREAD_SIZE); 657 - dbgirq_ctx[i] = (struct thread_info *)__va(sp); 658 - paca[i].dbg_kstack = __va(sp + THREAD_SIZE); 630 + sp = alloc_stack(ULONG_MAX, i); 631 + critirq_ctx[i] = sp; 632 + paca_ptrs[i]->crit_kstack = sp + THREAD_SIZE; 659 633 660 - sp = memblock_alloc(THREAD_SIZE, THREAD_SIZE); 661 - mcheckirq_ctx[i] = (struct thread_info *)__va(sp); 662 - paca[i].mc_kstack = __va(sp + THREAD_SIZE); 634 + sp = alloc_stack(ULONG_MAX, i); 635 + dbgirq_ctx[i] = sp; 636 + paca_ptrs[i]->dbg_kstack = sp + THREAD_SIZE; 637 + 638 + sp = alloc_stack(ULONG_MAX, i); 639 + mcheckirq_ctx[i] = sp; 640 + paca_ptrs[i]->mc_kstack = sp + THREAD_SIZE; 663 641 } 664 642 665 643 if (cpu_has_feature(CPU_FTR_DEBUG_LVL_EXC)) ··· 714 690 715 691 for_each_possible_cpu(i) { 716 692 struct thread_info *ti; 717 - ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit)); 693 + 694 + ti = alloc_stack(limit, i); 718 695 memset(ti, 0, THREAD_SIZE); 719 696 emerg_stack_init_thread_info(ti, i); 720 - paca[i].emergency_sp = (void *)ti + THREAD_SIZE; 697 + paca_ptrs[i]->emergency_sp = (void *)ti + THREAD_SIZE; 721 698 722 699 #ifdef CONFIG_PPC_BOOK3S_64 723 700 /* emergency stack for NMI exception handling. */ 724 - ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit)); 701 + ti = alloc_stack(limit, i); 725 702 memset(ti, 0, THREAD_SIZE); 726 703 emerg_stack_init_thread_info(ti, i); 727 - paca[i].nmi_emergency_sp = (void *)ti + THREAD_SIZE; 704 + paca_ptrs[i]->nmi_emergency_sp = (void *)ti + THREAD_SIZE; 728 705 729 706 /* emergency stack for machine check exception handling. */ 730 - ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit)); 707 + ti = alloc_stack(limit, i); 731 708 memset(ti, 0, THREAD_SIZE); 732 709 emerg_stack_init_thread_info(ti, i); 733 - paca[i].mc_emergency_sp = (void *)ti + THREAD_SIZE; 710 + paca_ptrs[i]->mc_emergency_sp = (void *)ti + THREAD_SIZE; 734 711 #endif 735 712 } 736 713 } ··· 787 762 delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; 788 763 for_each_possible_cpu(cpu) { 789 764 __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu]; 790 - paca[cpu].data_offset = __per_cpu_offset[cpu]; 765 + paca_ptrs[cpu]->data_offset = __per_cpu_offset[cpu]; 791 766 } 792 767 } 793 768 #endif ··· 871 846 872 847 void rfi_flush_enable(bool enable) 873 848 { 874 - if (rfi_flush == enable) 875 - return; 876 - 877 849 if (enable) { 878 850 do_rfi_flush_fixups(enabled_flush_types); 879 851 on_each_cpu(do_nothing, NULL, 1); ··· 885 863 u64 l1d_size, limit; 886 864 int cpu; 887 865 866 + /* Only allocate the fallback flush area once (at boot time). */ 867 + if (l1d_flush_fallback_area) 868 + return; 869 + 888 870 l1d_size = ppc64_caches.l1d.size; 889 871 limit = min(ppc64_bolted_size(), ppc64_rma_size); 890 872 ··· 901 875 memset(l1d_flush_fallback_area, 0, l1d_size * 2); 902 876 903 877 for_each_possible_cpu(cpu) { 904 - paca[cpu].rfi_flush_fallback_area = l1d_flush_fallback_area; 905 - paca[cpu].l1d_flush_size = l1d_size; 878 + struct paca_struct *paca = paca_ptrs[cpu]; 879 + paca->rfi_flush_fallback_area = l1d_flush_fallback_area; 880 + paca->l1d_flush_size = l1d_size; 906 881 } 907 882 } 908 883 909 - void __init setup_rfi_flush(enum l1d_flush_type types, bool enable) 884 + void setup_rfi_flush(enum l1d_flush_type types, bool enable) 910 885 { 911 886 if (types & L1D_FLUSH_FALLBACK) { 912 - pr_info("rfi-flush: Using fallback displacement flush\n"); 887 + pr_info("rfi-flush: fallback displacement flush available\n"); 913 888 init_fallback_flush(); 914 889 } 915 890 916 891 if (types & L1D_FLUSH_ORI) 917 - pr_info("rfi-flush: Using ori type flush\n"); 892 + pr_info("rfi-flush: ori type flush available\n"); 918 893 919 894 if (types & L1D_FLUSH_MTTRIG) 920 - pr_info("rfi-flush: Using mttrig type flush\n"); 895 + pr_info("rfi-flush: mttrig type flush available\n"); 921 896 922 897 enabled_flush_types = types; 923 898 ··· 929 902 #ifdef CONFIG_DEBUG_FS 930 903 static int rfi_flush_set(void *data, u64 val) 931 904 { 905 + bool enable; 906 + 932 907 if (val == 1) 933 - rfi_flush_enable(true); 908 + enable = true; 934 909 else if (val == 0) 935 - rfi_flush_enable(false); 910 + enable = false; 936 911 else 937 912 return -EINVAL; 913 + 914 + /* Only do anything if we're changing state */ 915 + if (enable != rfi_flush) 916 + rfi_flush_enable(enable); 938 917 939 918 return 0; 940 919 } ··· 960 927 } 961 928 device_initcall(rfi_flush_debugfs_init); 962 929 #endif 963 - 964 - ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf) 965 - { 966 - if (rfi_flush) 967 - return sprintf(buf, "Mitigation: RFI Flush\n"); 968 - 969 - return sprintf(buf, "Vulnerable\n"); 970 - } 971 930 #endif /* CONFIG_PPC_BOOK3S_64 */

+5

arch/powerpc/kernel/signal.h

··· 49 49 50 50 #else /* CONFIG_PPC64 */ 51 51 52 + extern long sys_rt_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8, 53 + struct pt_regs *regs); 54 + extern long sys_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8, 55 + struct pt_regs *regs); 56 + 52 57 static inline int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, 53 58 struct task_struct *tsk) 54 59 {

+2 -2

arch/powerpc/kernel/signal_32.c

··· 1045 1045 struct ucontext __user *new_ctx, 1046 1046 int ctx_size, int r6, int r7, int r8, struct pt_regs *regs) 1047 1047 { 1048 - unsigned char tmp; 1048 + unsigned char tmp __maybe_unused; 1049 1049 int ctx_has_vsx_region = 0; 1050 1050 1051 1051 #ifdef CONFIG_PPC64 ··· 1231 1231 { 1232 1232 struct sig_dbg_op op; 1233 1233 int i; 1234 - unsigned char tmp; 1234 + unsigned char tmp __maybe_unused; 1235 1235 unsigned long new_msr = regs->msr; 1236 1236 #ifdef CONFIG_PPC_ADV_DEBUG_REGS 1237 1237 unsigned long new_dbcr0 = current->thread.debug.dbcr0;

+16 -7

arch/powerpc/kernel/smp.c

··· 123 123 * cpu_start field to become non-zero After we set cpu_start, 124 124 * the processor will continue on to secondary_start 125 125 */ 126 - if (!paca[nr].cpu_start) { 127 - paca[nr].cpu_start = 1; 126 + if (!paca_ptrs[nr]->cpu_start) { 127 + paca_ptrs[nr]->cpu_start = 1; 128 128 smp_mb(); 129 129 return 0; 130 130 } ··· 565 565 } 566 566 #endif 567 567 568 + #ifdef CONFIG_NMI_IPI 569 + static void stop_this_cpu(struct pt_regs *regs) 570 + #else 568 571 static void stop_this_cpu(void *dummy) 572 + #endif 569 573 { 570 574 /* Remove this CPU */ 571 575 set_cpu_online(smp_processor_id(), false); 572 576 573 - local_irq_disable(); 577 + hard_irq_disable(); 578 + spin_begin(); 574 579 while (1) 575 - ; 580 + spin_cpu_relax(); 576 581 } 577 582 578 583 void smp_send_stop(void) 579 584 { 585 + #ifdef CONFIG_NMI_IPI 586 + smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, stop_this_cpu, 1000000); 587 + #else 580 588 smp_call_function(stop_this_cpu, NULL, 0); 589 + #endif 581 590 } 582 591 583 592 struct thread_info *current_set[NR_CPUS]; ··· 666 657 { 667 658 BUG_ON(smp_processor_id() != boot_cpuid); 668 659 #ifdef CONFIG_PPC64 669 - paca[boot_cpuid].__current = current; 660 + paca_ptrs[boot_cpuid]->__current = current; 670 661 #endif 671 662 set_numa_node(numa_cpu_lookup_table[boot_cpuid]); 672 663 current_set[boot_cpuid] = task_thread_info(current); ··· 757 748 struct thread_info *ti = task_thread_info(idle); 758 749 759 750 #ifdef CONFIG_PPC64 760 - paca[cpu].__current = idle; 761 - paca[cpu].kstack = (unsigned long)ti + THREAD_SIZE - STACK_FRAME_OVERHEAD; 751 + paca_ptrs[cpu]->__current = idle; 752 + paca_ptrs[cpu]->kstack = (unsigned long)ti + THREAD_SIZE - STACK_FRAME_OVERHEAD; 762 753 #endif 763 754 ti->cpu = cpu; 764 755 secondary_ti = current_set[cpu] = ti;

+10 -12

arch/powerpc/kernel/sysfs.c

··· 20 20 #include <asm/firmware.h> 21 21 22 22 #include "cacheinfo.h" 23 + #include "setup.h" 23 24 24 25 #ifdef CONFIG_PPC64 25 26 #include <asm/paca.h> ··· 589 588 590 589 static void sysfs_create_dscr_default(void) 591 590 { 592 - int err = 0; 593 - if (cpu_has_feature(CPU_FTR_DSCR)) 594 - err = device_create_file(cpu_subsys.dev_root, &dev_attr_dscr_default); 595 - } 596 - 597 - void __init record_spr_defaults(void) 598 - { 599 - int cpu; 600 - 601 591 if (cpu_has_feature(CPU_FTR_DSCR)) { 602 - dscr_default = mfspr(SPRN_DSCR); 603 - for (cpu = 0; cpu < nr_cpu_ids; cpu++) 604 - paca[cpu].dscr_default = dscr_default; 592 + int err = 0; 593 + int cpu; 594 + 595 + dscr_default = spr_default_dscr; 596 + for_each_possible_cpu(cpu) 597 + paca_ptrs[cpu]->dscr_default = dscr_default; 598 + 599 + err = device_create_file(cpu_subsys.dev_root, &dev_attr_dscr_default); 605 600 } 606 601 } 602 + 607 603 #endif /* CONFIG_PPC64 */ 608 604 609 605 #ifdef HAS_PPC_PMC_PA6T

+4 -1

arch/powerpc/kernel/time.c

··· 266 266 267 267 static inline u64 calculate_stolen_time(u64 stop_tb) 268 268 { 269 + if (!firmware_has_feature(FW_FEATURE_SPLPAR)) 270 + return 0; 271 + 269 272 if (get_paca()->dtl_ridx != be64_to_cpu(get_lppaca()->dtl_idx)) 270 273 return scan_dispatch_log(stop_tb); 271 274 ··· 1237 1234 static int rtc_generic_get_time(struct device *dev, struct rtc_time *tm) 1238 1235 { 1239 1236 ppc_md.get_rtc_time(tm); 1240 - return rtc_valid_tm(tm); 1237 + return 0; 1241 1238 } 1242 1239 1243 1240 static int rtc_generic_set_time(struct device *dev, struct rtc_time *tm)

+16 -15

arch/powerpc/kernel/traps.c

··· 208 208 } 209 209 raw_local_irq_restore(flags); 210 210 211 + /* 212 + * system_reset_excption handles debugger, crash dump, panic, for 0x100 213 + */ 214 + if (TRAP(regs) == 0x100) 215 + return; 216 + 211 217 crash_fadump(regs, "die oops"); 212 218 213 219 if (kexec_should_crash(current)) ··· 278 272 { 279 273 unsigned long flags; 280 274 281 - if (debugger(regs)) 282 - return; 275 + /* 276 + * system_reset_excption handles debugger, crash dump, panic, for 0x100 277 + */ 278 + if (TRAP(regs) != 0x100) { 279 + if (debugger(regs)) 280 + return; 281 + } 283 282 284 283 flags = oops_begin(regs); 285 284 if (__die(str, regs, err)) ··· 471 460 /* single-step stuff */ 472 461 #define single_stepping(regs) (current->thread.debug.dbcr0 & DBCR0_IC) 473 462 #define clear_single_step(regs) (current->thread.debug.dbcr0 &= ~DBCR0_IC) 474 - 463 + #define clear_br_trace(regs) do {} while(0) 475 464 #else 476 465 /* On non-4xx, the reason for the machine check or program 477 466 exception is in the MSR. */ ··· 484 473 485 474 #define single_stepping(regs) ((regs)->msr & MSR_SE) 486 475 #define clear_single_step(regs) ((regs)->msr &= ~MSR_SE) 476 + #define clear_br_trace(regs) ((regs)->msr &= ~MSR_BE) 487 477 #endif 488 478 489 479 #if defined(CONFIG_E500) ··· 1000 988 enum ctx_state prev_state = exception_enter(); 1001 989 1002 990 clear_single_step(regs); 991 + clear_br_trace(regs); 1003 992 1004 993 if (kprobe_post_handler(regs)) 1005 994 return; ··· 1505 1492 bad_page_fault(regs, regs->dar, sig); 1506 1493 1507 1494 bail: 1508 - exception_exit(prev_state); 1509 - } 1510 - 1511 - void slb_miss_bad_addr(struct pt_regs *regs) 1512 - { 1513 - enum ctx_state prev_state = exception_enter(); 1514 - 1515 - if (user_mode(regs)) 1516 - _exception(SIGSEGV, regs, SEGV_BNDERR, regs->dar); 1517 - else 1518 - bad_page_fault(regs, regs->dar, SIGSEGV); 1519 - 1520 1495 exception_exit(prev_state); 1521 1496 } 1522 1497

+7 -5

arch/powerpc/kernel/vdso.c

··· 99 99 CPU_FTR_COHERENT_ICACHE, CPU_FTR_COHERENT_ICACHE, 100 100 "__kernel_sync_dicache", "__kernel_sync_dicache_p5" 101 101 }, 102 + #ifdef CONFIG_PPC32 102 103 { 103 - CPU_FTR_USE_TB, 0, 104 + CPU_FTR_USE_RTC, CPU_FTR_USE_RTC, 104 105 "__kernel_gettimeofday", NULL 105 106 }, 106 107 { 107 - CPU_FTR_USE_TB, 0, 108 + CPU_FTR_USE_RTC, CPU_FTR_USE_RTC, 108 109 "__kernel_clock_gettime", NULL 109 110 }, 110 111 { 111 - CPU_FTR_USE_TB, 0, 112 + CPU_FTR_USE_RTC, CPU_FTR_USE_RTC, 112 113 "__kernel_clock_getres", NULL 113 114 }, 114 115 { 115 - CPU_FTR_USE_TB, 0, 116 + CPU_FTR_USE_RTC, CPU_FTR_USE_RTC, 116 117 "__kernel_get_tbfreq", NULL 117 118 }, 118 119 { 119 - CPU_FTR_USE_TB, 0, 120 + CPU_FTR_USE_RTC, CPU_FTR_USE_RTC, 120 121 "__kernel_time", NULL 121 122 }, 123 + #endif 122 124 }; 123 125 124 126 /*

+7

arch/powerpc/kvm/Makefile

··· 74 74 book3s_64_mmu_hv.o \ 75 75 book3s_64_mmu_radix.o 76 76 77 + kvm-hv-$(CONFIG_PPC_TRANSACTIONAL_MEM) += \ 78 + book3s_hv_tm.o 79 + 77 80 kvm-book3s_64-builtin-xics-objs-$(CONFIG_KVM_XICS) := \ 78 81 book3s_hv_rm_xics.o book3s_hv_rm_xive.o 82 + 83 + kvm-book3s_64-builtin-tm-objs-$(CONFIG_PPC_TRANSACTIONAL_MEM) += \ 84 + book3s_hv_tm_builtin.o 79 85 80 86 ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 81 87 kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \ ··· 90 84 book3s_hv_rm_mmu.o \ 91 85 book3s_hv_ras.o \ 92 86 book3s_hv_builtin.o \ 87 + $(kvm-book3s_64-builtin-tm-objs-y) \ 93 88 $(kvm-book3s_64-builtin-xics-objs-y) 94 89 endif 95 90

+40 -15

arch/powerpc/kvm/book3s_hv.c

··· 49 49 #include <asm/reg.h> 50 50 #include <asm/ppc-opcode.h> 51 51 #include <asm/asm-prototypes.h> 52 + #include <asm/debug.h> 52 53 #include <asm/disassemble.h> 53 54 #include <asm/cputable.h> 54 55 #include <asm/cacheflush.h> ··· 171 170 172 171 #if defined(CONFIG_PPC_ICP_NATIVE) && defined(CONFIG_SMP) 173 172 if (cpu >= 0 && cpu < nr_cpu_ids) { 174 - if (paca[cpu].kvm_hstate.xics_phys) { 173 + if (paca_ptrs[cpu]->kvm_hstate.xics_phys) { 175 174 xics_wake_cpu(cpu); 176 175 return true; 177 176 } ··· 499 498 * use 640 bytes of the structure though, so we should accept 500 499 * clients that set a size of 640. 501 500 */ 502 - if (len < 640) 501 + BUILD_BUG_ON(sizeof(struct lppaca) != 640); 502 + if (len < sizeof(struct lppaca)) 503 503 break; 504 504 vpap = &tvcpu->arch.vpa; 505 505 err = 0; ··· 742 740 return H_SUCCESS; 743 741 case H_SET_MODE_RESOURCE_SET_DAWR: 744 742 if (!kvmppc_power8_compatible(vcpu)) 743 + return H_P2; 744 + if (!ppc_breakpoint_available()) 745 745 return H_P2; 746 746 if (mflags) 747 747 return H_UNSUPPORTED_FLAG_START; ··· 1210 1206 r = RESUME_GUEST; 1211 1207 } 1212 1208 break; 1209 + 1210 + #ifdef CONFIG_PPC_TRANSACTIONAL_MEM 1211 + case BOOK3S_INTERRUPT_HV_SOFTPATCH: 1212 + /* 1213 + * This occurs for various TM-related instructions that 1214 + * we need to emulate on POWER9 DD2.2. We have already 1215 + * handled the cases where the guest was in real-suspend 1216 + * mode and was transitioning to transactional state. 1217 + */ 1218 + r = kvmhv_p9_tm_emulation(vcpu); 1219 + break; 1220 + #endif 1221 + 1213 1222 case BOOK3S_INTERRUPT_HV_RM_HARD: 1214 1223 r = RESUME_PASSTHROUGH; 1215 1224 break; ··· 1995 1978 * turn off the HFSCR bit, which causes those instructions to trap. 1996 1979 */ 1997 1980 vcpu->arch.hfscr = mfspr(SPRN_HFSCR); 1998 - if (!cpu_has_feature(CPU_FTR_TM)) 1981 + if (cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) 1982 + vcpu->arch.hfscr |= HFSCR_TM; 1983 + else if (!cpu_has_feature(CPU_FTR_TM_COMP)) 1999 1984 vcpu->arch.hfscr &= ~HFSCR_TM; 2000 1985 if (cpu_has_feature(CPU_FTR_ARCH_300)) 2001 1986 vcpu->arch.hfscr &= ~HFSCR_MSGP; ··· 2159 2140 struct paca_struct *tpaca; 2160 2141 long timeout = 10000; 2161 2142 2162 - tpaca = &paca[cpu]; 2143 + tpaca = paca_ptrs[cpu]; 2163 2144 2164 2145 /* Ensure the thread won't go into the kernel if it wakes */ 2165 2146 tpaca->kvm_hstate.kvm_vcpu = NULL; ··· 2192 2173 { 2193 2174 struct paca_struct *tpaca; 2194 2175 2195 - tpaca = &paca[cpu]; 2176 + tpaca = paca_ptrs[cpu]; 2196 2177 tpaca->kvm_hstate.hwthread_req = 0; 2197 2178 tpaca->kvm_hstate.kvm_vcpu = NULL; 2198 2179 tpaca->kvm_hstate.kvm_vcore = NULL; ··· 2258 2239 vcpu->arch.thread_cpu = cpu; 2259 2240 cpumask_set_cpu(cpu, &kvm->arch.cpu_in_guest); 2260 2241 } 2261 - tpaca = &paca[cpu]; 2242 + tpaca = paca_ptrs[cpu]; 2262 2243 tpaca->kvm_hstate.kvm_vcpu = vcpu; 2263 2244 tpaca->kvm_hstate.ptid = cpu - vc->pcpu; 2245 + tpaca->kvm_hstate.fake_suspend = 0; 2264 2246 /* Order stores to hstate.kvm_vcpu etc. before store to kvm_vcore */ 2265 2247 smp_wmb(); 2266 2248 tpaca->kvm_hstate.kvm_vcore = vc; ··· 2284 2264 * for any threads that still have a non-NULL vcore ptr. 2285 2265 */ 2286 2266 for (i = 1; i < n_threads; ++i) 2287 - if (paca[cpu + i].kvm_hstate.kvm_vcore) 2267 + if (paca_ptrs[cpu + i]->kvm_hstate.kvm_vcore) 2288 2268 break; 2289 2269 if (i == n_threads) { 2290 2270 HMT_medium(); ··· 2294 2274 } 2295 2275 HMT_medium(); 2296 2276 for (i = 1; i < n_threads; ++i) 2297 - if (paca[cpu + i].kvm_hstate.kvm_vcore) 2277 + if (paca_ptrs[cpu + i]->kvm_hstate.kvm_vcore) 2298 2278 pr_err("KVM: CPU %d seems to be stuck\n", cpu + i); 2299 2279 } 2300 2280 ··· 2826 2806 } 2827 2807 2828 2808 for (thr = 0; thr < controlled_threads; ++thr) { 2829 - paca[pcpu + thr].kvm_hstate.tid = thr; 2830 - paca[pcpu + thr].kvm_hstate.napping = 0; 2831 - paca[pcpu + thr].kvm_hstate.kvm_split_mode = sip; 2809 + struct paca_struct *paca = paca_ptrs[pcpu + thr]; 2810 + 2811 + paca->kvm_hstate.tid = thr; 2812 + paca->kvm_hstate.napping = 0; 2813 + paca->kvm_hstate.kvm_split_mode = sip; 2832 2814 } 2833 2815 2834 2816 /* Initiate micro-threading (split-core) on POWER8 if required */ ··· 2945 2923 } else if (hpt_on_radix) { 2946 2924 /* Wait for all threads to have seen final sync */ 2947 2925 for (thr = 1; thr < controlled_threads; ++thr) { 2948 - while (paca[pcpu + thr].kvm_hstate.kvm_split_mode) { 2926 + struct paca_struct *paca = paca_ptrs[pcpu + thr]; 2927 + 2928 + while (paca->kvm_hstate.kvm_split_mode) { 2949 2929 HMT_low(); 2950 2930 barrier(); 2951 2931 } ··· 4412 4388 int node = cpu_to_node(first_cpu); 4413 4389 4414 4390 /* Ignore if it is already allocated. */ 4415 - if (paca[first_cpu].sibling_subcore_state) 4391 + if (paca_ptrs[first_cpu]->sibling_subcore_state) 4416 4392 continue; 4417 4393 4418 4394 sibling_subcore_state = ··· 4427 4403 for (j = 0; j < threads_per_core; j++) { 4428 4404 int cpu = first_cpu + j; 4429 4405 4430 - paca[cpu].sibling_subcore_state = sibling_subcore_state; 4406 + paca_ptrs[cpu]->sibling_subcore_state = 4407 + sibling_subcore_state; 4431 4408 } 4432 4409 } 4433 4410 return 0; ··· 4455 4430 4456 4431 /* 4457 4432 * We need a way of accessing the XICS interrupt controller, 4458 - * either directly, via paca[cpu].kvm_hstate.xics_phys, or 4433 + * either directly, via paca_ptrs[cpu]->kvm_hstate.xics_phys, or 4459 4434 * indirectly, via OPAL. 4460 4435 */ 4461 4436 #ifdef CONFIG_SMP

+1 -1

arch/powerpc/kvm/book3s_hv_builtin.c

··· 251 251 return; 252 252 253 253 /* Else poke the target with an IPI */ 254 - xics_phys = paca[cpu].kvm_hstate.xics_phys; 254 + xics_phys = paca_ptrs[cpu]->kvm_hstate.xics_phys; 255 255 if (xics_phys) 256 256 __raw_rm_writeb(IPI_PRIORITY, xics_phys + XICS_MFRR); 257 257 else

+1 -2

arch/powerpc/kvm/book3s_hv_interrupts.S

··· 79 79 li r5, 0 80 80 mtspr SPRN_MMCRA, r5 81 81 isync 82 - ld r3, PACALPPACAPTR(r13) /* is the host using the PMU? */ 83 - lbz r5, LPPACA_PMCINUSE(r3) 82 + lbz r5, PACA_PMCINUSE(r13) /* is the host using the PMU? */ 84 83 cmpwi r5, 0 85 84 beq 31f /* skip if not */ 86 85 mfspr r5, SPRN_MMCR1

+177 -10

arch/powerpc/kvm/book3s_hv_rmhandlers.S

··· 113 113 mtspr SPRN_SPRG_VDSO_WRITE,r3 114 114 115 115 /* Reload the host's PMU registers */ 116 - ld r3, PACALPPACAPTR(r13) /* is the host using the PMU? */ 117 - lbz r4, LPPACA_PMCINUSE(r3) 116 + lbz r4, PACA_PMCINUSE(r13) /* is the host using the PMU? */ 118 117 cmpwi r4, 0 119 118 beq 23f /* skip if not */ 120 119 BEGIN_FTR_SECTION ··· 785 786 END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) 786 787 787 788 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM 789 + /* 790 + * Branch around the call if both CPU_FTR_TM and 791 + * CPU_FTR_P9_TM_HV_ASSIST are off. 792 + */ 788 793 BEGIN_FTR_SECTION 794 + b 91f 795 + END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0) 789 796 /* 790 797 * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR 791 798 */ 792 799 bl kvmppc_restore_tm 793 - END_FTR_SECTION_IFSET(CPU_FTR_TM) 800 + 91: 794 801 #endif 795 802 796 803 /* Load guest PMU registers */ ··· 890 885 ld r6, VCPU_DAWRX(r4) 891 886 ld r7, VCPU_CIABR(r4) 892 887 ld r8, VCPU_TAR(r4) 888 + /* 889 + * Handle broken DAWR case by not writing it. This means we 890 + * can still store the DAWR register for migration. 891 + */ 892 + BEGIN_FTR_SECTION 893 893 mtspr SPRN_DAWR, r5 894 894 mtspr SPRN_DAWRX, r6 895 + END_FTR_SECTION_IFSET(CPU_FTR_DAWR) 895 896 mtspr SPRN_CIABR, r7 896 897 mtspr SPRN_TAR, r8 897 898 ld r5, VCPU_IC(r4) ··· 925 914 mtspr SPRN_ACOP, r6 926 915 mtspr SPRN_CSIGR, r7 927 916 mtspr SPRN_TACR, r8 917 + nop 928 918 FTR_SECTION_ELSE 929 919 /* POWER9-only registers */ 930 920 ld r5, VCPU_TID(r4) 931 921 ld r6, VCPU_PSSCR(r4) 922 + lbz r8, HSTATE_FAKE_SUSPEND(r13) 932 923 oris r6, r6, PSSCR_EC@h /* This makes stop trap to HV */ 924 + rldimi r6, r8, PSSCR_FAKE_SUSPEND_LG, 63 - PSSCR_FAKE_SUSPEND_LG 933 925 ld r7, VCPU_HFSCR(r4) 934 926 mtspr SPRN_TIDR, r5 935 927 mtspr SPRN_PSSCR, r6 ··· 1384 1370 std r3, VCPU_CTR(r9) 1385 1371 std r4, VCPU_XER(r9) 1386 1372 1373 + #ifdef CONFIG_PPC_TRANSACTIONAL_MEM 1374 + /* For softpatch interrupt, go off and do TM instruction emulation */ 1375 + cmpwi r12, BOOK3S_INTERRUPT_HV_SOFTPATCH 1376 + beq kvmppc_tm_emul 1377 + #endif 1378 + 1387 1379 /* If this is a page table miss then see if it's theirs or ours */ 1388 1380 cmpwi r12, BOOK3S_INTERRUPT_H_DATA_STORAGE 1389 1381 beq kvmppc_hdsi ··· 1767 1747 bl kvmppc_save_fp 1768 1748 1769 1749 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM 1750 + /* 1751 + * Branch around the call if both CPU_FTR_TM and 1752 + * CPU_FTR_P9_TM_HV_ASSIST are off. 1753 + */ 1770 1754 BEGIN_FTR_SECTION 1755 + b 91f 1756 + END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0) 1771 1757 /* 1772 1758 * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR 1773 1759 */ 1774 1760 bl kvmppc_save_tm 1775 - END_FTR_SECTION_IFSET(CPU_FTR_TM) 1761 + 91: 1776 1762 #endif 1777 1763 1778 1764 /* Increment yield count if they have a VPA */ ··· 1878 1852 ld r6, STACK_SLOT_DAWR(r1) 1879 1853 ld r7, STACK_SLOT_DAWRX(r1) 1880 1854 mtspr SPRN_CIABR, r5 1855 + /* 1856 + * If the DAWR doesn't work, it's ok to write these here as 1857 + * this value should always be zero 1858 + */ 1881 1859 mtspr SPRN_DAWR, r6 1882 1860 mtspr SPRN_DAWRX, r7 1883 1861 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) ··· 2084 2054 addi r1, r1, SFS 2085 2055 mtlr r0 2086 2056 blr 2057 + 2058 + #ifdef CONFIG_PPC_TRANSACTIONAL_MEM 2059 + /* 2060 + * Softpatch interrupt for transactional memory emulation cases 2061 + * on POWER9 DD2.2. This is early in the guest exit path - we 2062 + * haven't saved registers or done a treclaim yet. 2063 + */ 2064 + kvmppc_tm_emul: 2065 + /* Save instruction image in HEIR */ 2066 + mfspr r3, SPRN_HEIR 2067 + stw r3, VCPU_HEIR(r9) 2068 + 2069 + /* 2070 + * The cases we want to handle here are those where the guest 2071 + * is in real suspend mode and is trying to transition to 2072 + * transactional mode. 2073 + */ 2074 + lbz r0, HSTATE_FAKE_SUSPEND(r13) 2075 + cmpwi r0, 0 /* keep exiting guest if in fake suspend */ 2076 + bne guest_exit_cont 2077 + rldicl r3, r11, 64 - MSR_TS_S_LG, 62 2078 + cmpwi r3, 1 /* or if not in suspend state */ 2079 + bne guest_exit_cont 2080 + 2081 + /* Call C code to do the emulation */ 2082 + mr r3, r9 2083 + bl kvmhv_p9_tm_emulation_early 2084 + nop 2085 + ld r9, HSTATE_KVM_VCPU(r13) 2086 + li r12, BOOK3S_INTERRUPT_HV_SOFTPATCH 2087 + cmpwi r3, 0 2088 + beq guest_exit_cont /* continue exiting if not handled */ 2089 + ld r10, VCPU_PC(r9) 2090 + ld r11, VCPU_MSR(r9) 2091 + b fast_interrupt_c_return /* go back to guest if handled */ 2092 + #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ 2087 2093 2088 2094 /* 2089 2095 * Check whether an HDSI is an HPTE not found fault or something else. ··· 2573 2507 li r3,0 2574 2508 blr 2575 2509 2510 + 2: 2511 + BEGIN_FTR_SECTION 2512 + /* POWER9 with disabled DAWR */ 2513 + li r3, H_HARDWARE 2514 + blr 2515 + END_FTR_SECTION_IFCLR(CPU_FTR_DAWR) 2576 2516 /* Emulate H_SET_DABR/X on P8 for the sake of compat mode guests */ 2577 - 2: rlwimi r5, r4, 5, DAWRX_DR | DAWRX_DW 2517 + rlwimi r5, r4, 5, DAWRX_DR | DAWRX_DW 2578 2518 rlwimi r5, r4, 2, DAWRX_WT 2579 2519 clrrdi r4, r4, 3 2580 2520 std r4, VCPU_DAWR(r3) ··· 2660 2588 bl kvmppc_save_fp 2661 2589 2662 2590 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM 2591 + /* 2592 + * Branch around the call if both CPU_FTR_TM and 2593 + * CPU_FTR_P9_TM_HV_ASSIST are off. 2594 + */ 2663 2595 BEGIN_FTR_SECTION 2596 + b 91f 2597 + END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0) 2664 2598 /* 2665 2599 * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR 2666 2600 */ 2667 2601 ld r9, HSTATE_KVM_VCPU(r13) 2668 2602 bl kvmppc_save_tm 2669 - END_FTR_SECTION_IFSET(CPU_FTR_TM) 2603 + 91: 2670 2604 #endif 2671 2605 2672 2606 /* ··· 2779 2701 #endif 2780 2702 2781 2703 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM 2704 + /* 2705 + * Branch around the call if both CPU_FTR_TM and 2706 + * CPU_FTR_P9_TM_HV_ASSIST are off. 2707 + */ 2782 2708 BEGIN_FTR_SECTION 2709 + b 91f 2710 + END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0) 2783 2711 /* 2784 2712 * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR 2785 2713 */ 2786 2714 bl kvmppc_restore_tm 2787 - END_FTR_SECTION_IFSET(CPU_FTR_TM) 2715 + 91: 2788 2716 #endif 2789 2717 2790 2718 /* load up FP state */ ··· 3117 3033 kvmppc_save_tm: 3118 3034 mflr r0 3119 3035 std r0, PPC_LR_STKOFF(r1) 3036 + stdu r1, -PPC_MIN_STKFRM(r1) 3120 3037 3121 3038 /* Turn on TM. */ 3122 3039 mfmsr r8 ··· 3132 3047 std r1, HSTATE_HOST_R1(r13) 3133 3048 li r3, TM_CAUSE_KVM_RESCHED 3134 3049 3050 + BEGIN_FTR_SECTION 3051 + lbz r0, HSTATE_FAKE_SUSPEND(r13) /* Were we fake suspended? */ 3052 + cmpwi r0, 0 3053 + beq 3f 3054 + rldicl. r8, r8, 64 - MSR_TS_S_LG, 62 /* Did we actually hrfid? */ 3055 + beq 4f 3056 + BEGIN_FTR_SECTION_NESTED(96) 3057 + bl pnv_power9_force_smt4_catch 3058 + END_FTR_SECTION_NESTED(CPU_FTR_P9_TM_XER_SO_BUG, CPU_FTR_P9_TM_XER_SO_BUG, 96) 3059 + nop 3060 + b 6f 3061 + 3: 3062 + /* Emulation of the treclaim instruction needs TEXASR before treclaim */ 3063 + mfspr r6, SPRN_TEXASR 3064 + std r6, VCPU_ORIG_TEXASR(r9) 3065 + 6: 3066 + END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST) 3067 + 3135 3068 /* Clear the MSR RI since r1, r13 are all going to be foobar. */ 3136 3069 li r5, 0 3137 3070 mtmsrd r5, 1 ··· 3161 3058 SET_SCRATCH0(r13) 3162 3059 GET_PACA(r13) 3163 3060 std r9, PACATMSCRATCH(r13) 3061 + 3062 + /* If doing TM emulation on POWER9 DD2.2, check for fake suspend mode */ 3063 + BEGIN_FTR_SECTION 3064 + lbz r9, HSTATE_FAKE_SUSPEND(r13) 3065 + cmpwi r9, 0 3066 + beq 2f 3067 + /* 3068 + * We were in fake suspend, so we are not going to save the 3069 + * register state as the guest checkpointed state (since 3070 + * we already have it), therefore we can now use any volatile GPR. 3071 + */ 3072 + /* Reload stack pointer and TOC. */ 3073 + ld r1, HSTATE_HOST_R1(r13) 3074 + ld r2, PACATOC(r13) 3075 + /* Set MSR RI now we have r1 and r13 back. */ 3076 + li r5, MSR_RI 3077 + mtmsrd r5, 1 3078 + HMT_MEDIUM 3079 + ld r6, HSTATE_DSCR(r13) 3080 + mtspr SPRN_DSCR, r6 3081 + BEGIN_FTR_SECTION_NESTED(96) 3082 + bl pnv_power9_force_smt4_release 3083 + END_FTR_SECTION_NESTED(CPU_FTR_P9_TM_XER_SO_BUG, CPU_FTR_P9_TM_XER_SO_BUG, 96) 3084 + nop 3085 + 3086 + 4: 3087 + mfspr r3, SPRN_PSSCR 3088 + /* PSSCR_FAKE_SUSPEND is a write-only bit, but clear it anyway */ 3089 + li r0, PSSCR_FAKE_SUSPEND 3090 + andc r3, r3, r0 3091 + mtspr SPRN_PSSCR, r3 3092 + ld r9, HSTATE_KVM_VCPU(r13) 3093 + /* Don't save TEXASR, use value from last exit in real suspend state */ 3094 + b 11f 3095 + 2: 3096 + END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST) 3097 + 3164 3098 ld r9, HSTATE_KVM_VCPU(r13) 3165 3099 3166 3100 /* Get a few more GPRs free. */ ··· 3268 3128 * change these outside of a transaction, so they must always be 3269 3129 * context switched. 3270 3130 */ 3131 + mfspr r7, SPRN_TEXASR 3132 + std r7, VCPU_TEXASR(r9) 3133 + 11: 3271 3134 mfspr r5, SPRN_TFHAR 3272 3135 mfspr r6, SPRN_TFIAR 3273 - mfspr r7, SPRN_TEXASR 3274 3136 std r5, VCPU_TFHAR(r9) 3275 3137 std r6, VCPU_TFIAR(r9) 3276 - std r7, VCPU_TEXASR(r9) 3277 3138 3139 + addi r1, r1, PPC_MIN_STKFRM 3278 3140 ld r0, PPC_LR_STKOFF(r1) 3279 3141 mtlr r0 3280 3142 blr ··· 3311 3169 mtspr SPRN_TFIAR, r6 3312 3170 mtspr SPRN_TEXASR, r7 3313 3171 3172 + li r0, 0 3173 + stb r0, HSTATE_FAKE_SUSPEND(r13) 3314 3174 ld r5, VCPU_MSR(r4) 3315 3175 rldicl. r5, r5, 64 - MSR_TS_S_LG, 62 3316 3176 beqlr /* TM not active in guest */ ··· 3325 3181 */ 3326 3182 oris r7, r7, (TEXASR_FS)@h 3327 3183 mtspr SPRN_TEXASR, r7 3184 + 3185 + /* 3186 + * If we are doing TM emulation for the guest on a POWER9 DD2, 3187 + * then we don't actually do a trechkpt -- we either set up 3188 + * fake-suspend mode, or emulate a TM rollback. 3189 + */ 3190 + BEGIN_FTR_SECTION 3191 + b .Ldo_tm_fake_load 3192 + END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST) 3328 3193 3329 3194 /* 3330 3195 * We need to load up the checkpointed state for the guest. ··· 3407 3254 /* Set the MSR RI since we have our registers back. */ 3408 3255 li r5, MSR_RI 3409 3256 mtmsrd r5, 1 3410 - 3257 + 9: 3411 3258 ld r0, PPC_LR_STKOFF(r1) 3412 3259 mtlr r0 3413 3260 blr 3261 + 3262 + .Ldo_tm_fake_load: 3263 + cmpwi r5, 1 /* check for suspended state */ 3264 + bgt 10f 3265 + stb r5, HSTATE_FAKE_SUSPEND(r13) 3266 + b 9b /* and return */ 3267 + 10: stdu r1, -PPC_MIN_STKFRM(r1) 3268 + /* guest is in transactional state, so simulate rollback */ 3269 + mr r3, r4 3270 + bl kvmhv_emulate_tm_rollback 3271 + nop 3272 + ld r4, HSTATE_KVM_VCPU(r13) /* our vcpu pointer has been trashed */ 3273 + addi r1, r1, PPC_MIN_STKFRM 3274 + b 9b 3414 3275 #endif 3415 3276 3416 3277 /*

+216

arch/powerpc/kvm/book3s_hv_tm.c

··· 1 + /* 2 + * Copyright 2017 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> 3 + * 4 + * This program is free software; you can redistribute it and/or modify 5 + * it under the terms of the GNU General Public License, version 2, as 6 + * published by the Free Software Foundation. 7 + */ 8 + 9 + #include <linux/kvm_host.h> 10 + 11 + #include <asm/kvm_ppc.h> 12 + #include <asm/kvm_book3s.h> 13 + #include <asm/kvm_book3s_64.h> 14 + #include <asm/reg.h> 15 + #include <asm/ppc-opcode.h> 16 + 17 + static void emulate_tx_failure(struct kvm_vcpu *vcpu, u64 failure_cause) 18 + { 19 + u64 texasr, tfiar; 20 + u64 msr = vcpu->arch.shregs.msr; 21 + 22 + tfiar = vcpu->arch.pc & ~0x3ull; 23 + texasr = (failure_cause << 56) | TEXASR_ABORT | TEXASR_FS | TEXASR_EXACT; 24 + if (MSR_TM_SUSPENDED(vcpu->arch.shregs.msr)) 25 + texasr |= TEXASR_SUSP; 26 + if (msr & MSR_PR) { 27 + texasr |= TEXASR_PR; 28 + tfiar |= 1; 29 + } 30 + vcpu->arch.tfiar = tfiar; 31 + /* Preserve ROT and TL fields of existing TEXASR */ 32 + vcpu->arch.texasr = (vcpu->arch.texasr & 0x3ffffff) | texasr; 33 + } 34 + 35 + /* 36 + * This gets called on a softpatch interrupt on POWER9 DD2.2 processors. 37 + * We expect to find a TM-related instruction to be emulated. The 38 + * instruction image is in vcpu->arch.emul_inst. If the guest was in 39 + * TM suspended or transactional state, the checkpointed state has been 40 + * reclaimed and is in the vcpu struct. The CPU is in virtual mode in 41 + * host context. 42 + */ 43 + int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu) 44 + { 45 + u32 instr = vcpu->arch.emul_inst; 46 + u64 msr = vcpu->arch.shregs.msr; 47 + u64 newmsr, bescr; 48 + int ra, rs; 49 + 50 + switch (instr & 0xfc0007ff) { 51 + case PPC_INST_RFID: 52 + /* XXX do we need to check for PR=0 here? */ 53 + newmsr = vcpu->arch.shregs.srr1; 54 + /* should only get here for Sx -> T1 transition */ 55 + WARN_ON_ONCE(!(MSR_TM_SUSPENDED(msr) && 56 + MSR_TM_TRANSACTIONAL(newmsr) && 57 + (newmsr & MSR_TM))); 58 + newmsr = sanitize_msr(newmsr); 59 + vcpu->arch.shregs.msr = newmsr; 60 + vcpu->arch.cfar = vcpu->arch.pc - 4; 61 + vcpu->arch.pc = vcpu->arch.shregs.srr0; 62 + return RESUME_GUEST; 63 + 64 + case PPC_INST_RFEBB: 65 + if ((msr & MSR_PR) && (vcpu->arch.vcore->pcr & PCR_ARCH_206)) { 66 + /* generate an illegal instruction interrupt */ 67 + kvmppc_core_queue_program(vcpu, SRR1_PROGILL); 68 + return RESUME_GUEST; 69 + } 70 + /* check EBB facility is available */ 71 + if (!(vcpu->arch.hfscr & HFSCR_EBB)) { 72 + /* generate an illegal instruction interrupt */ 73 + kvmppc_core_queue_program(vcpu, SRR1_PROGILL); 74 + return RESUME_GUEST; 75 + } 76 + if ((msr & MSR_PR) && !(vcpu->arch.fscr & FSCR_EBB)) { 77 + /* generate a facility unavailable interrupt */ 78 + vcpu->arch.fscr = (vcpu->arch.fscr & ~(0xffull << 56)) | 79 + ((u64)FSCR_EBB_LG << 56); 80 + kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_FAC_UNAVAIL); 81 + return RESUME_GUEST; 82 + } 83 + bescr = vcpu->arch.bescr; 84 + /* expect to see a S->T transition requested */ 85 + WARN_ON_ONCE(!(MSR_TM_SUSPENDED(msr) && 86 + ((bescr >> 30) & 3) == 2)); 87 + bescr &= ~BESCR_GE; 88 + if (instr & (1 << 11)) 89 + bescr |= BESCR_GE; 90 + vcpu->arch.bescr = bescr; 91 + msr = (msr & ~MSR_TS_MASK) | MSR_TS_T; 92 + vcpu->arch.shregs.msr = msr; 93 + vcpu->arch.cfar = vcpu->arch.pc - 4; 94 + vcpu->arch.pc = vcpu->arch.ebbrr; 95 + return RESUME_GUEST; 96 + 97 + case PPC_INST_MTMSRD: 98 + /* XXX do we need to check for PR=0 here? */ 99 + rs = (instr >> 21) & 0x1f; 100 + newmsr = kvmppc_get_gpr(vcpu, rs); 101 + /* check this is a Sx -> T1 transition */ 102 + WARN_ON_ONCE(!(MSR_TM_SUSPENDED(msr) && 103 + MSR_TM_TRANSACTIONAL(newmsr) && 104 + (newmsr & MSR_TM))); 105 + /* mtmsrd doesn't change LE */ 106 + newmsr = (newmsr & ~MSR_LE) | (msr & MSR_LE); 107 + newmsr = sanitize_msr(newmsr); 108 + vcpu->arch.shregs.msr = newmsr; 109 + return RESUME_GUEST; 110 + 111 + case PPC_INST_TSR: 112 + /* check for PR=1 and arch 2.06 bit set in PCR */ 113 + if ((msr & MSR_PR) && (vcpu->arch.vcore->pcr & PCR_ARCH_206)) { 114 + /* generate an illegal instruction interrupt */ 115 + kvmppc_core_queue_program(vcpu, SRR1_PROGILL); 116 + return RESUME_GUEST; 117 + } 118 + /* check for TM disabled in the HFSCR or MSR */ 119 + if (!(vcpu->arch.hfscr & HFSCR_TM)) { 120 + /* generate an illegal instruction interrupt */ 121 + kvmppc_core_queue_program(vcpu, SRR1_PROGILL); 122 + return RESUME_GUEST; 123 + } 124 + if (!(msr & MSR_TM)) { 125 + /* generate a facility unavailable interrupt */ 126 + vcpu->arch.fscr = (vcpu->arch.fscr & ~(0xffull << 56)) | 127 + ((u64)FSCR_TM_LG << 56); 128 + kvmppc_book3s_queue_irqprio(vcpu, 129 + BOOK3S_INTERRUPT_FAC_UNAVAIL); 130 + return RESUME_GUEST; 131 + } 132 + /* Set CR0 to indicate previous transactional state */ 133 + vcpu->arch.cr = (vcpu->arch.cr & 0x0fffffff) | 134 + (((msr & MSR_TS_MASK) >> MSR_TS_S_LG) << 28); 135 + /* L=1 => tresume, L=0 => tsuspend */ 136 + if (instr & (1 << 21)) { 137 + if (MSR_TM_SUSPENDED(msr)) 138 + msr = (msr & ~MSR_TS_MASK) | MSR_TS_T; 139 + } else { 140 + if (MSR_TM_TRANSACTIONAL(msr)) 141 + msr = (msr & ~MSR_TS_MASK) | MSR_TS_S; 142 + } 143 + vcpu->arch.shregs.msr = msr; 144 + return RESUME_GUEST; 145 + 146 + case PPC_INST_TRECLAIM: 147 + /* check for TM disabled in the HFSCR or MSR */ 148 + if (!(vcpu->arch.hfscr & HFSCR_TM)) { 149 + /* generate an illegal instruction interrupt */ 150 + kvmppc_core_queue_program(vcpu, SRR1_PROGILL); 151 + return RESUME_GUEST; 152 + } 153 + if (!(msr & MSR_TM)) { 154 + /* generate a facility unavailable interrupt */ 155 + vcpu->arch.fscr = (vcpu->arch.fscr & ~(0xffull << 56)) | 156 + ((u64)FSCR_TM_LG << 56); 157 + kvmppc_book3s_queue_irqprio(vcpu, 158 + BOOK3S_INTERRUPT_FAC_UNAVAIL); 159 + return RESUME_GUEST; 160 + } 161 + /* If no transaction active, generate TM bad thing */ 162 + if (!MSR_TM_ACTIVE(msr)) { 163 + kvmppc_core_queue_program(vcpu, SRR1_PROGTM); 164 + return RESUME_GUEST; 165 + } 166 + /* If failure was not previously recorded, recompute TEXASR */ 167 + if (!(vcpu->arch.orig_texasr & TEXASR_FS)) { 168 + ra = (instr >> 16) & 0x1f; 169 + if (ra) 170 + ra = kvmppc_get_gpr(vcpu, ra) & 0xff; 171 + emulate_tx_failure(vcpu, ra); 172 + } 173 + 174 + copy_from_checkpoint(vcpu); 175 + 176 + /* Set CR0 to indicate previous transactional state */ 177 + vcpu->arch.cr = (vcpu->arch.cr & 0x0fffffff) | 178 + (((msr & MSR_TS_MASK) >> MSR_TS_S_LG) << 28); 179 + vcpu->arch.shregs.msr &= ~MSR_TS_MASK; 180 + return RESUME_GUEST; 181 + 182 + case PPC_INST_TRECHKPT: 183 + /* XXX do we need to check for PR=0 here? */ 184 + /* check for TM disabled in the HFSCR or MSR */ 185 + if (!(vcpu->arch.hfscr & HFSCR_TM)) { 186 + /* generate an illegal instruction interrupt */ 187 + kvmppc_core_queue_program(vcpu, SRR1_PROGILL); 188 + return RESUME_GUEST; 189 + } 190 + if (!(msr & MSR_TM)) { 191 + /* generate a facility unavailable interrupt */ 192 + vcpu->arch.fscr = (vcpu->arch.fscr & ~(0xffull << 56)) | 193 + ((u64)FSCR_TM_LG << 56); 194 + kvmppc_book3s_queue_irqprio(vcpu, 195 + BOOK3S_INTERRUPT_FAC_UNAVAIL); 196 + return RESUME_GUEST; 197 + } 198 + /* If transaction active or TEXASR[FS] = 0, bad thing */ 199 + if (MSR_TM_ACTIVE(msr) || !(vcpu->arch.texasr & TEXASR_FS)) { 200 + kvmppc_core_queue_program(vcpu, SRR1_PROGTM); 201 + return RESUME_GUEST; 202 + } 203 + 204 + copy_to_checkpoint(vcpu); 205 + 206 + /* Set CR0 to indicate previous transactional state */ 207 + vcpu->arch.cr = (vcpu->arch.cr & 0x0fffffff) | 208 + (((msr & MSR_TS_MASK) >> MSR_TS_S_LG) << 28); 209 + vcpu->arch.shregs.msr = msr | MSR_TS_S; 210 + return RESUME_GUEST; 211 + } 212 + 213 + /* What should we do here? We didn't recognize the instruction */ 214 + WARN_ON_ONCE(1); 215 + return RESUME_GUEST; 216 + }

+109

arch/powerpc/kvm/book3s_hv_tm_builtin.c

··· 1 + /* 2 + * Copyright 2017 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> 3 + * 4 + * This program is free software; you can redistribute it and/or modify 5 + * it under the terms of the GNU General Public License, version 2, as 6 + * published by the Free Software Foundation. 7 + */ 8 + 9 + #include <linux/kvm_host.h> 10 + 11 + #include <asm/kvm_ppc.h> 12 + #include <asm/kvm_book3s.h> 13 + #include <asm/kvm_book3s_64.h> 14 + #include <asm/reg.h> 15 + #include <asm/ppc-opcode.h> 16 + 17 + /* 18 + * This handles the cases where the guest is in real suspend mode 19 + * and we want to get back to the guest without dooming the transaction. 20 + * The caller has checked that the guest is in real-suspend mode 21 + * (MSR[TS] = S and the fake-suspend flag is not set). 22 + */ 23 + int kvmhv_p9_tm_emulation_early(struct kvm_vcpu *vcpu) 24 + { 25 + u32 instr = vcpu->arch.emul_inst; 26 + u64 newmsr, msr, bescr; 27 + int rs; 28 + 29 + switch (instr & 0xfc0007ff) { 30 + case PPC_INST_RFID: 31 + /* XXX do we need to check for PR=0 here? */ 32 + newmsr = vcpu->arch.shregs.srr1; 33 + /* should only get here for Sx -> T1 transition */ 34 + if (!(MSR_TM_TRANSACTIONAL(newmsr) && (newmsr & MSR_TM))) 35 + return 0; 36 + newmsr = sanitize_msr(newmsr); 37 + vcpu->arch.shregs.msr = newmsr; 38 + vcpu->arch.cfar = vcpu->arch.pc - 4; 39 + vcpu->arch.pc = vcpu->arch.shregs.srr0; 40 + return 1; 41 + 42 + case PPC_INST_RFEBB: 43 + /* check for PR=1 and arch 2.06 bit set in PCR */ 44 + msr = vcpu->arch.shregs.msr; 45 + if ((msr & MSR_PR) && (vcpu->arch.vcore->pcr & PCR_ARCH_206)) 46 + return 0; 47 + /* check EBB facility is available */ 48 + if (!(vcpu->arch.hfscr & HFSCR_EBB) || 49 + ((msr & MSR_PR) && !(mfspr(SPRN_FSCR) & FSCR_EBB))) 50 + return 0; 51 + bescr = mfspr(SPRN_BESCR); 52 + /* expect to see a S->T transition requested */ 53 + if (((bescr >> 30) & 3) != 2) 54 + return 0; 55 + bescr &= ~BESCR_GE; 56 + if (instr & (1 << 11)) 57 + bescr |= BESCR_GE; 58 + mtspr(SPRN_BESCR, bescr); 59 + msr = (msr & ~MSR_TS_MASK) | MSR_TS_T; 60 + vcpu->arch.shregs.msr = msr; 61 + vcpu->arch.cfar = vcpu->arch.pc - 4; 62 + vcpu->arch.pc = mfspr(SPRN_EBBRR); 63 + return 1; 64 + 65 + case PPC_INST_MTMSRD: 66 + /* XXX do we need to check for PR=0 here? */ 67 + rs = (instr >> 21) & 0x1f; 68 + newmsr = kvmppc_get_gpr(vcpu, rs); 69 + msr = vcpu->arch.shregs.msr; 70 + /* check this is a Sx -> T1 transition */ 71 + if (!(MSR_TM_TRANSACTIONAL(newmsr) && (newmsr & MSR_TM))) 72 + return 0; 73 + /* mtmsrd doesn't change LE */ 74 + newmsr = (newmsr & ~MSR_LE) | (msr & MSR_LE); 75 + newmsr = sanitize_msr(newmsr); 76 + vcpu->arch.shregs.msr = newmsr; 77 + return 1; 78 + 79 + case PPC_INST_TSR: 80 + /* we know the MSR has the TS field = S (0b01) here */ 81 + msr = vcpu->arch.shregs.msr; 82 + /* check for PR=1 and arch 2.06 bit set in PCR */ 83 + if ((msr & MSR_PR) && (vcpu->arch.vcore->pcr & PCR_ARCH_206)) 84 + return 0; 85 + /* check for TM disabled in the HFSCR or MSR */ 86 + if (!(vcpu->arch.hfscr & HFSCR_TM) || !(msr & MSR_TM)) 87 + return 0; 88 + /* L=1 => tresume => set TS to T (0b10) */ 89 + if (instr & (1 << 21)) 90 + vcpu->arch.shregs.msr = (msr & ~MSR_TS_MASK) | MSR_TS_T; 91 + /* Set CR0 to 0b0010 */ 92 + vcpu->arch.cr = (vcpu->arch.cr & 0x0fffffff) | 0x20000000; 93 + return 1; 94 + } 95 + 96 + return 0; 97 + } 98 + 99 + /* 100 + * This is called when we are returning to a guest in TM transactional 101 + * state. We roll the guest state back to the checkpointed state. 102 + */ 103 + void kvmhv_emulate_tm_rollback(struct kvm_vcpu *vcpu) 104 + { 105 + vcpu->arch.shregs.msr &= ~MSR_TS_MASK; /* go to N state */ 106 + vcpu->arch.pc = vcpu->arch.tfhar; 107 + copy_from_checkpoint(vcpu); 108 + vcpu->arch.cr = (vcpu->arch.cr & 0x0fffffff) | 0xa0000000; 109 + }

-6

arch/powerpc/kvm/emulate.c

··· 45 45 #ifdef CONFIG_PPC_BOOK3S 46 46 /* mtdec lowers the interrupt line when positive. */ 47 47 kvmppc_core_dequeue_dec(vcpu); 48 - 49 - /* POWER4+ triggers a dec interrupt if the value is < 0 */ 50 - if (vcpu->arch.dec & 0x80000000) { 51 - kvmppc_core_queue_dec(vcpu); 52 - return; 53 - } 54 48 #endif 55 49 56 50 #ifdef CONFIG_BOOKE

+4 -1

arch/powerpc/kvm/powerpc.c

··· 646 646 r = hv_enabled; 647 647 break; 648 648 #endif 649 + #ifdef CONFIG_PPC_TRANSACTIONAL_MEM 649 650 case KVM_CAP_PPC_HTM: 650 651 r = hv_enabled && 651 - (cur_cpu_spec->cpu_user_features2 & PPC_FEATURE2_HTM_COMP); 652 + (!!(cur_cpu_spec->cpu_user_features2 & PPC_FEATURE2_HTM) || 653 + cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)); 652 654 break; 655 + #endif 653 656 default: 654 657 r = 0; 655 658 break;

+4 -2

arch/powerpc/lib/Makefile

··· 22 22 extra-$(CONFIG_PPC64) += crtsavres.o 23 23 endif 24 24 25 + obj-$(CONFIG_PPC_BOOK3S_64) += copyuser_power7.o copypage_power7.o \ 26 + memcpy_power7.o 27 + 25 28 obj64-y += copypage_64.o copyuser_64.o mem_64.o hweight_64.o \ 26 - copyuser_power7.o string_64.o copypage_power7.o memcpy_power7.o \ 27 - memcpy_64.o memcmp_64.o pmem.o 29 + string_64.o memcpy_64.o memcmp_64.o pmem.o 28 30 29 31 obj64-$(CONFIG_SMP) += locks.o 30 32 obj64-$(CONFIG_ALTIVEC) += vmx-helper.o

+2

arch/powerpc/lib/copypage_64.S

··· 21 21 BEGIN_FTR_SECTION 22 22 lis r5,PAGE_SIZE@h 23 23 FTR_SECTION_ELSE 24 + #ifdef CONFIG_PPC_BOOK3S_64 24 25 b copypage_power7 26 + #endif 25 27 ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY) 26 28 ori r5,r5,PAGE_SIZE@l 27 29 BEGIN_FTR_SECTION

-3

arch/powerpc/lib/copypage_power7.S

··· 42 42 lis r8,0x8000 /* GO=1 */ 43 43 clrldi r8,r8,32 44 44 45 - .machine push 46 - .machine "power4" 47 45 /* setup read stream 0 */ 48 46 dcbt 0,r4,0b01000 /* addr from */ 49 47 dcbt 0,r7,0b01010 /* length and depth from */ ··· 50 52 dcbtst 0,r10,0b01010 /* length and depth to */ 51 53 eieio 52 54 dcbt 0,r8,0b01010 /* all streams GO */ 53 - .machine pop 54 55 55 56 #ifdef CONFIG_ALTIVEC 56 57 mflr r0

+2

arch/powerpc/lib/copyuser_64.S

··· 20 20 21 21 .align 7 22 22 _GLOBAL_TOC(__copy_tofrom_user) 23 + #ifdef CONFIG_PPC_BOOK3S_64 23 24 BEGIN_FTR_SECTION 24 25 nop 25 26 FTR_SECTION_ELSE 26 27 b __copy_tofrom_user_power7 27 28 ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY) 29 + #endif 28 30 _GLOBAL(__copy_tofrom_user_base) 29 31 /* first check for a whole page copy on a page boundary */ 30 32 cmpldi cr1,r5,16

-3

arch/powerpc/lib/copyuser_power7.S

··· 312 312 lis r8,0x8000 /* GO=1 */ 313 313 clrldi r8,r8,32 314 314 315 - .machine push 316 - .machine "power4" 317 315 /* setup read stream 0 */ 318 316 dcbt 0,r6,0b01000 /* addr from */ 319 317 dcbt 0,r7,0b01010 /* length and depth from */ ··· 320 322 dcbtst 0,r10,0b01010 /* length and depth to */ 321 323 eieio 322 324 dcbt 0,r8,0b01010 /* all streams GO */ 323 - .machine pop 324 325 325 326 beq cr1,.Lunwind_stack_nonvmx_copy 326 327

+8 -1

arch/powerpc/lib/feature-fixups.c

··· 153 153 patch_instruction(dest + 2, instrs[2]); 154 154 } 155 155 156 - printk(KERN_DEBUG "rfi-flush: patched %d locations\n", i); 156 + printk(KERN_DEBUG "rfi-flush: patched %d locations (%s flush)\n", i, 157 + (types == L1D_FLUSH_NONE) ? "no" : 158 + (types == L1D_FLUSH_FALLBACK) ? "fallback displacement" : 159 + (types & L1D_FLUSH_ORI) ? (types & L1D_FLUSH_MTTRIG) 160 + ? "ori+mttrig type" 161 + : "ori type" : 162 + (types & L1D_FLUSH_MTTRIG) ? "mttrig type" 163 + : "unknown"); 157 164 } 158 165 #endif /* CONFIG_PPC_BOOK3S_64 */ 159 166

+2

arch/powerpc/lib/memcpy_64.S

··· 19 19 std r3,-STACKFRAMESIZE+STK_REG(R31)(r1) /* save destination pointer for return value */ 20 20 #endif 21 21 FTR_SECTION_ELSE 22 + #ifdef CONFIG_PPC_BOOK3S_64 22 23 #ifndef SELFTEST 23 24 b memcpy_power7 25 + #endif 24 26 #endif 25 27 ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY) 26 28 #ifdef __LITTLE_ENDIAN__

-3

arch/powerpc/lib/memcpy_power7.S

··· 259 259 lis r8,0x8000 /* GO=1 */ 260 260 clrldi r8,r8,32 261 261 262 - .machine push 263 - .machine "power4" 264 262 dcbt 0,r6,0b01000 265 263 dcbt 0,r7,0b01010 266 264 dcbtst 0,r9,0b01000 267 265 dcbtst 0,r10,0b01010 268 266 eieio 269 267 dcbt 0,r8,0b01010 /* GO */ 270 - .machine pop 271 268 272 269 beq cr1,.Lunwind_stack_nonvmx_copy 273 270

+2 -2

arch/powerpc/lib/sstep.c

··· 280 280 * Copy from userspace to a buffer, using the largest possible 281 281 * aligned accesses, up to sizeof(long). 282 282 */ 283 - static int nokprobe_inline copy_mem_in(u8 *dest, unsigned long ea, int nb, 283 + static nokprobe_inline int copy_mem_in(u8 *dest, unsigned long ea, int nb, 284 284 struct pt_regs *regs) 285 285 { 286 286 int err = 0; ··· 385 385 * Copy from a buffer to userspace, using the largest possible 386 386 * aligned accesses, up to sizeof(long). 387 387 */ 388 - static int nokprobe_inline copy_mem_out(u8 *dest, unsigned long ea, int nb, 388 + static nokprobe_inline int copy_mem_out(u8 *dest, unsigned long ea, int nb, 389 389 struct pt_regs *regs) 390 390 { 391 391 int err = 0;

+1 -1

arch/powerpc/mm/8xx_mmu.c

··· 192 192 mtspr(SPRN_M_TW, __pa(pgd) - offset); 193 193 194 194 /* Update context */ 195 - mtspr(SPRN_M_CASID, id); 195 + mtspr(SPRN_M_CASID, id - 1); 196 196 /* sync */ 197 197 mb(); 198 198 }

+1 -1

arch/powerpc/mm/copro_fault.c

··· 112 112 return 1; 113 113 psize = get_slice_psize(mm, ea); 114 114 ssize = user_segment_size(ea); 115 - vsid = get_vsid(mm->context.id, ea, ssize); 115 + vsid = get_user_vsid(&mm->context, ea, ssize); 116 116 vsidkey = SLB_VSID_USER; 117 117 break; 118 118 case VMALLOC_REGION_ID:

+12 -16

arch/powerpc/mm/fault.c

··· 297 297 298 298 if (unlikely(!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))) 299 299 return true; 300 - 300 + /* 301 + * We should ideally do the vma pkey access check here. But in the 302 + * fault path, handle_mm_fault() also does the same check. To avoid 303 + * these multiple checks, we skip it here and handle access error due 304 + * to pkeys later. 305 + */ 301 306 return false; 302 307 } 303 308 ··· 523 518 524 519 #ifdef CONFIG_PPC_MEM_KEYS 525 520 /* 526 - * if the HPTE is not hashed, hardware will not detect 527 - * a key fault. Lets check if we failed because of a 528 - * software detected key fault. 521 + * we skipped checking for access error due to key earlier. 522 + * Check that using handle_mm_fault error return. 529 523 */ 530 524 if (unlikely(fault & VM_FAULT_SIGSEGV) && 531 - !arch_vma_access_permitted(vma, flags & FAULT_FLAG_WRITE, 532 - is_exec, 0)) { 533 - /* 534 - * The PGD-PDT...PMD-PTE tree may not have been fully setup. 535 - * Hence we cannot walk the tree to locate the PTE, to locate 536 - * the key. Hence let's use vma_pkey() to get the key; instead 537 - * of get_mm_addr_key(). 538 - */ 525 + !arch_vma_access_permitted(vma, is_write, is_exec, 0)) { 526 + 539 527 int pkey = vma_pkey(vma); 540 528 541 - if (likely(pkey)) { 542 - up_read(&mm->mmap_sem); 543 - return bad_key_fault_exception(regs, address, pkey); 544 - } 529 + up_read(&mm->mmap_sem); 530 + return bad_key_fault_exception(regs, address, pkey); 545 531 } 546 532 #endif /* CONFIG_PPC_MEM_KEYS */ 547 533

-15

arch/powerpc/mm/hash_native_64.c

··· 866 866 local_irq_restore(flags); 867 867 } 868 868 869 - static int native_register_proc_table(unsigned long base, unsigned long page_size, 870 - unsigned long table_size) 871 - { 872 - unsigned long patb1 = base << 25; /* VSID */ 873 - 874 - patb1 |= (page_size << 5); /* sllp */ 875 - patb1 |= table_size; 876 - 877 - partition_tb->patb1 = cpu_to_be64(patb1); 878 - return 0; 879 - } 880 - 881 869 void __init hpte_init_native(void) 882 870 { 883 871 mmu_hash_ops.hpte_invalidate = native_hpte_invalidate; ··· 877 889 mmu_hash_ops.hpte_clear_all = native_hpte_clear; 878 890 mmu_hash_ops.flush_hash_range = native_flush_hash_range; 879 891 mmu_hash_ops.hugepage_invalidate = native_hugepage_invalidate; 880 - 881 - if (cpu_has_feature(CPU_FTR_ARCH_300)) 882 - register_process_table = native_register_proc_table; 883 892 }

+20 -14

arch/powerpc/mm/hash_utils_64.c

··· 132 132 * is provided by the firmware. 133 133 */ 134 134 135 - /* Pre-POWER4 CPUs (4k pages only) 135 + /* 136 + * Fallback (4k pages only) 136 137 */ 137 - static struct mmu_psize_def mmu_psize_defaults_old[] = { 138 + static struct mmu_psize_def mmu_psize_defaults[] = { 138 139 [MMU_PAGE_4K] = { 139 140 .shift = 12, 140 141 .sllp = 0, ··· 555 554 mmu_psize_set_default_penc(); 556 555 557 556 /* Default to 4K pages only */ 558 - memcpy(mmu_psize_defs, mmu_psize_defaults_old, 559 - sizeof(mmu_psize_defaults_old)); 557 + memcpy(mmu_psize_defs, mmu_psize_defaults, 558 + sizeof(mmu_psize_defaults)); 560 559 561 560 /* 562 561 * Try to find the available page sizes in the device-tree ··· 782 781 } 783 782 } 784 783 785 - int hash__create_section_mapping(unsigned long start, unsigned long end) 784 + int hash__create_section_mapping(unsigned long start, unsigned long end, int nid) 786 785 { 787 786 int rc = htab_bolt_mapping(start, end, __pa(start), 788 787 pgprot_val(PAGE_KERNEL), mmu_linear_psize, ··· 876 875 /* Using a hypervisor which owns the htab */ 877 876 htab_address = NULL; 878 877 _SDR1 = 0; 878 + /* 879 + * On POWER9, we need to do a H_REGISTER_PROC_TBL hcall 880 + * to inform the hypervisor that we wish to use the HPT. 881 + */ 882 + if (cpu_has_feature(CPU_FTR_ARCH_300)) 883 + register_process_table(0, 0, 0); 879 884 #ifdef CONFIG_FA_DUMP 880 885 /* 881 886 * If firmware assisted dump is active firmware preserves ··· 1117 1110 #ifdef CONFIG_PPC_MM_SLICES 1118 1111 static unsigned int get_paca_psize(unsigned long addr) 1119 1112 { 1120 - u64 lpsizes; 1121 - unsigned char *hpsizes; 1113 + unsigned char *psizes; 1122 1114 unsigned long index, mask_index; 1123 1115 1124 1116 if (addr < SLICE_LOW_TOP) { 1125 - lpsizes = get_paca()->mm_ctx_low_slices_psize; 1117 + psizes = get_paca()->mm_ctx_low_slices_psize; 1126 1118 index = GET_LOW_SLICE_INDEX(addr); 1127 - return (lpsizes >> (index * 4)) & 0xF; 1119 + } else { 1120 + psizes = get_paca()->mm_ctx_high_slices_psize; 1121 + index = GET_HIGH_SLICE_INDEX(addr); 1128 1122 } 1129 - hpsizes = get_paca()->mm_ctx_high_slices_psize; 1130 - index = GET_HIGH_SLICE_INDEX(addr); 1131 1123 mask_index = index & 0x1; 1132 - return (hpsizes[index >> 1] >> (mask_index * 4)) & 0xF; 1124 + return (psizes[index >> 1] >> (mask_index * 4)) & 0xF; 1133 1125 } 1134 1126 1135 1127 #else ··· 1268 1262 } 1269 1263 psize = get_slice_psize(mm, ea); 1270 1264 ssize = user_segment_size(ea); 1271 - vsid = get_vsid(mm->context.id, ea, ssize); 1265 + vsid = get_user_vsid(&mm->context, ea, ssize); 1272 1266 break; 1273 1267 case VMALLOC_REGION_ID: 1274 1268 vsid = get_kernel_vsid(ea, mmu_kernel_ssize); ··· 1533 1527 1534 1528 /* Get VSID */ 1535 1529 ssize = user_segment_size(ea); 1536 - vsid = get_vsid(mm->context.id, ea, ssize); 1530 + vsid = get_user_vsid(&mm->context, ea, ssize); 1537 1531 if (!vsid) 1538 1532 return; 1539 1533 /*

+19 -7

arch/powerpc/mm/hugetlbpage.c

··· 122 122 #if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_8xx) 123 123 #define HUGEPD_PGD_SHIFT PGDIR_SHIFT 124 124 #define HUGEPD_PUD_SHIFT PUD_SHIFT 125 - #else 126 - #define HUGEPD_PGD_SHIFT PUD_SHIFT 127 - #define HUGEPD_PUD_SHIFT PMD_SHIFT 128 125 #endif 129 126 130 127 /* ··· 550 553 struct hstate *hstate = hstate_file(file); 551 554 int mmu_psize = shift_to_mmu_psize(huge_page_shift(hstate)); 552 555 556 + #ifdef CONFIG_PPC_RADIX_MMU 553 557 if (radix_enabled()) 554 558 return radix__hugetlb_get_unmapped_area(file, addr, len, 555 559 pgoff, flags); 560 + #endif 556 561 return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1); 557 562 } 558 563 #endif ··· 562 563 unsigned long vma_mmu_pagesize(struct vm_area_struct *vma) 563 564 { 564 565 #ifdef CONFIG_PPC_MM_SLICES 565 - unsigned int psize = get_slice_psize(vma->vm_mm, vma->vm_start); 566 566 /* With radix we don't use slice, so derive it from vma*/ 567 - if (!radix_enabled()) 567 + if (!radix_enabled()) { 568 + unsigned int psize = get_slice_psize(vma->vm_mm, vma->vm_start); 569 + 568 570 return 1UL << mmu_psize_to_shift(psize); 571 + } 569 572 #endif 570 573 return vma_kernel_pagesize(vma); 571 574 } ··· 664 663 665 664 shift = mmu_psize_to_shift(psize); 666 665 667 - if (add_huge_page_size(1ULL << shift) < 0) 666 + #ifdef CONFIG_PPC_BOOK3S_64 667 + if (shift > PGDIR_SHIFT) 668 668 continue; 669 - 669 + else if (shift > PUD_SHIFT) 670 + pdshift = PGDIR_SHIFT; 671 + else if (shift > PMD_SHIFT) 672 + pdshift = PUD_SHIFT; 673 + else 674 + pdshift = PMD_SHIFT; 675 + #else 670 676 if (shift < HUGEPD_PUD_SHIFT) 671 677 pdshift = PMD_SHIFT; 672 678 else if (shift < HUGEPD_PGD_SHIFT) 673 679 pdshift = PUD_SHIFT; 674 680 else 675 681 pdshift = PGDIR_SHIFT; 682 + #endif 683 + 684 + if (add_huge_page_size(1ULL << shift) < 0) 685 + continue; 676 686 /* 677 687 * if we have pdshift and shift value same, we don't 678 688 * use pgt cache for hugepd.

+1 -6

arch/powerpc/mm/init_32.c

··· 88 88 int __map_without_bats; 89 89 int __map_without_ltlbs; 90 90 91 - /* 92 - * This tells the system to allow ioremapping memory marked as reserved. 93 - */ 94 - int __allow_ioremap_reserved; 95 - 96 91 /* max amount of low RAM to map in */ 97 92 unsigned long __max_low_memory = MAX_LOW_MEM; 98 93 99 94 /* 100 95 * Check for command-line options that affect what MMU_init will do. 101 96 */ 102 - void __init MMU_setup(void) 97 + static void __init MMU_setup(void) 103 98 { 104 99 /* Check for nobats option (used in mapin_ram). */ 105 100 if (strstr(boot_command_line, "nobats")) {

+1 -7

arch/powerpc/mm/init_64.c

··· 68 68 69 69 #include "mmu_decl.h" 70 70 71 - #ifdef CONFIG_PPC_BOOK3S_64 72 - #if H_PGTABLE_RANGE > USER_VSID_RANGE 73 - #warning Limited user VSID range means pagetable space is wasted 74 - #endif 75 - #endif /* CONFIG_PPC_BOOK3S_64 */ 76 - 77 71 phys_addr_t memstart_addr = ~0; 78 72 EXPORT_SYMBOL_GPL(memstart_addr); 79 73 phys_addr_t kernstart_addr; ··· 366 372 { 367 373 bool val; 368 374 369 - if (strlen(p) == 0) 375 + if (!p) 370 376 val = true; 371 377 else if (kstrtobool(p, &val)) 372 378 return -EINVAL;

+9 -16

arch/powerpc/mm/mem.c

··· 82 82 83 83 int page_is_ram(unsigned long pfn) 84 84 { 85 - #ifndef CONFIG_PPC64 /* XXX for now */ 86 - return pfn < max_pfn; 87 - #else 88 - unsigned long paddr = (pfn << PAGE_SHIFT); 89 - struct memblock_region *reg; 90 - 91 - for_each_memblock(memory, reg) 92 - if (paddr >= reg->base && paddr < (reg->base + reg->size)) 93 - return 1; 94 - return 0; 95 - #endif 85 + return memblock_is_memory(__pfn_to_phys(pfn)); 96 86 } 97 87 98 88 pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, ··· 107 117 } 108 118 #endif 109 119 110 - int __weak create_section_mapping(unsigned long start, unsigned long end) 120 + int __weak create_section_mapping(unsigned long start, unsigned long end, int nid) 111 121 { 112 122 return -ENODEV; 113 123 } ··· 117 127 return -ENODEV; 118 128 } 119 129 120 - int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, 130 + int __meminit arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, 121 131 bool want_memblock) 122 132 { 123 133 unsigned long start_pfn = start >> PAGE_SHIFT; ··· 127 137 resize_hpt_for_hotplug(memblock_phys_mem_size()); 128 138 129 139 start = (unsigned long)__va(start); 130 - rc = create_section_mapping(start, start + size); 140 + rc = create_section_mapping(start, start + size, nid); 131 141 if (rc) { 132 142 pr_warn("Unable to create mapping for hot added memory 0x%llx..0x%llx: %d\n", 133 143 start, start + size, rc); ··· 138 148 } 139 149 140 150 #ifdef CONFIG_MEMORY_HOTREMOVE 141 - int arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) 151 + int __meminit arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) 142 152 { 143 153 unsigned long start_pfn = start >> PAGE_SHIFT; 144 154 unsigned long nr_pages = size >> PAGE_SHIFT; ··· 202 212 EXPORT_SYMBOL_GPL(walk_system_ram_range); 203 213 204 214 #ifndef CONFIG_NEED_MULTIPLE_NODES 205 - void __init initmem_init(void) 215 + void __init mem_topology_setup(void) 206 216 { 207 217 max_low_pfn = max_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT; 208 218 min_low_pfn = MEMORY_START >> PAGE_SHIFT; ··· 214 224 * memblock_regions 215 225 */ 216 226 memblock_set_node(0, (phys_addr_t)ULLONG_MAX, &memblock.memory, 0); 227 + } 217 228 229 + void __init initmem_init(void) 230 + { 218 231 /* XXX need to clip this if using highmem? */ 219 232 sparse_memory_present_with_active_regions(0); 220 233 sparse_init();

+15 -9

arch/powerpc/mm/mmu_context_book3s64.c

··· 94 94 return index; 95 95 96 96 /* 97 - * In the case of exec, use the default limit, 98 - * otherwise inherit it from the mm we are duplicating. 99 - */ 100 - if (!mm->context.slb_addr_limit) 101 - mm->context.slb_addr_limit = DEFAULT_MAP_WINDOW_USER64; 102 - 103 - /* 104 97 * The old code would re-promote on fork, we don't do that when using 105 98 * slices as it could cause problem promoting slices that have been 106 99 * forced down to 4K. ··· 108 115 * check against 0 is OK. 109 116 */ 110 117 if (mm->context.id == 0) 111 - slice_set_user_psize(mm, mmu_virtual_psize); 118 + slice_init_new_context_exec(mm); 112 119 113 120 subpage_prot_init_new_context(mm); 114 121 ··· 179 186 } 180 187 EXPORT_SYMBOL_GPL(__destroy_context); 181 188 189 + static void destroy_contexts(mm_context_t *ctx) 190 + { 191 + int index, context_id; 192 + 193 + spin_lock(&mmu_context_lock); 194 + for (index = 0; index < ARRAY_SIZE(ctx->extended_id); index++) { 195 + context_id = ctx->extended_id[index]; 196 + if (context_id) 197 + ida_remove(&mmu_context_ida, context_id); 198 + } 199 + spin_unlock(&mmu_context_lock); 200 + } 201 + 182 202 #ifdef CONFIG_PPC_64K_PAGES 183 203 static void destroy_pagetable_page(struct mm_struct *mm) 184 204 { ··· 230 224 else 231 225 subpage_prot_free(mm); 232 226 destroy_pagetable_page(mm); 233 - __destroy_context(mm->context.id); 227 + destroy_contexts(&mm->context); 234 228 mm->context.id = MMU_NO_CONTEXT; 235 229 } 236 230

+13 -2

arch/powerpc/mm/mmu_context_nohash.c

··· 331 331 { 332 332 pr_hard("initing context for mm @%p\n", mm); 333 333 334 + #ifdef CONFIG_PPC_MM_SLICES 335 + /* 336 + * We have MMU_NO_CONTEXT set to be ~0. Hence check 337 + * explicitly against context.id == 0. This ensures that we properly 338 + * initialize context slice details for newly allocated mm's (which will 339 + * have id == 0) and don't alter context slice inherited via fork (which 340 + * will have id != 0). 341 + */ 342 + if (mm->context.id == 0) 343 + slice_init_new_context_exec(mm); 344 + #endif 334 345 mm->context.id = MMU_NO_CONTEXT; 335 346 mm->context.active = 0; 336 347 return 0; ··· 439 428 * -- BenH 440 429 */ 441 430 if (mmu_has_feature(MMU_FTR_TYPE_8xx)) { 442 - first_context = 0; 443 - last_context = 15; 431 + first_context = 1; 432 + last_context = 16; 444 433 no_selective_tlbil = true; 445 434 } else if (mmu_has_feature(MMU_FTR_TYPE_47x)) { 446 435 first_context = 1;

-1

arch/powerpc/mm/mmu_decl.h

··· 98 98 unsigned int size, pgprot_t prot); 99 99 100 100 extern int __map_without_bats; 101 - extern int __allow_ioremap_reserved; 102 101 extern unsigned int rtas_data, rtas_size; 103 102 104 103 struct hash_pte;

+21 -15

arch/powerpc/mm/numa.c

··· 831 831 of_node_put(rtas); 832 832 } 833 833 834 - void __init initmem_init(void) 834 + void __init mem_topology_setup(void) 835 835 { 836 - int nid, cpu; 837 - 838 - max_low_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT; 839 - max_pfn = max_low_pfn; 836 + int cpu; 840 837 841 838 if (parse_numa_properties()) 842 839 setup_nonnuma(); 843 - 844 - memblock_dump_all(); 845 840 846 841 /* 847 842 * Modify the set of possible NUMA nodes to reflect information ··· 848 853 849 854 find_possible_nodes(); 850 855 856 + setup_node_to_cpumask_map(); 857 + 858 + reset_numa_cpu_lookup_table(); 859 + 860 + for_each_present_cpu(cpu) 861 + numa_setup_cpu(cpu); 862 + } 863 + 864 + void __init initmem_init(void) 865 + { 866 + int nid; 867 + 868 + max_low_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT; 869 + max_pfn = max_low_pfn; 870 + 871 + memblock_dump_all(); 872 + 851 873 for_each_online_node(nid) { 852 874 unsigned long start_pfn, end_pfn; 853 875 ··· 875 863 876 864 sparse_init(); 877 865 878 - setup_node_to_cpumask_map(); 879 - 880 - reset_numa_cpu_lookup_table(); 881 - 882 866 /* 883 867 * We need the numa_cpu_lookup_table to be accurate for all CPUs, 884 868 * even before we online them, so that we can use cpu_to_{node,mem} ··· 884 876 */ 885 877 cpuhp_setup_state_nocalls(CPUHP_POWER_NUMA_PREPARE, "powerpc/numa:prepare", 886 878 ppc_numa_cpu_prepare, ppc_numa_cpu_dead); 887 - for_each_present_cpu(cpu) 888 - numa_setup_cpu(cpu); 889 879 } 890 880 891 881 static int __init early_numa(char *p) ··· 1111 1105 for_each_possible_cpu(cpu) { 1112 1106 int i; 1113 1107 u8 *counts = vphn_cpu_change_counts[cpu]; 1114 - volatile u8 *hypervisor_counts = lppaca[cpu].vphn_assoc_counts; 1108 + volatile u8 *hypervisor_counts = lppaca_of(cpu).vphn_assoc_counts; 1115 1109 1116 1110 for (i = 0; i < distance_ref_points_depth; i++) 1117 1111 counts[i] = hypervisor_counts[i]; ··· 1137 1131 for_each_possible_cpu(cpu) { 1138 1132 int i, changed = 0; 1139 1133 u8 *counts = vphn_cpu_change_counts[cpu]; 1140 - volatile u8 *hypervisor_counts = lppaca[cpu].vphn_assoc_counts; 1134 + volatile u8 *hypervisor_counts = lppaca_of(cpu).vphn_assoc_counts; 1141 1135 1142 1136 for (i = 0; i < distance_ref_points_depth; i++) { 1143 1137 if (hypervisor_counts[i] != counts[i]) {

+4 -4

arch/powerpc/mm/pgtable-book3s64.c

··· 155 155 } 156 156 157 157 #ifdef CONFIG_MEMORY_HOTPLUG 158 - int create_section_mapping(unsigned long start, unsigned long end) 158 + int __meminit create_section_mapping(unsigned long start, unsigned long end, int nid) 159 159 { 160 160 if (radix_enabled()) 161 - return radix__create_section_mapping(start, end); 161 + return radix__create_section_mapping(start, end, nid); 162 162 163 - return hash__create_section_mapping(start, end); 163 + return hash__create_section_mapping(start, end, nid); 164 164 } 165 165 166 - int remove_section_mapping(unsigned long start, unsigned long end) 166 + int __meminit remove_section_mapping(unsigned long start, unsigned long end) 167 167 { 168 168 if (radix_enabled()) 169 169 return radix__remove_section_mapping(start, end);

+5 -1

arch/powerpc/mm/pgtable-hash64.c

··· 24 24 #define CREATE_TRACE_POINTS 25 25 #include <trace/events/thp.h> 26 26 27 + #if H_PGTABLE_RANGE > (USER_VSID_RANGE * (TASK_SIZE_USER64 / TASK_CONTEXT_SIZE)) 28 + #warning Limited user VSID range means pagetable space is wasted 29 + #endif 30 + 27 31 #ifdef CONFIG_SPARSEMEM_VMEMMAP 28 32 /* 29 33 * vmemmap is the starting address of the virtual address space where ··· 324 320 325 321 if (!is_kernel_addr(addr)) { 326 322 ssize = user_segment_size(addr); 327 - vsid = get_vsid(mm->context.id, addr, ssize); 323 + vsid = get_user_vsid(&mm->context, addr, ssize); 328 324 WARN_ON(vsid == 0); 329 325 } else { 330 326 vsid = get_kernel_vsid(addr, mmu_kernel_ssize);

+144 -74

arch/powerpc/mm/pgtable-radix.c

··· 48 48 return 0; 49 49 } 50 50 51 - static __ref void *early_alloc_pgtable(unsigned long size) 51 + static __ref void *early_alloc_pgtable(unsigned long size, int nid, 52 + unsigned long region_start, unsigned long region_end) 52 53 { 54 + unsigned long pa = 0; 53 55 void *pt; 54 56 55 - pt = __va(memblock_alloc_base(size, size, MEMBLOCK_ALLOC_ANYWHERE)); 57 + if (region_start || region_end) /* has region hint */ 58 + pa = memblock_alloc_range(size, size, region_start, region_end, 59 + MEMBLOCK_NONE); 60 + else if (nid != -1) /* has node hint */ 61 + pa = memblock_alloc_base_nid(size, size, 62 + MEMBLOCK_ALLOC_ANYWHERE, 63 + nid, MEMBLOCK_NONE); 64 + 65 + if (!pa) 66 + pa = memblock_alloc_base(size, size, MEMBLOCK_ALLOC_ANYWHERE); 67 + 68 + BUG_ON(!pa); 69 + 70 + pt = __va(pa); 56 71 memset(pt, 0, size); 57 72 58 73 return pt; 59 74 } 60 75 61 - int radix__map_kernel_page(unsigned long ea, unsigned long pa, 76 + static int early_map_kernel_page(unsigned long ea, unsigned long pa, 62 77 pgprot_t flags, 63 - unsigned int map_page_size) 78 + unsigned int map_page_size, 79 + int nid, 80 + unsigned long region_start, unsigned long region_end) 64 81 { 82 + unsigned long pfn = pa >> PAGE_SHIFT; 83 + pgd_t *pgdp; 84 + pud_t *pudp; 85 + pmd_t *pmdp; 86 + pte_t *ptep; 87 + 88 + pgdp = pgd_offset_k(ea); 89 + if (pgd_none(*pgdp)) { 90 + pudp = early_alloc_pgtable(PUD_TABLE_SIZE, nid, 91 + region_start, region_end); 92 + pgd_populate(&init_mm, pgdp, pudp); 93 + } 94 + pudp = pud_offset(pgdp, ea); 95 + if (map_page_size == PUD_SIZE) { 96 + ptep = (pte_t *)pudp; 97 + goto set_the_pte; 98 + } 99 + if (pud_none(*pudp)) { 100 + pmdp = early_alloc_pgtable(PMD_TABLE_SIZE, nid, 101 + region_start, region_end); 102 + pud_populate(&init_mm, pudp, pmdp); 103 + } 104 + pmdp = pmd_offset(pudp, ea); 105 + if (map_page_size == PMD_SIZE) { 106 + ptep = pmdp_ptep(pmdp); 107 + goto set_the_pte; 108 + } 109 + if (!pmd_present(*pmdp)) { 110 + ptep = early_alloc_pgtable(PAGE_SIZE, nid, 111 + region_start, region_end); 112 + pmd_populate_kernel(&init_mm, pmdp, ptep); 113 + } 114 + ptep = pte_offset_kernel(pmdp, ea); 115 + 116 + set_the_pte: 117 + set_pte_at(&init_mm, ea, ptep, pfn_pte(pfn, flags)); 118 + smp_wmb(); 119 + return 0; 120 + } 121 + 122 + /* 123 + * nid, region_start, and region_end are hints to try to place the page 124 + * table memory in the same node or region. 125 + */ 126 + static int __map_kernel_page(unsigned long ea, unsigned long pa, 127 + pgprot_t flags, 128 + unsigned int map_page_size, 129 + int nid, 130 + unsigned long region_start, unsigned long region_end) 131 + { 132 + unsigned long pfn = pa >> PAGE_SHIFT; 65 133 pgd_t *pgdp; 66 134 pud_t *pudp; 67 135 pmd_t *pmdp; ··· 138 70 * Make sure task size is correct as per the max adddr 139 71 */ 140 72 BUILD_BUG_ON(TASK_SIZE_USER64 > RADIX_PGTABLE_RANGE); 141 - if (slab_is_available()) { 142 - pgdp = pgd_offset_k(ea); 143 - pudp = pud_alloc(&init_mm, pgdp, ea); 144 - if (!pudp) 145 - return -ENOMEM; 146 - if (map_page_size == PUD_SIZE) { 147 - ptep = (pte_t *)pudp; 148 - goto set_the_pte; 149 - } 150 - pmdp = pmd_alloc(&init_mm, pudp, ea); 151 - if (!pmdp) 152 - return -ENOMEM; 153 - if (map_page_size == PMD_SIZE) { 154 - ptep = pmdp_ptep(pmdp); 155 - goto set_the_pte; 156 - } 157 - ptep = pte_alloc_kernel(pmdp, ea); 158 - if (!ptep) 159 - return -ENOMEM; 160 - } else { 161 - pgdp = pgd_offset_k(ea); 162 - if (pgd_none(*pgdp)) { 163 - pudp = early_alloc_pgtable(PUD_TABLE_SIZE); 164 - BUG_ON(pudp == NULL); 165 - pgd_populate(&init_mm, pgdp, pudp); 166 - } 167 - pudp = pud_offset(pgdp, ea); 168 - if (map_page_size == PUD_SIZE) { 169 - ptep = (pte_t *)pudp; 170 - goto set_the_pte; 171 - } 172 - if (pud_none(*pudp)) { 173 - pmdp = early_alloc_pgtable(PMD_TABLE_SIZE); 174 - BUG_ON(pmdp == NULL); 175 - pud_populate(&init_mm, pudp, pmdp); 176 - } 177 - pmdp = pmd_offset(pudp, ea); 178 - if (map_page_size == PMD_SIZE) { 179 - ptep = pmdp_ptep(pmdp); 180 - goto set_the_pte; 181 - } 182 - if (!pmd_present(*pmdp)) { 183 - ptep = early_alloc_pgtable(PAGE_SIZE); 184 - BUG_ON(ptep == NULL); 185 - pmd_populate_kernel(&init_mm, pmdp, ptep); 186 - } 187 - ptep = pte_offset_kernel(pmdp, ea); 73 + 74 + if (unlikely(!slab_is_available())) 75 + return early_map_kernel_page(ea, pa, flags, map_page_size, 76 + nid, region_start, region_end); 77 + 78 + /* 79 + * Should make page table allocation functions be able to take a 80 + * node, so we can place kernel page tables on the right nodes after 81 + * boot. 82 + */ 83 + pgdp = pgd_offset_k(ea); 84 + pudp = pud_alloc(&init_mm, pgdp, ea); 85 + if (!pudp) 86 + return -ENOMEM; 87 + if (map_page_size == PUD_SIZE) { 88 + ptep = (pte_t *)pudp; 89 + goto set_the_pte; 188 90 } 91 + pmdp = pmd_alloc(&init_mm, pudp, ea); 92 + if (!pmdp) 93 + return -ENOMEM; 94 + if (map_page_size == PMD_SIZE) { 95 + ptep = pmdp_ptep(pmdp); 96 + goto set_the_pte; 97 + } 98 + ptep = pte_alloc_kernel(pmdp, ea); 99 + if (!ptep) 100 + return -ENOMEM; 189 101 190 102 set_the_pte: 191 - set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT, flags)); 103 + set_pte_at(&init_mm, ea, ptep, pfn_pte(pfn, flags)); 192 104 smp_wmb(); 193 105 return 0; 106 + } 107 + 108 + int radix__map_kernel_page(unsigned long ea, unsigned long pa, 109 + pgprot_t flags, 110 + unsigned int map_page_size) 111 + { 112 + return __map_kernel_page(ea, pa, flags, map_page_size, -1, 0, 0); 194 113 } 195 114 196 115 #ifdef CONFIG_STRICT_KERNEL_RWX ··· 266 211 } 267 212 268 213 static int __meminit create_physical_mapping(unsigned long start, 269 - unsigned long end) 214 + unsigned long end, 215 + int nid) 270 216 { 271 217 unsigned long vaddr, addr, mapping_size = 0; 272 218 pgprot_t prot; ··· 323 267 else 324 268 prot = PAGE_KERNEL; 325 269 326 - rc = radix__map_kernel_page(vaddr, addr, prot, mapping_size); 270 + rc = __map_kernel_page(vaddr, addr, prot, mapping_size, nid, start, end); 327 271 if (rc) 328 272 return rc; 329 273 } ··· 332 276 return 0; 333 277 } 334 278 335 - static void __init radix_init_pgtable(void) 279 + void __init radix_init_pgtable(void) 336 280 { 337 281 unsigned long rts_field; 338 282 struct memblock_region *reg; ··· 342 286 /* 343 287 * Create the linear mapping, using standard page size for now 344 288 */ 345 - for_each_memblock(memory, reg) 289 + for_each_memblock(memory, reg) { 290 + /* 291 + * The memblock allocator is up at this point, so the 292 + * page tables will be allocated within the range. No 293 + * need or a node (which we don't have yet). 294 + */ 346 295 WARN_ON(create_physical_mapping(reg->base, 347 - reg->base + reg->size)); 296 + reg->base + reg->size, 297 + -1)); 298 + } 348 299 349 300 /* Find out how many PID bits are supported */ 350 301 if (cpu_has_feature(CPU_FTR_HVMODE)) { ··· 380 317 * host. 381 318 */ 382 319 BUG_ON(PRTB_SIZE_SHIFT > 36); 383 - process_tb = early_alloc_pgtable(1UL << PRTB_SIZE_SHIFT); 320 + process_tb = early_alloc_pgtable(1UL << PRTB_SIZE_SHIFT, -1, 0, 0); 384 321 /* 385 322 * Fill in the process table. 386 323 */ ··· 638 575 #ifdef CONFIG_PCI 639 576 pci_io_base = ISA_IO_BASE; 640 577 #endif 641 - 642 - /* 643 - * For now radix also use the same frag size 644 - */ 645 - __pte_frag_nr = H_PTE_FRAG_NR; 646 - __pte_frag_size_shift = H_PTE_FRAG_SIZE_SHIFT; 578 + __pte_frag_nr = RADIX_PTE_FRAG_NR; 579 + __pte_frag_size_shift = RADIX_PTE_FRAG_SIZE_SHIFT; 647 580 648 581 if (!firmware_has_feature(FW_FEATURE_LPAR)) { 649 582 radix_init_native(); ··· 754 695 unsigned long aligned_end; 755 696 }; 756 697 757 - static int stop_machine_change_mapping(void *data) 698 + static int __meminit stop_machine_change_mapping(void *data) 758 699 { 759 700 struct change_mapping_params *params = 760 701 (struct change_mapping_params *)data; ··· 764 705 765 706 spin_unlock(&init_mm.page_table_lock); 766 707 pte_clear(&init_mm, params->aligned_start, params->pte); 767 - create_physical_mapping(params->aligned_start, params->start); 768 - create_physical_mapping(params->end, params->aligned_end); 708 + create_physical_mapping(params->aligned_start, params->start, -1); 709 + create_physical_mapping(params->end, params->aligned_end, -1); 769 710 spin_lock(&init_mm.page_table_lock); 770 711 return 0; 771 712 } ··· 801 742 /* 802 743 * clear the pte and potentially split the mapping helper 803 744 */ 804 - static void split_kernel_mapping(unsigned long addr, unsigned long end, 745 + static void __meminit split_kernel_mapping(unsigned long addr, unsigned long end, 805 746 unsigned long size, pte_t *pte) 806 747 { 807 748 unsigned long mask = ~(size - 1); ··· 894 835 } 895 836 } 896 837 897 - static void remove_pagetable(unsigned long start, unsigned long end) 838 + static void __meminit remove_pagetable(unsigned long start, unsigned long end) 898 839 { 899 840 unsigned long addr, next; 900 841 pud_t *pud_base; ··· 922 863 radix__flush_tlb_kernel_range(start, end); 923 864 } 924 865 925 - int __ref radix__create_section_mapping(unsigned long start, unsigned long end) 866 + int __meminit radix__create_section_mapping(unsigned long start, unsigned long end, int nid) 926 867 { 927 - return create_physical_mapping(start, end); 868 + return create_physical_mapping(start, end, nid); 928 869 } 929 870 930 - int radix__remove_section_mapping(unsigned long start, unsigned long end) 871 + int __meminit radix__remove_section_mapping(unsigned long start, unsigned long end) 931 872 { 932 873 remove_pagetable(start, end); 933 874 return 0; ··· 935 876 #endif /* CONFIG_MEMORY_HOTPLUG */ 936 877 937 878 #ifdef CONFIG_SPARSEMEM_VMEMMAP 879 + static int __map_kernel_page_nid(unsigned long ea, unsigned long pa, 880 + pgprot_t flags, unsigned int map_page_size, 881 + int nid) 882 + { 883 + return __map_kernel_page(ea, pa, flags, map_page_size, nid, 0, 0); 884 + } 885 + 938 886 int __meminit radix__vmemmap_create_mapping(unsigned long start, 939 887 unsigned long page_size, 940 888 unsigned long phys) 941 889 { 942 890 /* Create a PTE encoding */ 943 891 unsigned long flags = _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_KERNEL_RW; 892 + int nid = early_pfn_to_nid(phys >> PAGE_SHIFT); 893 + int ret; 944 894 945 - BUG_ON(radix__map_kernel_page(start, phys, __pgprot(flags), page_size)); 895 + ret = __map_kernel_page_nid(start, phys, __pgprot(flags), page_size, nid); 896 + BUG_ON(ret); 897 + 946 898 return 0; 947 899 } 948 900 949 901 #ifdef CONFIG_MEMORY_HOTPLUG 950 - void radix__vmemmap_remove_mapping(unsigned long start, unsigned long page_size) 902 + void __meminit radix__vmemmap_remove_mapping(unsigned long start, unsigned long page_size) 951 903 { 952 904 remove_pagetable(start, start + page_size); 953 905 }

+1 -1

arch/powerpc/mm/pgtable_32.c

··· 148 148 * mem_init() sets high_memory so only do the check after that. 149 149 */ 150 150 if (slab_is_available() && (p < virt_to_phys(high_memory)) && 151 - !(__allow_ioremap_reserved && memblock_is_region_reserved(p, size))) { 151 + page_is_ram(__phys_to_pfn(p))) { 152 152 printk("__ioremap(): phys addr 0x%llx is RAM lr %ps\n", 153 153 (unsigned long long)p, __builtin_return_address(0)); 154 154 return NULL;

-5

arch/powerpc/mm/pgtable_64.c

··· 57 57 58 58 #include "mmu_decl.h" 59 59 60 - #ifdef CONFIG_PPC_BOOK3S_64 61 - #if TASK_SIZE_USER64 > (1UL << (ESID_BITS + SID_SHIFT)) 62 - #error TASK_SIZE_USER64 exceeds user VSID range 63 - #endif 64 - #endif 65 60 66 61 #ifdef CONFIG_PPC_BOOK3S_64 67 62 /*

+7 -10

arch/powerpc/mm/pkeys.c

··· 119 119 #else 120 120 os_reserved = 0; 121 121 #endif 122 + initial_allocation_mask = ~0x0; 123 + pkey_amr_uamor_mask = ~0x0ul; 124 + pkey_iamr_mask = ~0x0ul; 122 125 /* 123 - * Bits are in LE format. NOTE: 1, 0 are reserved. 126 + * key 0, 1 are reserved. 124 127 * key 0 is the default key, which allows read/write/execute. 125 128 * key 1 is recommended not to be used. PowerISA(3.0) page 1015, 126 129 * programming note. 127 130 */ 128 - initial_allocation_mask = ~0x0; 129 - 130 - /* register mask is in BE format */ 131 - pkey_amr_uamor_mask = ~0x0ul; 132 - pkey_iamr_mask = ~0x0ul; 133 - 134 131 for (i = 2; i < (pkeys_total - os_reserved); i++) { 135 132 initial_allocation_mask &= ~(0x1 << i); 136 133 pkey_amr_uamor_mask &= ~(0x3ul << pkeyshift(i)); ··· 305 308 if (static_branch_likely(&pkey_disabled)) 306 309 return; 307 310 308 - write_amr(read_amr() & pkey_amr_uamor_mask); 309 - write_iamr(read_iamr() & pkey_iamr_mask); 310 - write_uamor(read_uamor() & pkey_amr_uamor_mask); 311 + thread->amr = read_amr() & pkey_amr_uamor_mask; 312 + thread->iamr = read_iamr() & pkey_iamr_mask; 313 + thread->uamor = read_uamor() & pkey_amr_uamor_mask; 311 314 } 312 315 313 316 static inline bool pkey_allows_readwrite(int pkey)

+108

arch/powerpc/mm/slb.c

··· 22 22 #include <asm/cacheflush.h> 23 23 #include <asm/smp.h> 24 24 #include <linux/compiler.h> 25 + #include <linux/context_tracking.h> 25 26 #include <linux/mm_types.h> 26 27 27 28 #include <asm/udbg.h> ··· 340 339 mmu_kernel_ssize, lflags, KSTACK_INDEX); 341 340 342 341 asm volatile("isync":::"memory"); 342 + } 343 + 344 + static void insert_slb_entry(unsigned long vsid, unsigned long ea, 345 + int bpsize, int ssize) 346 + { 347 + unsigned long flags, vsid_data, esid_data; 348 + enum slb_index index; 349 + int slb_cache_index; 350 + 351 + /* 352 + * We are irq disabled, hence should be safe to access PACA. 353 + */ 354 + index = get_paca()->stab_rr; 355 + 356 + /* 357 + * simple round-robin replacement of slb starting at SLB_NUM_BOLTED. 358 + */ 359 + if (index < (mmu_slb_size - 1)) 360 + index++; 361 + else 362 + index = SLB_NUM_BOLTED; 363 + 364 + get_paca()->stab_rr = index; 365 + 366 + flags = SLB_VSID_USER | mmu_psize_defs[bpsize].sllp; 367 + vsid_data = (vsid << slb_vsid_shift(ssize)) | flags | 368 + ((unsigned long) ssize << SLB_VSID_SSIZE_SHIFT); 369 + esid_data = mk_esid_data(ea, ssize, index); 370 + 371 + asm volatile("slbmte %0, %1" : : "r" (vsid_data), "r" (esid_data) 372 + : "memory"); 373 + 374 + /* 375 + * Now update slb cache entries 376 + */ 377 + slb_cache_index = get_paca()->slb_cache_ptr; 378 + if (slb_cache_index < SLB_CACHE_ENTRIES) { 379 + /* 380 + * We have space in slb cache for optimized switch_slb(). 381 + * Top 36 bits from esid_data as per ISA 382 + */ 383 + get_paca()->slb_cache[slb_cache_index++] = esid_data >> 28; 384 + get_paca()->slb_cache_ptr++; 385 + } else { 386 + /* 387 + * Our cache is full and the current cache content strictly 388 + * doesn't indicate the active SLB conents. Bump the ptr 389 + * so that switch_slb() will ignore the cache. 390 + */ 391 + get_paca()->slb_cache_ptr = SLB_CACHE_ENTRIES + 1; 392 + } 393 + } 394 + 395 + static void handle_multi_context_slb_miss(int context_id, unsigned long ea) 396 + { 397 + struct mm_struct *mm = current->mm; 398 + unsigned long vsid; 399 + int bpsize; 400 + 401 + /* 402 + * We are always above 1TB, hence use high user segment size. 403 + */ 404 + vsid = get_vsid(context_id, ea, mmu_highuser_ssize); 405 + bpsize = get_slice_psize(mm, ea); 406 + insert_slb_entry(vsid, ea, bpsize, mmu_highuser_ssize); 407 + } 408 + 409 + void slb_miss_large_addr(struct pt_regs *regs) 410 + { 411 + enum ctx_state prev_state = exception_enter(); 412 + unsigned long ea = regs->dar; 413 + int context; 414 + 415 + if (REGION_ID(ea) != USER_REGION_ID) 416 + goto slb_bad_addr; 417 + 418 + /* 419 + * Are we beyound what the page table layout supports ? 420 + */ 421 + if ((ea & ~REGION_MASK) >= H_PGTABLE_RANGE) 422 + goto slb_bad_addr; 423 + 424 + /* Lower address should have been handled by asm code */ 425 + if (ea < (1UL << MAX_EA_BITS_PER_CONTEXT)) 426 + goto slb_bad_addr; 427 + 428 + /* 429 + * consider this as bad access if we take a SLB miss 430 + * on an address above addr limit. 431 + */ 432 + if (ea >= current->mm->context.slb_addr_limit) 433 + goto slb_bad_addr; 434 + 435 + context = get_ea_context(&current->mm->context, ea); 436 + if (!context) 437 + goto slb_bad_addr; 438 + 439 + handle_multi_context_slb_miss(context, ea); 440 + exception_exit(prev_state); 441 + return; 442 + 443 + slb_bad_addr: 444 + if (user_mode(regs)) 445 + _exception(SIGSEGV, regs, SEGV_BNDERR, ea); 446 + else 447 + bad_page_fault(regs, ea, SIGSEGV); 448 + exception_exit(prev_state); 343 449 }

+13 -6

arch/powerpc/mm/slb_low.S

··· 75 75 */ 76 76 _GLOBAL(slb_allocate) 77 77 /* 78 - * check for bad kernel/user address 79 - * (ea & ~REGION_MASK) >= PGTABLE_RANGE 78 + * Check if the address falls within the range of the first context, or 79 + * if we may need to handle multi context. For the first context we 80 + * allocate the slb entry via the fast path below. For large address we 81 + * branch out to C-code and see if additional contexts have been 82 + * allocated. 83 + * The test here is: 84 + * (ea & ~REGION_MASK) >= (1ull << MAX_EA_BITS_PER_CONTEXT) 80 85 */ 81 - rldicr. r9,r3,4,(63 - H_PGTABLE_EADDR_SIZE - 4) 86 + rldicr. r9,r3,4,(63 - MAX_EA_BITS_PER_CONTEXT - 4) 82 87 bne- 8f 83 88 84 89 srdi r9,r3,60 /* get region */ ··· 205 200 5: 206 201 /* 207 202 * Handle lpsizes 208 - * r9 is get_paca()->context.low_slices_psize, r11 is index 203 + * r9 is get_paca()->context.low_slices_psize[index], r11 is mask_index 209 204 */ 210 - ld r9,PACALOWSLICESPSIZE(r13) 211 - mr r11,r10 205 + srdi r11,r10,1 /* index */ 206 + addi r9,r11,PACALOWSLICESPSIZE 207 + lbzx r9,r13,r9 /* r9 is lpsizes[r11] */ 208 + rldicl r11,r10,0,63 /* r11 = r10 & 0x1 */ 212 209 6: 213 210 sldi r11,r11,2 /* index * 4 */ 214 211 /* Extract the psize and multiply to get an array offset */

+262 -229

arch/powerpc/mm/slice.c

··· 37 37 #include <asm/hugetlb.h> 38 38 39 39 static DEFINE_SPINLOCK(slice_convert_lock); 40 - /* 41 - * One bit per slice. We have lower slices which cover 256MB segments 42 - * upto 4G range. That gets us 16 low slices. For the rest we track slices 43 - * in 1TB size. 44 - */ 45 - struct slice_mask { 46 - u64 low_slices; 47 - DECLARE_BITMAP(high_slices, SLICE_NUM_HIGH); 48 - }; 49 40 50 41 #ifdef DEBUG 51 42 int _slice_debug = 1; 52 43 53 - static void slice_print_mask(const char *label, struct slice_mask mask) 44 + static void slice_print_mask(const char *label, const struct slice_mask *mask) 54 45 { 55 46 if (!_slice_debug) 56 47 return; 57 - pr_devel("%s low_slice: %*pbl\n", label, (int)SLICE_NUM_LOW, &mask.low_slices); 58 - pr_devel("%s high_slice: %*pbl\n", label, (int)SLICE_NUM_HIGH, mask.high_slices); 48 + pr_devel("%s low_slice: %*pbl\n", label, 49 + (int)SLICE_NUM_LOW, &mask->low_slices); 50 + pr_devel("%s high_slice: %*pbl\n", label, 51 + (int)SLICE_NUM_HIGH, mask->high_slices); 59 52 } 60 53 61 54 #define slice_dbg(fmt...) do { if (_slice_debug) pr_devel(fmt); } while (0) 62 55 63 56 #else 64 57 65 - static void slice_print_mask(const char *label, struct slice_mask mask) {} 58 + static void slice_print_mask(const char *label, const struct slice_mask *mask) {} 66 59 #define slice_dbg(fmt...) 67 60 68 61 #endif ··· 66 73 unsigned long end = start + len - 1; 67 74 68 75 ret->low_slices = 0; 69 - bitmap_zero(ret->high_slices, SLICE_NUM_HIGH); 76 + if (SLICE_NUM_HIGH) 77 + bitmap_zero(ret->high_slices, SLICE_NUM_HIGH); 70 78 71 79 if (start < SLICE_LOW_TOP) { 72 - unsigned long mend = min(end, (SLICE_LOW_TOP - 1)); 80 + unsigned long mend = min(end, 81 + (unsigned long)(SLICE_LOW_TOP - 1)); 73 82 74 83 ret->low_slices = (1u << (GET_LOW_SLICE_INDEX(mend) + 1)) 75 84 - (1u << GET_LOW_SLICE_INDEX(start)); ··· 108 113 unsigned long start = slice << SLICE_HIGH_SHIFT; 109 114 unsigned long end = start + (1ul << SLICE_HIGH_SHIFT); 110 115 116 + #ifdef CONFIG_PPC64 111 117 /* Hack, so that each addresses is controlled by exactly one 112 118 * of the high or low area bitmaps, the first high area starts 113 119 * at 4GB, not 0 */ 114 120 if (start == 0) 115 121 start = SLICE_LOW_TOP; 122 + #endif 116 123 117 124 return !slice_area_is_free(mm, start, end - start); 118 125 } ··· 125 128 unsigned long i; 126 129 127 130 ret->low_slices = 0; 128 - bitmap_zero(ret->high_slices, SLICE_NUM_HIGH); 131 + if (SLICE_NUM_HIGH) 132 + bitmap_zero(ret->high_slices, SLICE_NUM_HIGH); 129 133 130 134 for (i = 0; i < SLICE_NUM_LOW; i++) 131 135 if (!slice_low_has_vma(mm, i)) ··· 140 142 __set_bit(i, ret->high_slices); 141 143 } 142 144 143 - static void slice_mask_for_size(struct mm_struct *mm, int psize, struct slice_mask *ret, 144 - unsigned long high_limit) 145 + #ifdef CONFIG_PPC_BOOK3S_64 146 + static struct slice_mask *slice_mask_for_size(struct mm_struct *mm, int psize) 145 147 { 146 - unsigned char *hpsizes; 147 - int index, mask_index; 148 - unsigned long i; 149 - u64 lpsizes; 150 - 151 - ret->low_slices = 0; 152 - bitmap_zero(ret->high_slices, SLICE_NUM_HIGH); 153 - 154 - lpsizes = mm->context.low_slices_psize; 155 - for (i = 0; i < SLICE_NUM_LOW; i++) 156 - if (((lpsizes >> (i * 4)) & 0xf) == psize) 157 - ret->low_slices |= 1u << i; 158 - 159 - if (high_limit <= SLICE_LOW_TOP) 160 - return; 161 - 162 - hpsizes = mm->context.high_slices_psize; 163 - for (i = 0; i < GET_HIGH_SLICE_INDEX(high_limit); i++) { 164 - mask_index = i & 0x1; 165 - index = i >> 1; 166 - if (((hpsizes[index] >> (mask_index * 4)) & 0xf) == psize) 167 - __set_bit(i, ret->high_slices); 168 - } 148 + #ifdef CONFIG_PPC_64K_PAGES 149 + if (psize == MMU_PAGE_64K) 150 + return &mm->context.mask_64k; 151 + #endif 152 + if (psize == MMU_PAGE_4K) 153 + return &mm->context.mask_4k; 154 + #ifdef CONFIG_HUGETLB_PAGE 155 + if (psize == MMU_PAGE_16M) 156 + return &mm->context.mask_16m; 157 + if (psize == MMU_PAGE_16G) 158 + return &mm->context.mask_16g; 159 + #endif 160 + BUG(); 169 161 } 170 - 171 - static int slice_check_fit(struct mm_struct *mm, 172 - struct slice_mask mask, struct slice_mask available) 162 + #elif defined(CONFIG_PPC_8xx) 163 + static struct slice_mask *slice_mask_for_size(struct mm_struct *mm, int psize) 173 164 { 174 - DECLARE_BITMAP(result, SLICE_NUM_HIGH); 175 - /* 176 - * Make sure we just do bit compare only to the max 177 - * addr limit and not the full bit map size. 178 - */ 179 - unsigned long slice_count = GET_HIGH_SLICE_INDEX(mm->context.slb_addr_limit); 165 + if (psize == mmu_virtual_psize) 166 + return &mm->context.mask_base_psize; 167 + #ifdef CONFIG_HUGETLB_PAGE 168 + if (psize == MMU_PAGE_512K) 169 + return &mm->context.mask_512k; 170 + if (psize == MMU_PAGE_8M) 171 + return &mm->context.mask_8m; 172 + #endif 173 + BUG(); 174 + } 175 + #else 176 + #error "Must define the slice masks for page sizes supported by the platform" 177 + #endif 180 178 181 - bitmap_and(result, mask.high_slices, 182 - available.high_slices, slice_count); 179 + static bool slice_check_range_fits(struct mm_struct *mm, 180 + const struct slice_mask *available, 181 + unsigned long start, unsigned long len) 182 + { 183 + unsigned long end = start + len - 1; 184 + u64 low_slices = 0; 183 185 184 - return (mask.low_slices & available.low_slices) == mask.low_slices && 185 - bitmap_equal(result, mask.high_slices, slice_count); 186 + if (start < SLICE_LOW_TOP) { 187 + unsigned long mend = min(end, 188 + (unsigned long)(SLICE_LOW_TOP - 1)); 189 + 190 + low_slices = (1u << (GET_LOW_SLICE_INDEX(mend) + 1)) 191 + - (1u << GET_LOW_SLICE_INDEX(start)); 192 + } 193 + if ((low_slices & available->low_slices) != low_slices) 194 + return false; 195 + 196 + if (SLICE_NUM_HIGH && ((start + len) > SLICE_LOW_TOP)) { 197 + unsigned long start_index = GET_HIGH_SLICE_INDEX(start); 198 + unsigned long align_end = ALIGN(end, (1UL << SLICE_HIGH_SHIFT)); 199 + unsigned long count = GET_HIGH_SLICE_INDEX(align_end) - start_index; 200 + unsigned long i; 201 + 202 + for (i = start_index; i < start_index + count; i++) { 203 + if (!test_bit(i, available->high_slices)) 204 + return false; 205 + } 206 + } 207 + 208 + return true; 186 209 } 187 210 188 211 static void slice_flush_segments(void *parm) 189 212 { 213 + #ifdef CONFIG_PPC64 190 214 struct mm_struct *mm = parm; 191 215 unsigned long flags; 192 216 ··· 220 200 local_irq_save(flags); 221 201 slb_flush_and_rebolt(); 222 202 local_irq_restore(flags); 203 + #endif 223 204 } 224 205 225 - static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psize) 206 + static void slice_convert(struct mm_struct *mm, 207 + const struct slice_mask *mask, int psize) 226 208 { 227 209 int index, mask_index; 228 210 /* Write the new slice psize bits */ 229 - unsigned char *hpsizes; 230 - u64 lpsizes; 211 + unsigned char *hpsizes, *lpsizes; 212 + struct slice_mask *psize_mask, *old_mask; 231 213 unsigned long i, flags; 214 + int old_psize; 232 215 233 216 slice_dbg("slice_convert(mm=%p, psize=%d)\n", mm, psize); 234 217 slice_print_mask(" mask", mask); 218 + 219 + psize_mask = slice_mask_for_size(mm, psize); 235 220 236 221 /* We need to use a spinlock here to protect against 237 222 * concurrent 64k -> 4k demotion ... ··· 244 219 spin_lock_irqsave(&slice_convert_lock, flags); 245 220 246 221 lpsizes = mm->context.low_slices_psize; 247 - for (i = 0; i < SLICE_NUM_LOW; i++) 248 - if (mask.low_slices & (1u << i)) 249 - lpsizes = (lpsizes & ~(0xful << (i * 4))) | 250 - (((unsigned long)psize) << (i * 4)); 222 + for (i = 0; i < SLICE_NUM_LOW; i++) { 223 + if (!(mask->low_slices & (1u << i))) 224 + continue; 251 225 252 - /* Assign the value back */ 253 - mm->context.low_slices_psize = lpsizes; 226 + mask_index = i & 0x1; 227 + index = i >> 1; 228 + 229 + /* Update the slice_mask */ 230 + old_psize = (lpsizes[index] >> (mask_index * 4)) & 0xf; 231 + old_mask = slice_mask_for_size(mm, old_psize); 232 + old_mask->low_slices &= ~(1u << i); 233 + psize_mask->low_slices |= 1u << i; 234 + 235 + /* Update the sizes array */ 236 + lpsizes[index] = (lpsizes[index] & ~(0xf << (mask_index * 4))) | 237 + (((unsigned long)psize) << (mask_index * 4)); 238 + } 254 239 255 240 hpsizes = mm->context.high_slices_psize; 256 241 for (i = 0; i < GET_HIGH_SLICE_INDEX(mm->context.slb_addr_limit); i++) { 242 + if (!test_bit(i, mask->high_slices)) 243 + continue; 244 + 257 245 mask_index = i & 0x1; 258 246 index = i >> 1; 259 - if (test_bit(i, mask.high_slices)) 260 - hpsizes[index] = (hpsizes[index] & 261 - ~(0xf << (mask_index * 4))) | 247 + 248 + /* Update the slice_mask */ 249 + old_psize = (hpsizes[index] >> (mask_index * 4)) & 0xf; 250 + old_mask = slice_mask_for_size(mm, old_psize); 251 + __clear_bit(i, old_mask->high_slices); 252 + __set_bit(i, psize_mask->high_slices); 253 + 254 + /* Update the sizes array */ 255 + hpsizes[index] = (hpsizes[index] & ~(0xf << (mask_index * 4))) | 262 256 (((unsigned long)psize) << (mask_index * 4)); 263 257 } 264 258 ··· 298 254 * 'available' slice_mark. 299 255 */ 300 256 static bool slice_scan_available(unsigned long addr, 301 - struct slice_mask available, 302 - int end, 303 - unsigned long *boundary_addr) 257 + const struct slice_mask *available, 258 + int end, unsigned long *boundary_addr) 304 259 { 305 260 unsigned long slice; 306 261 if (addr < SLICE_LOW_TOP) { 307 262 slice = GET_LOW_SLICE_INDEX(addr); 308 263 *boundary_addr = (slice + end) << SLICE_LOW_SHIFT; 309 - return !!(available.low_slices & (1u << slice)); 264 + return !!(available->low_slices & (1u << slice)); 310 265 } else { 311 266 slice = GET_HIGH_SLICE_INDEX(addr); 312 267 *boundary_addr = (slice + end) ? 313 268 ((slice + end) << SLICE_HIGH_SHIFT) : SLICE_LOW_TOP; 314 - return !!test_bit(slice, available.high_slices); 269 + return !!test_bit(slice, available->high_slices); 315 270 } 316 271 } 317 272 318 273 static unsigned long slice_find_area_bottomup(struct mm_struct *mm, 319 274 unsigned long len, 320 - struct slice_mask available, 275 + const struct slice_mask *available, 321 276 int psize, unsigned long high_limit) 322 277 { 323 278 int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT); ··· 362 319 363 320 static unsigned long slice_find_area_topdown(struct mm_struct *mm, 364 321 unsigned long len, 365 - struct slice_mask available, 322 + const struct slice_mask *available, 366 323 int psize, unsigned long high_limit) 367 324 { 368 325 int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT); ··· 420 377 421 378 422 379 static unsigned long slice_find_area(struct mm_struct *mm, unsigned long len, 423 - struct slice_mask mask, int psize, 380 + const struct slice_mask *mask, int psize, 424 381 int topdown, unsigned long high_limit) 425 382 { 426 383 if (topdown) ··· 429 386 return slice_find_area_bottomup(mm, len, mask, psize, high_limit); 430 387 } 431 388 432 - static inline void slice_or_mask(struct slice_mask *dst, struct slice_mask *src) 389 + static inline void slice_copy_mask(struct slice_mask *dst, 390 + const struct slice_mask *src) 433 391 { 434 - DECLARE_BITMAP(result, SLICE_NUM_HIGH); 435 - 436 - dst->low_slices |= src->low_slices; 437 - bitmap_or(result, dst->high_slices, src->high_slices, SLICE_NUM_HIGH); 438 - bitmap_copy(dst->high_slices, result, SLICE_NUM_HIGH); 392 + dst->low_slices = src->low_slices; 393 + if (!SLICE_NUM_HIGH) 394 + return; 395 + bitmap_copy(dst->high_slices, src->high_slices, SLICE_NUM_HIGH); 439 396 } 440 397 441 - static inline void slice_andnot_mask(struct slice_mask *dst, struct slice_mask *src) 398 + static inline void slice_or_mask(struct slice_mask *dst, 399 + const struct slice_mask *src1, 400 + const struct slice_mask *src2) 442 401 { 443 - DECLARE_BITMAP(result, SLICE_NUM_HIGH); 402 + dst->low_slices = src1->low_slices | src2->low_slices; 403 + if (!SLICE_NUM_HIGH) 404 + return; 405 + bitmap_or(dst->high_slices, src1->high_slices, src2->high_slices, SLICE_NUM_HIGH); 406 + } 444 407 445 - dst->low_slices &= ~src->low_slices; 446 - 447 - bitmap_andnot(result, dst->high_slices, src->high_slices, SLICE_NUM_HIGH); 448 - bitmap_copy(dst->high_slices, result, SLICE_NUM_HIGH); 408 + static inline void slice_andnot_mask(struct slice_mask *dst, 409 + const struct slice_mask *src1, 410 + const struct slice_mask *src2) 411 + { 412 + dst->low_slices = src1->low_slices & ~src2->low_slices; 413 + if (!SLICE_NUM_HIGH) 414 + return; 415 + bitmap_andnot(dst->high_slices, src1->high_slices, src2->high_slices, SLICE_NUM_HIGH); 449 416 } 450 417 451 418 #ifdef CONFIG_PPC_64K_PAGES ··· 468 415 unsigned long flags, unsigned int psize, 469 416 int topdown) 470 417 { 471 - struct slice_mask mask; 472 418 struct slice_mask good_mask; 473 419 struct slice_mask potential_mask; 474 - struct slice_mask compat_mask; 420 + const struct slice_mask *maskp; 421 + const struct slice_mask *compat_maskp = NULL; 475 422 int fixed = (flags & MAP_FIXED); 476 423 int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT); 477 424 unsigned long page_size = 1UL << pshift; ··· 495 442 } 496 443 497 444 if (high_limit > mm->context.slb_addr_limit) { 445 + /* 446 + * Increasing the slb_addr_limit does not require 447 + * slice mask cache to be recalculated because it should 448 + * be already initialised beyond the old address limit. 449 + */ 498 450 mm->context.slb_addr_limit = high_limit; 451 + 499 452 on_each_cpu(slice_flush_segments, mm, 1); 500 453 } 501 - 502 - /* 503 - * init different masks 504 - */ 505 - mask.low_slices = 0; 506 - bitmap_zero(mask.high_slices, SLICE_NUM_HIGH); 507 - 508 - /* silence stupid warning */; 509 - potential_mask.low_slices = 0; 510 - bitmap_zero(potential_mask.high_slices, SLICE_NUM_HIGH); 511 - 512 - compat_mask.low_slices = 0; 513 - bitmap_zero(compat_mask.high_slices, SLICE_NUM_HIGH); 514 454 515 455 /* Sanity checks */ 516 456 BUG_ON(mm->task_size == 0); ··· 527 481 /* First make up a "good" mask of slices that have the right size 528 482 * already 529 483 */ 530 - slice_mask_for_size(mm, psize, &good_mask, high_limit); 531 - slice_print_mask(" good_mask", good_mask); 484 + maskp = slice_mask_for_size(mm, psize); 532 485 533 486 /* 534 487 * Here "good" means slices that are already the right page size, ··· 548 503 * search in good | compat | free, found => convert free. 549 504 */ 550 505 551 - #ifdef CONFIG_PPC_64K_PAGES 552 - /* If we support combo pages, we can allow 64k pages in 4k slices */ 553 - if (psize == MMU_PAGE_64K) { 554 - slice_mask_for_size(mm, MMU_PAGE_4K, &compat_mask, high_limit); 506 + /* 507 + * If we support combo pages, we can allow 64k pages in 4k slices 508 + * The mask copies could be avoided in most cases here if we had 509 + * a pointer to good mask for the next code to use. 510 + */ 511 + if (IS_ENABLED(CONFIG_PPC_64K_PAGES) && psize == MMU_PAGE_64K) { 512 + compat_maskp = slice_mask_for_size(mm, MMU_PAGE_4K); 555 513 if (fixed) 556 - slice_or_mask(&good_mask, &compat_mask); 514 + slice_or_mask(&good_mask, maskp, compat_maskp); 515 + else 516 + slice_copy_mask(&good_mask, maskp); 517 + } else { 518 + slice_copy_mask(&good_mask, maskp); 557 519 } 558 - #endif 520 + 521 + slice_print_mask(" good_mask", &good_mask); 522 + if (compat_maskp) 523 + slice_print_mask(" compat_mask", compat_maskp); 559 524 560 525 /* First check hint if it's valid or if we have MAP_FIXED */ 561 526 if (addr != 0 || fixed) { 562 - /* Build a mask for the requested range */ 563 - slice_range_to_mask(addr, len, &mask); 564 - slice_print_mask(" mask", mask); 565 - 566 527 /* Check if we fit in the good mask. If we do, we just return, 567 528 * nothing else to do 568 529 */ 569 - if (slice_check_fit(mm, mask, good_mask)) { 530 + if (slice_check_range_fits(mm, &good_mask, addr, len)) { 570 531 slice_dbg(" fits good !\n"); 571 - return addr; 532 + newaddr = addr; 533 + goto return_addr; 572 534 } 573 535 } else { 574 536 /* Now let's see if we can find something in the existing 575 537 * slices for that size 576 538 */ 577 - newaddr = slice_find_area(mm, len, good_mask, 539 + newaddr = slice_find_area(mm, len, &good_mask, 578 540 psize, topdown, high_limit); 579 541 if (newaddr != -ENOMEM) { 580 542 /* Found within the good mask, we don't have to setup, 581 543 * we thus return directly 582 544 */ 583 545 slice_dbg(" found area at 0x%lx\n", newaddr); 584 - return newaddr; 546 + goto return_addr; 585 547 } 586 548 } 587 549 /* ··· 596 544 * empty and thus can be converted 597 545 */ 598 546 slice_mask_for_free(mm, &potential_mask, high_limit); 599 - slice_or_mask(&potential_mask, &good_mask); 600 - slice_print_mask(" potential", potential_mask); 547 + slice_or_mask(&potential_mask, &potential_mask, &good_mask); 548 + slice_print_mask(" potential", &potential_mask); 601 549 602 - if ((addr != 0 || fixed) && slice_check_fit(mm, mask, potential_mask)) { 603 - slice_dbg(" fits potential !\n"); 604 - goto convert; 550 + if (addr != 0 || fixed) { 551 + if (slice_check_range_fits(mm, &potential_mask, addr, len)) { 552 + slice_dbg(" fits potential !\n"); 553 + newaddr = addr; 554 + goto convert; 555 + } 605 556 } 606 557 607 558 /* If we have MAP_FIXED and failed the above steps, then error out */ ··· 617 562 * anywhere in the good area. 618 563 */ 619 564 if (addr) { 620 - addr = slice_find_area(mm, len, good_mask, 621 - psize, topdown, high_limit); 622 - if (addr != -ENOMEM) { 623 - slice_dbg(" found area at 0x%lx\n", addr); 624 - return addr; 565 + newaddr = slice_find_area(mm, len, &good_mask, 566 + psize, topdown, high_limit); 567 + if (newaddr != -ENOMEM) { 568 + slice_dbg(" found area at 0x%lx\n", newaddr); 569 + goto return_addr; 625 570 } 626 571 } 627 572 628 573 /* Now let's see if we can find something in the existing slices 629 574 * for that size plus free slices 630 575 */ 631 - addr = slice_find_area(mm, len, potential_mask, 632 - psize, topdown, high_limit); 576 + newaddr = slice_find_area(mm, len, &potential_mask, 577 + psize, topdown, high_limit); 633 578 634 579 #ifdef CONFIG_PPC_64K_PAGES 635 - if (addr == -ENOMEM && psize == MMU_PAGE_64K) { 580 + if (newaddr == -ENOMEM && psize == MMU_PAGE_64K) { 636 581 /* retry the search with 4k-page slices included */ 637 - slice_or_mask(&potential_mask, &compat_mask); 638 - addr = slice_find_area(mm, len, potential_mask, 639 - psize, topdown, high_limit); 582 + slice_or_mask(&potential_mask, &potential_mask, compat_maskp); 583 + newaddr = slice_find_area(mm, len, &potential_mask, 584 + psize, topdown, high_limit); 640 585 } 641 586 #endif 642 587 643 - if (addr == -ENOMEM) 588 + if (newaddr == -ENOMEM) 644 589 return -ENOMEM; 645 590 646 - slice_range_to_mask(addr, len, &mask); 647 - slice_dbg(" found potential area at 0x%lx\n", addr); 648 - slice_print_mask(" mask", mask); 591 + slice_range_to_mask(newaddr, len, &potential_mask); 592 + slice_dbg(" found potential area at 0x%lx\n", newaddr); 593 + slice_print_mask(" mask", &potential_mask); 649 594 650 595 convert: 651 - slice_andnot_mask(&mask, &good_mask); 652 - slice_andnot_mask(&mask, &compat_mask); 653 - if (mask.low_slices || !bitmap_empty(mask.high_slices, SLICE_NUM_HIGH)) { 654 - slice_convert(mm, mask, psize); 596 + /* 597 + * Try to allocate the context before we do slice convert 598 + * so that we handle the context allocation failure gracefully. 599 + */ 600 + if (need_extra_context(mm, newaddr)) { 601 + if (alloc_extended_context(mm, newaddr) < 0) 602 + return -ENOMEM; 603 + } 604 + 605 + slice_andnot_mask(&potential_mask, &potential_mask, &good_mask); 606 + if (compat_maskp && !fixed) 607 + slice_andnot_mask(&potential_mask, &potential_mask, compat_maskp); 608 + if (potential_mask.low_slices || 609 + (SLICE_NUM_HIGH && 610 + !bitmap_empty(potential_mask.high_slices, SLICE_NUM_HIGH))) { 611 + slice_convert(mm, &potential_mask, psize); 655 612 if (psize > MMU_PAGE_BASE) 656 613 on_each_cpu(slice_flush_segments, mm, 1); 657 614 } 658 - return addr; 615 + return newaddr; 659 616 617 + return_addr: 618 + if (need_extra_context(mm, newaddr)) { 619 + if (alloc_extended_context(mm, newaddr) < 0) 620 + return -ENOMEM; 621 + } 622 + return newaddr; 660 623 } 661 624 EXPORT_SYMBOL_GPL(slice_get_unmapped_area); 662 625 ··· 700 627 701 628 unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr) 702 629 { 703 - unsigned char *hpsizes; 630 + unsigned char *psizes; 704 631 int index, mask_index; 705 632 706 - /* 707 - * Radix doesn't use slice, but can get enabled along with MMU_SLICE 708 - */ 709 - if (radix_enabled()) { 710 - #ifdef CONFIG_PPC_64K_PAGES 711 - return MMU_PAGE_64K; 712 - #else 713 - return MMU_PAGE_4K; 714 - #endif 715 - } 633 + VM_BUG_ON(radix_enabled()); 634 + 716 635 if (addr < SLICE_LOW_TOP) { 717 - u64 lpsizes; 718 - lpsizes = mm->context.low_slices_psize; 636 + psizes = mm->context.low_slices_psize; 719 637 index = GET_LOW_SLICE_INDEX(addr); 720 - return (lpsizes >> (index * 4)) & 0xf; 638 + } else { 639 + psizes = mm->context.high_slices_psize; 640 + index = GET_HIGH_SLICE_INDEX(addr); 721 641 } 722 - hpsizes = mm->context.high_slices_psize; 723 - index = GET_HIGH_SLICE_INDEX(addr); 724 642 mask_index = index & 0x1; 725 - return (hpsizes[index >> 1] >> (mask_index * 4)) & 0xf; 643 + return (psizes[index >> 1] >> (mask_index * 4)) & 0xf; 726 644 } 727 645 EXPORT_SYMBOL_GPL(get_slice_psize); 728 646 729 - /* 730 - * This is called by hash_page when it needs to do a lazy conversion of 731 - * an address space from real 64K pages to combo 4K pages (typically 732 - * when hitting a non cacheable mapping on a processor or hypervisor 733 - * that won't allow them for 64K pages). 734 - * 735 - * This is also called in init_new_context() to change back the user 736 - * psize from whatever the parent context had it set to 737 - * N.B. This may be called before mm->context.id has been set. 738 - * 739 - * This function will only change the content of the {low,high)_slice_psize 740 - * masks, it will not flush SLBs as this shall be handled lazily by the 741 - * caller. 742 - */ 743 - void slice_set_user_psize(struct mm_struct *mm, unsigned int psize) 647 + void slice_init_new_context_exec(struct mm_struct *mm) 744 648 { 745 - int index, mask_index; 746 - unsigned char *hpsizes; 747 - unsigned long flags, lpsizes; 748 - unsigned int old_psize; 749 - int i; 649 + unsigned char *hpsizes, *lpsizes; 650 + struct slice_mask *mask; 651 + unsigned int psize = mmu_virtual_psize; 750 652 751 - slice_dbg("slice_set_user_psize(mm=%p, psize=%d)\n", mm, psize); 653 + slice_dbg("slice_init_new_context_exec(mm=%p)\n", mm); 752 654 753 - VM_BUG_ON(radix_enabled()); 754 - spin_lock_irqsave(&slice_convert_lock, flags); 755 - 756 - old_psize = mm->context.user_psize; 757 - slice_dbg(" old_psize=%d\n", old_psize); 758 - if (old_psize == psize) 759 - goto bail; 655 + /* 656 + * In the case of exec, use the default limit. In the 657 + * case of fork it is just inherited from the mm being 658 + * duplicated. 659 + */ 660 + #ifdef CONFIG_PPC64 661 + mm->context.slb_addr_limit = DEFAULT_MAP_WINDOW_USER64; 662 + #else 663 + mm->context.slb_addr_limit = DEFAULT_MAP_WINDOW; 664 + #endif 760 665 761 666 mm->context.user_psize = psize; 762 - wmb(); 763 667 668 + /* 669 + * Set all slice psizes to the default. 670 + */ 764 671 lpsizes = mm->context.low_slices_psize; 765 - for (i = 0; i < SLICE_NUM_LOW; i++) 766 - if (((lpsizes >> (i * 4)) & 0xf) == old_psize) 767 - lpsizes = (lpsizes & ~(0xful << (i * 4))) | 768 - (((unsigned long)psize) << (i * 4)); 769 - /* Assign the value back */ 770 - mm->context.low_slices_psize = lpsizes; 672 + memset(lpsizes, (psize << 4) | psize, SLICE_NUM_LOW >> 1); 771 673 772 674 hpsizes = mm->context.high_slices_psize; 773 - for (i = 0; i < SLICE_NUM_HIGH; i++) { 774 - mask_index = i & 0x1; 775 - index = i >> 1; 776 - if (((hpsizes[index] >> (mask_index * 4)) & 0xf) == old_psize) 777 - hpsizes[index] = (hpsizes[index] & 778 - ~(0xf << (mask_index * 4))) | 779 - (((unsigned long)psize) << (mask_index * 4)); 780 - } 675 + memset(hpsizes, (psize << 4) | psize, SLICE_NUM_HIGH >> 1); 781 676 782 - 783 - 784 - 785 - slice_dbg(" lsps=%lx, hsps=%lx\n", 786 - (unsigned long)mm->context.low_slices_psize, 787 - (unsigned long)mm->context.high_slices_psize); 788 - 789 - bail: 790 - spin_unlock_irqrestore(&slice_convert_lock, flags); 677 + /* 678 + * Slice mask cache starts zeroed, fill the default size cache. 679 + */ 680 + mask = slice_mask_for_size(mm, psize); 681 + mask->low_slices = ~0UL; 682 + if (SLICE_NUM_HIGH) 683 + bitmap_fill(mask->high_slices, SLICE_NUM_HIGH); 791 684 } 792 685 793 686 void slice_set_range_psize(struct mm_struct *mm, unsigned long start, ··· 764 725 VM_BUG_ON(radix_enabled()); 765 726 766 727 slice_range_to_mask(start, len, &mask); 767 - slice_convert(mm, mask, psize); 728 + slice_convert(mm, &mask, psize); 768 729 } 769 730 770 731 #ifdef CONFIG_HUGETLB_PAGE ··· 787 748 * for now as we only use slices with hugetlbfs enabled. This should 788 749 * be fixed as the generic code gets fixed. 789 750 */ 790 - int is_hugepage_only_range(struct mm_struct *mm, unsigned long addr, 751 + int slice_is_hugepage_only_range(struct mm_struct *mm, unsigned long addr, 791 752 unsigned long len) 792 753 { 793 - struct slice_mask mask, available; 754 + const struct slice_mask *maskp; 794 755 unsigned int psize = mm->context.user_psize; 795 - unsigned long high_limit = mm->context.slb_addr_limit; 796 756 797 - if (radix_enabled()) 798 - return 0; 757 + VM_BUG_ON(radix_enabled()); 799 758 800 - slice_range_to_mask(addr, len, &mask); 801 - slice_mask_for_size(mm, psize, &available, high_limit); 759 + maskp = slice_mask_for_size(mm, psize); 802 760 #ifdef CONFIG_PPC_64K_PAGES 803 761 /* We need to account for 4k slices too */ 804 762 if (psize == MMU_PAGE_64K) { 805 - struct slice_mask compat_mask; 806 - slice_mask_for_size(mm, MMU_PAGE_4K, &compat_mask, high_limit); 807 - slice_or_mask(&available, &compat_mask); 763 + const struct slice_mask *compat_maskp; 764 + struct slice_mask available; 765 + 766 + compat_maskp = slice_mask_for_size(mm, MMU_PAGE_4K); 767 + slice_or_mask(&available, maskp, compat_maskp); 768 + return !slice_check_range_fits(mm, &available, addr, len); 808 769 } 809 770 #endif 810 771 811 - #if 0 /* too verbose */ 812 - slice_dbg("is_hugepage_only_range(mm=%p, addr=%lx, len=%lx)\n", 813 - mm, addr, len); 814 - slice_print_mask(" mask", mask); 815 - slice_print_mask(" available", available); 816 - #endif 817 - return !slice_check_fit(mm, mask, available); 772 + return !slice_check_range_fits(mm, maskp, addr, len); 818 773 } 819 774 #endif

+7 -7

arch/powerpc/mm/tlb-radix.c

··· 98 98 rb |= set << PPC_BITLSHIFT(51); 99 99 rs = ((unsigned long)pid) << PPC_BITLSHIFT(31); 100 100 prs = 1; /* process scoped */ 101 - r = 1; /* raidx format */ 101 + r = 1; /* radix format */ 102 102 103 103 asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) 104 104 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); ··· 112 112 rb = PPC_BIT(53); /* IS = 1 */ 113 113 rs = pid << PPC_BITLSHIFT(31); 114 114 prs = 1; /* process scoped */ 115 - r = 1; /* raidx format */ 115 + r = 1; /* radix format */ 116 116 117 117 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 118 118 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); ··· 128 128 rb |= ap << PPC_BITLSHIFT(58); 129 129 rs = pid << PPC_BITLSHIFT(31); 130 130 prs = 1; /* process scoped */ 131 - r = 1; /* raidx format */ 131 + r = 1; /* radix format */ 132 132 133 133 asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) 134 134 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); ··· 144 144 rb |= ap << PPC_BITLSHIFT(58); 145 145 rs = pid << PPC_BITLSHIFT(31); 146 146 prs = 1; /* process scoped */ 147 - r = 1; /* raidx format */ 147 + r = 1; /* radix format */ 148 148 149 149 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 150 150 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); ··· 668 668 669 669 rb = 0x3 << PPC_BITLSHIFT(53); /* IS = 3 */ 670 670 prs = 0; /* partition scoped */ 671 - r = 1; /* raidx format */ 671 + r = 1; /* radix format */ 672 672 rs = 1 & ((1UL << 32) - 1); /* any LPID value to flush guest mappings */ 673 673 674 674 asm volatile("ptesync": : :"memory"); ··· 706 706 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 707 707 extern void radix_kvm_prefetch_workaround(struct mm_struct *mm) 708 708 { 709 - unsigned int pid = mm->context.id; 709 + unsigned long pid = mm->context.id; 710 710 711 711 if (unlikely(pid == MMU_NO_CONTEXT)) 712 712 return; ··· 734 734 for (; sib <= cpu_last_thread_sibling(cpu) && !flush; sib++) { 735 735 if (sib == cpu) 736 736 continue; 737 - if (paca[sib].kvm_hstate.kvm_vcpu) 737 + if (paca_ptrs[sib]->kvm_hstate.kvm_vcpu) 738 738 flush = true; 739 739 } 740 740 if (flush)

+1 -1

arch/powerpc/mm/tlb_hash64.c

··· 89 89 /* Build full vaddr */ 90 90 if (!is_kernel_addr(addr)) { 91 91 ssize = user_segment_size(addr); 92 - vsid = get_vsid(mm->context.id, addr, ssize); 92 + vsid = get_user_vsid(&mm->context, addr, ssize); 93 93 } else { 94 94 vsid = get_kernel_vsid(addr, mmu_kernel_ssize); 95 95 ssize = mmu_kernel_ssize;

+1 -1

arch/powerpc/oprofile/cell/spu_task_sync.c

··· 208 208 /* Create cached_info and set spu_info[spu->number] to point to it. 209 209 * spu->number is a system-wide value, not a per-node value. 210 210 */ 211 - info = kzalloc(sizeof(struct cached_info), GFP_KERNEL); 211 + info = kzalloc(sizeof(*info), GFP_KERNEL); 212 212 if (!info) { 213 213 printk(KERN_ERR "SPU_PROF: " 214 214 "%s, line %d: create vma_map failed\n",

+2 -2

arch/powerpc/oprofile/cell/vma_map.c

··· 69 69 unsigned int size, unsigned int offset, unsigned int guard_ptr, 70 70 unsigned int guard_val) 71 71 { 72 - struct vma_to_fileoffset_map *new = 73 - kzalloc(sizeof(struct vma_to_fileoffset_map), GFP_KERNEL); 72 + struct vma_to_fileoffset_map *new = kzalloc(sizeof(*new), GFP_KERNEL); 73 + 74 74 if (!new) { 75 75 printk(KERN_ERR "SPU_PROF: %s, line %d: malloc failed\n", 76 76 __func__, __LINE__);

+1 -1

arch/powerpc/perf/Makefile

··· 4 4 obj-$(CONFIG_PERF_EVENTS) += callchain.o perf_regs.o 5 5 6 6 obj-$(CONFIG_PPC_PERF_CTRS) += core-book3s.o bhrb.o 7 - obj64-$(CONFIG_PPC_PERF_CTRS) += power4-pmu.o ppc970-pmu.o power5-pmu.o \ 7 + obj64-$(CONFIG_PPC_PERF_CTRS) += ppc970-pmu.o power5-pmu.o \ 8 8 power5+-pmu.o power6-pmu.o power7-pmu.o \ 9 9 isa207-common.o power8-pmu.o power9-pmu.o 10 10 obj32-$(CONFIG_PPC_PERF_CTRS) += mpc7450-pmu.o

+50

arch/powerpc/perf/core-book3s.c

··· 198 198 199 199 if (!(mmcra & MMCRA_SAMPLE_ENABLE) || sdar_valid) 200 200 *addrp = mfspr(SPRN_SDAR); 201 + 202 + if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN) && 203 + is_kernel_addr(mfspr(SPRN_SDAR))) 204 + *addrp = 0; 201 205 } 202 206 203 207 static bool regs_sihv(struct pt_regs *regs) ··· 459 455 460 456 if (!addr) 461 457 /* invalid entry */ 458 + continue; 459 + 460 + /* 461 + * BHRB rolling buffer could very much contain the kernel 462 + * addresses at this point. Check the privileges before 463 + * exporting it to userspace (avoid exposure of regions 464 + * where we could have speculative execution) 465 + */ 466 + if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN) && 467 + is_kernel_addr(addr)) 462 468 continue; 463 469 464 470 /* Branches are read most recent first (ie. mfbhrb 0 is ··· 1240 1226 */ 1241 1227 write_mmcr0(cpuhw, val); 1242 1228 mb(); 1229 + isync(); 1243 1230 1244 1231 /* 1245 1232 * Disable instruction sampling if it was enabled ··· 1249 1234 mtspr(SPRN_MMCRA, 1250 1235 cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE); 1251 1236 mb(); 1237 + isync(); 1252 1238 } 1253 1239 1254 1240 cpuhw->disabled = 1; 1255 1241 cpuhw->n_added = 0; 1256 1242 1257 1243 ebb_switch_out(mmcr0); 1244 + 1245 + #ifdef CONFIG_PPC64 1246 + /* 1247 + * These are readable by userspace, may contain kernel 1248 + * addresses and are not switched by context switch, so clear 1249 + * them now to avoid leaking anything to userspace in general 1250 + * including to another process. 1251 + */ 1252 + if (ppmu->flags & PPMU_ARCH_207S) { 1253 + mtspr(SPRN_SDAR, 0); 1254 + mtspr(SPRN_SIAR, 0); 1255 + } 1256 + #endif 1258 1257 } 1259 1258 1260 1259 local_irq_restore(flags); ··· 1839 1810 return 0; 1840 1811 } 1841 1812 1813 + static bool is_event_blacklisted(u64 ev) 1814 + { 1815 + int i; 1816 + 1817 + for (i=0; i < ppmu->n_blacklist_ev; i++) { 1818 + if (ppmu->blacklist_ev[i] == ev) 1819 + return true; 1820 + } 1821 + 1822 + return false; 1823 + } 1824 + 1842 1825 static int power_pmu_event_init(struct perf_event *event) 1843 1826 { 1844 1827 u64 ev; ··· 1876 1835 ev = event->attr.config; 1877 1836 if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0) 1878 1837 return -EOPNOTSUPP; 1838 + 1839 + if (ppmu->blacklist_ev && is_event_blacklisted(ev)) 1840 + return -EINVAL; 1879 1841 ev = ppmu->generic_events[ev]; 1880 1842 break; 1881 1843 case PERF_TYPE_HW_CACHE: 1882 1844 err = hw_perf_cache_event(event->attr.config, &ev); 1883 1845 if (err) 1884 1846 return err; 1847 + 1848 + if (ppmu->blacklist_ev && is_event_blacklisted(ev)) 1849 + return -EINVAL; 1885 1850 break; 1886 1851 case PERF_TYPE_RAW: 1887 1852 ev = event->attr.config; 1853 + 1854 + if (ppmu->blacklist_ev && is_event_blacklisted(ev)) 1855 + return -EINVAL; 1888 1856 break; 1889 1857 default: 1890 1858 return -ENOENT;

-622

arch/powerpc/perf/power4-pmu.c

··· 1 - /* 2 - * Performance counter support for POWER4 (GP) and POWER4+ (GQ) processors. 3 - * 4 - * Copyright 2009 Paul Mackerras, IBM Corporation. 5 - * 6 - * This program is free software; you can redistribute it and/or 7 - * modify it under the terms of the GNU General Public License 8 - * as published by the Free Software Foundation; either version 9 - * 2 of the License, or (at your option) any later version. 10 - */ 11 - #include <linux/kernel.h> 12 - #include <linux/perf_event.h> 13 - #include <linux/string.h> 14 - #include <asm/reg.h> 15 - #include <asm/cputable.h> 16 - 17 - /* 18 - * Bits in event code for POWER4 19 - */ 20 - #define PM_PMC_SH 12 /* PMC number (1-based) for direct events */ 21 - #define PM_PMC_MSK 0xf 22 - #define PM_UNIT_SH 8 /* TTMMUX number and setting - unit select */ 23 - #define PM_UNIT_MSK 0xf 24 - #define PM_LOWER_SH 6 25 - #define PM_LOWER_MSK 1 26 - #define PM_LOWER_MSKS 0x40 27 - #define PM_BYTE_SH 4 /* Byte number of event bus to use */ 28 - #define PM_BYTE_MSK 3 29 - #define PM_PMCSEL_MSK 7 30 - 31 - /* 32 - * Unit code values 33 - */ 34 - #define PM_FPU 1 35 - #define PM_ISU1 2 36 - #define PM_IFU 3 37 - #define PM_IDU0 4 38 - #define PM_ISU1_ALT 6 39 - #define PM_ISU2 7 40 - #define PM_IFU_ALT 8 41 - #define PM_LSU0 9 42 - #define PM_LSU1 0xc 43 - #define PM_GPS 0xf 44 - 45 - /* 46 - * Bits in MMCR0 for POWER4 47 - */ 48 - #define MMCR0_PMC1SEL_SH 8 49 - #define MMCR0_PMC2SEL_SH 1 50 - #define MMCR_PMCSEL_MSK 0x1f 51 - 52 - /* 53 - * Bits in MMCR1 for POWER4 54 - */ 55 - #define MMCR1_TTM0SEL_SH 62 56 - #define MMCR1_TTC0SEL_SH 61 57 - #define MMCR1_TTM1SEL_SH 59 58 - #define MMCR1_TTC1SEL_SH 58 59 - #define MMCR1_TTM2SEL_SH 56 60 - #define MMCR1_TTC2SEL_SH 55 61 - #define MMCR1_TTM3SEL_SH 53 62 - #define MMCR1_TTC3SEL_SH 52 63 - #define MMCR1_TTMSEL_MSK 3 64 - #define MMCR1_TD_CP_DBG0SEL_SH 50 65 - #define MMCR1_TD_CP_DBG1SEL_SH 48 66 - #define MMCR1_TD_CP_DBG2SEL_SH 46 67 - #define MMCR1_TD_CP_DBG3SEL_SH 44 68 - #define MMCR1_DEBUG0SEL_SH 43 69 - #define MMCR1_DEBUG1SEL_SH 42 70 - #define MMCR1_DEBUG2SEL_SH 41 71 - #define MMCR1_DEBUG3SEL_SH 40 72 - #define MMCR1_PMC1_ADDER_SEL_SH 39 73 - #define MMCR1_PMC2_ADDER_SEL_SH 38 74 - #define MMCR1_PMC6_ADDER_SEL_SH 37 75 - #define MMCR1_PMC5_ADDER_SEL_SH 36 76 - #define MMCR1_PMC8_ADDER_SEL_SH 35 77 - #define MMCR1_PMC7_ADDER_SEL_SH 34 78 - #define MMCR1_PMC3_ADDER_SEL_SH 33 79 - #define MMCR1_PMC4_ADDER_SEL_SH 32 80 - #define MMCR1_PMC3SEL_SH 27 81 - #define MMCR1_PMC4SEL_SH 22 82 - #define MMCR1_PMC5SEL_SH 17 83 - #define MMCR1_PMC6SEL_SH 12 84 - #define MMCR1_PMC7SEL_SH 7 85 - #define MMCR1_PMC8SEL_SH 2 /* note bit 0 is in MMCRA for GP */ 86 - 87 - static short mmcr1_adder_bits[8] = { 88 - MMCR1_PMC1_ADDER_SEL_SH, 89 - MMCR1_PMC2_ADDER_SEL_SH, 90 - MMCR1_PMC3_ADDER_SEL_SH, 91 - MMCR1_PMC4_ADDER_SEL_SH, 92 - MMCR1_PMC5_ADDER_SEL_SH, 93 - MMCR1_PMC6_ADDER_SEL_SH, 94 - MMCR1_PMC7_ADDER_SEL_SH, 95 - MMCR1_PMC8_ADDER_SEL_SH 96 - }; 97 - 98 - /* 99 - * Bits in MMCRA 100 - */ 101 - #define MMCRA_PMC8SEL0_SH 17 /* PMC8SEL bit 0 for GP */ 102 - 103 - /* 104 - * Layout of constraint bits: 105 - * 6666555555555544444444443333333333222222222211111111110000000000 106 - * 3210987654321098765432109876543210987654321098765432109876543210 107 - * |[ >[ >[ >|||[ >[ >< >< >< >< ><><><><><><><><> 108 - * | UC1 UC2 UC3 ||| PS1 PS2 B0 B1 B2 B3 P1P2P3P4P5P6P7P8 109 - * \SMPL ||\TTC3SEL 110 - * |\TTC_IFU_SEL 111 - * \TTM2SEL0 112 - * 113 - * SMPL - SAMPLE_ENABLE constraint 114 - * 56: SAMPLE_ENABLE value 0x0100_0000_0000_0000 115 - * 116 - * UC1 - unit constraint 1: can't have all three of FPU/ISU1/IDU0|ISU2 117 - * 55: UC1 error 0x0080_0000_0000_0000 118 - * 54: FPU events needed 0x0040_0000_0000_0000 119 - * 53: ISU1 events needed 0x0020_0000_0000_0000 120 - * 52: IDU0|ISU2 events needed 0x0010_0000_0000_0000 121 - * 122 - * UC2 - unit constraint 2: can't have all three of FPU/IFU/LSU0 123 - * 51: UC2 error 0x0008_0000_0000_0000 124 - * 50: FPU events needed 0x0004_0000_0000_0000 125 - * 49: IFU events needed 0x0002_0000_0000_0000 126 - * 48: LSU0 events needed 0x0001_0000_0000_0000 127 - * 128 - * UC3 - unit constraint 3: can't have all four of LSU0/IFU/IDU0|ISU2/ISU1 129 - * 47: UC3 error 0x8000_0000_0000 130 - * 46: LSU0 events needed 0x4000_0000_0000 131 - * 45: IFU events needed 0x2000_0000_0000 132 - * 44: IDU0|ISU2 events needed 0x1000_0000_0000 133 - * 43: ISU1 events needed 0x0800_0000_0000 134 - * 135 - * TTM2SEL0 136 - * 42: 0 = IDU0 events needed 137 - * 1 = ISU2 events needed 0x0400_0000_0000 138 - * 139 - * TTC_IFU_SEL 140 - * 41: 0 = IFU.U events needed 141 - * 1 = IFU.L events needed 0x0200_0000_0000 142 - * 143 - * TTC3SEL 144 - * 40: 0 = LSU1.U events needed 145 - * 1 = LSU1.L events needed 0x0100_0000_0000 146 - * 147 - * PS1 148 - * 39: PS1 error 0x0080_0000_0000 149 - * 36-38: count of events needing PMC1/2/5/6 0x0070_0000_0000 150 - * 151 - * PS2 152 - * 35: PS2 error 0x0008_0000_0000 153 - * 32-34: count of events needing PMC3/4/7/8 0x0007_0000_0000 154 - * 155 - * B0 156 - * 28-31: Byte 0 event source 0xf000_0000 157 - * 1 = FPU 158 - * 2 = ISU1 159 - * 3 = IFU 160 - * 4 = IDU0 161 - * 7 = ISU2 162 - * 9 = LSU0 163 - * c = LSU1 164 - * f = GPS 165 - * 166 - * B1, B2, B3 167 - * 24-27, 20-23, 16-19: Byte 1, 2, 3 event sources 168 - * 169 - * P8 170 - * 15: P8 error 0x8000 171 - * 14-15: Count of events needing PMC8 172 - * 173 - * P1..P7 174 - * 0-13: Count of events needing PMC1..PMC7 175 - * 176 - * Note: this doesn't allow events using IFU.U to be combined with events 177 - * using IFU.L, though that is feasible (using TTM0 and TTM2). However 178 - * there are no listed events for IFU.L (they are debug events not 179 - * verified for performance monitoring) so this shouldn't cause a 180 - * problem. 181 - */ 182 - 183 - static struct unitinfo { 184 - unsigned long value, mask; 185 - int unit; 186 - int lowerbit; 187 - } p4_unitinfo[16] = { 188 - [PM_FPU] = { 0x44000000000000ul, 0x88000000000000ul, PM_FPU, 0 }, 189 - [PM_ISU1] = { 0x20080000000000ul, 0x88000000000000ul, PM_ISU1, 0 }, 190 - [PM_ISU1_ALT] = 191 - { 0x20080000000000ul, 0x88000000000000ul, PM_ISU1, 0 }, 192 - [PM_IFU] = { 0x02200000000000ul, 0x08820000000000ul, PM_IFU, 41 }, 193 - [PM_IFU_ALT] = 194 - { 0x02200000000000ul, 0x08820000000000ul, PM_IFU, 41 }, 195 - [PM_IDU0] = { 0x10100000000000ul, 0x80840000000000ul, PM_IDU0, 1 }, 196 - [PM_ISU2] = { 0x10140000000000ul, 0x80840000000000ul, PM_ISU2, 0 }, 197 - [PM_LSU0] = { 0x01400000000000ul, 0x08800000000000ul, PM_LSU0, 0 }, 198 - [PM_LSU1] = { 0x00000000000000ul, 0x00010000000000ul, PM_LSU1, 40 }, 199 - [PM_GPS] = { 0x00000000000000ul, 0x00000000000000ul, PM_GPS, 0 } 200 - }; 201 - 202 - static unsigned char direct_marked_event[8] = { 203 - (1<<2) | (1<<3), /* PMC1: PM_MRK_GRP_DISP, PM_MRK_ST_CMPL */ 204 - (1<<3) | (1<<5), /* PMC2: PM_THRESH_TIMEO, PM_MRK_BRU_FIN */ 205 - (1<<3), /* PMC3: PM_MRK_ST_CMPL_INT */ 206 - (1<<4) | (1<<5), /* PMC4: PM_MRK_GRP_CMPL, PM_MRK_CRU_FIN */ 207 - (1<<4) | (1<<5), /* PMC5: PM_MRK_GRP_TIMEO */ 208 - (1<<3) | (1<<4) | (1<<5), 209 - /* PMC6: PM_MRK_ST_GPS, PM_MRK_FXU_FIN, PM_MRK_GRP_ISSUED */ 210 - (1<<4) | (1<<5), /* PMC7: PM_MRK_FPU_FIN, PM_MRK_INST_FIN */ 211 - (1<<4), /* PMC8: PM_MRK_LSU_FIN */ 212 - }; 213 - 214 - /* 215 - * Returns 1 if event counts things relating to marked instructions 216 - * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not. 217 - */ 218 - static int p4_marked_instr_event(u64 event) 219 - { 220 - int pmc, psel, unit, byte, bit; 221 - unsigned int mask; 222 - 223 - pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; 224 - psel = event & PM_PMCSEL_MSK; 225 - if (pmc) { 226 - if (direct_marked_event[pmc - 1] & (1 << psel)) 227 - return 1; 228 - if (psel == 0) /* add events */ 229 - bit = (pmc <= 4)? pmc - 1: 8 - pmc; 230 - else if (psel == 6) /* decode events */ 231 - bit = 4; 232 - else 233 - return 0; 234 - } else 235 - bit = psel; 236 - 237 - byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; 238 - unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; 239 - mask = 0; 240 - switch (unit) { 241 - case PM_LSU1: 242 - if (event & PM_LOWER_MSKS) 243 - mask = 1 << 28; /* byte 7 bit 4 */ 244 - else 245 - mask = 6 << 24; /* byte 3 bits 1 and 2 */ 246 - break; 247 - case PM_LSU0: 248 - /* byte 3, bit 3; byte 2 bits 0,2,3,4,5; byte 1 */ 249 - mask = 0x083dff00; 250 - } 251 - return (mask >> (byte * 8 + bit)) & 1; 252 - } 253 - 254 - static int p4_get_constraint(u64 event, unsigned long *maskp, 255 - unsigned long *valp) 256 - { 257 - int pmc, byte, unit, lower, sh; 258 - unsigned long mask = 0, value = 0; 259 - int grp = -1; 260 - 261 - pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; 262 - if (pmc) { 263 - if (pmc > 8) 264 - return -1; 265 - sh = (pmc - 1) * 2; 266 - mask |= 2 << sh; 267 - value |= 1 << sh; 268 - grp = ((pmc - 1) >> 1) & 1; 269 - } 270 - unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; 271 - byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; 272 - if (unit) { 273 - lower = (event >> PM_LOWER_SH) & PM_LOWER_MSK; 274 - 275 - /* 276 - * Bus events on bytes 0 and 2 can be counted 277 - * on PMC1/2/5/6; bytes 1 and 3 on PMC3/4/7/8. 278 - */ 279 - if (!pmc) 280 - grp = byte & 1; 281 - 282 - if (!p4_unitinfo[unit].unit) 283 - return -1; 284 - mask |= p4_unitinfo[unit].mask; 285 - value |= p4_unitinfo[unit].value; 286 - sh = p4_unitinfo[unit].lowerbit; 287 - if (sh > 1) 288 - value |= (unsigned long)lower << sh; 289 - else if (lower != sh) 290 - return -1; 291 - unit = p4_unitinfo[unit].unit; 292 - 293 - /* Set byte lane select field */ 294 - mask |= 0xfULL << (28 - 4 * byte); 295 - value |= (unsigned long)unit << (28 - 4 * byte); 296 - } 297 - if (grp == 0) { 298 - /* increment PMC1/2/5/6 field */ 299 - mask |= 0x8000000000ull; 300 - value |= 0x1000000000ull; 301 - } else { 302 - /* increment PMC3/4/7/8 field */ 303 - mask |= 0x800000000ull; 304 - value |= 0x100000000ull; 305 - } 306 - 307 - /* Marked instruction events need sample_enable set */ 308 - if (p4_marked_instr_event(event)) { 309 - mask |= 1ull << 56; 310 - value |= 1ull << 56; 311 - } 312 - 313 - /* PMCSEL=6 decode events on byte 2 need sample_enable clear */ 314 - if (pmc && (event & PM_PMCSEL_MSK) == 6 && byte == 2) 315 - mask |= 1ull << 56; 316 - 317 - *maskp = mask; 318 - *valp = value; 319 - return 0; 320 - } 321 - 322 - static unsigned int ppc_inst_cmpl[] = { 323 - 0x1001, 0x4001, 0x6001, 0x7001, 0x8001 324 - }; 325 - 326 - static int p4_get_alternatives(u64 event, unsigned int flags, u64 alt[]) 327 - { 328 - int i, j, na; 329 - 330 - alt[0] = event; 331 - na = 1; 332 - 333 - /* 2 possibilities for PM_GRP_DISP_REJECT */ 334 - if (event == 0x8003 || event == 0x0224) { 335 - alt[1] = event ^ (0x8003 ^ 0x0224); 336 - return 2; 337 - } 338 - 339 - /* 2 possibilities for PM_ST_MISS_L1 */ 340 - if (event == 0x0c13 || event == 0x0c23) { 341 - alt[1] = event ^ (0x0c13 ^ 0x0c23); 342 - return 2; 343 - } 344 - 345 - /* several possibilities for PM_INST_CMPL */ 346 - for (i = 0; i < ARRAY_SIZE(ppc_inst_cmpl); ++i) { 347 - if (event == ppc_inst_cmpl[i]) { 348 - for (j = 0; j < ARRAY_SIZE(ppc_inst_cmpl); ++j) 349 - if (j != i) 350 - alt[na++] = ppc_inst_cmpl[j]; 351 - break; 352 - } 353 - } 354 - 355 - return na; 356 - } 357 - 358 - static int p4_compute_mmcr(u64 event[], int n_ev, 359 - unsigned int hwc[], unsigned long mmcr[], struct perf_event *pevents[]) 360 - { 361 - unsigned long mmcr0 = 0, mmcr1 = 0, mmcra = 0; 362 - unsigned int pmc, unit, byte, psel, lower; 363 - unsigned int ttm, grp; 364 - unsigned int pmc_inuse = 0; 365 - unsigned int pmc_grp_use[2]; 366 - unsigned char busbyte[4]; 367 - unsigned char unituse[16]; 368 - unsigned int unitlower = 0; 369 - int i; 370 - 371 - if (n_ev > 8) 372 - return -1; 373 - 374 - /* First pass to count resource use */ 375 - pmc_grp_use[0] = pmc_grp_use[1] = 0; 376 - memset(busbyte, 0, sizeof(busbyte)); 377 - memset(unituse, 0, sizeof(unituse)); 378 - for (i = 0; i < n_ev; ++i) { 379 - pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; 380 - if (pmc) { 381 - if (pmc_inuse & (1 << (pmc - 1))) 382 - return -1; 383 - pmc_inuse |= 1 << (pmc - 1); 384 - /* count 1/2/5/6 vs 3/4/7/8 use */ 385 - ++pmc_grp_use[((pmc - 1) >> 1) & 1]; 386 - } 387 - unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK; 388 - byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK; 389 - lower = (event[i] >> PM_LOWER_SH) & PM_LOWER_MSK; 390 - if (unit) { 391 - if (!pmc) 392 - ++pmc_grp_use[byte & 1]; 393 - if (unit == 6 || unit == 8) 394 - /* map alt ISU1/IFU codes: 6->2, 8->3 */ 395 - unit = (unit >> 1) - 1; 396 - if (busbyte[byte] && busbyte[byte] != unit) 397 - return -1; 398 - busbyte[byte] = unit; 399 - lower <<= unit; 400 - if (unituse[unit] && lower != (unitlower & lower)) 401 - return -1; 402 - unituse[unit] = 1; 403 - unitlower |= lower; 404 - } 405 - } 406 - if (pmc_grp_use[0] > 4 || pmc_grp_use[1] > 4) 407 - return -1; 408 - 409 - /* 410 - * Assign resources and set multiplexer selects. 411 - * 412 - * Units 1,2,3 are on TTM0, 4,6,7 on TTM1, 8,10 on TTM2. 413 - * Each TTMx can only select one unit, but since 414 - * units 2 and 6 are both ISU1, and 3 and 8 are both IFU, 415 - * we have some choices. 416 - */ 417 - if (unituse[2] & (unituse[1] | (unituse[3] & unituse[9]))) { 418 - unituse[6] = 1; /* Move 2 to 6 */ 419 - unituse[2] = 0; 420 - } 421 - if (unituse[3] & (unituse[1] | unituse[2])) { 422 - unituse[8] = 1; /* Move 3 to 8 */ 423 - unituse[3] = 0; 424 - unitlower = (unitlower & ~8) | ((unitlower & 8) << 5); 425 - } 426 - /* Check only one unit per TTMx */ 427 - if (unituse[1] + unituse[2] + unituse[3] > 1 || 428 - unituse[4] + unituse[6] + unituse[7] > 1 || 429 - unituse[8] + unituse[9] > 1 || 430 - (unituse[5] | unituse[10] | unituse[11] | 431 - unituse[13] | unituse[14])) 432 - return -1; 433 - 434 - /* Set TTMxSEL fields. Note, units 1-3 => TTM0SEL codes 0-2 */ 435 - mmcr1 |= (unsigned long)(unituse[3] * 2 + unituse[2]) 436 - << MMCR1_TTM0SEL_SH; 437 - mmcr1 |= (unsigned long)(unituse[7] * 3 + unituse[6] * 2) 438 - << MMCR1_TTM1SEL_SH; 439 - mmcr1 |= (unsigned long)unituse[9] << MMCR1_TTM2SEL_SH; 440 - 441 - /* Set TTCxSEL fields. */ 442 - if (unitlower & 0xe) 443 - mmcr1 |= 1ull << MMCR1_TTC0SEL_SH; 444 - if (unitlower & 0xf0) 445 - mmcr1 |= 1ull << MMCR1_TTC1SEL_SH; 446 - if (unitlower & 0xf00) 447 - mmcr1 |= 1ull << MMCR1_TTC2SEL_SH; 448 - if (unitlower & 0x7000) 449 - mmcr1 |= 1ull << MMCR1_TTC3SEL_SH; 450 - 451 - /* Set byte lane select fields. */ 452 - for (byte = 0; byte < 4; ++byte) { 453 - unit = busbyte[byte]; 454 - if (!unit) 455 - continue; 456 - if (unit == 0xf) { 457 - /* special case for GPS */ 458 - mmcr1 |= 1ull << (MMCR1_DEBUG0SEL_SH - byte); 459 - } else { 460 - if (!unituse[unit]) 461 - ttm = unit - 1; /* 2->1, 3->2 */ 462 - else 463 - ttm = unit >> 2; 464 - mmcr1 |= (unsigned long)ttm 465 - << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte); 466 - } 467 - } 468 - 469 - /* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */ 470 - for (i = 0; i < n_ev; ++i) { 471 - pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; 472 - unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK; 473 - byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK; 474 - psel = event[i] & PM_PMCSEL_MSK; 475 - if (!pmc) { 476 - /* Bus event or 00xxx direct event (off or cycles) */ 477 - if (unit) 478 - psel |= 0x10 | ((byte & 2) << 2); 479 - for (pmc = 0; pmc < 8; ++pmc) { 480 - if (pmc_inuse & (1 << pmc)) 481 - continue; 482 - grp = (pmc >> 1) & 1; 483 - if (unit) { 484 - if (grp == (byte & 1)) 485 - break; 486 - } else if (pmc_grp_use[grp] < 4) { 487 - ++pmc_grp_use[grp]; 488 - break; 489 - } 490 - } 491 - pmc_inuse |= 1 << pmc; 492 - } else { 493 - /* Direct event */ 494 - --pmc; 495 - if (psel == 0 && (byte & 2)) 496 - /* add events on higher-numbered bus */ 497 - mmcr1 |= 1ull << mmcr1_adder_bits[pmc]; 498 - else if (psel == 6 && byte == 3) 499 - /* seem to need to set sample_enable here */ 500 - mmcra |= MMCRA_SAMPLE_ENABLE; 501 - psel |= 8; 502 - } 503 - if (pmc <= 1) 504 - mmcr0 |= psel << (MMCR0_PMC1SEL_SH - 7 * pmc); 505 - else 506 - mmcr1 |= psel << (MMCR1_PMC3SEL_SH - 5 * (pmc - 2)); 507 - if (pmc == 7) /* PMC8 */ 508 - mmcra |= (psel & 1) << MMCRA_PMC8SEL0_SH; 509 - hwc[i] = pmc; 510 - if (p4_marked_instr_event(event[i])) 511 - mmcra |= MMCRA_SAMPLE_ENABLE; 512 - } 513 - 514 - if (pmc_inuse & 1) 515 - mmcr0 |= MMCR0_PMC1CE; 516 - if (pmc_inuse & 0xfe) 517 - mmcr0 |= MMCR0_PMCjCE; 518 - 519 - mmcra |= 0x2000; /* mark only one IOP per PPC instruction */ 520 - 521 - /* Return MMCRx values */ 522 - mmcr[0] = mmcr0; 523 - mmcr[1] = mmcr1; 524 - mmcr[2] = mmcra; 525 - return 0; 526 - } 527 - 528 - static void p4_disable_pmc(unsigned int pmc, unsigned long mmcr[]) 529 - { 530 - /* 531 - * Setting the PMCxSEL field to 0 disables PMC x. 532 - * (Note that pmc is 0-based here, not 1-based.) 533 - */ 534 - if (pmc <= 1) { 535 - mmcr[0] &= ~(0x1fUL << (MMCR0_PMC1SEL_SH - 7 * pmc)); 536 - } else { 537 - mmcr[1] &= ~(0x1fUL << (MMCR1_PMC3SEL_SH - 5 * (pmc - 2))); 538 - if (pmc == 7) 539 - mmcr[2] &= ~(1UL << MMCRA_PMC8SEL0_SH); 540 - } 541 - } 542 - 543 - static int p4_generic_events[] = { 544 - [PERF_COUNT_HW_CPU_CYCLES] = 7, 545 - [PERF_COUNT_HW_INSTRUCTIONS] = 0x1001, 546 - [PERF_COUNT_HW_CACHE_REFERENCES] = 0x8c10, /* PM_LD_REF_L1 */ 547 - [PERF_COUNT_HW_CACHE_MISSES] = 0x3c10, /* PM_LD_MISS_L1 */ 548 - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x330, /* PM_BR_ISSUED */ 549 - [PERF_COUNT_HW_BRANCH_MISSES] = 0x331, /* PM_BR_MPRED_CR */ 550 - }; 551 - 552 - #define C(x) PERF_COUNT_HW_CACHE_##x 553 - 554 - /* 555 - * Table of generalized cache-related events. 556 - * 0 means not supported, -1 means nonsensical, other values 557 - * are event codes. 558 - */ 559 - static int power4_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { 560 - [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */ 561 - [C(OP_READ)] = { 0x8c10, 0x3c10 }, 562 - [C(OP_WRITE)] = { 0x7c10, 0xc13 }, 563 - [C(OP_PREFETCH)] = { 0xc35, 0 }, 564 - }, 565 - [C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */ 566 - [C(OP_READ)] = { 0, 0 }, 567 - [C(OP_WRITE)] = { -1, -1 }, 568 - [C(OP_PREFETCH)] = { 0, 0 }, 569 - }, 570 - [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */ 571 - [C(OP_READ)] = { 0, 0 }, 572 - [C(OP_WRITE)] = { 0, 0 }, 573 - [C(OP_PREFETCH)] = { 0xc34, 0 }, 574 - }, 575 - [C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */ 576 - [C(OP_READ)] = { 0, 0x904 }, 577 - [C(OP_WRITE)] = { -1, -1 }, 578 - [C(OP_PREFETCH)] = { -1, -1 }, 579 - }, 580 - [C(ITLB)] = { /* RESULT_ACCESS RESULT_MISS */ 581 - [C(OP_READ)] = { 0, 0x900 }, 582 - [C(OP_WRITE)] = { -1, -1 }, 583 - [C(OP_PREFETCH)] = { -1, -1 }, 584 - }, 585 - [C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */ 586 - [C(OP_READ)] = { 0x330, 0x331 }, 587 - [C(OP_WRITE)] = { -1, -1 }, 588 - [C(OP_PREFETCH)] = { -1, -1 }, 589 - }, 590 - [C(NODE)] = { /* RESULT_ACCESS RESULT_MISS */ 591 - [C(OP_READ)] = { -1, -1 }, 592 - [C(OP_WRITE)] = { -1, -1 }, 593 - [C(OP_PREFETCH)] = { -1, -1 }, 594 - }, 595 - }; 596 - 597 - static struct power_pmu power4_pmu = { 598 - .name = "POWER4/4+", 599 - .n_counter = 8, 600 - .max_alternatives = 5, 601 - .add_fields = 0x0000001100005555ul, 602 - .test_adder = 0x0011083300000000ul, 603 - .compute_mmcr = p4_compute_mmcr, 604 - .get_constraint = p4_get_constraint, 605 - .get_alternatives = p4_get_alternatives, 606 - .disable_pmc = p4_disable_pmc, 607 - .n_generic = ARRAY_SIZE(p4_generic_events), 608 - .generic_events = p4_generic_events, 609 - .cache_events = &power4_cache_events, 610 - .flags = PPMU_NO_SIPR | PPMU_NO_CONT_SAMPLING, 611 - }; 612 - 613 - static int __init init_power4_pmu(void) 614 - { 615 - if (!cur_cpu_spec->oprofile_cpu_type || 616 - strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power4")) 617 - return -ENODEV; 618 - 619 - return register_power_pmu(&power4_pmu); 620 - } 621 - 622 - early_initcall(init_power4_pmu);

+28

arch/powerpc/perf/power9-events-list.h

··· 69 69 EVENT(PM_BR_2PATH, 0x20036) 70 70 /* ALternate branch event that are not strongly biased */ 71 71 EVENT(PM_BR_2PATH_ALT, 0x40036) 72 + 73 + /* Blacklisted events */ 74 + EVENT(PM_MRK_ST_DONE_L2, 0x10134) 75 + EVENT(PM_RADIX_PWC_L1_HIT, 0x1f056) 76 + EVENT(PM_FLOP_CMPL, 0x100f4) 77 + EVENT(PM_MRK_NTF_FIN, 0x20112) 78 + EVENT(PM_RADIX_PWC_L2_HIT, 0x2d024) 79 + EVENT(PM_IFETCH_THROTTLE, 0x3405e) 80 + EVENT(PM_MRK_L2_TM_ST_ABORT_SISTER, 0x3e15c) 81 + EVENT(PM_RADIX_PWC_L3_HIT, 0x3f056) 82 + EVENT(PM_RUN_CYC_SMT2_MODE, 0x3006c) 83 + EVENT(PM_TM_TX_PASS_RUN_INST, 0x4e014) 84 + EVENT(PM_DISP_HELD_SYNC_HOLD, 0x4003c) 85 + EVENT(PM_DTLB_MISS_16G, 0x1c058) 86 + EVENT(PM_DERAT_MISS_2M, 0x1c05a) 87 + EVENT(PM_DTLB_MISS_2M, 0x1c05c) 88 + EVENT(PM_MRK_DTLB_MISS_1G, 0x1d15c) 89 + EVENT(PM_DTLB_MISS_4K, 0x2c056) 90 + EVENT(PM_DERAT_MISS_1G, 0x2c05a) 91 + EVENT(PM_MRK_DERAT_MISS_2M, 0x2d152) 92 + EVENT(PM_MRK_DTLB_MISS_4K, 0x2d156) 93 + EVENT(PM_MRK_DTLB_MISS_16G, 0x2d15e) 94 + EVENT(PM_DTLB_MISS_64K, 0x3c056) 95 + EVENT(PM_MRK_DERAT_MISS_1G, 0x3d152) 96 + EVENT(PM_MRK_DTLB_MISS_64K, 0x3d156) 97 + EVENT(PM_DTLB_MISS_16M, 0x4c056) 98 + EVENT(PM_DTLB_MISS_1G, 0x4c05a) 99 + EVENT(PM_MRK_DTLB_MISS_16M, 0x4c15e)

+48

arch/powerpc/perf/power9-pmu.c

··· 101 101 #define POWER9_MMCRA_IFM2 0x0000000080000000UL 102 102 #define POWER9_MMCRA_IFM3 0x00000000C0000000UL 103 103 104 + /* Nasty Power9 specific hack */ 105 + #define PVR_POWER9_CUMULUS 0x00002000 106 + 104 107 /* PowerISA v2.07 format attribute structure*/ 105 108 extern struct attribute_group isa207_pmu_format_group; 109 + 110 + int p9_dd21_bl_ev[] = { 111 + PM_MRK_ST_DONE_L2, 112 + PM_RADIX_PWC_L1_HIT, 113 + PM_FLOP_CMPL, 114 + PM_MRK_NTF_FIN, 115 + PM_RADIX_PWC_L2_HIT, 116 + PM_IFETCH_THROTTLE, 117 + PM_MRK_L2_TM_ST_ABORT_SISTER, 118 + PM_RADIX_PWC_L3_HIT, 119 + PM_RUN_CYC_SMT2_MODE, 120 + PM_TM_TX_PASS_RUN_INST, 121 + PM_DISP_HELD_SYNC_HOLD, 122 + }; 123 + 124 + int p9_dd22_bl_ev[] = { 125 + PM_DTLB_MISS_16G, 126 + PM_DERAT_MISS_2M, 127 + PM_DTLB_MISS_2M, 128 + PM_MRK_DTLB_MISS_1G, 129 + PM_DTLB_MISS_4K, 130 + PM_DERAT_MISS_1G, 131 + PM_MRK_DERAT_MISS_2M, 132 + PM_MRK_DTLB_MISS_4K, 133 + PM_MRK_DTLB_MISS_16G, 134 + PM_DTLB_MISS_64K, 135 + PM_MRK_DERAT_MISS_1G, 136 + PM_MRK_DTLB_MISS_64K, 137 + PM_DISP_HELD_SYNC_HOLD, 138 + PM_DTLB_MISS_16M, 139 + PM_DTLB_MISS_1G, 140 + PM_MRK_DTLB_MISS_16M, 141 + }; 106 142 107 143 /* Table of alternatives, sorted by column 0 */ 108 144 static const unsigned int power9_event_alternatives[][MAX_ALT] = { ··· 482 446 static int __init init_power9_pmu(void) 483 447 { 484 448 int rc = 0; 449 + unsigned int pvr = mfspr(SPRN_PVR); 485 450 486 451 /* Comes from cpu_specs[] */ 487 452 if (!cur_cpu_spec->oprofile_cpu_type || 488 453 strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power9")) 489 454 return -ENODEV; 455 + 456 + /* Blacklist events */ 457 + if (!(pvr & PVR_POWER9_CUMULUS)) { 458 + if ((PVR_CFG(pvr) == 2) && (PVR_MIN(pvr) == 1)) { 459 + power9_pmu.blacklist_ev = p9_dd21_bl_ev; 460 + power9_pmu.n_blacklist_ev = ARRAY_SIZE(p9_dd21_bl_ev); 461 + } else if ((PVR_CFG(pvr) == 2) && (PVR_MIN(pvr) == 2)) { 462 + power9_pmu.blacklist_ev = p9_dd22_bl_ev; 463 + power9_pmu.n_blacklist_ev = ARRAY_SIZE(p9_dd22_bl_ev); 464 + } 465 + } 490 466 491 467 if (cpu_has_feature(CPU_FTR_POWER9_DD1)) { 492 468 /*

+3 -2

arch/powerpc/platforms/4xx/msi.c

··· 223 223 224 224 dev_dbg(&dev->dev, "PCIE-MSI: Setting up MSI support...\n"); 225 225 226 - msi = kzalloc(sizeof(struct ppc4xx_msi), GFP_KERNEL); 226 + msi = kzalloc(sizeof(*msi), GFP_KERNEL); 227 227 if (!msi) { 228 228 dev_err(&dev->dev, "No memory for MSI structure\n"); 229 229 return -ENOMEM; ··· 241 241 if (!msi_irqs) 242 242 return -ENODEV; 243 243 244 - if (ppc4xx_setup_pcieh_hw(dev, res, msi)) 244 + err = ppc4xx_setup_pcieh_hw(dev, res, msi); 245 + if (err) 245 246 goto error_out; 246 247 247 248 err = ppc4xx_msi_init_allocator(dev, msi);

+1 -1

arch/powerpc/platforms/4xx/ocm.c

··· 339 339 if (IS_ERR_VALUE(offset)) 340 340 continue; 341 341 342 - ocm_blk = kzalloc(sizeof(struct ocm_block), GFP_KERNEL); 342 + ocm_blk = kzalloc(sizeof(*ocm_blk), GFP_KERNEL); 343 343 if (!ocm_blk) { 344 344 printk(KERN_ERR "PPC4XX OCM: could not allocate ocm block"); 345 345 rh_free(ocm_reg->rh, offset);

+4 -4

arch/powerpc/platforms/85xx/smp.c

··· 147 147 for (i = 0; i < 500; i++) { 148 148 if (is_cpu_dead(cpu)) { 149 149 #ifdef CONFIG_PPC64 150 - paca[cpu].cpu_start = 0; 150 + paca_ptrs[cpu]->cpu_start = 0; 151 151 #endif 152 152 return; 153 153 } ··· 328 328 return ret; 329 329 330 330 done: 331 - paca[nr].cpu_start = 1; 331 + paca_ptrs[nr]->cpu_start = 1; 332 332 generic_set_cpu_up(nr); 333 333 334 334 return ret; ··· 409 409 } 410 410 411 411 if (disable_threadbit) { 412 - while (paca[disable_cpu].kexec_state < KEXEC_STATE_REAL_MODE) { 412 + while (paca_ptrs[disable_cpu]->kexec_state < KEXEC_STATE_REAL_MODE) { 413 413 barrier(); 414 414 now = mftb(); 415 415 if (!notified && now - start > 1000000) { 416 416 pr_info("%s/%d: waiting for cpu %d to enter KEXEC_STATE_REAL_MODE (%d)\n", 417 417 __func__, smp_processor_id(), 418 418 disable_cpu, 419 - paca[disable_cpu].kexec_state); 419 + paca_ptrs[disable_cpu]->kexec_state); 420 420 notified = true; 421 421 } 422 422 }

+1 -7

arch/powerpc/platforms/8xx/m8xx_setup.c

··· 217 217 218 218 static void cpm_cascade(struct irq_desc *desc) 219 219 { 220 - struct irq_chip *chip = irq_desc_get_chip(desc); 221 - int cascade_irq = cpm_get_irq(); 222 - 223 - if (cascade_irq >= 0) 224 - generic_handle_irq(cascade_irq); 225 - 226 - chip->irq_eoi(&desc->irq_data); 220 + generic_handle_irq(cpm_get_irq()); 227 221 } 228 222 229 223 /* Initialize the internal interrupt controllers. The number of

+13 -7

arch/powerpc/platforms/Kconfig.cputype

··· 61 61 help 62 62 There are two families of 64 bit PowerPC chips supported. 63 63 The most common ones are the desktop and server CPUs 64 - (POWER4, POWER5, 970, POWER5+, POWER6, POWER7, POWER8 ...) 64 + (POWER5, 970, POWER5+, POWER6, POWER7, POWER8, POWER9 ...) 65 65 66 66 The other are the "embedded" processors compliant with the 67 67 "Book 3E" variant of the architecture ··· 87 87 choice 88 88 prompt "CPU selection" 89 89 depends on PPC64 90 - default POWER8_CPU if CPU_LITTLE_ENDIAN 91 90 default GENERIC_CPU 92 91 help 93 92 This will create a kernel which is optimised for a particular CPU. ··· 95 96 If unsure, select Generic. 96 97 97 98 config GENERIC_CPU 98 - bool "Generic" 99 + bool "Generic (POWER4 and above)" 99 100 depends on !CPU_LITTLE_ENDIAN 101 + 102 + config GENERIC_CPU 103 + bool "Generic (POWER8 and above)" 104 + depends on CPU_LITTLE_ENDIAN 105 + select ARCH_HAS_FAST_MULTIPLIER 100 106 101 107 config CELL_CPU 102 108 bool "Cell Broadband Engine" 103 - depends on PPC_BOOK3S_64 && !CPU_LITTLE_ENDIAN 104 - 105 - config POWER4_CPU 106 - bool "POWER4" 107 109 depends on PPC_BOOK3S_64 && !CPU_LITTLE_ENDIAN 108 110 109 111 config POWER5_CPU ··· 122 122 123 123 config POWER8_CPU 124 124 bool "POWER8" 125 + depends on PPC_BOOK3S_64 126 + select ARCH_HAS_FAST_MULTIPLIER 127 + 128 + config POWER9_CPU 129 + bool "POWER9" 125 130 depends on PPC_BOOK3S_64 126 131 select ARCH_HAS_FAST_MULTIPLIER 127 132 ··· 331 326 config PPC_MM_SLICES 332 327 bool 333 328 default y if PPC_BOOK3S_64 329 + default y if PPC_8xx && HUGETLB_PAGE 334 330 default n 335 331 336 332 config PPC_HAVE_PMU_SUPPORT

+1 -1

arch/powerpc/platforms/cell/axon_msi.c

··· 342 342 343 343 pr_devel("axon_msi: setting up dn %pOF\n", dn); 344 344 345 - msic = kzalloc(sizeof(struct axon_msic), GFP_KERNEL); 345 + msic = kzalloc(sizeof(*msic), GFP_KERNEL); 346 346 if (!msic) { 347 347 printk(KERN_ERR "axon_msi: couldn't allocate msic for %pOF\n", 348 348 dn);

+2 -2

arch/powerpc/platforms/cell/smp.c

··· 83 83 pcpu = get_hard_smp_processor_id(lcpu); 84 84 85 85 /* Fixup atomic count: it exited inside IRQ handler. */ 86 - task_thread_info(paca[lcpu].__current)->preempt_count = 0; 86 + task_thread_info(paca_ptrs[lcpu]->__current)->preempt_count = 0; 87 87 88 88 /* 89 89 * If the RTAS start-cpu token does not exist then presume the ··· 126 126 * cpu_start field to become non-zero After we set cpu_start, 127 127 * the processor will continue on to secondary_start 128 128 */ 129 - paca[nr].cpu_start = 1; 129 + paca_ptrs[nr]->cpu_start = 1; 130 130 131 131 return 0; 132 132 }

+1 -1

arch/powerpc/platforms/cell/spider-pci.c

··· 133 133 pr_debug("SPIDERPCI-IOWA:Bus initialize for spider(%pOF)\n", 134 134 np); 135 135 136 - priv = kzalloc(sizeof(struct spiderpci_iowa_private), GFP_KERNEL); 136 + priv = kzalloc(sizeof(*priv), GFP_KERNEL); 137 137 if (!priv) { 138 138 pr_err("SPIDERPCI-IOWA:" 139 139 "Can't allocate struct spiderpci_iowa_private");

+1 -1

arch/powerpc/platforms/cell/spufs/lscsa_alloc.c

··· 36 36 struct spu_lscsa *lscsa; 37 37 unsigned char *p; 38 38 39 - lscsa = vzalloc(sizeof(struct spu_lscsa)); 39 + lscsa = vzalloc(sizeof(*lscsa)); 40 40 if (!lscsa) 41 41 return -ENOMEM; 42 42 csa->lscsa = lscsa;

+1 -1

arch/powerpc/platforms/embedded6xx/flipper-pic.c

··· 132 132 out_be32(io_base + FLIPPER_ICR, 0xffffffff); 133 133 } 134 134 135 - struct irq_domain * __init flipper_pic_init(struct device_node *np) 135 + static struct irq_domain * __init flipper_pic_init(struct device_node *np) 136 136 { 137 137 struct device_node *pi; 138 138 struct irq_domain *irq_domain = NULL;

+1 -1

arch/powerpc/platforms/embedded6xx/usbgecko_udbg.c

··· 169 169 /* 170 170 * Transmits a character. 171 171 */ 172 - void ug_udbg_putc(char ch) 172 + static void ug_udbg_putc(char ch) 173 173 { 174 174 ug_putc(ch); 175 175 }

+9 -14

arch/powerpc/platforms/embedded6xx/wii.c

··· 44 44 #define HW_GPIO_BASE(idx) (idx * 0x20) 45 45 #define HW_GPIO_OUT(idx) (HW_GPIO_BASE(idx) + 0) 46 46 #define HW_GPIO_DIR(idx) (HW_GPIO_BASE(idx) + 4) 47 + #define HW_GPIO_OWNER (HW_GPIO_BASE(1) + 0x1c) 47 48 48 49 #define HW_GPIO_SHUTDOWN (1<<1) 49 50 #define HW_GPIO_SLOT_LED (1<<5) ··· 80 79 BUG_ON(memblock.memory.cnt != 2); 81 80 BUG_ON(!page_aligned(p[0].base) || !page_aligned(p[1].base)); 82 81 83 - /* trim unaligned tail */ 84 - memblock_remove(ALIGN(p[1].base + p[1].size, PAGE_SIZE), 85 - (phys_addr_t)ULLONG_MAX); 86 - 87 - /* determine hole, add & reserve them */ 82 + /* determine hole */ 88 83 wii_hole_start = ALIGN(p[0].base + p[0].size, PAGE_SIZE); 89 84 wii_hole_size = p[1].base - wii_hole_start; 90 - memblock_add(wii_hole_start, wii_hole_size); 91 - memblock_reserve(wii_hole_start, wii_hole_size); 92 - 93 - BUG_ON(memblock.memory.cnt != 1); 94 - __memblock_dump_all(); 95 - 96 - /* allow ioremapping the address space in the hole */ 97 - __allow_ioremap_reserved = 1; 98 85 } 99 86 100 87 unsigned long __init wii_mmu_mapin_mem2(unsigned long top) ··· 165 176 local_irq_disable(); 166 177 167 178 if (hw_gpio) { 179 + /* 180 + * set the owner of the shutdown pin to ARM, because it is 181 + * accessed through the registers for the ARM, below 182 + */ 183 + clrbits32(hw_gpio + HW_GPIO_OWNER, HW_GPIO_SHUTDOWN); 184 + 168 185 /* make sure that the poweroff GPIO is configured as output */ 169 186 setbits32(hw_gpio + HW_GPIO_DIR(1), HW_GPIO_SHUTDOWN); 170 187 ··· 234 239 if (!machine_is(wii)) 235 240 return 0; 236 241 237 - of_platform_bus_probe(NULL, wii_of_bus, NULL); 242 + of_platform_populate(NULL, wii_of_bus, NULL, NULL); 238 243 return 0; 239 244 } 240 245 device_initcall(wii_device_probe);

+1 -1

arch/powerpc/platforms/powermac/low_i2c.c

··· 492 492 const u32 *psteps, *prate, *addrp; 493 493 u32 steps; 494 494 495 - host = kzalloc(sizeof(struct pmac_i2c_host_kw), GFP_KERNEL); 495 + host = kzalloc(sizeof(*host), GFP_KERNEL); 496 496 if (host == NULL) { 497 497 printk(KERN_ERR "low_i2c: Can't allocate host for %pOF\n", 498 498 np);

+2 -2

arch/powerpc/platforms/powermac/pfunc_core.c

··· 643 643 644 644 while (length >= 12) { 645 645 /* Allocate a structure */ 646 - func = kzalloc(sizeof(struct pmf_function), GFP_KERNEL); 646 + func = kzalloc(sizeof(*func), GFP_KERNEL); 647 647 if (func == NULL) 648 648 goto bail; 649 649 kref_init(&func->ref); ··· 719 719 return -EBUSY; 720 720 } 721 721 722 - dev = kzalloc(sizeof(struct pmf_device), GFP_KERNEL); 722 + dev = kzalloc(sizeof(*dev), GFP_KERNEL); 723 723 if (dev == NULL) { 724 724 DBG("pmf: no memory !\n"); 725 725 return -ENOMEM;

-1

arch/powerpc/platforms/powernv/Makefile

··· 16 16 obj-$(CONFIG_PERF_EVENTS) += opal-imc.o 17 17 obj-$(CONFIG_PPC_MEMTRACE) += memtrace.o 18 18 obj-$(CONFIG_PPC_VAS) += vas.o vas-window.o vas-debug.o 19 - obj-$(CONFIG_PPC_FTW) += nx-ftw.o 20 19 obj-$(CONFIG_OCXL_BASE) += ocxl.o

+2 -7

arch/powerpc/platforms/powernv/eeh-powernv.c

··· 1425 1425 dev_pe = dev_pe->parent; 1426 1426 while (dev_pe && !(dev_pe->type & EEH_PE_PHB)) { 1427 1427 int ret; 1428 - int active_flags = (EEH_STATE_MMIO_ACTIVE | 1429 - EEH_STATE_DMA_ACTIVE); 1430 - 1431 1428 ret = eeh_ops->get_state(dev_pe, NULL); 1432 - if (ret <= 0 || (ret & active_flags) == active_flags) { 1429 + if (ret <= 0 || eeh_state_active(ret)) { 1433 1430 dev_pe = dev_pe->parent; 1434 1431 continue; 1435 1432 } ··· 1460 1463 struct eeh_pe *phb_pe, *parent_pe; 1461 1464 __be64 frozen_pe_no; 1462 1465 __be16 err_type, severity; 1463 - int active_flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); 1464 1466 long rc; 1465 1467 int state, ret = EEH_NEXT_ERR_NONE; 1466 1468 ··· 1622 1626 1623 1627 /* Frozen parent PE ? */ 1624 1628 state = eeh_ops->get_state(parent_pe, NULL); 1625 - if (state > 0 && 1626 - (state & active_flags) != active_flags) 1629 + if (state > 0 && !eeh_state_active(state)) 1627 1630 *pe = parent_pe; 1628 1631 1629 1632 /* Next parent level */

+81 -7

arch/powerpc/platforms/powernv/idle.c

··· 24 24 #include <asm/code-patching.h> 25 25 #include <asm/smp.h> 26 26 #include <asm/runlatch.h> 27 + #include <asm/dbell.h> 27 28 28 29 #include "powernv.h" 29 30 #include "subcore.h" ··· 81 80 82 81 for_each_possible_cpu(cpu) { 83 82 uint64_t pir = get_hard_smp_processor_id(cpu); 84 - uint64_t hsprg0_val = (uint64_t)&paca[cpu]; 83 + uint64_t hsprg0_val = (uint64_t)paca_ptrs[cpu]; 85 84 86 85 rc = opal_slw_set_reg(pir, SPRN_HSPRG0, hsprg0_val); 87 86 if (rc != 0) ··· 174 173 for (j = 0; j < threads_per_core; j++) { 175 174 int cpu = first_cpu + j; 176 175 177 - paca[cpu].core_idle_state_ptr = core_idle_state; 178 - paca[cpu].thread_idle_state = PNV_THREAD_RUNNING; 179 - paca[cpu].thread_mask = 1 << j; 176 + paca_ptrs[cpu]->core_idle_state_ptr = core_idle_state; 177 + paca_ptrs[cpu]->thread_idle_state = PNV_THREAD_RUNNING; 178 + paca_ptrs[cpu]->thread_mask = 1 << j; 180 179 if (!cpu_has_feature(CPU_FTR_POWER9_DD1)) 181 180 continue; 182 - paca[cpu].thread_sibling_pacas = 181 + paca_ptrs[cpu]->thread_sibling_pacas = 183 182 kmalloc_node(paca_ptr_array_size, 184 183 GFP_KERNEL, node); 185 184 } ··· 388 387 power9_idle_type(pnv_default_stop_val, pnv_default_stop_mask); 389 388 } 390 389 390 + #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 391 + /* 392 + * This is used in working around bugs in thread reconfiguration 393 + * on POWER9 (at least up to Nimbus DD2.2) relating to transactional 394 + * memory and the way that XER[SO] is checkpointed. 395 + * This function forces the core into SMT4 in order by asking 396 + * all other threads not to stop, and sending a message to any 397 + * that are in a stop state. 398 + * Must be called with preemption disabled. 399 + */ 400 + void pnv_power9_force_smt4_catch(void) 401 + { 402 + int cpu, cpu0, thr; 403 + int awake_threads = 1; /* this thread is awake */ 404 + int poke_threads = 0; 405 + int need_awake = threads_per_core; 406 + 407 + cpu = smp_processor_id(); 408 + cpu0 = cpu & ~(threads_per_core - 1); 409 + for (thr = 0; thr < threads_per_core; ++thr) { 410 + if (cpu != cpu0 + thr) 411 + atomic_inc(&paca_ptrs[cpu0+thr]->dont_stop); 412 + } 413 + /* order setting dont_stop vs testing requested_psscr */ 414 + mb(); 415 + for (thr = 0; thr < threads_per_core; ++thr) { 416 + if (!paca_ptrs[cpu0+thr]->requested_psscr) 417 + ++awake_threads; 418 + else 419 + poke_threads |= (1 << thr); 420 + } 421 + 422 + /* If at least 3 threads are awake, the core is in SMT4 already */ 423 + if (awake_threads < need_awake) { 424 + /* We have to wake some threads; we'll use msgsnd */ 425 + for (thr = 0; thr < threads_per_core; ++thr) { 426 + if (poke_threads & (1 << thr)) { 427 + ppc_msgsnd_sync(); 428 + ppc_msgsnd(PPC_DBELL_MSGTYPE, 0, 429 + paca_ptrs[cpu0+thr]->hw_cpu_id); 430 + } 431 + } 432 + /* now spin until at least 3 threads are awake */ 433 + do { 434 + for (thr = 0; thr < threads_per_core; ++thr) { 435 + if ((poke_threads & (1 << thr)) && 436 + !paca_ptrs[cpu0+thr]->requested_psscr) { 437 + ++awake_threads; 438 + poke_threads &= ~(1 << thr); 439 + } 440 + } 441 + } while (awake_threads < need_awake); 442 + } 443 + } 444 + EXPORT_SYMBOL_GPL(pnv_power9_force_smt4_catch); 445 + 446 + void pnv_power9_force_smt4_release(void) 447 + { 448 + int cpu, cpu0, thr; 449 + 450 + cpu = smp_processor_id(); 451 + cpu0 = cpu & ~(threads_per_core - 1); 452 + 453 + /* clear all the dont_stop flags */ 454 + for (thr = 0; thr < threads_per_core; ++thr) { 455 + if (cpu != cpu0 + thr) 456 + atomic_dec(&paca_ptrs[cpu0+thr]->dont_stop); 457 + } 458 + } 459 + EXPORT_SYMBOL_GPL(pnv_power9_force_smt4_release); 460 + #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ 461 + 391 462 #ifdef CONFIG_HOTPLUG_CPU 392 463 static void pnv_program_cpu_hotplug_lpcr(unsigned int cpu, u64 lpcr_val) 393 464 { ··· 507 434 psscr = mfspr(SPRN_PSSCR); 508 435 psscr = (psscr & ~pnv_deepest_stop_psscr_mask) | 509 436 pnv_deepest_stop_psscr_val; 510 - srr1 = power9_idle_stop(psscr); 437 + srr1 = power9_offline_stop(psscr); 511 438 512 439 } else if ((idle_states & OPAL_PM_WINKLE_ENABLED) && 513 440 (idle_states & OPAL_PM_LOSE_FULL_CONTEXT)) { ··· 822 749 for (i = 0; i < threads_per_core; i++) { 823 750 int j = base_cpu + i; 824 751 825 - paca[j].thread_sibling_pacas[idx] = &paca[cpu]; 752 + paca_ptrs[j]->thread_sibling_pacas[idx] = 753 + paca_ptrs[cpu]; 826 754 } 827 755 } 828 756 }

+161 -98

arch/powerpc/platforms/powernv/npu-dma.c

··· 410 410 void *priv; 411 411 }; 412 412 413 + struct mmio_atsd_reg { 414 + struct npu *npu; 415 + int reg; 416 + }; 417 + 413 418 /* 414 419 * Find a free MMIO ATSD register and mark it in use. Return -ENOSPC 415 420 * if none are available. ··· 424 419 int i; 425 420 426 421 for (i = 0; i < npu->mmio_atsd_count; i++) { 427 - if (!test_and_set_bit(i, &npu->mmio_atsd_usage)) 422 + if (!test_and_set_bit_lock(i, &npu->mmio_atsd_usage)) 428 423 return i; 429 424 } 430 425 ··· 433 428 434 429 static void put_mmio_atsd_reg(struct npu *npu, int reg) 435 430 { 436 - clear_bit(reg, &npu->mmio_atsd_usage); 431 + clear_bit_unlock(reg, &npu->mmio_atsd_usage); 437 432 } 438 433 439 434 /* MMIO ATSD register offsets */ 440 435 #define XTS_ATSD_AVA 1 441 436 #define XTS_ATSD_STAT 2 442 437 443 - static int mmio_launch_invalidate(struct npu *npu, unsigned long launch, 444 - unsigned long va) 438 + static void mmio_launch_invalidate(struct mmio_atsd_reg *mmio_atsd_reg, 439 + unsigned long launch, unsigned long va) 445 440 { 446 - int mmio_atsd_reg; 447 - 448 - do { 449 - mmio_atsd_reg = get_mmio_atsd_reg(npu); 450 - cpu_relax(); 451 - } while (mmio_atsd_reg < 0); 441 + struct npu *npu = mmio_atsd_reg->npu; 442 + int reg = mmio_atsd_reg->reg; 452 443 453 444 __raw_writeq(cpu_to_be64(va), 454 - npu->mmio_atsd_regs[mmio_atsd_reg] + XTS_ATSD_AVA); 445 + npu->mmio_atsd_regs[reg] + XTS_ATSD_AVA); 455 446 eieio(); 456 - __raw_writeq(cpu_to_be64(launch), npu->mmio_atsd_regs[mmio_atsd_reg]); 457 - 458 - return mmio_atsd_reg; 447 + __raw_writeq(cpu_to_be64(launch), npu->mmio_atsd_regs[reg]); 459 448 } 460 449 461 - static int mmio_invalidate_pid(struct npu *npu, unsigned long pid, bool flush) 450 + static void mmio_invalidate_pid(struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS], 451 + unsigned long pid, bool flush) 462 452 { 453 + int i; 463 454 unsigned long launch; 464 455 465 - /* IS set to invalidate matching PID */ 466 - launch = PPC_BIT(12); 456 + for (i = 0; i <= max_npu2_index; i++) { 457 + if (mmio_atsd_reg[i].reg < 0) 458 + continue; 467 459 468 - /* PRS set to process-scoped */ 469 - launch |= PPC_BIT(13); 460 + /* IS set to invalidate matching PID */ 461 + launch = PPC_BIT(12); 470 462 471 - /* AP */ 472 - launch |= (u64) mmu_get_ap(mmu_virtual_psize) << PPC_BITLSHIFT(17); 463 + /* PRS set to process-scoped */ 464 + launch |= PPC_BIT(13); 473 465 474 - /* PID */ 475 - launch |= pid << PPC_BITLSHIFT(38); 466 + /* AP */ 467 + launch |= (u64) 468 + mmu_get_ap(mmu_virtual_psize) << PPC_BITLSHIFT(17); 476 469 477 - /* No flush */ 478 - launch |= !flush << PPC_BITLSHIFT(39); 470 + /* PID */ 471 + launch |= pid << PPC_BITLSHIFT(38); 479 472 480 - /* Invalidating the entire process doesn't use a va */ 481 - return mmio_launch_invalidate(npu, launch, 0); 473 + /* No flush */ 474 + launch |= !flush << PPC_BITLSHIFT(39); 475 + 476 + /* Invalidating the entire process doesn't use a va */ 477 + mmio_launch_invalidate(&mmio_atsd_reg[i], launch, 0); 478 + } 482 479 } 483 480 484 - static int mmio_invalidate_va(struct npu *npu, unsigned long va, 485 - unsigned long pid, bool flush) 481 + static void mmio_invalidate_va(struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS], 482 + unsigned long va, unsigned long pid, bool flush) 486 483 { 484 + int i; 487 485 unsigned long launch; 488 486 489 - /* IS set to invalidate target VA */ 490 - launch = 0; 487 + for (i = 0; i <= max_npu2_index; i++) { 488 + if (mmio_atsd_reg[i].reg < 0) 489 + continue; 491 490 492 - /* PRS set to process scoped */ 493 - launch |= PPC_BIT(13); 491 + /* IS set to invalidate target VA */ 492 + launch = 0; 494 493 495 - /* AP */ 496 - launch |= (u64) mmu_get_ap(mmu_virtual_psize) << PPC_BITLSHIFT(17); 494 + /* PRS set to process scoped */ 495 + launch |= PPC_BIT(13); 497 496 498 - /* PID */ 499 - launch |= pid << PPC_BITLSHIFT(38); 497 + /* AP */ 498 + launch |= (u64) 499 + mmu_get_ap(mmu_virtual_psize) << PPC_BITLSHIFT(17); 500 500 501 - /* No flush */ 502 - launch |= !flush << PPC_BITLSHIFT(39); 501 + /* PID */ 502 + launch |= pid << PPC_BITLSHIFT(38); 503 503 504 - return mmio_launch_invalidate(npu, launch, va); 504 + /* No flush */ 505 + launch |= !flush << PPC_BITLSHIFT(39); 506 + 507 + mmio_launch_invalidate(&mmio_atsd_reg[i], launch, va); 508 + } 505 509 } 506 510 507 511 #define mn_to_npu_context(x) container_of(x, struct npu_context, mn) 508 512 509 - struct mmio_atsd_reg { 510 - struct npu *npu; 511 - int reg; 512 - }; 513 - 514 513 static void mmio_invalidate_wait( 515 - struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS], bool flush) 514 + struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS]) 516 515 { 517 516 struct npu *npu; 518 517 int i, reg; ··· 531 522 reg = mmio_atsd_reg[i].reg; 532 523 while (__raw_readq(npu->mmio_atsd_regs[reg] + XTS_ATSD_STAT)) 533 524 cpu_relax(); 525 + } 526 + } 534 527 535 - put_mmio_atsd_reg(npu, reg); 528 + /* 529 + * Acquires all the address translation shootdown (ATSD) registers required to 530 + * launch an ATSD on all links this npu_context is active on. 531 + */ 532 + static void acquire_atsd_reg(struct npu_context *npu_context, 533 + struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS]) 534 + { 535 + int i, j; 536 + struct npu *npu; 537 + struct pci_dev *npdev; 538 + struct pnv_phb *nphb; 536 539 540 + for (i = 0; i <= max_npu2_index; i++) { 541 + mmio_atsd_reg[i].reg = -1; 542 + for (j = 0; j < NV_MAX_LINKS; j++) { 543 + /* 544 + * There are no ordering requirements with respect to 545 + * the setup of struct npu_context, but to ensure 546 + * consistent behaviour we need to ensure npdev[][] is 547 + * only read once. 548 + */ 549 + npdev = READ_ONCE(npu_context->npdev[i][j]); 550 + if (!npdev) 551 + continue; 552 + 553 + nphb = pci_bus_to_host(npdev->bus)->private_data; 554 + npu = &nphb->npu; 555 + mmio_atsd_reg[i].npu = npu; 556 + mmio_atsd_reg[i].reg = get_mmio_atsd_reg(npu); 557 + while (mmio_atsd_reg[i].reg < 0) { 558 + mmio_atsd_reg[i].reg = get_mmio_atsd_reg(npu); 559 + cpu_relax(); 560 + } 561 + break; 562 + } 563 + } 564 + } 565 + 566 + /* 567 + * Release previously acquired ATSD registers. To avoid deadlocks the registers 568 + * must be released in the same order they were acquired above in 569 + * acquire_atsd_reg. 570 + */ 571 + static void release_atsd_reg(struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS]) 572 + { 573 + int i; 574 + 575 + for (i = 0; i <= max_npu2_index; i++) { 537 576 /* 538 - * The GPU requires two flush ATSDs to ensure all entries have 539 - * been flushed. We use PID 0 as it will never be used for a 540 - * process on the GPU. 577 + * We can't rely on npu_context->npdev[][] being the same here 578 + * as when acquire_atsd_reg() was called, hence we use the 579 + * values stored in mmio_atsd_reg during the acquire phase 580 + * rather than re-reading npdev[][]. 541 581 */ 542 - if (flush) 543 - mmio_invalidate_pid(npu, 0, true); 582 + if (mmio_atsd_reg[i].reg < 0) 583 + continue; 584 + 585 + put_mmio_atsd_reg(mmio_atsd_reg[i].npu, mmio_atsd_reg[i].reg); 544 586 } 545 587 } 546 588 ··· 602 542 static void mmio_invalidate(struct npu_context *npu_context, int va, 603 543 unsigned long address, bool flush) 604 544 { 605 - int i, j; 606 - struct npu *npu; 607 - struct pnv_phb *nphb; 608 - struct pci_dev *npdev; 609 545 struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS]; 610 546 unsigned long pid = npu_context->mm->context.id; 611 547 ··· 617 561 * Loop over all the NPUs this process is active on and launch 618 562 * an invalidate. 619 563 */ 620 - for (i = 0; i <= max_npu2_index; i++) { 621 - mmio_atsd_reg[i].reg = -1; 622 - for (j = 0; j < NV_MAX_LINKS; j++) { 623 - npdev = npu_context->npdev[i][j]; 624 - if (!npdev) 625 - continue; 564 + acquire_atsd_reg(npu_context, mmio_atsd_reg); 565 + if (va) 566 + mmio_invalidate_va(mmio_atsd_reg, address, pid, flush); 567 + else 568 + mmio_invalidate_pid(mmio_atsd_reg, pid, flush); 626 569 627 - nphb = pci_bus_to_host(npdev->bus)->private_data; 628 - npu = &nphb->npu; 629 - mmio_atsd_reg[i].npu = npu; 630 - 631 - if (va) 632 - mmio_atsd_reg[i].reg = 633 - mmio_invalidate_va(npu, address, pid, 634 - flush); 635 - else 636 - mmio_atsd_reg[i].reg = 637 - mmio_invalidate_pid(npu, pid, flush); 638 - 639 - /* 640 - * The NPU hardware forwards the shootdown to all GPUs 641 - * so we only have to launch one shootdown per NPU. 642 - */ 643 - break; 644 - } 570 + mmio_invalidate_wait(mmio_atsd_reg); 571 + if (flush) { 572 + /* 573 + * The GPU requires two flush ATSDs to ensure all entries have 574 + * been flushed. We use PID 0 as it will never be used for a 575 + * process on the GPU. 576 + */ 577 + mmio_invalidate_pid(mmio_atsd_reg, 0, true); 578 + mmio_invalidate_wait(mmio_atsd_reg); 579 + mmio_invalidate_pid(mmio_atsd_reg, 0, true); 580 + mmio_invalidate_wait(mmio_atsd_reg); 645 581 } 646 - 647 - mmio_invalidate_wait(mmio_atsd_reg, flush); 648 - if (flush) 649 - /* Wait for the flush to complete */ 650 - mmio_invalidate_wait(mmio_atsd_reg, false); 582 + release_atsd_reg(mmio_atsd_reg); 651 583 } 652 584 653 585 static void pnv_npu2_mn_release(struct mmu_notifier *mn, ··· 724 680 /* No nvlink associated with this GPU device */ 725 681 return ERR_PTR(-ENODEV); 726 682 683 + nvlink_dn = of_parse_phandle(npdev->dev.of_node, "ibm,nvlink", 0); 684 + if (WARN_ON(of_property_read_u32(nvlink_dn, "ibm,npu-link-index", 685 + &nvlink_index))) 686 + return ERR_PTR(-ENODEV); 687 + 727 688 if (!mm || mm->context.id == 0) { 728 689 /* 729 690 * Kernel thread contexts are not supported and context id 0 is ··· 756 707 */ 757 708 npu_context = mm->context.npu_context; 758 709 if (!npu_context) { 710 + rc = -ENOMEM; 759 711 npu_context = kzalloc(sizeof(struct npu_context), GFP_KERNEL); 760 - if (!npu_context) 761 - return ERR_PTR(-ENOMEM); 712 + if (npu_context) { 713 + kref_init(&npu_context->kref); 714 + npu_context->mm = mm; 715 + npu_context->mn.ops = &nv_nmmu_notifier_ops; 716 + rc = __mmu_notifier_register(&npu_context->mn, mm); 717 + } 718 + 719 + if (rc) { 720 + kfree(npu_context); 721 + opal_npu_destroy_context(nphb->opal_id, mm->context.id, 722 + PCI_DEVID(gpdev->bus->number, 723 + gpdev->devfn)); 724 + return ERR_PTR(rc); 725 + } 762 726 763 727 mm->context.npu_context = npu_context; 764 - npu_context->mm = mm; 765 - npu_context->mn.ops = &nv_nmmu_notifier_ops; 766 - __mmu_notifier_register(&npu_context->mn, mm); 767 - kref_init(&npu_context->kref); 768 728 } else { 769 - kref_get(&npu_context->kref); 729 + WARN_ON(!kref_get_unless_zero(&npu_context->kref)); 770 730 } 771 731 772 732 npu_context->release_cb = cb; 773 733 npu_context->priv = priv; 774 - nvlink_dn = of_parse_phandle(npdev->dev.of_node, "ibm,nvlink", 0); 775 - if (WARN_ON(of_property_read_u32(nvlink_dn, "ibm,npu-link-index", 776 - &nvlink_index))) 777 - return ERR_PTR(-ENODEV); 778 - npu_context->npdev[npu->index][nvlink_index] = npdev; 734 + 735 + /* 736 + * npdev is a pci_dev pointer setup by the PCI code. We assign it to 737 + * npdev[][] to indicate to the mmu notifiers that an invalidation 738 + * should also be sent over this nvlink. The notifiers don't use any 739 + * other fields in npu_context, so we just need to ensure that when they 740 + * deference npu_context->npdev[][] it is either a valid pointer or 741 + * NULL. 742 + */ 743 + WRITE_ONCE(npu_context->npdev[npu->index][nvlink_index], npdev); 779 744 780 745 if (!nphb->npu.nmmu_flush) { 781 746 /* ··· 841 778 if (WARN_ON(of_property_read_u32(nvlink_dn, "ibm,npu-link-index", 842 779 &nvlink_index))) 843 780 return; 844 - npu_context->npdev[npu->index][nvlink_index] = NULL; 781 + WRITE_ONCE(npu_context->npdev[npu->index][nvlink_index], NULL); 845 782 opal_npu_destroy_context(nphb->opal_id, npu_context->mm->context.id, 846 783 PCI_DEVID(gpdev->bus->number, gpdev->devfn)); 847 784 kref_put(&npu_context->kref, pnv_npu2_release_context);

+3 -29

arch/powerpc/platforms/powernv/opal-flash.c

··· 303 303 return rc; 304 304 } 305 305 306 - /* Return CPUs to OPAL before starting FW update */ 307 - static void flash_return_cpu(void *info) 308 - { 309 - int cpu = smp_processor_id(); 310 - 311 - if (!cpu_online(cpu)) 312 - return; 313 - 314 - /* Disable IRQ */ 315 - hard_irq_disable(); 316 - 317 - /* Return the CPU to OPAL */ 318 - opal_return_cpu(); 319 - } 320 - 321 306 /* This gets called just before system reboots */ 322 - void opal_flash_term_callback(void) 307 + void opal_flash_update_print_message(void) 323 308 { 324 - struct cpumask mask; 325 - 326 309 if (update_flash_data.status != FLASH_IMG_READY) 327 310 return; 328 311 ··· 316 333 317 334 /* Small delay to help getting the above message out */ 318 335 msleep(500); 319 - 320 - /* Return secondary CPUs to firmware */ 321 - cpumask_copy(&mask, cpu_online_mask); 322 - cpumask_clear_cpu(smp_processor_id(), &mask); 323 - if (!cpumask_empty(&mask)) 324 - smp_call_function_many(&mask, 325 - flash_return_cpu, NULL, false); 326 - /* Hard disable interrupts */ 327 - hard_irq_disable(); 328 336 } 329 337 330 338 /* ··· 392 418 void *addr; 393 419 int size; 394 420 395 - if (count < sizeof(struct image_header_t)) { 421 + if (count < sizeof(image_header)) { 396 422 pr_warn("FLASH: Invalid candidate image\n"); 397 423 return -EINVAL; 398 424 } 399 425 400 - memcpy(&image_header, (void *)buffer, sizeof(struct image_header_t)); 426 + memcpy(&image_header, (void *)buffer, sizeof(image_header)); 401 427 image_data.size = be32_to_cpu(image_header.size); 402 428 pr_debug("FLASH: Candidate image size = %u\n", image_data.size); 403 429

+1 -1

arch/powerpc/platforms/powernv/opal-hmi.c

··· 314 314 pr_err("HMI: out of memory, Opal message event not handled\n"); 315 315 return -ENOMEM; 316 316 } 317 - memcpy(&msg_node->hmi_evt, hmi_evt, sizeof(struct OpalHMIEvent)); 317 + memcpy(&msg_node->hmi_evt, hmi_evt, sizeof(*hmi_evt)); 318 318 319 319 spin_lock_irqsave(&opal_hmi_evt_lock, flags); 320 320 list_add(&msg_node->list, &opal_hmi_evt_list);

+5 -5

arch/powerpc/platforms/powernv/opal-imc.c

··· 110 110 if (nr_chips <= 0) 111 111 return -ENODEV; 112 112 113 - base_addr_arr = kcalloc(nr_chips, sizeof(u64), GFP_KERNEL); 113 + base_addr_arr = kcalloc(nr_chips, sizeof(*base_addr_arr), GFP_KERNEL); 114 114 if (!base_addr_arr) 115 115 return -ENOMEM; 116 116 117 - chipid_arr = kcalloc(nr_chips, sizeof(u32), GFP_KERNEL); 117 + chipid_arr = kcalloc(nr_chips, sizeof(*chipid_arr), GFP_KERNEL); 118 118 if (!chipid_arr) 119 119 return -ENOMEM; 120 120 ··· 125 125 nr_chips)) 126 126 goto error; 127 127 128 - pmu_ptr->mem_info = kcalloc(nr_chips, sizeof(struct imc_mem_info), 129 - GFP_KERNEL); 128 + pmu_ptr->mem_info = kcalloc(nr_chips, sizeof(*pmu_ptr->mem_info), 129 + GFP_KERNEL); 130 130 if (!pmu_ptr->mem_info) 131 131 goto error; 132 132 ··· 161 161 u32 offset; 162 162 163 163 /* memory for pmu */ 164 - pmu_ptr = kzalloc(sizeof(struct imc_pmu), GFP_KERNEL); 164 + pmu_ptr = kzalloc(sizeof(*pmu_ptr), GFP_KERNEL); 165 165 if (!pmu_ptr) 166 166 return -ENOMEM; 167 167

+1 -1

arch/powerpc/platforms/powernv/opal-memory-errors.c

··· 112 112 "handled\n"); 113 113 return -ENOMEM; 114 114 } 115 - memcpy(&msg_node->msg, msg, sizeof(struct opal_msg)); 115 + memcpy(&msg_node->msg, msg, sizeof(msg_node->msg)); 116 116 117 117 spin_lock_irqsave(&opal_mem_err_lock, flags); 118 118 list_add(&msg_node->list, &opal_memory_err_list);

+4

arch/powerpc/platforms/powernv/opal-nvram.c

··· 59 59 if (rc == OPAL_BUSY_EVENT) 60 60 opal_poll_events(NULL); 61 61 } 62 + 63 + if (rc) 64 + return -EIO; 65 + 62 66 *index += count; 63 67 return count; 64 68 }

+1 -1

arch/powerpc/platforms/powernv/opal-psr.c

··· 136 136 return; 137 137 } 138 138 139 - psr_attrs = kcalloc(of_get_child_count(psr), sizeof(struct psr_attr), 139 + psr_attrs = kcalloc(of_get_child_count(psr), sizeof(*psr_attrs), 140 140 GFP_KERNEL); 141 141 if (!psr_attrs) 142 142 return;

+2 -2

arch/powerpc/platforms/powernv/opal-sensor-groups.c

··· 166 166 if (!nr_attrs) 167 167 continue; 168 168 169 - sgs[i].sgattrs = kcalloc(nr_attrs, sizeof(struct sg_attr), 169 + sgs[i].sgattrs = kcalloc(nr_attrs, sizeof(*sgs[i].sgattrs), 170 170 GFP_KERNEL); 171 171 if (!sgs[i].sgattrs) 172 172 goto out_sgs_sgattrs; 173 173 174 174 sgs[i].sg.attrs = kcalloc(nr_attrs + 1, 175 - sizeof(struct attribute *), 175 + sizeof(*sgs[i].sg.attrs), 176 176 GFP_KERNEL); 177 177 178 178 if (!sgs[i].sg.attrs) {

+2

arch/powerpc/platforms/powernv/opal-wrappers.S

··· 323 323 OPAL_CALL(opal_npu_spa_setup, OPAL_NPU_SPA_SETUP); 324 324 OPAL_CALL(opal_npu_spa_clear_cache, OPAL_NPU_SPA_CLEAR_CACHE); 325 325 OPAL_CALL(opal_npu_tl_set, OPAL_NPU_TL_SET); 326 + OPAL_CALL(opal_pci_get_pbcq_tunnel_bar, OPAL_PCI_GET_PBCQ_TUNNEL_BAR); 327 + OPAL_CALL(opal_pci_set_pbcq_tunnel_bar, OPAL_PCI_SET_PBCQ_TUNNEL_BAR);

+1 -1

arch/powerpc/platforms/powernv/opal-xscom.c

··· 46 46 __func__, dev); 47 47 return SCOM_MAP_INVALID; 48 48 } 49 - m = kmalloc(sizeof(struct opal_scom_map), GFP_KERNEL); 49 + m = kmalloc(sizeof(*m), GFP_KERNEL); 50 50 if (!m) 51 51 return NULL; 52 52 m->chip = be32_to_cpup(gcid);

+4 -1

arch/powerpc/platforms/powernv/opal.c

··· 490 490 * opal to trigger checkstop explicitly for error analysis. 491 491 * The FSP PRD component would have already got notified 492 492 * about this error through other channels. 493 + * 4. We are running on a newer skiboot that by default does 494 + * not cause a checkstop, drops us back to the kernel to 495 + * extract context and state at the time of the error. 493 496 */ 494 497 495 - ppc_md.restart(NULL); 498 + panic(msg); 496 499 } 497 500 498 501 int opal_machine_check(struct pt_regs *regs)

-8

arch/powerpc/platforms/powernv/pci-cxl.c

··· 16 16 17 17 #include "pci.h" 18 18 19 - struct device_node *pnv_pci_get_phb_node(struct pci_dev *dev) 20 - { 21 - struct pci_controller *hose = pci_bus_to_host(dev->bus); 22 - 23 - return of_node_get(hose->dn); 24 - } 25 - EXPORT_SYMBOL(pnv_pci_get_phb_node); 26 - 27 19 int pnv_phb_to_cxl_mode(struct pci_dev *dev, uint64_t mode) 28 20 { 29 21 struct pci_controller *hose = pci_bus_to_host(dev->bus);

+25 -4

arch/powerpc/platforms/powernv/pci-ioda.c

··· 2681 2681 static long pnv_pci_ioda2_npu_set_window(struct iommu_table_group *table_group, 2682 2682 int num, struct iommu_table *tbl) 2683 2683 { 2684 + struct pnv_ioda_pe *npe = gpe_table_group_to_npe(table_group); 2685 + int num2 = (num == 0) ? 1 : 0; 2684 2686 long ret = pnv_pci_ioda2_set_window(table_group, num, tbl); 2685 2687 2686 2688 if (ret) 2687 2689 return ret; 2688 2690 2689 - ret = pnv_npu_set_window(gpe_table_group_to_npe(table_group), num, tbl); 2690 - if (ret) 2691 + if (table_group->tables[num2]) 2692 + pnv_npu_unset_window(npe, num2); 2693 + 2694 + ret = pnv_npu_set_window(npe, num, tbl); 2695 + if (ret) { 2691 2696 pnv_pci_ioda2_unset_window(table_group, num); 2697 + if (table_group->tables[num2]) 2698 + pnv_npu_set_window(npe, num2, 2699 + table_group->tables[num2]); 2700 + } 2692 2701 2693 2702 return ret; 2694 2703 } ··· 2706 2697 struct iommu_table_group *table_group, 2707 2698 int num) 2708 2699 { 2700 + struct pnv_ioda_pe *npe = gpe_table_group_to_npe(table_group); 2701 + int num2 = (num == 0) ? 1 : 0; 2709 2702 long ret = pnv_pci_ioda2_unset_window(table_group, num); 2710 2703 2711 2704 if (ret) 2712 2705 return ret; 2713 2706 2714 - return pnv_npu_unset_window(gpe_table_group_to_npe(table_group), num); 2707 + if (!npe->table_group.tables[num]) 2708 + return 0; 2709 + 2710 + ret = pnv_npu_unset_window(npe, num); 2711 + if (ret) 2712 + return ret; 2713 + 2714 + if (table_group->tables[num2]) 2715 + ret = pnv_npu_set_window(npe, num2, table_group->tables[num2]); 2716 + 2717 + return ret; 2715 2718 } 2716 2719 2717 2720 static void pnv_ioda2_npu_take_ownership(struct iommu_table_group *table_group) ··· 3864 3843 phb_id = be64_to_cpup(prop64); 3865 3844 pr_debug(" PHB-ID : 0x%016llx\n", phb_id); 3866 3845 3867 - phb = memblock_virt_alloc(sizeof(struct pnv_phb), 0); 3846 + phb = memblock_virt_alloc(sizeof(*phb), 0); 3868 3847 3869 3848 /* Allocate PCI controller */ 3870 3849 phb->hose = hose = pcibios_alloc_controller(np);

+135

arch/powerpc/platforms/powernv/pci.c

··· 18 18 #include <linux/io.h> 19 19 #include <linux/msi.h> 20 20 #include <linux/iommu.h> 21 + #include <linux/sched/mm.h> 21 22 22 23 #include <asm/sections.h> 23 24 #include <asm/io.h> ··· 39 38 #include "pci.h" 40 39 41 40 static DEFINE_MUTEX(p2p_mutex); 41 + static DEFINE_MUTEX(tunnel_mutex); 42 42 43 43 int pnv_pci_get_slot_id(struct device_node *np, uint64_t *id) 44 44 { ··· 1093 1091 return rc; 1094 1092 } 1095 1093 EXPORT_SYMBOL_GPL(pnv_pci_set_p2p); 1094 + 1095 + struct device_node *pnv_pci_get_phb_node(struct pci_dev *dev) 1096 + { 1097 + struct pci_controller *hose = pci_bus_to_host(dev->bus); 1098 + 1099 + return of_node_get(hose->dn); 1100 + } 1101 + EXPORT_SYMBOL(pnv_pci_get_phb_node); 1102 + 1103 + int pnv_pci_enable_tunnel(struct pci_dev *dev, u64 *asnind) 1104 + { 1105 + struct device_node *np; 1106 + const __be32 *prop; 1107 + struct pnv_ioda_pe *pe; 1108 + uint16_t window_id; 1109 + int rc; 1110 + 1111 + if (!radix_enabled()) 1112 + return -ENXIO; 1113 + 1114 + if (!(np = pnv_pci_get_phb_node(dev))) 1115 + return -ENXIO; 1116 + 1117 + prop = of_get_property(np, "ibm,phb-indications", NULL); 1118 + of_node_put(np); 1119 + 1120 + if (!prop || !prop[1]) 1121 + return -ENXIO; 1122 + 1123 + *asnind = (u64)be32_to_cpu(prop[1]); 1124 + pe = pnv_ioda_get_pe(dev); 1125 + if (!pe) 1126 + return -ENODEV; 1127 + 1128 + /* Increase real window size to accept as_notify messages. */ 1129 + window_id = (pe->pe_number << 1 ) + 1; 1130 + rc = opal_pci_map_pe_dma_window_real(pe->phb->opal_id, pe->pe_number, 1131 + window_id, pe->tce_bypass_base, 1132 + (uint64_t)1 << 48); 1133 + return opal_error_code(rc); 1134 + } 1135 + EXPORT_SYMBOL_GPL(pnv_pci_enable_tunnel); 1136 + 1137 + int pnv_pci_disable_tunnel(struct pci_dev *dev) 1138 + { 1139 + struct pnv_ioda_pe *pe; 1140 + 1141 + pe = pnv_ioda_get_pe(dev); 1142 + if (!pe) 1143 + return -ENODEV; 1144 + 1145 + /* Restore default real window size. */ 1146 + pnv_pci_ioda2_set_bypass(pe, true); 1147 + return 0; 1148 + } 1149 + EXPORT_SYMBOL_GPL(pnv_pci_disable_tunnel); 1150 + 1151 + int pnv_pci_set_tunnel_bar(struct pci_dev *dev, u64 addr, int enable) 1152 + { 1153 + __be64 val; 1154 + struct pci_controller *hose; 1155 + struct pnv_phb *phb; 1156 + u64 tunnel_bar; 1157 + int rc; 1158 + 1159 + if (!opal_check_token(OPAL_PCI_GET_PBCQ_TUNNEL_BAR)) 1160 + return -ENXIO; 1161 + if (!opal_check_token(OPAL_PCI_SET_PBCQ_TUNNEL_BAR)) 1162 + return -ENXIO; 1163 + 1164 + hose = pci_bus_to_host(dev->bus); 1165 + phb = hose->private_data; 1166 + 1167 + mutex_lock(&tunnel_mutex); 1168 + rc = opal_pci_get_pbcq_tunnel_bar(phb->opal_id, &val); 1169 + if (rc != OPAL_SUCCESS) { 1170 + rc = -EIO; 1171 + goto out; 1172 + } 1173 + tunnel_bar = be64_to_cpu(val); 1174 + if (enable) { 1175 + /* 1176 + * Only one device per PHB can use atomics. 1177 + * Our policy is first-come, first-served. 1178 + */ 1179 + if (tunnel_bar) { 1180 + if (tunnel_bar != addr) 1181 + rc = -EBUSY; 1182 + else 1183 + rc = 0; /* Setting same address twice is ok */ 1184 + goto out; 1185 + } 1186 + } else { 1187 + /* 1188 + * The device that owns atomics and wants to release 1189 + * them must pass the same address with enable == 0. 1190 + */ 1191 + if (tunnel_bar != addr) { 1192 + rc = -EPERM; 1193 + goto out; 1194 + } 1195 + addr = 0x0ULL; 1196 + } 1197 + rc = opal_pci_set_pbcq_tunnel_bar(phb->opal_id, addr); 1198 + rc = opal_error_code(rc); 1199 + out: 1200 + mutex_unlock(&tunnel_mutex); 1201 + return rc; 1202 + } 1203 + EXPORT_SYMBOL_GPL(pnv_pci_set_tunnel_bar); 1204 + 1205 + #ifdef CONFIG_PPC64 /* for thread.tidr */ 1206 + int pnv_pci_get_as_notify_info(struct task_struct *task, u32 *lpid, u32 *pid, 1207 + u32 *tid) 1208 + { 1209 + struct mm_struct *mm = NULL; 1210 + 1211 + if (task == NULL) 1212 + return -EINVAL; 1213 + 1214 + mm = get_task_mm(task); 1215 + if (mm == NULL) 1216 + return -EINVAL; 1217 + 1218 + *pid = mm->context.id; 1219 + mmput(mm); 1220 + 1221 + *tid = task->thread.tidr; 1222 + *lpid = mfspr(SPRN_LPID); 1223 + return 0; 1224 + } 1225 + EXPORT_SYMBOL_GPL(pnv_pci_get_as_notify_info); 1226 + #endif 1096 1227 1097 1228 void pnv_pci_shutdown(void) 1098 1229 {

+72 -42

arch/powerpc/platforms/powernv/setup.c

··· 38 38 #include <asm/smp.h> 39 39 #include <asm/tm.h> 40 40 #include <asm/setup.h> 41 + #include <asm/security_features.h> 41 42 42 43 #include "powernv.h" 44 + 45 + 46 + static bool fw_feature_is(const char *state, const char *name, 47 + struct device_node *fw_features) 48 + { 49 + struct device_node *np; 50 + bool rc = false; 51 + 52 + np = of_get_child_by_name(fw_features, name); 53 + if (np) { 54 + rc = of_property_read_bool(np, state); 55 + of_node_put(np); 56 + } 57 + 58 + return rc; 59 + } 60 + 61 + static void init_fw_feat_flags(struct device_node *np) 62 + { 63 + if (fw_feature_is("enabled", "inst-spec-barrier-ori31,31,0", np)) 64 + security_ftr_set(SEC_FTR_SPEC_BAR_ORI31); 65 + 66 + if (fw_feature_is("enabled", "fw-bcctrl-serialized", np)) 67 + security_ftr_set(SEC_FTR_BCCTRL_SERIALISED); 68 + 69 + if (fw_feature_is("enabled", "inst-l1d-flush-ori30,30,0", np)) 70 + security_ftr_set(SEC_FTR_L1D_FLUSH_ORI30); 71 + 72 + if (fw_feature_is("enabled", "inst-l1d-flush-trig2", np)) 73 + security_ftr_set(SEC_FTR_L1D_FLUSH_TRIG2); 74 + 75 + if (fw_feature_is("enabled", "fw-l1d-thread-split", np)) 76 + security_ftr_set(SEC_FTR_L1D_THREAD_PRIV); 77 + 78 + if (fw_feature_is("enabled", "fw-count-cache-disabled", np)) 79 + security_ftr_set(SEC_FTR_COUNT_CACHE_DISABLED); 80 + 81 + /* 82 + * The features below are enabled by default, so we instead look to see 83 + * if firmware has *disabled* them, and clear them if so. 84 + */ 85 + if (fw_feature_is("disabled", "speculation-policy-favor-security", np)) 86 + security_ftr_clear(SEC_FTR_FAVOUR_SECURITY); 87 + 88 + if (fw_feature_is("disabled", "needs-l1d-flush-msr-pr-0-to-1", np)) 89 + security_ftr_clear(SEC_FTR_L1D_FLUSH_PR); 90 + 91 + if (fw_feature_is("disabled", "needs-l1d-flush-msr-hv-1-to-0", np)) 92 + security_ftr_clear(SEC_FTR_L1D_FLUSH_HV); 93 + 94 + if (fw_feature_is("disabled", "needs-spec-barrier-for-bound-checks", np)) 95 + security_ftr_clear(SEC_FTR_BNDS_CHK_SPEC_BAR); 96 + } 43 97 44 98 static void pnv_setup_rfi_flush(void) 45 99 { 46 100 struct device_node *np, *fw_features; 47 101 enum l1d_flush_type type; 48 - int enable; 102 + bool enable; 49 103 50 104 /* Default to fallback in case fw-features are not available */ 51 105 type = L1D_FLUSH_FALLBACK; 52 - enable = 1; 53 106 54 107 np = of_find_node_by_name(NULL, "ibm,opal"); 55 108 fw_features = of_get_child_by_name(np, "fw-features"); 56 109 of_node_put(np); 57 110 58 111 if (fw_features) { 59 - np = of_get_child_by_name(fw_features, "inst-l1d-flush-trig2"); 60 - if (np && of_property_read_bool(np, "enabled")) 112 + init_fw_feat_flags(fw_features); 113 + of_node_put(fw_features); 114 + 115 + if (security_ftr_enabled(SEC_FTR_L1D_FLUSH_TRIG2)) 61 116 type = L1D_FLUSH_MTTRIG; 62 117 63 - of_node_put(np); 64 - 65 - np = of_get_child_by_name(fw_features, "inst-l1d-flush-ori30,30,0"); 66 - if (np && of_property_read_bool(np, "enabled")) 118 + if (security_ftr_enabled(SEC_FTR_L1D_FLUSH_ORI30)) 67 119 type = L1D_FLUSH_ORI; 68 - 69 - of_node_put(np); 70 - 71 - /* Enable unless firmware says NOT to */ 72 - enable = 2; 73 - np = of_get_child_by_name(fw_features, "needs-l1d-flush-msr-hv-1-to-0"); 74 - if (np && of_property_read_bool(np, "disabled")) 75 - enable--; 76 - 77 - of_node_put(np); 78 - 79 - np = of_get_child_by_name(fw_features, "needs-l1d-flush-msr-pr-0-to-1"); 80 - if (np && of_property_read_bool(np, "disabled")) 81 - enable--; 82 - 83 - np = of_get_child_by_name(fw_features, "speculation-policy-favor-security"); 84 - if (np && of_property_read_bool(np, "disabled")) 85 - enable = 0; 86 - 87 - of_node_put(np); 88 - of_node_put(fw_features); 89 120 } 90 121 91 - setup_rfi_flush(type, enable > 0); 122 + enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && \ 123 + (security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR) || \ 124 + security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV)); 125 + 126 + setup_rfi_flush(type, enable); 92 127 } 93 128 94 129 static void __init pnv_setup_arch(void) ··· 201 166 */ 202 167 opal_event_shutdown(); 203 168 204 - /* Soft disable interrupts */ 205 - local_irq_disable(); 169 + /* Print flash update message if one is scheduled. */ 170 + opal_flash_update_print_message(); 206 171 207 - /* 208 - * Return secondary CPUs to firwmare if a flash update 209 - * is pending otherwise we will get all sort of error 210 - * messages about CPU being stuck etc.. This will also 211 - * have the side effect of hard disabling interrupts so 212 - * past this point, the kernel is effectively dead. 213 - */ 214 - opal_flash_term_callback(); 172 + smp_send_stop(); 173 + 174 + hard_irq_disable(); 215 175 } 216 176 217 177 static void __noreturn pnv_restart(char *cmd) ··· 288 258 if (i != notified) { 289 259 printk(KERN_INFO "kexec: waiting for cpu %d " 290 260 "(physical %d) to enter OPAL\n", 291 - i, paca[i].hw_cpu_id); 261 + i, paca_ptrs[i]->hw_cpu_id); 292 262 notified = i; 293 263 } 294 264 ··· 300 270 if (timeout-- == 0) { 301 271 printk(KERN_ERR "kexec: timed out waiting for " 302 272 "cpu %d (physical %d) to enter OPAL\n", 303 - i, paca[i].hw_cpu_id); 273 + i, paca_ptrs[i]->hw_cpu_id); 304 274 break; 305 275 } 306 276 }

+1 -1

arch/powerpc/platforms/powernv/smp.c

··· 80 80 * If we already started or OPAL is not supported, we just 81 81 * kick the CPU via the PACA 82 82 */ 83 - if (paca[nr].cpu_start || !firmware_has_feature(FW_FEATURE_OPAL)) 83 + if (paca_ptrs[nr]->cpu_start || !firmware_has_feature(FW_FEATURE_OPAL)) 84 84 goto kick; 85 85 86 86 /*

+1 -1

arch/powerpc/platforms/powernv/subcore.c

··· 280 280 int offset = (tid / threads_per_subcore) * threads_per_subcore; 281 281 int mask = sibling_mask_first_cpu << offset; 282 282 283 - paca[cpu].subcore_sibling_mask = mask; 283 + paca_ptrs[cpu]->subcore_sibling_mask = mask; 284 284 285 285 } 286 286 }

+15 -4

arch/powerpc/platforms/powernv/vas-debug.c

··· 166 166 167 167 return; 168 168 169 - free_name: 170 - kfree(window->dbgname); 171 - window->dbgname = NULL; 172 - 173 169 remove_dir: 174 170 debugfs_remove_recursive(window->dbgdir); 175 171 window->dbgdir = NULL; 172 + 173 + free_name: 174 + kfree(window->dbgname); 175 + window->dbgname = NULL; 176 176 } 177 177 178 178 void vas_instance_init_dbgdir(struct vas_instance *vinst) 179 179 { 180 180 struct dentry *d; 181 181 182 + vas_init_dbgdir(); 182 183 if (!vas_debugfs) 183 184 return; 184 185 ··· 202 201 vinst->dbgdir = NULL; 203 202 } 204 203 204 + /* 205 + * Set up the "root" VAS debugfs dir. Return if we already set it up 206 + * (or failed to) in an earlier instance of VAS. 207 + */ 205 208 void vas_init_dbgdir(void) 206 209 { 210 + static bool first_time = true; 211 + 212 + if (!first_time) 213 + return; 214 + 215 + first_time = false; 207 216 vas_debugfs = debugfs_create_dir("vas", NULL); 208 217 if (IS_ERR(vas_debugfs)) 209 218 vas_debugfs = NULL;

+113

arch/powerpc/platforms/powernv/vas-trace.h

··· 1 + /* SPDX-License-Identifier: GPL-2.0+ */ 2 + 3 + #undef TRACE_SYSTEM 4 + #define TRACE_SYSTEM vas 5 + 6 + #if !defined(_VAS_TRACE_H) || defined(TRACE_HEADER_MULTI_READ) 7 + 8 + #define _VAS_TRACE_H 9 + #include <linux/tracepoint.h> 10 + #include <linux/sched.h> 11 + #include <asm/vas.h> 12 + 13 + TRACE_EVENT( vas_rx_win_open, 14 + 15 + TP_PROTO(struct task_struct *tsk, 16 + int vasid, 17 + int cop, 18 + struct vas_rx_win_attr *rxattr), 19 + 20 + TP_ARGS(tsk, vasid, cop, rxattr), 21 + 22 + TP_STRUCT__entry( 23 + __field(struct task_struct *, tsk) 24 + __field(int, pid) 25 + __field(int, cop) 26 + __field(int, vasid) 27 + __field(struct vas_rx_win_attr *, rxattr) 28 + __field(int, lnotify_lpid) 29 + __field(int, lnotify_pid) 30 + __field(int, lnotify_tid) 31 + ), 32 + 33 + TP_fast_assign( 34 + __entry->pid = tsk->pid; 35 + __entry->vasid = vasid; 36 + __entry->cop = cop; 37 + __entry->lnotify_lpid = rxattr->lnotify_lpid; 38 + __entry->lnotify_pid = rxattr->lnotify_pid; 39 + __entry->lnotify_tid = rxattr->lnotify_tid; 40 + ), 41 + 42 + TP_printk("pid=%d, vasid=%d, cop=%d, lpid=%d, pid=%d, tid=%d", 43 + __entry->pid, __entry->vasid, __entry->cop, 44 + __entry->lnotify_lpid, __entry->lnotify_pid, 45 + __entry->lnotify_tid) 46 + ); 47 + 48 + TRACE_EVENT( vas_tx_win_open, 49 + 50 + TP_PROTO(struct task_struct *tsk, 51 + int vasid, 52 + int cop, 53 + struct vas_tx_win_attr *txattr), 54 + 55 + TP_ARGS(tsk, vasid, cop, txattr), 56 + 57 + TP_STRUCT__entry( 58 + __field(struct task_struct *, tsk) 59 + __field(int, pid) 60 + __field(int, cop) 61 + __field(int, vasid) 62 + __field(struct vas_tx_win_attr *, txattr) 63 + __field(int, lpid) 64 + __field(int, pidr) 65 + ), 66 + 67 + TP_fast_assign( 68 + __entry->pid = tsk->pid; 69 + __entry->vasid = vasid; 70 + __entry->cop = cop; 71 + __entry->lpid = txattr->lpid; 72 + __entry->pidr = txattr->pidr; 73 + ), 74 + 75 + TP_printk("pid=%d, vasid=%d, cop=%d, lpid=%d, pidr=%d", 76 + __entry->pid, __entry->vasid, __entry->cop, 77 + __entry->lpid, __entry->pidr) 78 + ); 79 + 80 + TRACE_EVENT( vas_paste_crb, 81 + 82 + TP_PROTO(struct task_struct *tsk, 83 + struct vas_window *win), 84 + 85 + TP_ARGS(tsk, win), 86 + 87 + TP_STRUCT__entry( 88 + __field(struct task_struct *, tsk) 89 + __field(struct vas_window *, win) 90 + __field(int, pid) 91 + __field(int, vasid) 92 + __field(int, winid) 93 + __field(unsigned long, paste_kaddr) 94 + ), 95 + 96 + TP_fast_assign( 97 + __entry->pid = tsk->pid; 98 + __entry->vasid = win->vinst->vas_id; 99 + __entry->winid = win->winid; 100 + __entry->paste_kaddr = (unsigned long)win->paste_kaddr 101 + ), 102 + 103 + TP_printk("pid=%d, vasid=%d, winid=%d, paste_kaddr=0x%016lx\n", 104 + __entry->pid, __entry->vasid, __entry->winid, 105 + __entry->paste_kaddr) 106 + ); 107 + 108 + #endif /* _VAS_TRACE_H */ 109 + 110 + #undef TRACE_INCLUDE_PATH 111 + #define TRACE_INCLUDE_PATH ../../arch/powerpc/platforms/powernv 112 + #define TRACE_INCLUDE_FILE vas-trace 113 + #include <trace/define_trace.h>

+9

arch/powerpc/platforms/powernv/vas-window.c

··· 21 21 #include "vas.h" 22 22 #include "copy-paste.h" 23 23 24 + #define CREATE_TRACE_POINTS 25 + #include "vas-trace.h" 26 + 24 27 /* 25 28 * Compute the paste address region for the window @window using the 26 29 * ->paste_base_addr and ->paste_win_id_shift we got from device tree. ··· 883 880 struct vas_winctx winctx; 884 881 struct vas_instance *vinst; 885 882 883 + trace_vas_rx_win_open(current, vasid, cop, rxattr); 884 + 886 885 if (!rx_win_args_valid(cop, rxattr)) 887 886 return ERR_PTR(-EINVAL); 888 887 ··· 1013 1008 struct vas_winctx winctx; 1014 1009 struct vas_instance *vinst; 1015 1010 1011 + trace_vas_tx_win_open(current, vasid, cop, attr); 1012 + 1016 1013 if (!tx_win_args_valid(cop, attr)) 1017 1014 return ERR_PTR(-EINVAL); 1018 1015 ··· 1106 1099 int rc; 1107 1100 void *addr; 1108 1101 uint64_t val; 1102 + 1103 + trace_vas_paste_crb(current, txwin); 1109 1104 1110 1105 /* 1111 1106 * Only NX windows are supported for now and hardware assumes

+3 -3

arch/powerpc/platforms/powernv/vas.c

··· 160 160 int found = 0; 161 161 struct device_node *dn; 162 162 163 - vas_init_dbgdir(); 164 - 165 163 platform_driver_register(&vas_driver); 166 164 167 165 for_each_compatible_node(dn, NULL, "ibm,vas") { ··· 167 169 found++; 168 170 } 169 171 170 - if (!found) 172 + if (!found) { 173 + platform_driver_unregister(&vas_driver); 171 174 return -ENODEV; 175 + } 172 176 173 177 pr_devel("Found %d instances\n", found); 174 178

+2 -4

arch/powerpc/platforms/ps3/mm.c

··· 524 524 int result; 525 525 struct dma_chunk *c; 526 526 527 - c = kzalloc(sizeof(struct dma_chunk), GFP_ATOMIC); 528 - 527 + c = kzalloc(sizeof(*c), GFP_ATOMIC); 529 528 if (!c) { 530 529 result = -ENOMEM; 531 530 goto fail_alloc; ··· 569 570 570 571 DBG(KERN_ERR "%s: phy=%#lx, lpar%#lx, len=%#lx\n", __func__, 571 572 phys_addr, ps3_mm_phys_to_lpar(phys_addr), len); 572 - c = kzalloc(sizeof(struct dma_chunk), GFP_ATOMIC); 573 - 573 + c = kzalloc(sizeof(*c), GFP_ATOMIC); 574 574 if (!c) { 575 575 result = -ENOMEM; 576 576 goto fail_alloc;

+1 -1

arch/powerpc/platforms/pseries/hotplug-cpu.c

··· 234 234 * done here. Change isolate state to Isolate and 235 235 * change allocation-state to Unusable. 236 236 */ 237 - paca[cpu].cpu_start = 0; 237 + paca_ptrs[cpu]->cpu_start = 0; 238 238 } 239 239 240 240 /*

+6 -1

arch/powerpc/platforms/pseries/kexec.c

··· 23 23 24 24 void pseries_kexec_cpu_down(int crash_shutdown, int secondary) 25 25 { 26 - /* Don't risk a hypervisor call if we're crashing */ 26 + /* 27 + * Don't risk a hypervisor call if we're crashing 28 + * XXX: Why? The hypervisor is not crashing. It might be better 29 + * to at least attempt unregister to avoid the hypervisor stepping 30 + * on our memory. 31 + */ 27 32 if (firmware_has_feature(FW_FEATURE_SPLPAR) && !crash_shutdown) { 28 33 int ret; 29 34 int cpu = smp_processor_id();

+11 -7

arch/powerpc/platforms/pseries/lpar.c

··· 99 99 * reports that. All SPLPAR support SLB shadow buffer. 100 100 */ 101 101 if (!radix_enabled() && firmware_has_feature(FW_FEATURE_SPLPAR)) { 102 - addr = __pa(paca[cpu].slb_shadow_ptr); 102 + addr = __pa(paca_ptrs[cpu]->slb_shadow_ptr); 103 103 ret = register_slb_shadow(hwcpu, addr); 104 104 if (ret) 105 105 pr_err("WARNING: SLB shadow buffer registration for " ··· 111 111 /* 112 112 * Register dispatch trace log, if one has been allocated. 113 113 */ 114 - pp = &paca[cpu]; 114 + pp = paca_ptrs[cpu]; 115 115 dtl = pp->dispatch_log; 116 116 if (dtl) { 117 117 pp->dtl_ridx = 0; ··· 306 306 307 307 want_v = hpte_encode_avpn(vpn, psize, ssize); 308 308 309 - pr_devel(" update: avpnv=%016lx, hash=%016lx, f=%lx, psize: %d ...", 310 - want_v, slot, flags, psize); 311 - 312 309 flags = (newpp & 7) | H_AVPN; 313 310 if (mmu_has_feature(MMU_FTR_KERNEL_RO)) 314 311 /* Move pp0 into bit 8 (IBM 55) */ 315 312 flags |= (newpp & HPTE_R_PP0) >> 55; 313 + 314 + pr_devel(" update: avpnv=%016lx, hash=%016lx, f=%lx, psize: %d ...", 315 + want_v, slot, flags, psize); 316 316 317 317 lpar_rc = plpar_pte_protect(flags, slot, want_v); 318 318 ··· 726 726 return 0; 727 727 } 728 728 729 - /* Actually only used for radix, so far */ 730 729 static int pseries_lpar_register_process_table(unsigned long base, 731 730 unsigned long page_size, unsigned long table_size) 732 731 { 733 732 long rc; 734 - unsigned long flags = PROC_TABLE_NEW; 733 + unsigned long flags = 0; 735 734 735 + if (table_size) 736 + flags |= PROC_TABLE_NEW; 736 737 if (radix_enabled()) 737 738 flags |= PROC_TABLE_RADIX | PROC_TABLE_GTSE; 739 + else 740 + flags |= PROC_TABLE_HPT_SLB; 738 741 for (;;) { 739 742 rc = plpar_hcall_norets(H_REGISTER_PROC_TBL, flags, base, 740 743 page_size, table_size); ··· 763 760 mmu_hash_ops.flush_hash_range = pSeries_lpar_flush_hash_range; 764 761 mmu_hash_ops.hpte_clear_all = pseries_hpte_clear_all; 765 762 mmu_hash_ops.hugepage_invalidate = pSeries_lpar_hugepage_invalidate; 763 + register_process_table = pseries_lpar_register_process_table; 766 764 767 765 if (firmware_has_feature(FW_FEATURE_HPT_RESIZE)) 768 766 mmu_hash_ops.resize_hpt = pseries_lpar_resize_hpt;

+3

arch/powerpc/platforms/pseries/mobility.c

··· 348 348 printk(KERN_ERR "Post-mobility device tree update " 349 349 "failed: %d\n", rc); 350 350 351 + /* Possibly switch to a new RFI flush type */ 352 + pseries_setup_rfi_flush(); 353 + 351 354 return; 352 355 } 353 356

+10

arch/powerpc/platforms/pseries/pseries.h

··· 27 27 28 28 #ifdef CONFIG_SMP 29 29 extern void smp_init_pseries(void); 30 + 31 + /* Get state of physical CPU from query_cpu_stopped */ 32 + int smp_query_cpu_stopped(unsigned int pcpu); 33 + #define QCSS_STOPPED 0 34 + #define QCSS_STOPPING 1 35 + #define QCSS_NOT_STOPPED 2 36 + #define QCSS_HARDWARE_ERROR -1 37 + #define QCSS_HARDWARE_BUSY -2 30 38 #else 31 39 static inline void smp_init_pseries(void) { }; 32 40 #endif ··· 107 99 } 108 100 109 101 int dlpar_workqueue_init(void); 102 + 103 + void pseries_setup_rfi_flush(void); 110 104 111 105 #endif /* _PSERIES_PSERIES_H */

+64 -21

arch/powerpc/platforms/pseries/setup.c

··· 68 68 #include <asm/plpar_wrappers.h> 69 69 #include <asm/kexec.h> 70 70 #include <asm/isa-bridge.h> 71 + #include <asm/security_features.h> 71 72 72 73 #include "pseries.h" 73 74 ··· 247 246 return 0; 248 247 249 248 for_each_possible_cpu(cpu) { 250 - pp = &paca[cpu]; 249 + pp = paca_ptrs[cpu]; 251 250 dtl = kmem_cache_alloc(dtl_cache, GFP_KERNEL); 252 251 if (!dtl) { 253 252 pr_warn("Failed to allocate dispatch trace log for cpu %d\n", ··· 460 459 of_pci_check_probe_only(); 461 460 } 462 461 463 - static void pseries_setup_rfi_flush(void) 462 + static void init_cpu_char_feature_flags(struct h_cpu_char_result *result) 463 + { 464 + /* 465 + * The features below are disabled by default, so we instead look to see 466 + * if firmware has *enabled* them, and set them if so. 467 + */ 468 + if (result->character & H_CPU_CHAR_SPEC_BAR_ORI31) 469 + security_ftr_set(SEC_FTR_SPEC_BAR_ORI31); 470 + 471 + if (result->character & H_CPU_CHAR_BCCTRL_SERIALISED) 472 + security_ftr_set(SEC_FTR_BCCTRL_SERIALISED); 473 + 474 + if (result->character & H_CPU_CHAR_L1D_FLUSH_ORI30) 475 + security_ftr_set(SEC_FTR_L1D_FLUSH_ORI30); 476 + 477 + if (result->character & H_CPU_CHAR_L1D_FLUSH_TRIG2) 478 + security_ftr_set(SEC_FTR_L1D_FLUSH_TRIG2); 479 + 480 + if (result->character & H_CPU_CHAR_L1D_THREAD_PRIV) 481 + security_ftr_set(SEC_FTR_L1D_THREAD_PRIV); 482 + 483 + if (result->character & H_CPU_CHAR_COUNT_CACHE_DISABLED) 484 + security_ftr_set(SEC_FTR_COUNT_CACHE_DISABLED); 485 + 486 + /* 487 + * The features below are enabled by default, so we instead look to see 488 + * if firmware has *disabled* them, and clear them if so. 489 + */ 490 + if (!(result->behaviour & H_CPU_BEHAV_FAVOUR_SECURITY)) 491 + security_ftr_clear(SEC_FTR_FAVOUR_SECURITY); 492 + 493 + if (!(result->behaviour & H_CPU_BEHAV_L1D_FLUSH_PR)) 494 + security_ftr_clear(SEC_FTR_L1D_FLUSH_PR); 495 + 496 + if (!(result->behaviour & H_CPU_BEHAV_BNDS_CHK_SPEC_BAR)) 497 + security_ftr_clear(SEC_FTR_BNDS_CHK_SPEC_BAR); 498 + } 499 + 500 + void pseries_setup_rfi_flush(void) 464 501 { 465 502 struct h_cpu_char_result result; 466 503 enum l1d_flush_type types; 467 504 bool enable; 468 505 long rc; 469 506 470 - /* Enable by default */ 471 - enable = true; 507 + /* 508 + * Set features to the defaults assumed by init_cpu_char_feature_flags() 509 + * so it can set/clear again any features that might have changed after 510 + * migration, and in case the hypercall fails and it is not even called. 511 + */ 512 + powerpc_security_features = SEC_FTR_DEFAULT; 472 513 473 514 rc = plpar_get_cpu_characteristics(&result); 474 - if (rc == H_SUCCESS) { 475 - types = L1D_FLUSH_NONE; 515 + if (rc == H_SUCCESS) 516 + init_cpu_char_feature_flags(&result); 476 517 477 - if (result.character & H_CPU_CHAR_L1D_FLUSH_TRIG2) 478 - types |= L1D_FLUSH_MTTRIG; 479 - if (result.character & H_CPU_CHAR_L1D_FLUSH_ORI30) 480 - types |= L1D_FLUSH_ORI; 518 + /* 519 + * We're the guest so this doesn't apply to us, clear it to simplify 520 + * handling of it elsewhere. 521 + */ 522 + security_ftr_clear(SEC_FTR_L1D_FLUSH_HV); 481 523 482 - /* Use fallback if nothing set in hcall */ 483 - if (types == L1D_FLUSH_NONE) 484 - types = L1D_FLUSH_FALLBACK; 524 + types = L1D_FLUSH_FALLBACK; 485 525 486 - if ((!(result.behaviour & H_CPU_BEHAV_L1D_FLUSH_PR)) || 487 - (!(result.behaviour & H_CPU_BEHAV_FAVOUR_SECURITY))) 488 - enable = false; 489 - } else { 490 - /* Default to fallback if case hcall is not available */ 491 - types = L1D_FLUSH_FALLBACK; 492 - } 526 + if (security_ftr_enabled(SEC_FTR_L1D_FLUSH_TRIG2)) 527 + types |= L1D_FLUSH_MTTRIG; 528 + 529 + if (security_ftr_enabled(SEC_FTR_L1D_FLUSH_ORI30)) 530 + types |= L1D_FLUSH_ORI; 531 + 532 + enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && \ 533 + security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR); 493 534 494 535 setup_rfi_flush(types, enable); 495 536 } ··· 782 739 /* PAPR says we can't set HYP */ 783 740 dawrx &= ~DAWRX_HYP; 784 741 785 - return plapr_set_watchpoint0(dawr, dawrx); 742 + return plpar_set_watchpoint0(dawr, dawrx); 786 743 } 787 744 788 745 #define CMO_CHARACTERISTICS_TOKEN 44

+3 -3

arch/powerpc/platforms/pseries/smp.c

··· 110 110 } 111 111 112 112 /* Fixup atomic count: it exited inside IRQ handler. */ 113 - task_thread_info(paca[lcpu].__current)->preempt_count = 0; 113 + task_thread_info(paca_ptrs[lcpu]->__current)->preempt_count = 0; 114 114 #ifdef CONFIG_HOTPLUG_CPU 115 115 if (get_cpu_current_state(lcpu) == CPU_STATE_INACTIVE) 116 116 goto out; ··· 165 165 * cpu_start field to become non-zero After we set cpu_start, 166 166 * the processor will continue on to secondary_start 167 167 */ 168 - paca[nr].cpu_start = 1; 168 + paca_ptrs[nr]->cpu_start = 1; 169 169 #ifdef CONFIG_HOTPLUG_CPU 170 170 set_preferred_offline_state(nr, CPU_STATE_ONLINE); 171 171 ··· 215 215 hwcpu = get_hard_smp_processor_id(cpu); 216 216 } 217 217 218 - if (plapr_signal_sys_reset(hwcpu) == H_SUCCESS) 218 + if (plpar_signal_sys_reset(hwcpu) == H_SUCCESS) 219 219 return 1; 220 220 221 221 return 0;

+1 -1

arch/powerpc/sysdev/mpic.c

··· 626 626 int i; 627 627 u32 mask = 0; 628 628 629 - for (i = 0; i < min(32, NR_CPUS); ++i, cpumask >>= 1) 629 + for (i = 0; i < min(32, NR_CPUS) && cpu_possible(i); ++i, cpumask >>= 1) 630 630 mask |= (cpumask & 1) << get_hard_smp_processor_id(i); 631 631 return mask; 632 632 }

+1 -1

arch/powerpc/sysdev/xics/icp-native.c

··· 164 164 * Just like the cause_ipi functions, it is required to 165 165 * include a full barrier before causing the IPI. 166 166 */ 167 - xics_phys = paca[cpu].kvm_hstate.xics_phys; 167 + xics_phys = paca_ptrs[cpu]->kvm_hstate.xics_phys; 168 168 mb(); 169 169 __raw_rm_writeb(IPI_PRIORITY, xics_phys + XICS_MFRR); 170 170 }

+1 -1

arch/powerpc/sysdev/xive/common.c

··· 246 246 u64 val = xive_esb_read(&xc->ipi_data, XIVE_ESB_GET); 247 247 xmon_printf(" IPI state: %x:%c%c\n", xc->hw_ipi, 248 248 val & XIVE_ESB_VAL_P ? 'P' : 'p', 249 - val & XIVE_ESB_VAL_P ? 'Q' : 'q'); 249 + val & XIVE_ESB_VAL_Q ? 'Q' : 'q'); 250 250 } 251 251 #endif 252 252 }

+47 -9

arch/powerpc/xmon/xmon.c

··· 41 41 #include <asm/pgtable.h> 42 42 #include <asm/mmu.h> 43 43 #include <asm/mmu_context.h> 44 + #include <asm/plpar_wrappers.h> 44 45 #include <asm/cputable.h> 45 46 #include <asm/rtas.h> 46 47 #include <asm/sstep.h> ··· 60 59 #ifdef CONFIG_PPC64 61 60 #include <asm/hvcall.h> 62 61 #include <asm/paca.h> 63 - #endif 64 - 65 - #if defined(CONFIG_PPC_SPLPAR) 66 - #include <asm/plpar_wrappers.h> 67 - #else 68 - static inline long plapr_set_ciabr(unsigned long ciabr) {return 0; }; 69 62 #endif 70 63 71 64 #include "nonstdio.h" ··· 323 328 mtspr(SPRN_CIABR, ciabr); 324 329 return; 325 330 } 326 - plapr_set_ciabr(ciabr); 331 + plpar_set_ciabr(ciabr); 327 332 } 328 333 329 334 /** ··· 1268 1273 return 1; 1269 1274 } 1270 1275 1276 + /* Force enable xmon if not already enabled */ 1277 + static inline void force_enable_xmon(void) 1278 + { 1279 + /* Enable xmon hooks if needed */ 1280 + if (!xmon_on) { 1281 + printf("xmon: Enabling debugger hooks\n"); 1282 + xmon_on = 1; 1283 + } 1284 + } 1285 + 1271 1286 static char *breakpoint_help_string = 1272 1287 "Breakpoint command usage:\n" 1273 1288 "b show breakpoints\n" ··· 1302 1297 static const char badaddr[] = "Only kernel addresses are permitted for breakpoints\n"; 1303 1298 int mode; 1304 1299 case 'd': /* bd - hardware data breakpoint */ 1300 + if (!ppc_breakpoint_available()) { 1301 + printf("Hardware data breakpoint not supported on this cpu\n"); 1302 + break; 1303 + } 1305 1304 mode = 7; 1306 1305 cmd = inchar(); 1307 1306 if (cmd == 'r') ··· 1324 1315 dabr.address &= ~HW_BRK_TYPE_DABR; 1325 1316 dabr.enabled = mode | BP_DABR; 1326 1317 } 1318 + 1319 + force_enable_xmon(); 1327 1320 break; 1328 1321 1329 1322 case 'i': /* bi - hardware instr breakpoint */ ··· 1346 1335 if (bp != NULL) { 1347 1336 bp->enabled |= BP_CIABR; 1348 1337 iabr = bp; 1338 + force_enable_xmon(); 1349 1339 } 1350 1340 break; 1351 1341 #endif ··· 1411 1399 if (!check_bp_loc(a)) 1412 1400 break; 1413 1401 bp = new_breakpoint(a); 1414 - if (bp != NULL) 1402 + if (bp != NULL) { 1415 1403 bp->enabled |= BP_TRAP; 1404 + force_enable_xmon(); 1405 + } 1416 1406 break; 1417 1407 } 1418 1408 } ··· 2341 2327 catch_memory_errors = 1; 2342 2328 sync(); 2343 2329 2344 - p = &paca[cpu]; 2330 + p = paca_ptrs[cpu]; 2345 2331 2346 2332 printf("paca for cpu 0x%x @ %px:\n", cpu, p); 2347 2333 ··· 3663 3649 #endif /* CONFIG_MAGIC_SYSRQ */ 3664 3650 3665 3651 #ifdef CONFIG_DEBUG_FS 3652 + static void clear_all_bpt(void) 3653 + { 3654 + int i; 3655 + 3656 + /* clear/unpatch all breakpoints */ 3657 + remove_bpts(); 3658 + remove_cpu_bpts(); 3659 + 3660 + /* Disable all breakpoints */ 3661 + for (i = 0; i < NBPTS; ++i) 3662 + bpts[i].enabled = 0; 3663 + 3664 + /* Clear any data or iabr breakpoints */ 3665 + if (iabr || dabr.enabled) { 3666 + iabr = NULL; 3667 + dabr.enabled = 0; 3668 + } 3669 + 3670 + printf("xmon: All breakpoints cleared\n"); 3671 + } 3672 + 3666 3673 static int xmon_dbgfs_set(void *data, u64 val) 3667 3674 { 3668 3675 xmon_on = !!val; 3669 3676 xmon_init(xmon_on); 3670 3677 3678 + /* make sure all breakpoints removed when disabling */ 3679 + if (!xmon_on) 3680 + clear_all_bpt(); 3671 3681 return 0; 3672 3682 } 3673 3683

+7 -7

drivers/macintosh/adb-iop.c

··· 53 53 static int adb_iop_reset_bus(void); 54 54 55 55 struct adb_driver adb_iop_driver = { 56 - "ISM IOP", 57 - adb_iop_probe, 58 - adb_iop_init, 59 - adb_iop_send_request, 60 - adb_iop_autopoll, 61 - adb_iop_poll, 62 - adb_iop_reset_bus 56 + .name = "ISM IOP", 57 + .probe = adb_iop_probe, 58 + .init = adb_iop_init, 59 + .send_request = adb_iop_send_request, 60 + .autopoll = adb_iop_autopoll, 61 + .poll = adb_iop_poll, 62 + .reset_bus = adb_iop_reset_bus 63 63 }; 64 64 65 65 static void adb_iop_end_req(struct adb_request *req, int state)

+1

drivers/macintosh/ans-lcd.c

··· 201 201 202 202 module_init(anslcd_init); 203 203 module_exit(anslcd_exit); 204 + MODULE_LICENSE("GPL v2");

+7 -8

drivers/macintosh/macio-adb.c

··· 70 70 static int macio_adb_reset_bus(void); 71 71 72 72 struct adb_driver macio_adb_driver = { 73 - "MACIO", 74 - macio_probe, 75 - macio_init, 76 - macio_send_request, 77 - /*macio_write,*/ 78 - macio_adb_autopoll, 79 - macio_adb_poll, 80 - macio_adb_reset_bus 73 + .name = "MACIO", 74 + .probe = macio_probe, 75 + .init = macio_init, 76 + .send_request = macio_send_request, 77 + .autopoll = macio_adb_autopoll, 78 + .poll = macio_adb_poll, 79 + .reset_bus = macio_adb_reset_bus, 81 80 }; 82 81 83 82 int macio_probe(void)

+3 -3

drivers/macintosh/rack-meter.c

··· 154 154 DBDMA_DO_STOP(rm->dma_regs); 155 155 return; 156 156 } 157 - memset(rdma->buf1, 0, ARRAY_SIZE(rdma->buf1)); 158 - memset(rdma->buf2, 0, ARRAY_SIZE(rdma->buf2)); 157 + memset(rdma->buf1, 0, sizeof(rdma->buf1)); 158 + memset(rdma->buf2, 0, sizeof(rdma->buf2)); 159 159 160 160 rm->dma_buf_v->mark = 0; 161 161 ··· 397 397 } 398 398 399 399 /* Create and initialize our instance data */ 400 - rm = kzalloc(sizeof(struct rackmeter), GFP_KERNEL); 400 + rm = kzalloc(sizeof(*rm), GFP_KERNEL); 401 401 if (rm == NULL) { 402 402 printk(KERN_ERR "rackmeter: failed to allocate memory !\n"); 403 403 rc = -ENOMEM;

+7 -7

drivers/macintosh/via-macii.c

··· 91 91 static int macii_reset_bus(void); 92 92 93 93 struct adb_driver via_macii_driver = { 94 - "Mac II", 95 - macii_probe, 96 - macii_init, 97 - macii_send_request, 98 - macii_autopoll, 99 - macii_poll, 100 - macii_reset_bus 94 + .name = "Mac II", 95 + .probe = macii_probe, 96 + .init = macii_init, 97 + .send_request = macii_send_request, 98 + .autopoll = macii_autopoll, 99 + .poll = macii_poll, 100 + .reset_bus = macii_reset_bus, 101 101 }; 102 102 103 103 static enum macii_state {

+8 -8

drivers/macintosh/via-pmu.c

··· 198 198 static const struct file_operations pmu_options_proc_fops; 199 199 200 200 #ifdef CONFIG_ADB 201 - struct adb_driver via_pmu_driver = { 202 - "PMU", 203 - pmu_probe, 204 - pmu_init, 205 - pmu_send_request, 206 - pmu_adb_autopoll, 207 - pmu_poll_adb, 208 - pmu_adb_reset_bus 201 + const struct adb_driver via_pmu_driver = { 202 + .name = "PMU", 203 + .probe = pmu_probe, 204 + .init = pmu_init, 205 + .send_request = pmu_send_request, 206 + .autopoll = pmu_adb_autopoll, 207 + .poll = pmu_poll_adb, 208 + .reset_bus = pmu_adb_reset_bus, 209 209 }; 210 210 #endif /* CONFIG_ADB */ 211 211

+7 -7

drivers/macintosh/via-pmu68k.c

··· 120 120 static void pmu_set_brightness(int level); 121 121 122 122 struct adb_driver via_pmu_driver = { 123 - "68K PMU", 124 - pmu_probe, 125 - pmu_init, 126 - pmu_send_request, 127 - pmu_autopoll, 128 - pmu_poll, 129 - pmu_reset_bus 123 + .name = "68K PMU", 124 + .probe = pmu_probe, 125 + .init = pmu_init, 126 + .send_request = pmu_send_request, 127 + .autopoll = pmu_autopoll, 128 + .poll = pmu_poll, 129 + .reset_bus = pmu_reset_bus, 130 130 }; 131 131 132 132 /*

+5 -1

drivers/misc/cxl/cxl.h

··· 369 369 #define CXL_PSL_TFC_An_AE (1ull << (63-30)) /* Restart PSL with address error */ 370 370 #define CXL_PSL_TFC_An_R (1ull << (63-31)) /* Restart PSL transaction */ 371 371 372 + /****** CXL_PSL_DEBUG *****************************************************/ 373 + #define CXL_PSL_DEBUG_CDC (1ull << (63-27)) /* Coherent Data cache support */ 374 + 372 375 /****** CXL_XSL9_IERAT_ERAT - CAIA 2 **********************************/ 373 376 #define CXL_XSL9_IERAT_MLPID (1ull << (63-0)) /* Match LPID */ 374 377 #define CXL_XSL9_IERAT_MPID (1ull << (63-1)) /* Match PID */ ··· 672 669 irq_hw_number_t err_hwirq; 673 670 unsigned int err_virq; 674 671 u64 ps_off; 672 + bool no_data_cache; /* set if no data cache on the card */ 675 673 const struct cxl_service_layer_ops *sl_ops; 676 674 }; 677 675 ··· 1069 1065 int cxl_calc_capp_routing(struct pci_dev *dev, u64 *chipid, 1070 1066 u32 *phb_index, u64 *capp_unit_id); 1071 1067 int cxl_slot_is_switched(struct pci_dev *dev); 1072 - int cxl_get_xsl9_dsnctl(u64 capp_unit_id, u64 *reg); 1068 + int cxl_get_xsl9_dsnctl(struct pci_dev *dev, u64 capp_unit_id, u64 *reg); 1073 1069 u64 cxl_calculate_sr(bool master, bool kernel, bool real_mode, bool p9); 1074 1070 1075 1071 void cxl_native_irq_dump_regs_psl9(struct cxl_context *ctx);

+57 -32

drivers/misc/cxl/cxllib.c

··· 99 99 if (rc) 100 100 return rc; 101 101 102 - rc = cxl_get_xsl9_dsnctl(capp_unit_id, &cfg->dsnctl); 102 + rc = cxl_get_xsl9_dsnctl(dev, capp_unit_id, &cfg->dsnctl); 103 103 if (rc) 104 104 return rc; 105 105 if (cpu_has_feature(CPU_FTR_POWER9_DD1)) { ··· 208 208 } 209 209 EXPORT_SYMBOL_GPL(cxllib_get_PE_attributes); 210 210 211 - int cxllib_handle_fault(struct mm_struct *mm, u64 addr, u64 size, u64 flags) 211 + static int get_vma_info(struct mm_struct *mm, u64 addr, 212 + u64 *vma_start, u64 *vma_end, 213 + unsigned long *page_size) 212 214 { 213 - int rc; 214 - u64 dar; 215 215 struct vm_area_struct *vma = NULL; 216 - unsigned long page_size; 217 - 218 - if (mm == NULL) 219 - return -EFAULT; 216 + int rc = 0; 220 217 221 218 down_read(&mm->mmap_sem); 222 219 223 220 vma = find_vma(mm, addr); 224 221 if (!vma) { 225 - pr_err("Can't find vma for addr %016llx\n", addr); 226 222 rc = -EFAULT; 227 223 goto out; 228 224 } 229 - /* get the size of the pages allocated */ 230 - page_size = vma_kernel_pagesize(vma); 231 - 232 - for (dar = (addr & ~(page_size - 1)); dar < (addr + size); dar += page_size) { 233 - if (dar < vma->vm_start || dar >= vma->vm_end) { 234 - vma = find_vma(mm, addr); 235 - if (!vma) { 236 - pr_err("Can't find vma for addr %016llx\n", addr); 237 - rc = -EFAULT; 238 - goto out; 239 - } 240 - /* get the size of the pages allocated */ 241 - page_size = vma_kernel_pagesize(vma); 242 - } 243 - 244 - rc = cxl_handle_mm_fault(mm, flags, dar); 245 - if (rc) { 246 - pr_err("cxl_handle_mm_fault failed %d", rc); 247 - rc = -EFAULT; 248 - goto out; 249 - } 250 - } 251 - rc = 0; 225 + *page_size = vma_kernel_pagesize(vma); 226 + *vma_start = vma->vm_start; 227 + *vma_end = vma->vm_end; 252 228 out: 253 229 up_read(&mm->mmap_sem); 254 230 return rc; 231 + } 232 + 233 + int cxllib_handle_fault(struct mm_struct *mm, u64 addr, u64 size, u64 flags) 234 + { 235 + int rc; 236 + u64 dar, vma_start, vma_end; 237 + unsigned long page_size; 238 + 239 + if (mm == NULL) 240 + return -EFAULT; 241 + 242 + /* 243 + * The buffer we have to process can extend over several pages 244 + * and may also cover several VMAs. 245 + * We iterate over all the pages. The page size could vary 246 + * between VMAs. 247 + */ 248 + rc = get_vma_info(mm, addr, &vma_start, &vma_end, &page_size); 249 + if (rc) 250 + return rc; 251 + 252 + for (dar = (addr & ~(page_size - 1)); dar < (addr + size); 253 + dar += page_size) { 254 + if (dar < vma_start || dar >= vma_end) { 255 + /* 256 + * We don't hold the mm->mmap_sem semaphore 257 + * while iterating, since the semaphore is 258 + * required by one of the lower-level page 259 + * fault processing functions and it could 260 + * create a deadlock. 261 + * 262 + * It means the VMAs can be altered between 2 263 + * loop iterations and we could theoretically 264 + * miss a page (however unlikely). But that's 265 + * not really a problem, as the driver will 266 + * retry access, get another page fault on the 267 + * missing page and call us again. 268 + */ 269 + rc = get_vma_info(mm, dar, &vma_start, &vma_end, 270 + &page_size); 271 + if (rc) 272 + return rc; 273 + } 274 + 275 + rc = cxl_handle_mm_fault(mm, flags, dar); 276 + if (rc) 277 + return -EFAULT; 278 + } 279 + return 0; 255 280 } 256 281 EXPORT_SYMBOL_GPL(cxllib_handle_fault);

+10 -1

drivers/misc/cxl/native.c

··· 353 353 u64 reg; 354 354 unsigned long timeout = jiffies + (HZ * CXL_TIMEOUT); 355 355 356 - pr_devel("Flushing data cache\n"); 356 + /* 357 + * Do a datacache flush only if datacache is available. 358 + * In case of PSL9D datacache absent hence flush operation. 359 + * would timeout. 360 + */ 361 + if (adapter->native->no_data_cache) { 362 + pr_devel("No PSL data cache. Ignoring cache flush req.\n"); 363 + return 0; 364 + } 357 365 366 + pr_devel("Flushing data cache\n"); 358 367 reg = cxl_p1_read(adapter, CXL_PSL_Control); 359 368 reg |= CXL_PSL_Control_Fr; 360 369 cxl_p1_write(adapter, CXL_PSL_Control, reg);

+64 -38

drivers/misc/cxl/pci.c

··· 407 407 return 0; 408 408 } 409 409 410 - int cxl_get_xsl9_dsnctl(u64 capp_unit_id, u64 *reg) 410 + static DEFINE_MUTEX(indications_mutex); 411 + 412 + static int get_phb_indications(struct pci_dev *dev, u64 *capiind, u64 *asnind, 413 + u64 *nbwind) 414 + { 415 + static u64 nbw, asn, capi = 0; 416 + struct device_node *np; 417 + const __be32 *prop; 418 + 419 + mutex_lock(&indications_mutex); 420 + if (!capi) { 421 + if (!(np = pnv_pci_get_phb_node(dev))) { 422 + mutex_unlock(&indications_mutex); 423 + return -ENODEV; 424 + } 425 + 426 + prop = of_get_property(np, "ibm,phb-indications", NULL); 427 + if (!prop) { 428 + nbw = 0x0300UL; /* legacy values */ 429 + asn = 0x0400UL; 430 + capi = 0x0200UL; 431 + } else { 432 + nbw = (u64)be32_to_cpu(prop[2]); 433 + asn = (u64)be32_to_cpu(prop[1]); 434 + capi = (u64)be32_to_cpu(prop[0]); 435 + } 436 + of_node_put(np); 437 + } 438 + *capiind = capi; 439 + *asnind = asn; 440 + *nbwind = nbw; 441 + mutex_unlock(&indications_mutex); 442 + return 0; 443 + } 444 + 445 + int cxl_get_xsl9_dsnctl(struct pci_dev *dev, u64 capp_unit_id, u64 *reg) 411 446 { 412 447 u64 xsl_dsnctl; 448 + u64 capiind, asnind, nbwind; 413 449 414 450 /* 415 451 * CAPI Identifier bits [0:7] 416 452 * bit 61:60 MSI bits --> 0 417 453 * bit 59 TVT selector --> 0 418 454 */ 455 + if (get_phb_indications(dev, &capiind, &asnind, &nbwind)) 456 + return -ENODEV; 419 457 420 458 /* 421 459 * Tell XSL where to route data to. 422 460 * The field chipid should match the PHB CAPI_CMPM register 423 461 */ 424 - xsl_dsnctl = ((u64)0x2 << (63-7)); /* Bit 57 */ 462 + xsl_dsnctl = (capiind << (63-15)); /* Bit 57 */ 425 463 xsl_dsnctl |= (capp_unit_id << (63-15)); 426 464 427 465 /* nMMU_ID Defaults to: b’000001001’*/ ··· 473 435 * nbwind=0x03, bits [57:58], must include capi indicator. 474 436 * Not supported on P9 DD1. 475 437 */ 476 - xsl_dsnctl |= ((u64)0x03 << (63-47)); 438 + xsl_dsnctl |= (nbwind << (63-55)); 477 439 478 440 /* 479 441 * Upper 16b address bits of ASB_Notify messages sent to the 480 442 * system. Need to match the PHB’s ASN Compare/Mask Register. 481 443 * Not supported on P9 DD1. 482 444 */ 483 - xsl_dsnctl |= ((u64)0x04 << (63-55)); 445 + xsl_dsnctl |= asnind; 484 446 } 485 447 486 448 *reg = xsl_dsnctl; ··· 494 456 u64 chipid; 495 457 u32 phb_index; 496 458 u64 capp_unit_id; 459 + u64 psl_debug; 497 460 int rc; 498 461 499 462 rc = cxl_calc_capp_routing(dev, &chipid, &phb_index, &capp_unit_id); 500 463 if (rc) 501 464 return rc; 502 465 503 - rc = cxl_get_xsl9_dsnctl(capp_unit_id, &xsl_dsnctl); 466 + rc = cxl_get_xsl9_dsnctl(dev, capp_unit_id, &xsl_dsnctl); 504 467 if (rc) 505 468 return rc; 506 469 ··· 542 503 if (cxl_is_power9_dd1()) { 543 504 /* Disabling deadlock counter CAR */ 544 505 cxl_p1_write(adapter, CXL_PSL9_GP_CT, 0x0020000000000001ULL); 545 - } else 546 - cxl_p1_write(adapter, CXL_PSL9_DEBUG, 0x4000000000000000ULL); 506 + /* Enable NORST */ 507 + cxl_p1_write(adapter, CXL_PSL9_DEBUG, 0x8000000000000000ULL); 508 + } else { 509 + /* Enable NORST and DD2 features */ 510 + cxl_p1_write(adapter, CXL_PSL9_DEBUG, 0xC000000000000000ULL); 511 + } 512 + 513 + /* 514 + * Check if PSL has data-cache. We need to flush adapter datacache 515 + * when as its about to be removed. 516 + */ 517 + psl_debug = cxl_p1_read(adapter, CXL_PSL9_DEBUG); 518 + if (psl_debug & CXL_PSL_DEBUG_CDC) { 519 + dev_dbg(&dev->dev, "No data-cache present\n"); 520 + adapter->native->no_data_cache = true; 521 + } 547 522 548 523 return 0; 549 524 } ··· 621 568 /* For the PSL this is a multiple for 0 < n <= 7: */ 622 569 #define PSL_2048_250MHZ_CYCLES 1 623 570 624 - static void write_timebase_ctrl_psl9(struct cxl *adapter) 625 - { 626 - cxl_p1_write(adapter, CXL_PSL9_TB_CTLSTAT, 627 - TBSYNC_CNT(2 * PSL_2048_250MHZ_CYCLES)); 628 - } 629 - 630 571 static void write_timebase_ctrl_psl8(struct cxl *adapter) 631 572 { 632 573 cxl_p1_write(adapter, CXL_PSL_TB_CTLSTAT, ··· 659 612 660 613 static void cxl_setup_psl_timebase(struct cxl *adapter, struct pci_dev *dev) 661 614 { 662 - u64 psl_tb; 663 - int delta; 664 - unsigned int retry = 0; 665 615 struct device_node *np; 666 616 667 617 adapter->psl_timebase_synced = false; ··· 679 635 * Setup PSL Timebase Control and Status register 680 636 * with the recommended Timebase Sync Count value 681 637 */ 682 - adapter->native->sl_ops->write_timebase_ctrl(adapter); 638 + if (adapter->native->sl_ops->write_timebase_ctrl) 639 + adapter->native->sl_ops->write_timebase_ctrl(adapter); 683 640 684 641 /* Enable PSL Timebase */ 685 642 cxl_p1_write(adapter, CXL_PSL_Control, 0x0000000000000000); 686 643 cxl_p1_write(adapter, CXL_PSL_Control, CXL_PSL_Control_tb); 687 644 688 - /* Wait until CORE TB and PSL TB difference <= 16usecs */ 689 - do { 690 - msleep(1); 691 - if (retry++ > 5) { 692 - dev_info(&dev->dev, "PSL timebase can't synchronize\n"); 693 - return; 694 - } 695 - psl_tb = adapter->native->sl_ops->timebase_read(adapter); 696 - delta = mftb() - psl_tb; 697 - if (delta < 0) 698 - delta = -delta; 699 - } while (tb_to_ns(delta) > 16000); 700 - 701 - adapter->psl_timebase_synced = true; 702 645 return; 703 646 } 704 647 ··· 1480 1449 1481 1450 /* 1482 1451 * The adapter is about to be reset, so ignore errors. 1483 - * Not supported on P9 DD1 1484 1452 */ 1485 - if ((cxl_is_power8()) || (!(cxl_is_power9_dd1()))) 1486 - cxl_data_cache_flush(adapter); 1453 + cxl_data_cache_flush(adapter); 1487 1454 1488 1455 /* pcie_warm_reset requests a fundamental pci reset which includes a 1489 1456 * PERST assert/deassert. PERST triggers a loading of the image ··· 1830 1801 .psl_irq_dump_registers = cxl_native_irq_dump_regs_psl9, 1831 1802 .err_irq_dump_registers = cxl_native_err_irq_dump_regs_psl9, 1832 1803 .debugfs_stop_trace = cxl_stop_trace_psl9, 1833 - .write_timebase_ctrl = write_timebase_ctrl_psl9, 1834 1804 .timebase_read = timebase_read_psl9, 1835 1805 .capi_mode = OPAL_PHB_CAPI_MODE_CAPI, 1836 1806 .needs_reset_before_disable = true, ··· 1964 1936 1965 1937 /* 1966 1938 * Flush adapter datacache as its about to be removed. 1967 - * Not supported on P9 DD1. 1968 1939 */ 1969 - if ((cxl_is_power8()) || (!(cxl_is_power9_dd1()))) 1970 - cxl_data_cache_flush(adapter); 1940 + cxl_data_cache_flush(adapter); 1971 1941 1972 1942 cxl_deconfigure_adapter(adapter); 1973 1943

+12

drivers/misc/cxl/sysfs.c

··· 62 62 char *buf) 63 63 { 64 64 struct cxl *adapter = to_cxl_adapter(device); 65 + u64 psl_tb, delta; 65 66 67 + /* Recompute the status only in native mode */ 68 + if (cpu_has_feature(CPU_FTR_HVMODE)) { 69 + psl_tb = adapter->native->sl_ops->timebase_read(adapter); 70 + delta = abs(mftb() - psl_tb); 71 + 72 + /* CORE TB and PSL TB difference <= 16usecs ? */ 73 + adapter->psl_timebase_synced = (tb_to_ns(delta) < 16000) ? true : false; 74 + pr_devel("PSL timebase %s - delta: 0x%016llx\n", 75 + (tb_to_ns(delta) < 16000) ? "synchronized" : 76 + "not synchronized", tb_to_ns(delta)); 77 + } 66 78 return scnprintf(buf, PAGE_SIZE, "%i\n", adapter->psl_timebase_synced); 67 79 } 68 80

+1 -1

drivers/pci/hotplug/pnv_php.c

··· 919 919 return; 920 920 921 921 php_slot->state = PNV_PHP_STATE_OFFLINE; 922 - pnv_php_put_slot(php_slot); 923 922 pci_hp_deregister(&php_slot->slot); 923 + pnv_php_put_slot(php_slot); 924 924 } 925 925 926 926 static void pnv_php_unregister(struct device_node *dn)

+3

include/linux/memblock.h

··· 318 318 phys_addr_t __init memblock_alloc_range(phys_addr_t size, phys_addr_t align, 319 319 phys_addr_t start, phys_addr_t end, 320 320 ulong flags); 321 + phys_addr_t memblock_alloc_base_nid(phys_addr_t size, 322 + phys_addr_t align, phys_addr_t max_addr, 323 + int nid, ulong flags); 321 324 phys_addr_t memblock_alloc_base(phys_addr_t size, phys_addr_t align, 322 325 phys_addr_t max_addr); 323 326 phys_addr_t __memblock_alloc_base(phys_addr_t size, phys_addr_t align,

+4

include/linux/raid/pq.h

··· 106 106 extern const struct raid6_calls raid6_avx512x2; 107 107 extern const struct raid6_calls raid6_avx512x4; 108 108 extern const struct raid6_calls raid6_s390vx8; 109 + extern const struct raid6_calls raid6_vpermxor1; 110 + extern const struct raid6_calls raid6_vpermxor2; 111 + extern const struct raid6_calls raid6_vpermxor4; 112 + extern const struct raid6_calls raid6_vpermxor8; 109 113 110 114 struct raid6_recov_calls { 111 115 void (*data2)(int, size_t, int, int, void **);

+1

lib/raid6/.gitignore

··· 4 4 tables.c 5 5 neon?.c 6 6 s390vx?.c 7 + vpermxor*.c

+26 -1

lib/raid6/Makefile

··· 5 5 int8.o int16.o int32.o 6 6 7 7 raid6_pq-$(CONFIG_X86) += recov_ssse3.o recov_avx2.o mmx.o sse1.o sse2.o avx2.o avx512.o recov_avx512.o 8 - raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o 8 + raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o \ 9 + vpermxor1.o vpermxor2.o vpermxor4.o vpermxor8.o 9 10 raid6_pq-$(CONFIG_KERNEL_MODE_NEON) += neon.o neon1.o neon2.o neon4.o neon8.o recov_neon.o recov_neon_inner.o 10 11 raid6_pq-$(CONFIG_S390) += s390vx8.o recov_s390xc.o 11 12 ··· 89 88 targets += altivec8.c 90 89 $(obj)/altivec8.c: UNROLL := 8 91 90 $(obj)/altivec8.c: $(src)/altivec.uc $(src)/unroll.awk FORCE 91 + $(call if_changed,unroll) 92 + 93 + CFLAGS_vpermxor1.o += $(altivec_flags) 94 + targets += vpermxor1.c 95 + $(obj)/vpermxor1.c: UNROLL := 1 96 + $(obj)/vpermxor1.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE 97 + $(call if_changed,unroll) 98 + 99 + CFLAGS_vpermxor2.o += $(altivec_flags) 100 + targets += vpermxor2.c 101 + $(obj)/vpermxor2.c: UNROLL := 2 102 + $(obj)/vpermxor2.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE 103 + $(call if_changed,unroll) 104 + 105 + CFLAGS_vpermxor4.o += $(altivec_flags) 106 + targets += vpermxor4.c 107 + $(obj)/vpermxor4.c: UNROLL := 4 108 + $(obj)/vpermxor4.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE 109 + $(call if_changed,unroll) 110 + 111 + CFLAGS_vpermxor8.o += $(altivec_flags) 112 + targets += vpermxor8.c 113 + $(obj)/vpermxor8.c: UNROLL := 8 114 + $(obj)/vpermxor8.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE 92 115 $(call if_changed,unroll) 93 116 94 117 CFLAGS_neon1.o += $(NEON_FLAGS)

+4

lib/raid6/algos.c

··· 74 74 &raid6_altivec2, 75 75 &raid6_altivec4, 76 76 &raid6_altivec8, 77 + &raid6_vpermxor1, 78 + &raid6_vpermxor2, 79 + &raid6_vpermxor4, 80 + &raid6_vpermxor8, 77 81 #endif 78 82 #if defined(CONFIG_S390) 79 83 &raid6_s390vx8,

+3

lib/raid6/altivec.uc

··· 24 24 25 25 #include <linux/raid/pq.h> 26 26 27 + #ifdef CONFIG_ALTIVEC 28 + 27 29 #include <altivec.h> 28 30 #ifdef __KERNEL__ 29 31 # include <asm/cputable.h> 30 32 # include <asm/switch_to.h> 33 + #endif /* __KERNEL__ */ 31 34 32 35 /* 33 36 * This is the C data type to use. We use a vector of

+18 -4

lib/raid6/test/Makefile

··· 45 45 CFLAGS += -DCONFIG_KERNEL_MODE_NEON=1 46 46 else 47 47 HAS_ALTIVEC := $(shell printf '\#include <altivec.h>\nvector int a;\n' |\ 48 - gcc -c -x c - >&/dev/null && \ 49 - rm ./-.o && echo yes) 48 + gcc -c -x c - >/dev/null && rm ./-.o && echo yes) 50 49 ifeq ($(HAS_ALTIVEC),yes) 51 - OBJS += altivec1.o altivec2.o altivec4.o altivec8.o 50 + CFLAGS += -I../../../arch/powerpc/include 51 + CFLAGS += -DCONFIG_ALTIVEC 52 + OBJS += altivec1.o altivec2.o altivec4.o altivec8.o \ 53 + vpermxor1.o vpermxor2.o vpermxor4.o vpermxor8.o 52 54 endif 53 55 endif 54 56 ··· 97 95 altivec8.c: altivec.uc ../unroll.awk 98 96 $(AWK) ../unroll.awk -vN=8 < altivec.uc > $@ 99 97 98 + vpermxor1.c: vpermxor.uc ../unroll.awk 99 + $(AWK) ../unroll.awk -vN=1 < vpermxor.uc > $@ 100 + 101 + vpermxor2.c: vpermxor.uc ../unroll.awk 102 + $(AWK) ../unroll.awk -vN=2 < vpermxor.uc > $@ 103 + 104 + vpermxor4.c: vpermxor.uc ../unroll.awk 105 + $(AWK) ../unroll.awk -vN=4 < vpermxor.uc > $@ 106 + 107 + vpermxor8.c: vpermxor.uc ../unroll.awk 108 + $(AWK) ../unroll.awk -vN=8 < vpermxor.uc > $@ 109 + 100 110 int1.c: int.uc ../unroll.awk 101 111 $(AWK) ../unroll.awk -vN=1 < int.uc > $@ 102 112 ··· 131 117 ./mktables > tables.c 132 118 133 119 clean: 134 - rm -f *.o *.a mktables mktables.c *.uc int*.c altivec*.c neon*.c tables.c raid6test 120 + rm -f *.o *.a mktables mktables.c *.uc int*.c altivec*.c vpermxor*.c neon*.c tables.c raid6test 135 121 136 122 spotless: clean 137 123 rm -f *~

+105

lib/raid6/vpermxor.uc

··· 1 + /* 2 + * Copyright 2017, Matt Brown, IBM Corp. 3 + * 4 + * This program is free software; you can redistribute it and/or 5 + * modify it under the terms of the GNU General Public License 6 + * as published by the Free Software Foundation; either version 7 + * 2 of the License, or (at your option) any later version. 8 + * 9 + * vpermxor$#.c 10 + * 11 + * Based on H. Peter Anvin's paper - The mathematics of RAID-6 12 + * 13 + * $#-way unrolled portable integer math RAID-6 instruction set 14 + * This file is postprocessed using unroll.awk 15 + * 16 + * vpermxor$#.c makes use of the vpermxor instruction to optimise the RAID6 Q 17 + * syndrome calculations. 18 + * This can be run on systems which have both Altivec and vpermxor instruction. 19 + * 20 + * This instruction was introduced in POWER8 - ISA v2.07. 21 + */ 22 + 23 + #include <linux/raid/pq.h> 24 + #ifdef CONFIG_ALTIVEC 25 + 26 + #include <altivec.h> 27 + #ifdef __KERNEL__ 28 + #include <asm/cputable.h> 29 + #include <asm/ppc-opcode.h> 30 + #include <asm/switch_to.h> 31 + #endif 32 + 33 + typedef vector unsigned char unative_t; 34 + #define NSIZE sizeof(unative_t) 35 + 36 + static const vector unsigned char gf_low = {0x1e, 0x1c, 0x1a, 0x18, 0x16, 0x14, 37 + 0x12, 0x10, 0x0e, 0x0c, 0x0a, 0x08, 38 + 0x06, 0x04, 0x02,0x00}; 39 + static const vector unsigned char gf_high = {0xfd, 0xdd, 0xbd, 0x9d, 0x7d, 0x5d, 40 + 0x3d, 0x1d, 0xe0, 0xc0, 0xa0, 0x80, 41 + 0x60, 0x40, 0x20, 0x00}; 42 + 43 + static void noinline raid6_vpermxor$#_gen_syndrome_real(int disks, size_t bytes, 44 + void **ptrs) 45 + { 46 + u8 **dptr = (u8 **)ptrs; 47 + u8 *p, *q; 48 + int d, z, z0; 49 + unative_t wp$$, wq$$, wd$$; 50 + 51 + z0 = disks - 3; /* Highest data disk */ 52 + p = dptr[z0+1]; /* XOR parity */ 53 + q = dptr[z0+2]; /* RS syndrome */ 54 + 55 + for (d = 0; d < bytes; d += NSIZE*$#) { 56 + wp$$ = wq$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; 57 + 58 + for (z = z0-1; z>=0; z--) { 59 + wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; 60 + /* P syndrome */ 61 + wp$$ = vec_xor(wp$$, wd$$); 62 + 63 + /* Q syndrome */ 64 + asm(VPERMXOR(%0,%1,%2,%3):"=v"(wq$$):"v"(gf_high), "v"(gf_low), "v"(wq$$)); 65 + wq$$ = vec_xor(wq$$, wd$$); 66 + } 67 + *(unative_t *)&p[d+NSIZE*$$] = wp$$; 68 + *(unative_t *)&q[d+NSIZE*$$] = wq$$; 69 + } 70 + } 71 + 72 + static void raid6_vpermxor$#_gen_syndrome(int disks, size_t bytes, void **ptrs) 73 + { 74 + preempt_disable(); 75 + enable_kernel_altivec(); 76 + 77 + raid6_vpermxor$#_gen_syndrome_real(disks, bytes, ptrs); 78 + 79 + disable_kernel_altivec(); 80 + preempt_enable(); 81 + } 82 + 83 + int raid6_have_altivec_vpermxor(void); 84 + #if $# == 1 85 + int raid6_have_altivec_vpermxor(void) 86 + { 87 + /* Check if arch has both altivec and the vpermxor instructions */ 88 + # ifdef __KERNEL__ 89 + return (cpu_has_feature(CPU_FTR_ALTIVEC_COMP) && 90 + cpu_has_feature(CPU_FTR_ARCH_207S)); 91 + # else 92 + return 1; 93 + #endif 94 + 95 + } 96 + #endif 97 + 98 + const struct raid6_calls raid6_vpermxor$# = { 99 + raid6_vpermxor$#_gen_syndrome, 100 + NULL, 101 + raid6_have_altivec_vpermxor, 102 + "vpermxor$#", 103 + 0 104 + }; 105 + #endif

+1 -1

mm/memblock.c

··· 1163 1163 flags); 1164 1164 } 1165 1165 1166 - static phys_addr_t __init memblock_alloc_base_nid(phys_addr_t size, 1166 + phys_addr_t __init memblock_alloc_base_nid(phys_addr_t size, 1167 1167 phys_addr_t align, phys_addr_t max_addr, 1168 1168 int nid, ulong flags) 1169 1169 {

+2

tools/testing/selftests/powerpc/benchmarks/.gitignore

··· 1 1 gettimeofday 2 2 context_switch 3 + fork 4 + exec_target 3 5 mmap_bench 4 6 futex_bench 5 7 null_syscall

+6 -1

tools/testing/selftests/powerpc/benchmarks/Makefile

··· 1 1 # SPDX-License-Identifier: GPL-2.0 2 - TEST_GEN_PROGS := gettimeofday context_switch mmap_bench futex_bench null_syscall 2 + TEST_GEN_PROGS := gettimeofday context_switch fork mmap_bench futex_bench null_syscall 3 + TEST_GEN_FILES := exec_target 3 4 4 5 CFLAGS += -O2 5 6 ··· 11 10 $(OUTPUT)/context_switch: ../utils.c 12 11 $(OUTPUT)/context_switch: CFLAGS += -maltivec -mvsx -mabi=altivec 13 12 $(OUTPUT)/context_switch: LDLIBS += -lpthread 13 + 14 + $(OUTPUT)/fork: LDLIBS += -lpthread 15 + 16 + $(OUTPUT)/exec_target: CFLAGS += -static -nostartfiles

+13

tools/testing/selftests/powerpc/benchmarks/exec_target.c

··· 1 + // SPDX-License-Identifier: GPL-2.0+ 2 + 3 + /* 4 + * Part of fork context switch microbenchmark. 5 + * 6 + * Copyright 2018, Anton Blanchard, IBM Corp. 7 + */ 8 + 9 + void _exit(int); 10 + void _start(void) 11 + { 12 + _exit(0); 13 + }

+325

tools/testing/selftests/powerpc/benchmarks/fork.c

··· 1 + // SPDX-License-Identifier: GPL-2.0+ 2 + 3 + /* 4 + * Context switch microbenchmark. 5 + * 6 + * Copyright 2018, Anton Blanchard, IBM Corp. 7 + */ 8 + 9 + #define _GNU_SOURCE 10 + #include <assert.h> 11 + #include <errno.h> 12 + #include <getopt.h> 13 + #include <limits.h> 14 + #include <linux/futex.h> 15 + #include <pthread.h> 16 + #include <sched.h> 17 + #include <signal.h> 18 + #include <stdio.h> 19 + #include <stdlib.h> 20 + #include <string.h> 21 + #include <sys/shm.h> 22 + #include <sys/syscall.h> 23 + #include <sys/time.h> 24 + #include <sys/types.h> 25 + #include <sys/wait.h> 26 + #include <unistd.h> 27 + 28 + static unsigned int timeout = 30; 29 + 30 + static void set_cpu(int cpu) 31 + { 32 + cpu_set_t cpuset; 33 + 34 + if (cpu == -1) 35 + return; 36 + 37 + CPU_ZERO(&cpuset); 38 + CPU_SET(cpu, &cpuset); 39 + 40 + if (sched_setaffinity(0, sizeof(cpuset), &cpuset)) { 41 + perror("sched_setaffinity"); 42 + exit(1); 43 + } 44 + } 45 + 46 + static void start_process_on(void *(*fn)(void *), void *arg, int cpu) 47 + { 48 + int pid; 49 + 50 + pid = fork(); 51 + if (pid == -1) { 52 + perror("fork"); 53 + exit(1); 54 + } 55 + 56 + if (pid) 57 + return; 58 + 59 + set_cpu(cpu); 60 + 61 + fn(arg); 62 + 63 + exit(0); 64 + } 65 + 66 + static int cpu; 67 + static int do_fork = 0; 68 + static int do_vfork = 0; 69 + static int do_exec = 0; 70 + static char *exec_file; 71 + static int exec_target = 0; 72 + static unsigned long iterations; 73 + static unsigned long iterations_prev; 74 + 75 + static void run_exec(void) 76 + { 77 + char *const argv[] = { "./exec_target", NULL }; 78 + 79 + if (execve("./exec_target", argv, NULL) == -1) { 80 + perror("execve"); 81 + exit(1); 82 + } 83 + } 84 + 85 + static void bench_fork(void) 86 + { 87 + while (1) { 88 + pid_t pid = fork(); 89 + if (pid == -1) { 90 + perror("fork"); 91 + exit(1); 92 + } 93 + if (pid == 0) { 94 + if (do_exec) 95 + run_exec(); 96 + _exit(0); 97 + } 98 + pid = waitpid(pid, NULL, 0); 99 + if (pid == -1) { 100 + perror("waitpid"); 101 + exit(1); 102 + } 103 + iterations++; 104 + } 105 + } 106 + 107 + static void bench_vfork(void) 108 + { 109 + while (1) { 110 + pid_t pid = vfork(); 111 + if (pid == -1) { 112 + perror("fork"); 113 + exit(1); 114 + } 115 + if (pid == 0) { 116 + if (do_exec) 117 + run_exec(); 118 + _exit(0); 119 + } 120 + pid = waitpid(pid, NULL, 0); 121 + if (pid == -1) { 122 + perror("waitpid"); 123 + exit(1); 124 + } 125 + iterations++; 126 + } 127 + } 128 + 129 + static void *null_fn(void *arg) 130 + { 131 + pthread_exit(NULL); 132 + } 133 + 134 + static void bench_thread(void) 135 + { 136 + pthread_t tid; 137 + cpu_set_t cpuset; 138 + pthread_attr_t attr; 139 + int rc; 140 + 141 + rc = pthread_attr_init(&attr); 142 + if (rc) { 143 + errno = rc; 144 + perror("pthread_attr_init"); 145 + exit(1); 146 + } 147 + 148 + if (cpu != -1) { 149 + CPU_ZERO(&cpuset); 150 + CPU_SET(cpu, &cpuset); 151 + 152 + rc = pthread_attr_setaffinity_np(&attr, sizeof(cpu_set_t), &cpuset); 153 + if (rc) { 154 + errno = rc; 155 + perror("pthread_attr_setaffinity_np"); 156 + exit(1); 157 + } 158 + } 159 + 160 + while (1) { 161 + rc = pthread_create(&tid, &attr, null_fn, NULL); 162 + if (rc) { 163 + errno = rc; 164 + perror("pthread_create"); 165 + exit(1); 166 + } 167 + rc = pthread_join(tid, NULL); 168 + if (rc) { 169 + errno = rc; 170 + perror("pthread_join"); 171 + exit(1); 172 + } 173 + iterations++; 174 + } 175 + } 176 + 177 + static void sigalrm_handler(int junk) 178 + { 179 + unsigned long i = iterations; 180 + 181 + printf("%ld\n", i - iterations_prev); 182 + iterations_prev = i; 183 + 184 + if (--timeout == 0) 185 + kill(0, SIGUSR1); 186 + 187 + alarm(1); 188 + } 189 + 190 + static void sigusr1_handler(int junk) 191 + { 192 + exit(0); 193 + } 194 + 195 + static void *bench_proc(void *arg) 196 + { 197 + signal(SIGALRM, sigalrm_handler); 198 + alarm(1); 199 + 200 + if (do_fork) 201 + bench_fork(); 202 + else if (do_vfork) 203 + bench_vfork(); 204 + else 205 + bench_thread(); 206 + 207 + return NULL; 208 + } 209 + 210 + static struct option options[] = { 211 + { "fork", no_argument, &do_fork, 1 }, 212 + { "vfork", no_argument, &do_vfork, 1 }, 213 + { "exec", no_argument, &do_exec, 1 }, 214 + { "timeout", required_argument, 0, 's' }, 215 + { "exec-target", no_argument, &exec_target, 1 }, 216 + { NULL }, 217 + }; 218 + 219 + static void usage(void) 220 + { 221 + fprintf(stderr, "Usage: fork <options> CPU\n\n"); 222 + fprintf(stderr, "\t\t--fork\tUse fork() (default threads)\n"); 223 + fprintf(stderr, "\t\t--vfork\tUse vfork() (default threads)\n"); 224 + fprintf(stderr, "\t\t--exec\tAlso exec() (default no exec)\n"); 225 + fprintf(stderr, "\t\t--timeout=X\tDuration in seconds to run (default 30)\n"); 226 + fprintf(stderr, "\t\t--exec-target\tInternal option for exec workload\n"); 227 + } 228 + 229 + int main(int argc, char *argv[]) 230 + { 231 + signed char c; 232 + 233 + while (1) { 234 + int option_index = 0; 235 + 236 + c = getopt_long(argc, argv, "", options, &option_index); 237 + 238 + if (c == -1) 239 + break; 240 + 241 + switch (c) { 242 + case 0: 243 + if (options[option_index].flag != 0) 244 + break; 245 + 246 + usage(); 247 + exit(1); 248 + break; 249 + 250 + case 's': 251 + timeout = atoi(optarg); 252 + break; 253 + 254 + default: 255 + usage(); 256 + exit(1); 257 + } 258 + } 259 + 260 + if (do_fork && do_vfork) { 261 + usage(); 262 + exit(1); 263 + } 264 + if (do_exec && !do_fork && !do_vfork) { 265 + usage(); 266 + exit(1); 267 + } 268 + 269 + if (do_exec) { 270 + char *dirname = strdup(argv[0]); 271 + int i; 272 + i = strlen(dirname) - 1; 273 + while (i) { 274 + if (dirname[i] == '/') { 275 + dirname[i] = '\0'; 276 + if (chdir(dirname) == -1) { 277 + perror("chdir"); 278 + exit(1); 279 + } 280 + break; 281 + } 282 + i--; 283 + } 284 + } 285 + 286 + if (exec_target) { 287 + exit(0); 288 + } 289 + 290 + if (((argc - optind) != 1)) { 291 + cpu = -1; 292 + } else { 293 + cpu = atoi(argv[optind++]); 294 + } 295 + 296 + if (do_exec) 297 + exec_file = argv[0]; 298 + 299 + set_cpu(cpu); 300 + 301 + printf("Using "); 302 + if (do_fork) 303 + printf("fork"); 304 + else if (do_vfork) 305 + printf("vfork"); 306 + else 307 + printf("clone"); 308 + 309 + if (do_exec) 310 + printf(" + exec"); 311 + 312 + printf(" on cpu %d\n", cpu); 313 + 314 + /* Create a new process group so we can signal everyone for exit */ 315 + setpgid(getpid(), getpid()); 316 + 317 + signal(SIGUSR1, sigusr1_handler); 318 + 319 + start_process_on(bench_proc, NULL, cpu); 320 + 321 + while (1) 322 + sleep(3600); 323 + 324 + return 0; 325 + }

+2 -2

tools/testing/selftests/powerpc/copyloops/Makefile

··· 5 5 CFLAGS += -D SELFTEST 6 6 CFLAGS += -maltivec 7 7 8 - # Use our CFLAGS for the implicit .S rule 9 - ASFLAGS = $(CFLAGS) 8 + # Use our CFLAGS for the implicit .S rule & set the asm machine type 9 + ASFLAGS = $(CFLAGS) -Wa,-mpower4 10 10 11 11 TEST_GEN_PROGS := copyuser_64 copyuser_power7 memcpy_64 memcpy_power7 12 12 EXTRA_SOURCES := validate.c ../harness.c

+1 -1

tools/testing/selftests/powerpc/tm/Makefile

··· 4 4 5 5 TEST_GEN_PROGS := tm-resched-dscr tm-syscall tm-signal-msr-resv tm-signal-stack \ 6 6 tm-vmxcopy tm-fork tm-tar tm-tmspr tm-vmx-unavail tm-unavailable tm-trap \ 7 - $(SIGNAL_CONTEXT_CHK_TESTS) 7 + $(SIGNAL_CONTEXT_CHK_TESTS) tm-sigreturn 8 8 9 9 include ../../lib.mk 10 10

+92

tools/testing/selftests/powerpc/tm/tm-sigreturn.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + /* 4 + * Copyright 2015, Laurent Dufour, IBM Corp. 5 + * 6 + * Test the kernel's signal returning code to check reclaim is done if the 7 + * sigreturn() is called while in a transaction (suspended since active is 8 + * already dropped trough the system call path). 9 + * 10 + * The kernel must discard the transaction when entering sigreturn, since 11 + * restoring the potential TM SPRS from the signal frame is requiring to not be 12 + * in a transaction. 13 + */ 14 + 15 + #include <signal.h> 16 + #include <stdio.h> 17 + #include <stdlib.h> 18 + #include <string.h> 19 + #include <sys/types.h> 20 + #include <sys/wait.h> 21 + #include <unistd.h> 22 + 23 + #include "tm.h" 24 + #include "utils.h" 25 + 26 + 27 + void handler(int sig) 28 + { 29 + uint64_t ret; 30 + 31 + asm __volatile__( 32 + "li 3,1 ;" 33 + "tbegin. ;" 34 + "beq 1f ;" 35 + "li 3,0 ;" 36 + "tsuspend. ;" 37 + "1: ;" 38 + "std%X[ret] 3, %[ret] ;" 39 + : [ret] "=m"(ret) 40 + : 41 + : "memory", "3", "cr0"); 42 + 43 + if (ret) 44 + exit(1); 45 + 46 + /* 47 + * We return from the signal handle while in a suspended transaction 48 + */ 49 + } 50 + 51 + 52 + int tm_sigreturn(void) 53 + { 54 + struct sigaction sa; 55 + uint64_t ret = 0; 56 + 57 + SKIP_IF(!have_htm()); 58 + 59 + memset(&sa, 0, sizeof(sa)); 60 + sa.sa_handler = handler; 61 + sigemptyset(&sa.sa_mask); 62 + 63 + if (sigaction(SIGSEGV, &sa, NULL)) 64 + exit(1); 65 + 66 + asm __volatile__( 67 + "tbegin. ;" 68 + "beq 1f ;" 69 + "li 3,0 ;" 70 + "std 3,0(3) ;" /* trigger SEGV */ 71 + "li 3,1 ;" 72 + "std%X[ret] 3,%[ret] ;" 73 + "tend. ;" 74 + "b 2f ;" 75 + "1: ;" 76 + "li 3,2 ;" 77 + "std%X[ret] 3,%[ret] ;" 78 + "2: ;" 79 + : [ret] "=m"(ret) 80 + : 81 + : "memory", "3", "cr0"); 82 + 83 + if (ret != 2) 84 + exit(1); 85 + 86 + exit(0); 87 + } 88 + 89 + int main(void) 90 + { 91 + return test_harness(tm_sigreturn, "tm_sigreturn"); 92 + }

+16 -8

tools/testing/selftests/powerpc/tm/tm-unavailable.c

··· 80 80 return ((condition_reg >> 28) & 0xa) == 0xa; 81 81 } 82 82 83 - void *ping(void *input) 83 + void *tm_una_ping(void *input) 84 84 { 85 85 86 86 /* ··· 280 280 } 281 281 282 282 /* Thread to force context switch */ 283 - void *pong(void *not_used) 283 + void *tm_una_pong(void *not_used) 284 284 { 285 285 /* Wait thread get its name "pong". */ 286 286 if (DEBUG) ··· 311 311 do { 312 312 int rc; 313 313 314 - /* Bind 'ping' to CPU 0, as specified in 'attr'. */ 315 - rc = pthread_create(&t0, attr, ping, (void *) &flags); 314 + /* Bind to CPU 0, as specified in 'attr'. */ 315 + rc = pthread_create(&t0, attr, tm_una_ping, (void *) &flags); 316 316 if (rc) 317 317 pr_err(rc, "pthread_create()"); 318 - rc = pthread_setname_np(t0, "ping"); 318 + rc = pthread_setname_np(t0, "tm_una_ping"); 319 319 if (rc) 320 320 pr_warn(rc, "pthread_setname_np"); 321 321 rc = pthread_join(t0, &ret_value); ··· 333 333 } 334 334 } 335 335 336 - int main(int argc, char **argv) 336 + int tm_unavailable_test(void) 337 337 { 338 338 int rc, exception; /* FP = 0, VEC = 1, VSX = 2 */ 339 339 pthread_t t1; 340 340 pthread_attr_t attr; 341 341 cpu_set_t cpuset; 342 + 343 + SKIP_IF(!have_htm()); 342 344 343 345 /* Set only CPU 0 in the mask. Both threads will be bound to CPU 0. */ 344 346 CPU_ZERO(&cpuset); ··· 356 354 if (rc) 357 355 pr_err(rc, "pthread_attr_setaffinity_np()"); 358 356 359 - rc = pthread_create(&t1, &attr /* Bind 'pong' to CPU 0 */, pong, NULL); 357 + rc = pthread_create(&t1, &attr /* Bind to CPU 0 */, tm_una_pong, NULL); 360 358 if (rc) 361 359 pr_err(rc, "pthread_create()"); 362 360 363 361 /* Name it for systemtap convenience */ 364 - rc = pthread_setname_np(t1, "pong"); 362 + rc = pthread_setname_np(t1, "tm_una_pong"); 365 363 if (rc) 366 364 pr_warn(rc, "pthread_create()"); 367 365 ··· 395 393 printf("result: success\n"); 396 394 exit(0); 397 395 } 396 + } 397 + 398 + int main(int argc, char **argv) 399 + { 400 + test_harness_set_timeout(220); 401 + return test_harness(tm_unavailable_test, "tm_unavailable_test"); 398 402 }