Merge tag 'powerpc-4.11-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux

+40 -2

Documentation/devicetree/bindings/powerpc/fsl/l2cache.txt

··· 5 5 6 6 Required Properties: 7 7 8 - - compatible : Should include "fsl,chip-l2-cache-controller" and "cache" 9 - where chip is the processor (bsc9132, npc8572 etc.) 8 + - compatible : Should include one of the following: 9 + "fsl,8540-l2-cache-controller" 10 + "fsl,8541-l2-cache-controller" 11 + "fsl,8544-l2-cache-controller" 12 + "fsl,8548-l2-cache-controller" 13 + "fsl,8555-l2-cache-controller" 14 + "fsl,8568-l2-cache-controller" 15 + "fsl,b4420-l2-cache-controller" 16 + "fsl,b4860-l2-cache-controller" 17 + "fsl,bsc9131-l2-cache-controller" 18 + "fsl,bsc9132-l2-cache-controller" 19 + "fsl,c293-l2-cache-controller" 20 + "fsl,mpc8536-l2-cache-controller" 21 + "fsl,mpc8540-l2-cache-controller" 22 + "fsl,mpc8541-l2-cache-controller" 23 + "fsl,mpc8544-l2-cache-controller" 24 + "fsl,mpc8548-l2-cache-controller" 25 + "fsl,mpc8555-l2-cache-controller" 26 + "fsl,mpc8560-l2-cache-controller" 27 + "fsl,mpc8568-l2-cache-controller" 28 + "fsl,mpc8569-l2-cache-controller" 29 + "fsl,mpc8572-l2-cache-controller" 30 + "fsl,p1010-l2-cache-controller" 31 + "fsl,p1011-l2-cache-controller" 32 + "fsl,p1012-l2-cache-controller" 33 + "fsl,p1013-l2-cache-controller" 34 + "fsl,p1014-l2-cache-controller" 35 + "fsl,p1015-l2-cache-controller" 36 + "fsl,p1016-l2-cache-controller" 37 + "fsl,p1020-l2-cache-controller" 38 + "fsl,p1021-l2-cache-controller" 39 + "fsl,p1022-l2-cache-controller" 40 + "fsl,p1023-l2-cache-controller" 41 + "fsl,p1024-l2-cache-controller" 42 + "fsl,p1025-l2-cache-controller" 43 + "fsl,p2010-l2-cache-controller" 44 + "fsl,p2020-l2-cache-controller" 45 + "fsl,t2080-l2-cache-controller" 46 + "fsl,t4240-l2-cache-controller" 47 + and "cache". 10 48 - reg : Address and size of L2 cache controller registers 11 49 - cache-size : Size of the entire L2 cache 12 50 - interrupts : Error interrupt of L2 controller

+118

Documentation/devicetree/bindings/powerpc/opal/power-mgt.txt

··· 1 + IBM Power-Management Bindings 2 + ============================= 3 + 4 + Linux running on baremetal POWER machines has access to the processor 5 + idle states. The description of these idle states is exposed via the 6 + node @power-mgt in the device-tree by the firmware. 7 + 8 + Definitions: 9 + ---------------- 10 + Typically each idle state has the following associated properties: 11 + 12 + - name: The name of the idle state as defined by the firmware. 13 + 14 + - flags: indicating some aspects of this idle states such as the 15 + extent of state-loss, whether timebase is stopped on this 16 + idle states and so on. The flag bits are as follows: 17 + 18 + - exit-latency: The latency involved in transitioning the state of the 19 + CPU from idle to running. 20 + 21 + - target-residency: The minimum time that the CPU needs to reside in 22 + this idle state in order to accrue power-savings 23 + benefit. 24 + 25 + Properties 26 + ---------------- 27 + The following properties provide details about the idle states. These 28 + properties are exposed as arrays. Each entry in the property array 29 + provides the value of that property for the idle state associated with 30 + the array index of that entry. 31 + 32 + If idle-states are defined, then the properties 33 + "ibm,cpu-idle-state-names" and "ibm,cpu-idle-state-flags" are 34 + required. The other properties are required unless mentioned 35 + otherwise. The length of all the property arrays must be the same. 36 + 37 + - ibm,cpu-idle-state-names: 38 + Array of strings containing the names of the idle states. 39 + 40 + - ibm,cpu-idle-state-flags: 41 + Array of unsigned 32-bit values containing the values of the 42 + flags associated with the the aforementioned idle-states. The 43 + flag bits are as follows: 44 + 0x00000001 /* Decrementer would stop */ 45 + 0x00000002 /* Needs timebase restore */ 46 + 0x00001000 /* Restore GPRs like nap */ 47 + 0x00002000 /* Restore hypervisor resource from PACA pointer */ 48 + 0x00004000 /* Program PORE to restore PACA pointer */ 49 + 0x00010000 /* This is a nap state (POWER7,POWER8) */ 50 + 0x00020000 /* This is a fast-sleep state (POWER8)*/ 51 + 0x00040000 /* This is a winkle state (POWER8) */ 52 + 0x00080000 /* This is a fast-sleep state which requires a */ 53 + /* software workaround for restoring the */ 54 + /* timebase (POWER8) */ 55 + 0x00800000 /* This state uses SPR PMICR instruction */ 56 + /* (POWER8)*/ 57 + 0x00100000 /* This is a fast stop state (POWER9) */ 58 + 0x00200000 /* This is a deep-stop state (POWER9) */ 59 + 60 + - ibm,cpu-idle-state-latencies-ns: 61 + Array of unsigned 32-bit values containing the values of the 62 + exit-latencies (in ns) for the idle states in 63 + ibm,cpu-idle-state-names. 64 + 65 + - ibm,cpu-idle-state-residency-ns: 66 + Array of unsigned 32-bit values containing the values of the 67 + target-residency (in ns) for the idle states in 68 + ibm,cpu-idle-state-names. On POWER8 this is an optional 69 + property. If the property is absent, the target residency for 70 + the "Nap", "FastSleep" are defined to 10000 and 300000000 71 + respectively by the kernel. On POWER9 this property is required. 72 + 73 + - ibm,cpu-idle-state-psscr: 74 + Array of unsigned 64-bit values containing the values for the 75 + PSSCR for each of the idle states in ibm,cpu-idle-state-names. 76 + This property is required on POWER9 and absent on POWER8. 77 + 78 + - ibm,cpu-idle-state-psscr-mask: 79 + Array of unsigned 64-bit values containing the masks 80 + indicating which psscr fields are set in the corresponding 81 + entries of ibm,cpu-idle-state-psscr. This property is 82 + required on POWER9 and absent on POWER8. 83 + 84 + Whenever the firmware sets an entry in 85 + ibm,cpu-idle-state-psscr-mask value to 0xf, it implies that 86 + only the Requested Level (RL) field of the corresponding entry 87 + in ibm,cpu-idle-state-psscr should be considered by the 88 + kernel. For such idle states, the kernel would set the 89 + remaining fields of the psscr to the following sane-default 90 + values. 91 + 92 + - ESL and EC bits are to 1. So wakeup from any stop 93 + state will be at vector 0x100. 94 + 95 + - MTL and PSLL are set to the maximum allowed value as 96 + per the ISA, i.e. 15. 97 + 98 + - The Transition Rate, TR is set to the Maximum value 99 + 3. 100 + 101 + For all the other values of the entry in 102 + ibm,cpu-idle-state-psscr-mask, the kernel expects all the 103 + psscr fields of the corresponding entry in 104 + ibm,cpu-idle-state-psscr to be correctly set by the firmware. 105 + 106 + - ibm,cpu-idle-state-pmicr: 107 + Array of unsigned 64-bit values containing the pmicr values 108 + for the idle states in ibm,cpu-idle-state-names. This 64-bit 109 + register value is to be set in pmicr for the corresponding 110 + state if the flag indicates that pmicr SPR should be set. This 111 + is an optional property on POWER8 and is absent on 112 + POWER9. 113 + 114 + - ibm,cpu-idle-state-pmicr-mask: 115 + Array of unsigned 64-bit values containing the mask indicating 116 + which of the fields of the PMICR are set in the corresponding 117 + entries in ibm,cpu-idle-state-pmicr. This is an optional 118 + property on POWER8 and is absent on POWER9.

+83

Documentation/virtual/kvm/api.txt

··· 3201 3201 pit_reinject = 0 (!reinject mode) is recommended, unless running an old 3202 3202 operating system that uses the PIT for timing (e.g. Linux 2.4.x). 3203 3203 3204 + 4.99 KVM_PPC_CONFIGURE_V3_MMU 3205 + 3206 + Capability: KVM_CAP_PPC_RADIX_MMU or KVM_CAP_PPC_HASH_MMU_V3 3207 + Architectures: ppc 3208 + Type: vm ioctl 3209 + Parameters: struct kvm_ppc_mmuv3_cfg (in) 3210 + Returns: 0 on success, 3211 + -EFAULT if struct kvm_ppc_mmuv3_cfg cannot be read, 3212 + -EINVAL if the configuration is invalid 3213 + 3214 + This ioctl controls whether the guest will use radix or HPT (hashed 3215 + page table) translation, and sets the pointer to the process table for 3216 + the guest. 3217 + 3218 + struct kvm_ppc_mmuv3_cfg { 3219 + __u64 flags; 3220 + __u64 process_table; 3221 + }; 3222 + 3223 + There are two bits that can be set in flags; KVM_PPC_MMUV3_RADIX and 3224 + KVM_PPC_MMUV3_GTSE. KVM_PPC_MMUV3_RADIX, if set, configures the guest 3225 + to use radix tree translation, and if clear, to use HPT translation. 3226 + KVM_PPC_MMUV3_GTSE, if set and if KVM permits it, configures the guest 3227 + to be able to use the global TLB and SLB invalidation instructions; 3228 + if clear, the guest may not use these instructions. 3229 + 3230 + The process_table field specifies the address and size of the guest 3231 + process table, which is in the guest's space. This field is formatted 3232 + as the second doubleword of the partition table entry, as defined in 3233 + the Power ISA V3.00, Book III section 5.7.6.1. 3234 + 3235 + 4.100 KVM_PPC_GET_RMMU_INFO 3236 + 3237 + Capability: KVM_CAP_PPC_RADIX_MMU 3238 + Architectures: ppc 3239 + Type: vm ioctl 3240 + Parameters: struct kvm_ppc_rmmu_info (out) 3241 + Returns: 0 on success, 3242 + -EFAULT if struct kvm_ppc_rmmu_info cannot be written, 3243 + -EINVAL if no useful information can be returned 3244 + 3245 + This ioctl returns a structure containing two things: (a) a list 3246 + containing supported radix tree geometries, and (b) a list that maps 3247 + page sizes to put in the "AP" (actual page size) field for the tlbie 3248 + (TLB invalidate entry) instruction. 3249 + 3250 + struct kvm_ppc_rmmu_info { 3251 + struct kvm_ppc_radix_geom { 3252 + __u8 page_shift; 3253 + __u8 level_bits[4]; 3254 + __u8 pad[3]; 3255 + } geometries[8]; 3256 + __u32 ap_encodings[8]; 3257 + }; 3258 + 3259 + The geometries[] field gives up to 8 supported geometries for the 3260 + radix page table, in terms of the log base 2 of the smallest page 3261 + size, and the number of bits indexed at each level of the tree, from 3262 + the PTE level up to the PGD level in that order. Any unused entries 3263 + will have 0 in the page_shift field. 3264 + 3265 + The ap_encodings gives the supported page sizes and their AP field 3266 + encodings, encoded with the AP value in the top 3 bits and the log 3267 + base 2 of the page size in the bottom 6 bits. 3268 + 3204 3269 5. The kvm_run structure 3205 3270 ------------------------ 3206 3271 ··· 4007 3942 capability via KVM_ENABLE_CAP ioctl on the vcpu fd. Note that this 4008 3943 will disable the use of APIC hardware virtualization even if supported 4009 3944 by the CPU, as it's incompatible with SynIC auto-EOI behavior. 3945 + 3946 + 8.3 KVM_CAP_PPC_RADIX_MMU 3947 + 3948 + Architectures: ppc 3949 + 3950 + This capability, if KVM_CHECK_EXTENSION indicates that it is 3951 + available, means that that the kernel can support guests using the 3952 + radix MMU defined in Power ISA V3.00 (as implemented in the POWER9 3953 + processor). 3954 + 3955 + 8.4 KVM_CAP_PPC_HASH_MMU_V3 3956 + 3957 + Architectures: ppc 3958 + 3959 + This capability, if KVM_CHECK_EXTENSION indicates that it is 3960 + available, means that that the kernel can support guests using the 3961 + hashed page table MMU defined in Power ISA V3.00 (as implemented in 3962 + the POWER9 processor), including in-memory segment tables.

+1 -1

arch/m68k/include/asm/macintosh.h

··· 38 38 39 39 #define MAC_ADB_NONE 0 40 40 #define MAC_ADB_II 1 41 - #define MAC_ADB_IISI 2 41 + #define MAC_ADB_EGRET 2 42 42 #define MAC_ADB_CUDA 3 43 43 #define MAC_ADB_PB1 4 44 44 #define MAC_ADB_PB2 5

+9 -9

arch/m68k/mac/config.c

··· 286 286 }, { 287 287 .ident = MAC_MODEL_IISI, 288 288 .name = "IIsi", 289 - .adb_type = MAC_ADB_IISI, 289 + .adb_type = MAC_ADB_EGRET, 290 290 .via_type = MAC_VIA_IICI, 291 291 .scsi_type = MAC_SCSI_OLD, 292 292 .scc_type = MAC_SCC_II, ··· 295 295 }, { 296 296 .ident = MAC_MODEL_IIVI, 297 297 .name = "IIvi", 298 - .adb_type = MAC_ADB_IISI, 298 + .adb_type = MAC_ADB_EGRET, 299 299 .via_type = MAC_VIA_IICI, 300 300 .scsi_type = MAC_SCSI_LC, 301 301 .scc_type = MAC_SCC_II, ··· 304 304 }, { 305 305 .ident = MAC_MODEL_IIVX, 306 306 .name = "IIvx", 307 - .adb_type = MAC_ADB_IISI, 307 + .adb_type = MAC_ADB_EGRET, 308 308 .via_type = MAC_VIA_IICI, 309 309 .scsi_type = MAC_SCSI_LC, 310 310 .scc_type = MAC_SCC_II, ··· 319 319 { 320 320 .ident = MAC_MODEL_CLII, 321 321 .name = "Classic II", 322 - .adb_type = MAC_ADB_IISI, 322 + .adb_type = MAC_ADB_EGRET, 323 323 .via_type = MAC_VIA_IICI, 324 324 .scsi_type = MAC_SCSI_LC, 325 325 .scc_type = MAC_SCC_II, ··· 352 352 { 353 353 .ident = MAC_MODEL_LC, 354 354 .name = "LC", 355 - .adb_type = MAC_ADB_IISI, 355 + .adb_type = MAC_ADB_EGRET, 356 356 .via_type = MAC_VIA_IICI, 357 357 .scsi_type = MAC_SCSI_LC, 358 358 .scc_type = MAC_SCC_II, ··· 361 361 }, { 362 362 .ident = MAC_MODEL_LCII, 363 363 .name = "LC II", 364 - .adb_type = MAC_ADB_IISI, 364 + .adb_type = MAC_ADB_EGRET, 365 365 .via_type = MAC_VIA_IICI, 366 366 .scsi_type = MAC_SCSI_LC, 367 367 .scc_type = MAC_SCC_II, ··· 370 370 }, { 371 371 .ident = MAC_MODEL_LCIII, 372 372 .name = "LC III", 373 - .adb_type = MAC_ADB_IISI, 373 + .adb_type = MAC_ADB_EGRET, 374 374 .via_type = MAC_VIA_IICI, 375 375 .scsi_type = MAC_SCSI_LC, 376 376 .scc_type = MAC_SCC_II, ··· 498 498 { 499 499 .ident = MAC_MODEL_P460, 500 500 .name = "Performa 460", 501 - .adb_type = MAC_ADB_IISI, 501 + .adb_type = MAC_ADB_EGRET, 502 502 .via_type = MAC_VIA_IICI, 503 503 .scsi_type = MAC_SCSI_LC, 504 504 .scc_type = MAC_SCC_II, ··· 575 575 }, { 576 576 .ident = MAC_MODEL_P600, 577 577 .name = "Performa 600", 578 - .adb_type = MAC_ADB_IISI, 578 + .adb_type = MAC_ADB_EGRET, 579 579 .via_type = MAC_VIA_IICI, 580 580 .scsi_type = MAC_SCSI_LC, 581 581 .scc_type = MAC_SCC_II,

+8 -64

arch/m68k/mac/misc.c

··· 141 141 #define pmu_write_pram NULL 142 142 #endif 143 143 144 - #if 0 /* def CONFIG_ADB_MACIISI */ 145 - extern int maciisi_request(struct adb_request *req, 146 - void (*done)(struct adb_request *), int nbytes, ...); 147 - 148 - static long maciisi_read_time(void) 149 - { 150 - struct adb_request req; 151 - long time; 152 - 153 - if (maciisi_request(&req, NULL, 2, CUDA_PACKET, CUDA_GET_TIME)) 154 - return 0; 155 - 156 - time = (req.reply[3] << 24) | (req.reply[4] << 16) 157 - | (req.reply[5] << 8) | req.reply[6]; 158 - return time - RTC_OFFSET; 159 - } 160 - 161 - static void maciisi_write_time(long data) 162 - { 163 - struct adb_request req; 164 - data += RTC_OFFSET; 165 - maciisi_request(&req, NULL, 6, CUDA_PACKET, CUDA_SET_TIME, 166 - (data >> 24) & 0xFF, (data >> 16) & 0xFF, 167 - (data >> 8) & 0xFF, data & 0xFF); 168 - } 169 - 170 - static __u8 maciisi_read_pram(int offset) 171 - { 172 - struct adb_request req; 173 - if (maciisi_request(&req, NULL, 4, CUDA_PACKET, CUDA_GET_PRAM, 174 - (offset >> 8) & 0xFF, offset & 0xFF)) 175 - return 0; 176 - return req.reply[3]; 177 - } 178 - 179 - static void maciisi_write_pram(int offset, __u8 data) 180 - { 181 - struct adb_request req; 182 - maciisi_request(&req, NULL, 5, CUDA_PACKET, CUDA_SET_PRAM, 183 - (offset >> 8) & 0xFF, offset & 0xFF, data); 184 - } 185 - #else 186 - #define maciisi_read_time() 0 187 - #define maciisi_write_time(n) 188 - #define maciisi_read_pram NULL 189 - #define maciisi_write_pram NULL 190 - #endif 191 - 192 144 /* 193 145 * VIA PRAM/RTC access routines 194 146 * ··· 409 457 int i; 410 458 411 459 switch(macintosh_config->adb_type) { 412 - case MAC_ADB_IISI: 413 - func = maciisi_read_pram; break; 414 460 case MAC_ADB_PB1: 415 461 case MAC_ADB_PB2: 416 462 func = pmu_read_pram; break; 463 + case MAC_ADB_EGRET: 417 464 case MAC_ADB_CUDA: 418 465 func = cuda_read_pram; break; 419 466 default: ··· 431 480 int i; 432 481 433 482 switch(macintosh_config->adb_type) { 434 - case MAC_ADB_IISI: 435 - func = maciisi_write_pram; break; 436 483 case MAC_ADB_PB1: 437 484 case MAC_ADB_PB2: 438 485 func = pmu_write_pram; break; 486 + case MAC_ADB_EGRET: 439 487 case MAC_ADB_CUDA: 440 488 func = cuda_write_pram; break; 441 489 default: ··· 449 499 450 500 void mac_poweroff(void) 451 501 { 452 - /* 453 - * MAC_ADB_IISI may need to be moved up here if it doesn't actually 454 - * work using the ADB packet method. --David Kilzer 455 - */ 456 - 457 502 if (oss_present) { 458 503 oss_shutdown(); 459 504 } else if (macintosh_config->adb_type == MAC_ADB_II) { 460 505 via_shutdown(); 461 506 #ifdef CONFIG_ADB_CUDA 462 - } else if (macintosh_config->adb_type == MAC_ADB_CUDA) { 507 + } else if (macintosh_config->adb_type == MAC_ADB_EGRET || 508 + macintosh_config->adb_type == MAC_ADB_CUDA) { 463 509 cuda_shutdown(); 464 510 #endif 465 511 #ifdef CONFIG_ADB_PMU68K ··· 495 549 local_irq_restore(flags); 496 550 } 497 551 #ifdef CONFIG_ADB_CUDA 498 - } else if (macintosh_config->adb_type == MAC_ADB_CUDA) { 552 + } else if (macintosh_config->adb_type == MAC_ADB_EGRET || 553 + macintosh_config->adb_type == MAC_ADB_CUDA) { 499 554 cuda_restart(); 500 555 #endif 501 556 #ifdef CONFIG_ADB_PMU68K ··· 645 698 case MAC_ADB_IOP: 646 699 now = via_read_time(); 647 700 break; 648 - case MAC_ADB_IISI: 649 - now = maciisi_read_time(); 650 - break; 651 701 case MAC_ADB_PB1: 652 702 case MAC_ADB_PB2: 653 703 now = pmu_read_time(); 654 704 break; 705 + case MAC_ADB_EGRET: 655 706 case MAC_ADB_CUDA: 656 707 now = cuda_read_time(); 657 708 break; ··· 681 736 case MAC_ADB_IOP: 682 737 via_write_time(now); 683 738 break; 739 + case MAC_ADB_EGRET: 684 740 case MAC_ADB_CUDA: 685 741 cuda_write_time(now); 686 742 break; ··· 689 743 case MAC_ADB_PB2: 690 744 pmu_write_time(now); 691 745 break; 692 - case MAC_ADB_IISI: 693 - maciisi_write_time(now); 694 746 } 695 747 } 696 748 return 0;

+6 -3

arch/powerpc/Kconfig

··· 93 93 select HAVE_DYNAMIC_FTRACE_WITH_REGS if MPROFILE_KERNEL 94 94 select HAVE_FUNCTION_TRACER 95 95 select HAVE_FUNCTION_GRAPH_TRACER 96 + select HAVE_GCC_PLUGINS 96 97 select SYSCTL_EXCEPTION_TRACE 97 98 select VIRT_TO_BUS if !PPC64 98 99 select HAVE_IDE 99 100 select HAVE_IOREMAP_PROT 100 101 select HAVE_EFFICIENT_UNALIGNED_ACCESS if !(CPU_LITTLE_ENDIAN && POWER7_CPU) 101 102 select HAVE_KPROBES 103 + select HAVE_OPTPROBES if PPC64 102 104 select HAVE_ARCH_KGDB 103 105 select HAVE_KRETPROBES 104 106 select HAVE_ARCH_TRACEHOOK ··· 166 164 select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE 167 165 select HAVE_ARCH_HARDENED_USERCOPY 168 166 select HAVE_KERNEL_GZIP 167 + select HAVE_CONTEXT_TRACKING if PPC64 169 168 170 169 config GENERIC_CSUM 171 - def_bool CPU_LITTLE_ENDIAN 170 + def_bool n 172 171 173 172 config EARLY_PRINTK 174 173 bool ··· 393 390 be disabled also. 394 391 395 392 If you have a toolchain which supports mprofile-kernel, then you can 396 - enable this. Otherwise leave it disabled. If you're not sure, say 397 - "N". 393 + disable this. Otherwise leave it enabled. If you're not sure, say 394 + "Y". 398 395 399 396 config MPROFILE_KERNEL 400 397 depends on PPC64 && CPU_LITTLE_ENDIAN

+1 -2

arch/powerpc/Kconfig.debug

··· 356 356 357 357 config PPC_PTDUMP 358 358 bool "Export kernel pagetable layout to userspace via debugfs" 359 - depends on DEBUG_KERNEL 360 - select DEBUG_FS 359 + depends on DEBUG_KERNEL && DEBUG_FS 361 360 help 362 361 This option exports the state of the kernel pagetables to a 363 362 debugfs file. This is only useful for kernel developers who are

+4

arch/powerpc/boot/.gitignore

··· 1 1 addnote 2 + decompress_inflate.c 2 3 empty.c 3 4 hack-coff 4 5 inffast.c ··· 14 13 kernel-vmlinux.strip.c 15 14 kernel-vmlinux.strip.gz 16 15 mktree 16 + otheros.bld 17 17 uImage 18 18 cuImage.* 19 19 dtbImage.* 20 20 *.dtb 21 21 treeImage.* 22 + vmlinux.strip 22 23 zImage 23 24 zImage.initrd 24 25 zImage.bin.* ··· 29 26 zImage.epapr 30 27 zImage.holly 31 28 zImage.*lds 29 + zImage.maple 32 30 zImage.miboot 33 31 zImage.pmac 34 32 zImage.pseries

+10 -1

arch/powerpc/configs/powernv_defconfig

··· 26 26 CONFIG_CPUSETS=y 27 27 CONFIG_CGROUP_DEVICE=y 28 28 CONFIG_CGROUP_CPUACCT=y 29 + CONFIG_CGROUP_BPF=y 29 30 CONFIG_CGROUP_PERF=y 30 31 CONFIG_USER_NS=y 31 32 CONFIG_BLK_DEV_INITRD=y 33 + CONFIG_BPF_SYSCALL=y 32 34 # CONFIG_COMPAT_BRK is not set 33 35 CONFIG_PROFILING=y 34 36 CONFIG_OPROFILE=y ··· 81 79 # CONFIG_NETFILTER_ADVANCED is not set 82 80 CONFIG_BRIDGE=m 83 81 CONFIG_VLAN_8021Q=m 82 + CONFIG_NET_SCHED=y 83 + CONFIG_NET_CLS_BPF=m 84 + CONFIG_NET_CLS_ACT=y 85 + CONFIG_NET_ACT_BPF=m 86 + CONFIG_BPF_JIT=y 84 87 CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" 85 88 CONFIG_DEVTMPFS=y 86 89 CONFIG_DEVTMPFS_MOUNT=y ··· 220 213 CONFIG_USB_HIDDEV=y 221 214 CONFIG_USB=y 222 215 CONFIG_USB_MON=m 216 + CONFIG_USB_XHCI_HCD=y 223 217 CONFIG_USB_EHCI_HCD=y 224 218 # CONFIG_USB_EHCI_HCD_PPC_OF is not set 225 219 CONFIG_USB_OHCI_HCD=y 226 - CONFIG_USB_STORAGE=m 220 + CONFIG_USB_STORAGE=y 227 221 CONFIG_NEW_LEDS=y 228 222 CONFIG_LEDS_CLASS=m 229 223 CONFIG_LEDS_POWERNV=m ··· 297 289 CONFIG_LATENCYTOP=y 298 290 CONFIG_SCHED_TRACER=y 299 291 CONFIG_BLK_DEV_IO_TRACE=y 292 + CONFIG_UPROBE_EVENT=y 300 293 CONFIG_CODE_PATCHING_SELFTEST=y 301 294 CONFIG_FTR_FIXUP_SELFTEST=y 302 295 CONFIG_MSI_BITMAP_SELFTEST=y

+7

arch/powerpc/configs/ppc64_defconfig

··· 14 14 CONFIG_LOG_CPU_MAX_BUF_SHIFT=13 15 15 CONFIG_CGROUPS=y 16 16 CONFIG_CPUSETS=y 17 + CONFIG_CGROUP_BPF=y 17 18 CONFIG_BLK_DEV_INITRD=y 19 + CONFIG_BPF_SYSCALL=y 18 20 # CONFIG_COMPAT_BRK is not set 19 21 CONFIG_PROFILING=y 20 22 CONFIG_OPROFILE=y ··· 78 76 CONFIG_NETFILTER=y 79 77 # CONFIG_NETFILTER_ADVANCED is not set 80 78 CONFIG_BRIDGE=m 79 + CONFIG_NET_SCHED=y 80 + CONFIG_NET_CLS_BPF=m 81 + CONFIG_NET_CLS_ACT=y 82 + CONFIG_NET_ACT_BPF=m 81 83 CONFIG_BPF_JIT=y 82 84 CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" 83 85 CONFIG_DEVTMPFS=y ··· 330 324 CONFIG_LATENCYTOP=y 331 325 CONFIG_SCHED_TRACER=y 332 326 CONFIG_BLK_DEV_IO_TRACE=y 327 + CONFIG_UPROBE_EVENT=y 333 328 CONFIG_CODE_PATCHING_SELFTEST=y 334 329 CONFIG_FTR_FIXUP_SELFTEST=y 335 330 CONFIG_MSI_BITMAP_SELFTEST=y

+8

arch/powerpc/configs/pseries_defconfig

··· 24 24 CONFIG_CGROUP_DEVICE=y 25 25 CONFIG_CPUSETS=y 26 26 CONFIG_CGROUP_CPUACCT=y 27 + CONFIG_CGROUP_BPF=y 27 28 CONFIG_MEMCG=y 28 29 CONFIG_MEMCG_SWAP=y 29 30 CONFIG_CGROUP_PERF=y 30 31 CONFIG_CGROUP_SCHED=y 31 32 CONFIG_USER_NS=y 32 33 CONFIG_BLK_DEV_INITRD=y 34 + CONFIG_BPF_SYSCALL=y 33 35 # CONFIG_COMPAT_BRK is not set 34 36 CONFIG_PROFILING=y 35 37 CONFIG_OPROFILE=y ··· 84 82 # CONFIG_NETFILTER_ADVANCED is not set 85 83 CONFIG_BRIDGE=m 86 84 CONFIG_VLAN_8021Q=m 85 + CONFIG_NET_SCHED=y 86 + CONFIG_NET_CLS_BPF=m 87 + CONFIG_NET_CLS_ACT=y 88 + CONFIG_NET_ACT_BPF=m 89 + CONFIG_BPF_JIT=y 87 90 CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" 88 91 CONFIG_DEVTMPFS=y 89 92 CONFIG_DEVTMPFS_MOUNT=y ··· 296 289 CONFIG_LATENCYTOP=y 297 290 CONFIG_SCHED_TRACER=y 298 291 CONFIG_BLK_DEV_IO_TRACE=y 292 + CONFIG_UPROBE_EVENT=y 299 293 CONFIG_CODE_PATCHING_SELFTEST=y 300 294 CONFIG_FTR_FIXUP_SELFTEST=y 301 295 CONFIG_MSI_BITMAP_SELFTEST=y

+2

arch/powerpc/include/asm/asm-prototypes.h

··· 120 120 extern int __cmpdi2(s64, s64); 121 121 extern int __ucmpdi2(u64, u64); 122 122 123 + void _mcount(void); 124 + 123 125 #endif /* _ASM_POWERPC_ASM_PROTOTYPES_H */

+2 -2

arch/powerpc/include/asm/book3s/64/hash.h

··· 33 33 H_PUD_INDEX_SIZE + H_PGD_INDEX_SIZE + PAGE_SHIFT) 34 34 #define H_PGTABLE_RANGE (ASM_CONST(1) << H_PGTABLE_EADDR_SIZE) 35 35 36 - #ifdef CONFIG_TRANSPARENT_HUGEPAGE 36 + #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && defined(CONFIG_PPC_64K_PAGES) 37 37 /* 38 - * only with hash we need to use the second half of pmd page table 38 + * only with hash 64k we need to use the second half of pmd page table 39 39 * to store pointer to deposited pgtable_t 40 40 */ 41 41 #define H_PMD_CACHE_INDEX (H_PMD_INDEX_SIZE + 1)

+6 -2

arch/powerpc/include/asm/book3s/64/mmu-hash.h

··· 157 157 unsigned long addr, 158 158 unsigned char *hpte_slot_array, 159 159 int psize, int ssize, int local); 160 + int (*resize_hpt)(unsigned long shift); 160 161 /* 161 162 * Special for kexec. 162 163 * To be called in real mode with interrupts disabled. No locks are ··· 526 525 #define ESID_BITS 18 527 526 #define ESID_BITS_1T 6 528 527 528 + #define ESID_BITS_MASK ((1 << ESID_BITS) - 1) 529 + #define ESID_BITS_1T_MASK ((1 << ESID_BITS_1T) - 1) 530 + 529 531 /* 530 532 * 256MB segment 531 533 * The proto-VSID space has 2^(CONTEX_BITS + ESID_BITS) - 1 segments ··· 664 660 665 661 if (ssize == MMU_SEGSIZE_256M) 666 662 return vsid_scramble((context << ESID_BITS) 667 - | (ea >> SID_SHIFT), 256M); 663 + | ((ea >> SID_SHIFT) & ESID_BITS_MASK), 256M); 668 664 return vsid_scramble((context << ESID_BITS_1T) 669 - | (ea >> SID_SHIFT_1T), 1T); 665 + | ((ea >> SID_SHIFT_1T) & ESID_BITS_1T_MASK), 1T); 670 666 } 671 667 672 668 /*

+17 -1

arch/powerpc/include/asm/book3s/64/mmu.h

··· 44 44 }; 45 45 extern struct patb_entry *partition_tb; 46 46 47 + /* Bits in patb0 field */ 47 48 #define PATB_HR (1UL << 63) 48 - #define PATB_GR (1UL << 63) 49 49 #define RPDB_MASK 0x0ffffffffffff00fUL 50 50 #define RPDB_SHIFT (1UL << 8) 51 + #define RTS1_SHIFT 61 /* top 2 bits of radix tree size */ 52 + #define RTS1_MASK (3UL << RTS1_SHIFT) 53 + #define RTS2_SHIFT 5 /* bottom 3 bits of radix tree size */ 54 + #define RTS2_MASK (7UL << RTS2_SHIFT) 55 + #define RPDS_MASK 0x1f /* root page dir. size field */ 56 + 57 + /* Bits in patb1 field */ 58 + #define PATB_GR (1UL << 63) /* guest uses radix; must match HR */ 59 + #define PRTS_MASK 0x1f /* process table size field */ 60 + 51 61 /* 52 62 * Limit process table to PAGE_SIZE table. This 53 63 * also limit the max pid we can support. ··· 147 137 148 138 extern int (*register_process_table)(unsigned long base, unsigned long page_size, 149 139 unsigned long tbl_size); 140 + 141 + #ifdef CONFIG_PPC_PSERIES 142 + extern void radix_init_pseries(void); 143 + #else 144 + static inline void radix_init_pseries(void) { }; 145 + #endif 150 146 151 147 #endif /* __ASSEMBLY__ */ 152 148 #endif /* _ASM_POWERPC_BOOK3S_64_MMU_H_ */

+5

arch/powerpc/include/asm/book3s/64/pgtable-4k.h

··· 47 47 return hash__hugepd_ok(hpd); 48 48 } 49 49 #define is_hugepd(hpd) (hugepd_ok(hpd)) 50 + 51 + #else /* !CONFIG_HUGETLB_PAGE */ 52 + static inline int pmd_huge(pmd_t pmd) { return 0; } 53 + static inline int pud_huge(pud_t pud) { return 0; } 50 54 #endif /* CONFIG_HUGETLB_PAGE */ 55 + 51 56 #endif /* __ASSEMBLY__ */ 52 57 53 58 #endif /*_ASM_POWERPC_BOOK3S_64_PGTABLE_4K_H */

+3 -5

arch/powerpc/include/asm/book3s/64/pgtable-64k.h

··· 35 35 } 36 36 #define pgd_huge pgd_huge 37 37 38 - #ifdef CONFIG_DEBUG_VM 39 - extern int hugepd_ok(hugepd_t hpd); 40 - #define is_hugepd(hpd) (hugepd_ok(hpd)) 41 - #else 42 38 /* 43 39 * With 64k page size, we have hugepage ptes in the pgd and pmd entries. We don't 44 40 * need to setup hugepage directory for them. Our pte and page directory format ··· 45 49 return 0; 46 50 } 47 51 #define is_hugepd(pdep) 0 48 - #endif /* CONFIG_DEBUG_VM */ 49 52 53 + #else /* !CONFIG_HUGETLB_PAGE */ 54 + static inline int pmd_huge(pmd_t pmd) { return 0; } 55 + static inline int pud_huge(pud_t pud) { return 0; } 50 56 #endif /* CONFIG_HUGETLB_PAGE */ 51 57 52 58 static inline int remap_4k_pfn(struct vm_area_struct *vma, unsigned long addr,

+17

arch/powerpc/include/asm/book3s/64/pgtable.h

··· 371 371 return __pte(old); 372 372 } 373 373 374 + #define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL 375 + static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, 376 + unsigned long addr, 377 + pte_t *ptep, int full) 378 + { 379 + if (full && radix_enabled()) { 380 + /* 381 + * Let's skip the DD1 style pte update here. We know that 382 + * this is a full mm pte clear and hence can be sure there is 383 + * no parallel set_pte. 384 + */ 385 + return radix__ptep_get_and_clear_full(mm, addr, ptep, full); 386 + } 387 + return ptep_get_and_clear(mm, addr, ptep); 388 + } 389 + 390 + 374 391 static inline void pte_clear(struct mm_struct *mm, unsigned long addr, 375 392 pte_t * ptep) 376 393 {

+29 -12

arch/powerpc/include/asm/book3s/64/radix.h

··· 139 139 140 140 unsigned long new_pte; 141 141 142 - old_pte = __radix_pte_update(ptep, ~0, 0); 142 + old_pte = __radix_pte_update(ptep, ~0ul, 0); 143 143 /* 144 144 * new value of pte 145 145 */ 146 146 new_pte = (old_pte | set) & ~clr; 147 - /* 148 - * If we are trying to clear the pte, we can skip 149 - * the below sequence and batch the tlb flush. The 150 - * tlb flush batching is done by mmu gather code 151 - */ 152 - if (new_pte) { 153 - asm volatile("ptesync" : : : "memory"); 154 - radix__flush_tlb_pte_p9_dd1(old_pte, mm, addr); 147 + radix__flush_tlb_pte_p9_dd1(old_pte, mm, addr); 148 + if (new_pte) 155 149 __radix_pte_update(ptep, 0, new_pte); 156 - } 157 150 } else 158 151 old_pte = __radix_pte_update(ptep, clr, set); 159 - asm volatile("ptesync" : : : "memory"); 160 152 if (!huge) 161 153 assert_pte_locked(mm, addr); 162 154 163 155 return old_pte; 156 + } 157 + 158 + static inline pte_t radix__ptep_get_and_clear_full(struct mm_struct *mm, 159 + unsigned long addr, 160 + pte_t *ptep, int full) 161 + { 162 + unsigned long old_pte; 163 + 164 + if (full) { 165 + /* 166 + * If we are trying to clear the pte, we can skip 167 + * the DD1 pte update sequence and batch the tlb flush. The 168 + * tlb flush batching is done by mmu gather code. We 169 + * still keep the cmp_xchg update to make sure we get 170 + * correct R/C bit which might be updated via Nest MMU. 171 + */ 172 + old_pte = __radix_pte_update(ptep, ~0ul, 0); 173 + } else 174 + old_pte = radix__pte_update(mm, addr, ptep, ~0ul, 0, 0); 175 + 176 + return __pte(old_pte); 164 177 } 165 178 166 179 /* ··· 193 180 unsigned long old_pte, new_pte; 194 181 195 182 old_pte = __radix_pte_update(ptep, ~0, 0); 196 - asm volatile("ptesync" : : : "memory"); 197 183 /* 198 184 * new value of pte 199 185 */ ··· 303 291 } 304 292 return rts_field; 305 293 } 294 + 295 + #ifdef CONFIG_MEMORY_HOTPLUG 296 + int radix__create_section_mapping(unsigned long start, unsigned long end); 297 + int radix__remove_section_mapping(unsigned long start, unsigned long end); 298 + #endif /* CONFIG_MEMORY_HOTPLUG */ 306 299 #endif /* __ASSEMBLY__ */ 307 300 #endif

+15 -8

arch/powerpc/include/asm/cache.h

··· 30 30 #define IFETCH_ALIGN_BYTES (1 << IFETCH_ALIGN_SHIFT) 31 31 32 32 #if defined(__powerpc64__) && !defined(__ASSEMBLY__) 33 + 34 + struct ppc_cache_info { 35 + u32 size; 36 + u32 line_size; 37 + u32 block_size; /* L1 only */ 38 + u32 log_block_size; 39 + u32 blocks_per_page; 40 + u32 sets; 41 + u32 assoc; 42 + }; 43 + 33 44 struct ppc64_caches { 34 - u32 dsize; /* L1 d-cache size */ 35 - u32 dline_size; /* L1 d-cache line size */ 36 - u32 log_dline_size; 37 - u32 dlines_per_page; 38 - u32 isize; /* L1 i-cache size */ 39 - u32 iline_size; /* L1 i-cache line size */ 40 - u32 log_iline_size; 41 - u32 ilines_per_page; 45 + struct ppc_cache_info l1d; 46 + struct ppc_cache_info l1i; 47 + struct ppc_cache_info l2; 48 + struct ppc_cache_info l3; 42 49 }; 43 50 44 51 extern struct ppc64_caches ppc64_caches;

+16 -5

arch/powerpc/include/asm/checksum.h

··· 53 53 return (__force __sum16)(~((__force u32)sum + tmp) >> 16); 54 54 } 55 55 56 + static inline u32 from64to32(u64 x) 57 + { 58 + /* add up 32-bit and 32-bit for 32+c bit */ 59 + x = (x & 0xffffffff) + (x >> 32); 60 + /* add up carry.. */ 61 + x = (x & 0xffffffff) + (x >> 32); 62 + return (u32)x; 63 + } 64 + 56 65 static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, 57 66 __u8 proto, __wsum sum) 58 67 { 59 68 #ifdef __powerpc64__ 60 - unsigned long s = (__force u32)sum; 69 + u64 s = (__force u32)sum; 61 70 62 71 s += (__force u32)saddr; 63 72 s += (__force u32)daddr; 73 + #ifdef __BIG_ENDIAN__ 64 74 s += proto + len; 65 - s += (s >> 32); 66 - return (__force __wsum) s; 75 + #else 76 + s += (proto + len) << 8; 77 + #endif 78 + return (__force __wsum) from64to32(s); 67 79 #else 68 80 __asm__("\n\ 69 81 addc %0,%0,%1 \n\ ··· 135 123 136 124 for (i = 0; i < ihl - 1; i++, ptr++) 137 125 s += *ptr; 138 - s += (s >> 32); 139 - return (__force __wsum)s; 126 + return (__force __wsum)from64to32(s); 140 127 #else 141 128 __wsum sum, tmp; 142 129

+2

arch/powerpc/include/asm/code-patching.h

··· 22 22 #define BRANCH_SET_LINK 0x1 23 23 #define BRANCH_ABSOLUTE 0x2 24 24 25 + bool is_offset_in_branch_range(long offset); 25 26 unsigned int create_branch(const unsigned int *addr, 26 27 unsigned long target, int flags); 27 28 unsigned int create_cond_branch(const unsigned int *addr, ··· 35 34 unsigned long branch_target(const unsigned int *instr); 36 35 unsigned int translate_branch(const unsigned int *dest, 37 36 const unsigned int *src); 37 + extern bool is_conditional_branch(unsigned int instr); 38 38 #ifdef CONFIG_PPC_BOOK3E_64 39 39 void __patch_exception(int exc, unsigned long addr); 40 40 #define patch_exception(exc, name) do { \

+48 -1

arch/powerpc/include/asm/cpuidle.h

··· 10 10 #define PNV_CORE_IDLE_LOCK_BIT 0x100 11 11 #define PNV_CORE_IDLE_THREAD_BITS 0x0FF 12 12 13 + /* 14 + * ============================ NOTE ================================= 15 + * The older firmware populates only the RL field in the psscr_val and 16 + * sets the psscr_mask to 0xf. On such a firmware, the kernel sets the 17 + * remaining PSSCR fields to default values as follows: 18 + * 19 + * - ESL and EC bits are to 1. So wakeup from any stop state will be 20 + * at vector 0x100. 21 + * 22 + * - MTL and PSLL are set to the maximum allowed value as per the ISA, 23 + * i.e. 15. 24 + * 25 + * - The Transition Rate, TR is set to the Maximum value 3. 26 + */ 27 + #define PSSCR_HV_DEFAULT_VAL (PSSCR_ESL | PSSCR_EC | \ 28 + PSSCR_PSLL_MASK | PSSCR_TR_MASK | \ 29 + PSSCR_MTL_MASK) 30 + 31 + #define PSSCR_HV_DEFAULT_MASK (PSSCR_ESL | PSSCR_EC | \ 32 + PSSCR_PSLL_MASK | PSSCR_TR_MASK | \ 33 + PSSCR_MTL_MASK | PSSCR_RL_MASK) 34 + #define PSSCR_EC_SHIFT 20 35 + #define PSSCR_ESL_SHIFT 21 36 + #define GET_PSSCR_EC(x) (((x) & PSSCR_EC) >> PSSCR_EC_SHIFT) 37 + #define GET_PSSCR_ESL(x) (((x) & PSSCR_ESL) >> PSSCR_ESL_SHIFT) 38 + #define GET_PSSCR_RL(x) ((x) & PSSCR_RL_MASK) 39 + 40 + #define ERR_EC_ESL_MISMATCH -1 41 + #define ERR_DEEP_STATE_ESL_MISMATCH -2 42 + 13 43 #ifndef __ASSEMBLY__ 14 44 extern u32 pnv_fastsleep_workaround_at_entry[]; 15 45 extern u32 pnv_fastsleep_workaround_at_exit[]; 16 46 17 47 extern u64 pnv_first_deep_stop_state; 48 + 49 + int validate_psscr_val_mask(u64 *psscr_val, u64 *psscr_mask, u32 flags); 50 + static inline void report_invalid_psscr_val(u64 psscr_val, int err) 51 + { 52 + switch (err) { 53 + case ERR_EC_ESL_MISMATCH: 54 + pr_warn("Invalid psscr 0x%016llx : ESL,EC bits unequal", 55 + psscr_val); 56 + break; 57 + case ERR_DEEP_STATE_ESL_MISMATCH: 58 + pr_warn("Invalid psscr 0x%016llx : ESL cleared for deep stop-state", 59 + psscr_val); 60 + } 61 + } 18 62 #endif 19 63 20 64 #endif 21 65 22 66 /* Idle state entry routines */ 23 67 #ifdef CONFIG_PPC_P7_NAP 24 - #define IDLE_STATE_ENTER_SEQ(IDLE_INST) \ 68 + #define IDLE_STATE_ENTER_SEQ(IDLE_INST) \ 25 69 /* Magic NAP/SLEEP/WINKLE mode enter sequence */ \ 26 70 std r0,0(r1); \ 27 71 ptesync; \ ··· 73 29 1: cmpd cr0,r0,r0; \ 74 30 bne 1b; \ 75 31 IDLE_INST; \ 32 + 33 + #define IDLE_STATE_ENTER_SEQ_NORET(IDLE_INST) \ 34 + IDLE_STATE_ENTER_SEQ(IDLE_INST) \ 76 35 b . 77 36 #endif /* CONFIG_PPC_P7_NAP */ 78 37

+42

arch/powerpc/include/asm/elf.h

··· 136 136 137 137 #endif /* CONFIG_SPU_BASE */ 138 138 139 + #ifdef CONFIG_PPC64 140 + 141 + #define get_cache_geometry(level) \ 142 + (ppc64_caches.level.assoc << 16 | ppc64_caches.level.line_size) 143 + 144 + #define ARCH_DLINFO_CACHE_GEOMETRY \ 145 + NEW_AUX_ENT(AT_L1I_CACHESIZE, ppc64_caches.l1i.size); \ 146 + NEW_AUX_ENT(AT_L1I_CACHEGEOMETRY, get_cache_geometry(l1i)); \ 147 + NEW_AUX_ENT(AT_L1D_CACHESIZE, ppc64_caches.l1i.size); \ 148 + NEW_AUX_ENT(AT_L1D_CACHEGEOMETRY, get_cache_geometry(l1i)); \ 149 + NEW_AUX_ENT(AT_L2_CACHESIZE, ppc64_caches.l2.size); \ 150 + NEW_AUX_ENT(AT_L2_CACHEGEOMETRY, get_cache_geometry(l2)); \ 151 + NEW_AUX_ENT(AT_L3_CACHESIZE, ppc64_caches.l3.size); \ 152 + NEW_AUX_ENT(AT_L3_CACHEGEOMETRY, get_cache_geometry(l3)) 153 + 154 + #else 155 + #define ARCH_DLINFO_CACHE_GEOMETRY 156 + #endif 157 + 158 + /* 159 + * The requirements here are: 160 + * - keep the final alignment of sp (sp & 0xf) 161 + * - make sure the 32-bit value at the first 16 byte aligned position of 162 + * AUXV is greater than 16 for glibc compatibility. 163 + * AT_IGNOREPPC is used for that. 164 + * - for compatibility with glibc ARCH_DLINFO must always be defined on PPC, 165 + * even if DLINFO_ARCH_ITEMS goes to zero or is undefined. 166 + * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT entries changes 167 + */ 168 + #define ARCH_DLINFO \ 169 + do { \ 170 + /* Handle glibc compatibility. */ \ 171 + NEW_AUX_ENT(AT_IGNOREPPC, AT_IGNOREPPC); \ 172 + NEW_AUX_ENT(AT_IGNOREPPC, AT_IGNOREPPC); \ 173 + /* Cache size items */ \ 174 + NEW_AUX_ENT(AT_DCACHEBSIZE, dcache_bsize); \ 175 + NEW_AUX_ENT(AT_ICACHEBSIZE, icache_bsize); \ 176 + NEW_AUX_ENT(AT_UCACHEBSIZE, ucache_bsize); \ 177 + VDSO_AUX_ENT(AT_SYSINFO_EHDR, current->mm->context.vdso_base); \ 178 + ARCH_DLINFO_CACHE_GEOMETRY; \ 179 + } while (0) 180 + 139 181 #endif /* _ASM_POWERPC_ELF_H */

+63 -20

arch/powerpc/include/asm/exception-64s.h

··· 97 97 ld reg,PACAKBASE(r13); \ 98 98 ori reg,reg,(ABS_ADDR(label))@l; 99 99 100 + /* 101 + * Branches from unrelocated code (e.g., interrupts) to labels outside 102 + * head-y require >64K offsets. 103 + */ 104 + #define __LOAD_FAR_HANDLER(reg, label) \ 105 + ld reg,PACAKBASE(r13); \ 106 + ori reg,reg,(ABS_ADDR(label))@l; \ 107 + addis reg,reg,(ABS_ADDR(label))@h; 108 + 100 109 /* Exception register prefixes */ 101 110 #define EXC_HV H 102 111 #define EXC_STD ··· 236 227 mtctr reg; \ 237 228 bctr 238 229 230 + #define BRANCH_LINK_TO_FAR(reg, label) \ 231 + __LOAD_FAR_HANDLER(reg, label); \ 232 + mtctr reg; \ 233 + bctrl 234 + 235 + /* 236 + * KVM requires __LOAD_FAR_HANDLER. 237 + * 238 + * __BRANCH_TO_KVM_EXIT branches are also a special case because they 239 + * explicitly use r9 then reload it from PACA before branching. Hence 240 + * the double-underscore. 241 + */ 242 + #define __BRANCH_TO_KVM_EXIT(area, label) \ 243 + mfctr r9; \ 244 + std r9,HSTATE_SCRATCH1(r13); \ 245 + __LOAD_FAR_HANDLER(r9, label); \ 246 + mtctr r9; \ 247 + ld r9,area+EX_R9(r13); \ 248 + bctr 249 + 250 + #define BRANCH_TO_KVM(reg, label) \ 251 + __LOAD_FAR_HANDLER(reg, label); \ 252 + mtctr reg; \ 253 + bctr 254 + 239 255 #else 240 256 #define BRANCH_TO_COMMON(reg, label) \ 241 257 b label 242 258 259 + #define BRANCH_LINK_TO_FAR(reg, label) \ 260 + bl label 261 + 262 + #define BRANCH_TO_KVM(reg, label) \ 263 + b label 264 + 265 + #define __BRANCH_TO_KVM_EXIT(area, label) \ 266 + ld r9,area+EX_R9(r13); \ 267 + b label 268 + 243 269 #endif 244 270 245 - #define __KVM_HANDLER_PROLOG(area, n) \ 271 + 272 + #define __KVM_HANDLER(area, h, n) \ 246 273 BEGIN_FTR_SECTION_NESTED(947) \ 247 274 ld r10,area+EX_CFAR(r13); \ 248 275 std r10,HSTATE_CFAR(r13); \ ··· 288 243 std r10,HSTATE_PPR(r13); \ 289 244 END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948); \ 290 245 ld r10,area+EX_R10(r13); \ 291 - stw r9,HSTATE_SCRATCH1(r13); \ 292 - ld r9,area+EX_R9(r13); \ 293 246 std r12,HSTATE_SCRATCH0(r13); \ 294 - 295 - #define __KVM_HANDLER(area, h, n) \ 296 - __KVM_HANDLER_PROLOG(area, n) \ 297 - li r12,n; \ 298 - b kvmppc_interrupt 247 + sldi r12,r9,32; \ 248 + ori r12,r12,(n); \ 249 + /* This reloads r9 before branching to kvmppc_interrupt */ \ 250 + __BRANCH_TO_KVM_EXIT(area, kvmppc_interrupt) 299 251 300 252 #define __KVM_HANDLER_SKIP(area, h, n) \ 301 253 cmpwi r10,KVM_GUEST_MODE_SKIP; \ 302 - ld r10,area+EX_R10(r13); \ 303 254 beq 89f; \ 304 - stw r9,HSTATE_SCRATCH1(r13); \ 305 255 BEGIN_FTR_SECTION_NESTED(948) \ 306 - ld r9,area+EX_PPR(r13); \ 307 - std r9,HSTATE_PPR(r13); \ 256 + ld r10,area+EX_PPR(r13); \ 257 + std r10,HSTATE_PPR(r13); \ 308 258 END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948); \ 309 - ld r9,area+EX_R9(r13); \ 259 + ld r10,area+EX_R10(r13); \ 310 260 std r12,HSTATE_SCRATCH0(r13); \ 311 - li r12,n; \ 312 - b kvmppc_interrupt; \ 261 + sldi r12,r9,32; \ 262 + ori r12,r12,(n); \ 263 + /* This reloads r9 before branching to kvmppc_interrupt */ \ 264 + __BRANCH_TO_KVM_EXIT(area, kvmppc_interrupt); \ 313 265 89: mtocrf 0x80,r9; \ 314 266 ld r9,area+EX_R9(r13); \ 267 + ld r10,area+EX_R10(r13); \ 315 268 b kvmppc_skip_##h##interrupt 316 269 317 270 #ifdef CONFIG_KVM_BOOK3S_64_HANDLER ··· 436 393 EXCEPTION_RELON_PROLOG_PSERIES_1(label, EXC_STD) 437 394 438 395 #define STD_RELON_EXCEPTION_HV(loc, vec, label) \ 439 - /* No guest interrupts come through here */ \ 440 396 SET_SCRATCH0(r13); /* save r13 */ \ 441 - EXCEPTION_RELON_PROLOG_PSERIES(PACA_EXGEN, label, EXC_HV, NOTEST, vec); 397 + EXCEPTION_RELON_PROLOG_PSERIES(PACA_EXGEN, label, \ 398 + EXC_HV, KVMTEST_HV, vec); 442 399 443 400 #define STD_RELON_EXCEPTION_HV_OOL(vec, label) \ 444 - EXCEPTION_PROLOG_1(PACA_EXGEN, NOTEST, vec); \ 401 + EXCEPTION_PROLOG_1(PACA_EXGEN, KVMTEST_HV, vec); \ 445 402 EXCEPTION_RELON_PROLOG_PSERIES_1(label, EXC_HV) 446 403 447 404 /* This associate vector numbers with bits in paca->irq_happened */ ··· 518 475 519 476 #define MASKABLE_RELON_EXCEPTION_HV(loc, vec, label) \ 520 477 _MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, \ 521 - EXC_HV, SOFTEN_NOTEST_HV) 478 + EXC_HV, SOFTEN_TEST_HV) 522 479 523 480 #define MASKABLE_RELON_EXCEPTION_HV_OOL(vec, label) \ 524 - EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_NOTEST_HV, vec); \ 481 + EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_TEST_HV, vec); \ 525 482 EXCEPTION_PROLOG_PSERIES_1(label, EXC_HV) 526 483 527 484 /*

+3 -2

arch/powerpc/include/asm/firmware.h

··· 42 42 #define FW_FEATURE_SPLPAR ASM_CONST(0x0000000000100000) 43 43 #define FW_FEATURE_LPAR ASM_CONST(0x0000000000400000) 44 44 #define FW_FEATURE_PS3_LV1 ASM_CONST(0x0000000000800000) 45 - /* Free ASM_CONST(0x0000000001000000) */ 45 + #define FW_FEATURE_HPT_RESIZE ASM_CONST(0x0000000001000000) 46 46 #define FW_FEATURE_CMO ASM_CONST(0x0000000002000000) 47 47 #define FW_FEATURE_VPHN ASM_CONST(0x0000000004000000) 48 48 #define FW_FEATURE_XCMO ASM_CONST(0x0000000008000000) ··· 66 66 FW_FEATURE_MULTITCE | FW_FEATURE_SPLPAR | FW_FEATURE_LPAR | 67 67 FW_FEATURE_CMO | FW_FEATURE_VPHN | FW_FEATURE_XCMO | 68 68 FW_FEATURE_SET_MODE | FW_FEATURE_BEST_ENERGY | 69 - FW_FEATURE_TYPE1_AFFINITY | FW_FEATURE_PRRN, 69 + FW_FEATURE_TYPE1_AFFINITY | FW_FEATURE_PRRN | 70 + FW_FEATURE_HPT_RESIZE, 70 71 FW_FEATURE_PSERIES_ALWAYS = 0, 71 72 FW_FEATURE_POWERNV_POSSIBLE = FW_FEATURE_OPAL, 72 73 FW_FEATURE_POWERNV_ALWAYS = 0,

+119 -113

arch/powerpc/include/asm/head-64.h

··· 38 38 * li r10,128 39 39 * mv r11,r10 40 40 41 - * FIXED_SECTION_ENTRY_BEGIN_LOCATION(section_name, label2, start_address) 42 - * FIXED_SECTION_ENTRY_END_LOCATION(section_name, label2, end_address) 41 + * FIXED_SECTION_ENTRY_BEGIN_LOCATION(section_name, label2, start_address, size) 42 + * FIXED_SECTION_ENTRY_END_LOCATION(section_name, label2, start_address, size) 43 43 * CLOSE_FIXED_SECTION(section_name) 44 44 * 45 45 * ZERO_FIXED_SECTION can be used to emit zeroed data. ··· 102 102 #define FIXED_SECTION_ENTRY_BEGIN(sname, name) \ 103 103 __FIXED_SECTION_ENTRY_BEGIN(sname, name, IFETCH_ALIGN_BYTES) 104 104 105 - #define FIXED_SECTION_ENTRY_BEGIN_LOCATION(sname, name, start) \ 105 + #define FIXED_SECTION_ENTRY_BEGIN_LOCATION(sname, name, start, size) \ 106 106 USE_FIXED_SECTION(sname); \ 107 107 name##_start = (start); \ 108 + .if ((start) % (size) != 0); \ 109 + .error "Fixed section exception vector misalignment"; \ 110 + .endif; \ 111 + .if ((size) != 0x20) && ((size) != 0x80) && ((size) != 0x100); \ 112 + .error "Fixed section exception vector bad size"; \ 113 + .endif; \ 108 114 .if (start) < sname##_start; \ 109 115 .error "Fixed section underflow"; \ 110 116 .abort; \ ··· 119 113 .global name; \ 120 114 name: 121 115 122 - #define FIXED_SECTION_ENTRY_END_LOCATION(sname, name, end) \ 123 - .if (end) > sname##_end; \ 116 + #define FIXED_SECTION_ENTRY_END_LOCATION(sname, name, start, size) \ 117 + .if (start) + (size) > sname##_end; \ 124 118 .error "Fixed section overflow"; \ 125 119 .abort; \ 126 120 .endif; \ 127 - .if (. - name > end - name##_start); \ 121 + .if (. - name > (start) + (size) - name##_start); \ 128 122 .error "Fixed entry overflow"; \ 129 123 .abort; \ 130 124 .endif; \ 131 - . = ((end) - sname##_start); \ 125 + . = ((start) + (size) - sname##_start); \ 132 126 133 127 134 128 /* ··· 153 147 * Following are the BOOK3S exception handler helper macros. 154 148 * Handlers come in a number of types, and each type has a number of varieties. 155 149 * 156 - * EXC_REAL_* - real, unrelocated exception vectors 157 - * EXC_VIRT_* - virt (AIL), unrelocated exception vectors 150 + * EXC_REAL_* - real, unrelocated exception vectors 151 + * EXC_VIRT_* - virt (AIL), unrelocated exception vectors 158 152 * TRAMP_REAL_* - real, unrelocated helpers (virt can call these) 159 - * TRAMP_VIRT_* - virt, unreloc helpers (in practice, real can use) 160 - * TRAMP_KVM - KVM handlers that get put into real, unrelocated 161 - * EXC_COMMON_* - virt, relocated common handlers 153 + * TRAMP_VIRT_* - virt, unreloc helpers (in practice, real can use) 154 + * TRAMP_KVM - KVM handlers that get put into real, unrelocated 155 + * EXC_COMMON_* - virt, relocated common handlers 162 156 * 163 157 * The EXC handlers are given a name, and branch to name_common, or the 164 158 * appropriate KVM or masking function. Vector handler verieties are as ··· 197 191 * and OOL handlers are implemented as types of TRAMP and TRAMP_VIRT handlers. 198 192 */ 199 193 200 - #define EXC_REAL_BEGIN(name, start, end) \ 201 - FIXED_SECTION_ENTRY_BEGIN_LOCATION(real_vectors, exc_real_##start##_##name, start) 194 + #define EXC_REAL_BEGIN(name, start, size) \ 195 + FIXED_SECTION_ENTRY_BEGIN_LOCATION(real_vectors, exc_real_##start##_##name, start, size) 202 196 203 - #define EXC_REAL_END(name, start, end) \ 204 - FIXED_SECTION_ENTRY_END_LOCATION(real_vectors, exc_real_##start##_##name, end) 197 + #define EXC_REAL_END(name, start, size) \ 198 + FIXED_SECTION_ENTRY_END_LOCATION(real_vectors, exc_real_##start##_##name, start, size) 205 199 206 - #define EXC_VIRT_BEGIN(name, start, end) \ 207 - FIXED_SECTION_ENTRY_BEGIN_LOCATION(virt_vectors, exc_virt_##start##_##name, start) 200 + #define EXC_VIRT_BEGIN(name, start, size) \ 201 + FIXED_SECTION_ENTRY_BEGIN_LOCATION(virt_vectors, exc_virt_##start##_##name, start, size) 208 202 209 - #define EXC_VIRT_END(name, start, end) \ 210 - FIXED_SECTION_ENTRY_END_LOCATION(virt_vectors, exc_virt_##start##_##name, end) 203 + #define EXC_VIRT_END(name, start, size) \ 204 + FIXED_SECTION_ENTRY_END_LOCATION(virt_vectors, exc_virt_##start##_##name, start, size) 211 205 212 - #define EXC_COMMON_BEGIN(name) \ 213 - USE_TEXT_SECTION(); \ 214 - .balign IFETCH_ALIGN_BYTES; \ 215 - .global name; \ 216 - DEFINE_FIXED_SYMBOL(name); \ 206 + #define EXC_COMMON_BEGIN(name) \ 207 + USE_TEXT_SECTION(); \ 208 + .balign IFETCH_ALIGN_BYTES; \ 209 + .global name; \ 210 + DEFINE_FIXED_SYMBOL(name); \ 217 211 name: 218 212 219 213 #define TRAMP_REAL_BEGIN(name) \ ··· 223 217 FIXED_SECTION_ENTRY_BEGIN(virt_trampolines, name) 224 218 225 219 #ifdef CONFIG_KVM_BOOK3S_64_HANDLER 226 - #define TRAMP_KVM_BEGIN(name) \ 227 - TRAMP_REAL_BEGIN(name) 220 + #define TRAMP_KVM_BEGIN(name) \ 221 + TRAMP_VIRT_BEGIN(name) 228 222 #else 229 223 #define TRAMP_KVM_BEGIN(name) 230 224 #endif 231 225 232 - #define EXC_REAL_NONE(start, end) \ 233 - FIXED_SECTION_ENTRY_BEGIN_LOCATION(real_vectors, exc_real_##start##_##unused, start); \ 234 - FIXED_SECTION_ENTRY_END_LOCATION(real_vectors, exc_real_##start##_##unused, end) 226 + #define EXC_REAL_NONE(start, size) \ 227 + FIXED_SECTION_ENTRY_BEGIN_LOCATION(real_vectors, exc_real_##start##_##unused, start, size); \ 228 + FIXED_SECTION_ENTRY_END_LOCATION(real_vectors, exc_real_##start##_##unused, start, size) 235 229 236 - #define EXC_VIRT_NONE(start, end) \ 237 - FIXED_SECTION_ENTRY_BEGIN_LOCATION(virt_vectors, exc_virt_##start##_##unused, start); \ 238 - FIXED_SECTION_ENTRY_END_LOCATION(virt_vectors, exc_virt_##start##_##unused, end); 230 + #define EXC_VIRT_NONE(start, size) \ 231 + FIXED_SECTION_ENTRY_BEGIN_LOCATION(virt_vectors, exc_virt_##start##_##unused, start, size); \ 232 + FIXED_SECTION_ENTRY_END_LOCATION(virt_vectors, exc_virt_##start##_##unused, start, size); 239 233 240 234 241 - #define EXC_REAL(name, start, end) \ 242 - EXC_REAL_BEGIN(name, start, end); \ 235 + #define EXC_REAL(name, start, size) \ 236 + EXC_REAL_BEGIN(name, start, size); \ 243 237 STD_EXCEPTION_PSERIES(start, name##_common); \ 244 - EXC_REAL_END(name, start, end); 238 + EXC_REAL_END(name, start, size); 245 239 246 - #define EXC_VIRT(name, start, end, realvec) \ 247 - EXC_VIRT_BEGIN(name, start, end); \ 240 + #define EXC_VIRT(name, start, size, realvec) \ 241 + EXC_VIRT_BEGIN(name, start, size); \ 248 242 STD_RELON_EXCEPTION_PSERIES(start, realvec, name##_common); \ 249 - EXC_VIRT_END(name, start, end); 243 + EXC_VIRT_END(name, start, size); 250 244 251 - #define EXC_REAL_MASKABLE(name, start, end) \ 252 - EXC_REAL_BEGIN(name, start, end); \ 245 + #define EXC_REAL_MASKABLE(name, start, size) \ 246 + EXC_REAL_BEGIN(name, start, size); \ 253 247 MASKABLE_EXCEPTION_PSERIES(start, start, name##_common); \ 254 - EXC_REAL_END(name, start, end); 248 + EXC_REAL_END(name, start, size); 255 249 256 - #define EXC_VIRT_MASKABLE(name, start, end, realvec) \ 257 - EXC_VIRT_BEGIN(name, start, end); \ 250 + #define EXC_VIRT_MASKABLE(name, start, size, realvec) \ 251 + EXC_VIRT_BEGIN(name, start, size); \ 258 252 MASKABLE_RELON_EXCEPTION_PSERIES(start, realvec, name##_common); \ 259 - EXC_VIRT_END(name, start, end); 253 + EXC_VIRT_END(name, start, size); 260 254 261 - #define EXC_REAL_HV(name, start, end) \ 262 - EXC_REAL_BEGIN(name, start, end); \ 255 + #define EXC_REAL_HV(name, start, size) \ 256 + EXC_REAL_BEGIN(name, start, size); \ 263 257 STD_EXCEPTION_HV(start, start, name##_common); \ 264 - EXC_REAL_END(name, start, end); 258 + EXC_REAL_END(name, start, size); 265 259 266 - #define EXC_VIRT_HV(name, start, end, realvec) \ 267 - EXC_VIRT_BEGIN(name, start, end); \ 260 + #define EXC_VIRT_HV(name, start, size, realvec) \ 261 + EXC_VIRT_BEGIN(name, start, size); \ 268 262 STD_RELON_EXCEPTION_HV(start, realvec, name##_common); \ 269 - EXC_VIRT_END(name, start, end); 263 + EXC_VIRT_END(name, start, size); 270 264 271 - #define __EXC_REAL_OOL(name, start, end) \ 272 - EXC_REAL_BEGIN(name, start, end); \ 265 + #define __EXC_REAL_OOL(name, start, size) \ 266 + EXC_REAL_BEGIN(name, start, size); \ 273 267 __OOL_EXCEPTION(start, label, tramp_real_##name); \ 274 - EXC_REAL_END(name, start, end); 268 + EXC_REAL_END(name, start, size); 275 269 276 - #define __TRAMP_REAL_REAL_OOL(name, vec) \ 270 + #define __TRAMP_REAL_OOL(name, vec) \ 277 271 TRAMP_REAL_BEGIN(tramp_real_##name); \ 278 272 STD_EXCEPTION_PSERIES_OOL(vec, name##_common); \ 279 273 280 - #define EXC_REAL_OOL(name, start, end) \ 281 - __EXC_REAL_OOL(name, start, end); \ 282 - __TRAMP_REAL_REAL_OOL(name, start); 274 + #define EXC_REAL_OOL(name, start, size) \ 275 + __EXC_REAL_OOL(name, start, size); \ 276 + __TRAMP_REAL_OOL(name, start); 283 277 284 - #define __EXC_REAL_OOL_MASKABLE(name, start, end) \ 285 - __EXC_REAL_OOL(name, start, end); 278 + #define __EXC_REAL_OOL_MASKABLE(name, start, size) \ 279 + __EXC_REAL_OOL(name, start, size); 286 280 287 - #define __TRAMP_REAL_REAL_OOL_MASKABLE(name, vec) \ 281 + #define __TRAMP_REAL_OOL_MASKABLE(name, vec) \ 288 282 TRAMP_REAL_BEGIN(tramp_real_##name); \ 289 283 MASKABLE_EXCEPTION_PSERIES_OOL(vec, name##_common); \ 290 284 291 - #define EXC_REAL_OOL_MASKABLE(name, start, end) \ 292 - __EXC_REAL_OOL_MASKABLE(name, start, end); \ 293 - __TRAMP_REAL_REAL_OOL_MASKABLE(name, start); 285 + #define EXC_REAL_OOL_MASKABLE(name, start, size) \ 286 + __EXC_REAL_OOL_MASKABLE(name, start, size); \ 287 + __TRAMP_REAL_OOL_MASKABLE(name, start); 294 288 295 - #define __EXC_REAL_OOL_HV_DIRECT(name, start, end, handler) \ 296 - EXC_REAL_BEGIN(name, start, end); \ 289 + #define __EXC_REAL_OOL_HV_DIRECT(name, start, size, handler) \ 290 + EXC_REAL_BEGIN(name, start, size); \ 297 291 __OOL_EXCEPTION(start, label, handler); \ 298 - EXC_REAL_END(name, start, end); 292 + EXC_REAL_END(name, start, size); 299 293 300 - #define __EXC_REAL_OOL_HV(name, start, end) \ 301 - __EXC_REAL_OOL(name, start, end); 294 + #define __EXC_REAL_OOL_HV(name, start, size) \ 295 + __EXC_REAL_OOL(name, start, size); 302 296 303 - #define __TRAMP_REAL_REAL_OOL_HV(name, vec) \ 297 + #define __TRAMP_REAL_OOL_HV(name, vec) \ 304 298 TRAMP_REAL_BEGIN(tramp_real_##name); \ 305 299 STD_EXCEPTION_HV_OOL(vec, name##_common); \ 306 300 307 - #define EXC_REAL_OOL_HV(name, start, end) \ 308 - __EXC_REAL_OOL_HV(name, start, end); \ 309 - __TRAMP_REAL_REAL_OOL_HV(name, start); 301 + #define EXC_REAL_OOL_HV(name, start, size) \ 302 + __EXC_REAL_OOL_HV(name, start, size); \ 303 + __TRAMP_REAL_OOL_HV(name, start); 310 304 311 - #define __EXC_REAL_OOL_MASKABLE_HV(name, start, end) \ 312 - __EXC_REAL_OOL(name, start, end); 305 + #define __EXC_REAL_OOL_MASKABLE_HV(name, start, size) \ 306 + __EXC_REAL_OOL(name, start, size); 313 307 314 - #define __TRAMP_REAL_REAL_OOL_MASKABLE_HV(name, vec) \ 308 + #define __TRAMP_REAL_OOL_MASKABLE_HV(name, vec) \ 315 309 TRAMP_REAL_BEGIN(tramp_real_##name); \ 316 310 MASKABLE_EXCEPTION_HV_OOL(vec, name##_common); \ 317 311 318 - #define EXC_REAL_OOL_MASKABLE_HV(name, start, end) \ 319 - __EXC_REAL_OOL_MASKABLE_HV(name, start, end); \ 320 - __TRAMP_REAL_REAL_OOL_MASKABLE_HV(name, start); 312 + #define EXC_REAL_OOL_MASKABLE_HV(name, start, size) \ 313 + __EXC_REAL_OOL_MASKABLE_HV(name, start, size); \ 314 + __TRAMP_REAL_OOL_MASKABLE_HV(name, start); 321 315 322 - #define __EXC_VIRT_OOL(name, start, end) \ 323 - EXC_VIRT_BEGIN(name, start, end); \ 316 + #define __EXC_VIRT_OOL(name, start, size) \ 317 + EXC_VIRT_BEGIN(name, start, size); \ 324 318 __OOL_EXCEPTION(start, label, tramp_virt_##name); \ 325 - EXC_VIRT_END(name, start, end); 319 + EXC_VIRT_END(name, start, size); 326 320 327 - #define __TRAMP_REAL_VIRT_OOL(name, realvec) \ 328 - TRAMP_VIRT_BEGIN(tramp_virt_##name); \ 321 + #define __TRAMP_VIRT_OOL(name, realvec) \ 322 + TRAMP_VIRT_BEGIN(tramp_virt_##name); \ 329 323 STD_RELON_EXCEPTION_PSERIES_OOL(realvec, name##_common); \ 330 324 331 - #define EXC_VIRT_OOL(name, start, end, realvec) \ 332 - __EXC_VIRT_OOL(name, start, end); \ 333 - __TRAMP_REAL_VIRT_OOL(name, realvec); 325 + #define EXC_VIRT_OOL(name, start, size, realvec) \ 326 + __EXC_VIRT_OOL(name, start, size); \ 327 + __TRAMP_VIRT_OOL(name, realvec); 334 328 335 - #define __EXC_VIRT_OOL_MASKABLE(name, start, end) \ 336 - __EXC_VIRT_OOL(name, start, end); 329 + #define __EXC_VIRT_OOL_MASKABLE(name, start, size) \ 330 + __EXC_VIRT_OOL(name, start, size); 337 331 338 - #define __TRAMP_REAL_VIRT_OOL_MASKABLE(name, realvec) \ 339 - TRAMP_VIRT_BEGIN(tramp_virt_##name); \ 332 + #define __TRAMP_VIRT_OOL_MASKABLE(name, realvec) \ 333 + TRAMP_VIRT_BEGIN(tramp_virt_##name); \ 340 334 MASKABLE_RELON_EXCEPTION_PSERIES_OOL(realvec, name##_common); \ 341 335 342 - #define EXC_VIRT_OOL_MASKABLE(name, start, end, realvec) \ 343 - __EXC_VIRT_OOL_MASKABLE(name, start, end); \ 344 - __TRAMP_REAL_VIRT_OOL_MASKABLE(name, realvec); 336 + #define EXC_VIRT_OOL_MASKABLE(name, start, size, realvec) \ 337 + __EXC_VIRT_OOL_MASKABLE(name, start, size); \ 338 + __TRAMP_VIRT_OOL_MASKABLE(name, realvec); 345 339 346 - #define __EXC_VIRT_OOL_HV(name, start, end) \ 347 - __EXC_VIRT_OOL(name, start, end); 340 + #define __EXC_VIRT_OOL_HV(name, start, size) \ 341 + __EXC_VIRT_OOL(name, start, size); 348 342 349 - #define __TRAMP_REAL_VIRT_OOL_HV(name, realvec) \ 350 - TRAMP_VIRT_BEGIN(tramp_virt_##name); \ 343 + #define __TRAMP_VIRT_OOL_HV(name, realvec) \ 344 + TRAMP_VIRT_BEGIN(tramp_virt_##name); \ 351 345 STD_RELON_EXCEPTION_HV_OOL(realvec, name##_common); \ 352 346 353 - #define EXC_VIRT_OOL_HV(name, start, end, realvec) \ 354 - __EXC_VIRT_OOL_HV(name, start, end); \ 355 - __TRAMP_REAL_VIRT_OOL_HV(name, realvec); 347 + #define EXC_VIRT_OOL_HV(name, start, size, realvec) \ 348 + __EXC_VIRT_OOL_HV(name, start, size); \ 349 + __TRAMP_VIRT_OOL_HV(name, realvec); 356 350 357 - #define __EXC_VIRT_OOL_MASKABLE_HV(name, start, end) \ 358 - __EXC_VIRT_OOL(name, start, end); 351 + #define __EXC_VIRT_OOL_MASKABLE_HV(name, start, size) \ 352 + __EXC_VIRT_OOL(name, start, size); 359 353 360 - #define __TRAMP_REAL_VIRT_OOL_MASKABLE_HV(name, realvec) \ 361 - TRAMP_VIRT_BEGIN(tramp_virt_##name); \ 354 + #define __TRAMP_VIRT_OOL_MASKABLE_HV(name, realvec) \ 355 + TRAMP_VIRT_BEGIN(tramp_virt_##name); \ 362 356 MASKABLE_RELON_EXCEPTION_HV_OOL(realvec, name##_common); \ 363 357 364 - #define EXC_VIRT_OOL_MASKABLE_HV(name, start, end, realvec) \ 365 - __EXC_VIRT_OOL_MASKABLE_HV(name, start, end); \ 366 - __TRAMP_REAL_VIRT_OOL_MASKABLE_HV(name, realvec); 358 + #define EXC_VIRT_OOL_MASKABLE_HV(name, start, size, realvec) \ 359 + __EXC_VIRT_OOL_MASKABLE_HV(name, start, size); \ 360 + __TRAMP_VIRT_OOL_MASKABLE_HV(name, realvec); 367 361 368 362 #define TRAMP_KVM(area, n) \ 369 363 TRAMP_KVM_BEGIN(do_kvm_##n); \ ··· 384 378 TRAMP_KVM_BEGIN(do_kvm_H##n); \ 385 379 KVM_HANDLER_SKIP(area, EXC_HV, n + 0x2); \ 386 380 387 - #define EXC_COMMON(name, realvec, hdlr) \ 388 - EXC_COMMON_BEGIN(name); \ 381 + #define EXC_COMMON(name, realvec, hdlr) \ 382 + EXC_COMMON_BEGIN(name); \ 389 383 STD_EXCEPTION_COMMON(realvec, name, hdlr); \ 390 384 391 - #define EXC_COMMON_ASYNC(name, realvec, hdlr) \ 392 - EXC_COMMON_BEGIN(name); \ 385 + #define EXC_COMMON_ASYNC(name, realvec, hdlr) \ 386 + EXC_COMMON_BEGIN(name); \ 393 387 STD_EXCEPTION_COMMON_ASYNC(realvec, name, hdlr); \ 394 388 395 389 #define EXC_COMMON_HV(name, realvec, hdlr) \ 396 - EXC_COMMON_BEGIN(name); \ 390 + EXC_COMMON_BEGIN(name); \ 397 391 STD_EXCEPTION_COMMON(realvec + 0x2, name, hdlr); \ 398 392 399 393 #endif /* _ASM_POWERPC_HEAD_64_H */

+13

arch/powerpc/include/asm/hvcall.h

··· 276 276 #define H_GET_MPP_X 0x314 277 277 #define H_SET_MODE 0x31C 278 278 #define H_CLEAR_HPT 0x358 279 + #define H_RESIZE_HPT_PREPARE 0x36C 280 + #define H_RESIZE_HPT_COMMIT 0x370 281 + #define H_REGISTER_PROC_TBL 0x37C 279 282 #define H_SIGNAL_SYS_RESET 0x380 280 283 #define MAX_HCALL_OPCODE H_SIGNAL_SYS_RESET 281 284 ··· 315 312 #define H_SIGNAL_SYS_RESET_ALL -1 316 313 #define H_SIGNAL_SYS_RESET_ALL_OTHERS -2 317 314 /* >= 0 values are CPU number */ 315 + 316 + /* Flag values used in H_REGISTER_PROC_TBL hcall */ 317 + #define PROC_TABLE_OP_MASK 0x18 318 + #define PROC_TABLE_DEREG 0x10 319 + #define PROC_TABLE_NEW 0x18 320 + #define PROC_TABLE_TYPE_MASK 0x06 321 + #define PROC_TABLE_HPT_SLB 0x00 322 + #define PROC_TABLE_HPT_PT 0x02 323 + #define PROC_TABLE_RADIX 0x04 324 + #define PROC_TABLE_GTSE 0x01 318 325 319 326 #ifndef __ASSEMBLY__ 320 327

+29

arch/powerpc/include/asm/isa-bridge.h

··· 1 + #ifndef __ISA_BRIDGE_H 2 + #define __ISA_BRIDGE_H 3 + 4 + #ifdef CONFIG_PPC64 5 + 6 + extern void isa_bridge_find_early(struct pci_controller *hose); 7 + extern void isa_bridge_init_non_pci(struct device_node *np); 8 + 9 + static inline int isa_vaddr_is_ioport(void __iomem *address) 10 + { 11 + /* Check if address hits the reserved legacy IO range */ 12 + unsigned long ea = (unsigned long)address; 13 + return ea >= ISA_IO_BASE && ea < ISA_IO_END; 14 + } 15 + 16 + #else 17 + 18 + static inline int isa_vaddr_is_ioport(void __iomem *address) 19 + { 20 + /* No specific ISA handling on ppc32 at this stage, it 21 + * all goes through PCI 22 + */ 23 + return 0; 24 + } 25 + 26 + #endif 27 + 28 + #endif /* __ISA_BRIDGE_H */ 29 +

+25 -2

arch/powerpc/include/asm/kprobes.h

··· 29 29 #include <linux/types.h> 30 30 #include <linux/ptrace.h> 31 31 #include <linux/percpu.h> 32 + #include <linux/module.h> 32 33 #include <asm/probes.h> 33 34 #include <asm/code-patching.h> 34 35 ··· 40 39 struct kprobe; 41 40 42 41 typedef ppc_opcode_t kprobe_opcode_t; 43 - #define MAX_INSN_SIZE 1 42 + 43 + extern kprobe_opcode_t optinsn_slot; 44 + 45 + /* Optinsn template address */ 46 + extern kprobe_opcode_t optprobe_template_entry[]; 47 + extern kprobe_opcode_t optprobe_template_op_address[]; 48 + extern kprobe_opcode_t optprobe_template_call_handler[]; 49 + extern kprobe_opcode_t optprobe_template_insn[]; 50 + extern kprobe_opcode_t optprobe_template_call_emulate[]; 51 + extern kprobe_opcode_t optprobe_template_ret[]; 52 + extern kprobe_opcode_t optprobe_template_end[]; 53 + 54 + /* Fixed instruction size for powerpc */ 55 + #define MAX_INSN_SIZE 1 56 + #define MAX_OPTIMIZED_LENGTH sizeof(kprobe_opcode_t) /* 4 bytes */ 57 + #define MAX_OPTINSN_SIZE (optprobe_template_end - optprobe_template_entry) 58 + #define RELATIVEJUMP_SIZE sizeof(kprobe_opcode_t) /* 4 bytes */ 44 59 45 60 #ifdef PPC64_ELF_ABI_v2 46 61 /* PPC64 ABIv2 needs local entry point */ ··· 78 61 #define kprobe_lookup_name(name, addr) \ 79 62 { \ 80 63 char dot_name[MODULE_NAME_LEN + 1 + KSYM_NAME_LEN]; \ 81 - char *modsym; \ 64 + const char *modsym; \ 82 65 bool dot_appended = false; \ 83 66 if ((modsym = strchr(name, ':')) != NULL) { \ 84 67 modsym++; \ ··· 140 123 unsigned long kprobe_saved_msr; 141 124 struct pt_regs jprobe_saved_regs; 142 125 struct prev_kprobe prev_kprobe; 126 + }; 127 + 128 + struct arch_optimized_insn { 129 + kprobe_opcode_t copied_insn[1]; 130 + /* detour buffer */ 131 + kprobe_opcode_t *insn; 143 132 }; 144 133 145 134 extern int kprobe_exceptions_notify(struct notifier_block *self,

+25 -1

arch/powerpc/include/asm/kvm_book3s.h

··· 170 170 unsigned long status); 171 171 extern long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, 172 172 unsigned long slb_v, unsigned long valid); 173 + extern int kvmppc_hv_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu, 174 + unsigned long gpa, gva_t ea, int is_store); 173 175 174 176 extern void kvmppc_mmu_hpte_cache_map(struct kvm_vcpu *vcpu, struct hpte_cache *pte); 175 177 extern struct hpte_cache *kvmppc_mmu_hpte_cache_next(struct kvm_vcpu *vcpu); ··· 183 181 extern void kvmppc_mmu_hpte_sysexit(void); 184 182 extern int kvmppc_mmu_hv_init(void); 185 183 extern int kvmppc_book3s_hcall_implemented(struct kvm *kvm, unsigned long hc); 184 + 185 + extern int kvmppc_book3s_radix_page_fault(struct kvm_run *run, 186 + struct kvm_vcpu *vcpu, 187 + unsigned long ea, unsigned long dsisr); 188 + extern int kvmppc_mmu_radix_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, 189 + struct kvmppc_pte *gpte, bool data, bool iswrite); 190 + extern int kvmppc_init_vm_radix(struct kvm *kvm); 191 + extern void kvmppc_free_radix(struct kvm *kvm); 192 + extern int kvmppc_radix_init(void); 193 + extern void kvmppc_radix_exit(void); 194 + extern int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, 195 + unsigned long gfn); 196 + extern int kvm_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, 197 + unsigned long gfn); 198 + extern int kvm_test_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, 199 + unsigned long gfn); 200 + extern long kvmppc_hv_get_dirty_log_radix(struct kvm *kvm, 201 + struct kvm_memory_slot *memslot, unsigned long *map); 202 + extern int kvmhv_get_rmmu_info(struct kvm *kvm, struct kvm_ppc_rmmu_info *info); 186 203 187 204 /* XXX remove this export when load_last_inst() is generic */ 188 205 extern int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data); ··· 232 211 extern long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags, 233 212 unsigned long pte_index, unsigned long avpn, 234 213 unsigned long *hpret); 235 - extern long kvmppc_hv_get_dirty_log(struct kvm *kvm, 214 + extern long kvmppc_hv_get_dirty_log_hpt(struct kvm *kvm, 236 215 struct kvm_memory_slot *memslot, unsigned long *map); 216 + extern void kvmppc_harvest_vpa_dirty(struct kvmppc_vpa *vpa, 217 + struct kvm_memory_slot *memslot, 218 + unsigned long *map); 237 219 extern void kvmppc_update_lpcr(struct kvm *kvm, unsigned long lpcr, 238 220 unsigned long mask); 239 221 extern void kvmppc_set_fscr(struct kvm_vcpu *vcpu, u64 fscr);

+6

arch/powerpc/include/asm/kvm_book3s_64.h

··· 36 36 #endif 37 37 38 38 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 39 + 40 + static inline bool kvm_is_radix(struct kvm *kvm) 41 + { 42 + return kvm->arch.radix; 43 + } 44 + 39 45 #define KVM_DEFAULT_HPT_ORDER 24 /* 16MB HPT by default */ 40 46 #endif 41 47

+6

arch/powerpc/include/asm/kvm_host.h

··· 263 263 unsigned long hpt_mask; 264 264 atomic_t hpte_mod_interest; 265 265 cpumask_t need_tlb_flush; 266 + cpumask_t cpu_in_guest; 266 267 int hpt_cma_alloc; 268 + u8 radix; 269 + pgd_t *pgtable; 270 + u64 process_table; 267 271 struct dentry *debugfs_dir; 268 272 struct dentry *htab_dentry; 269 273 #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ ··· 607 603 ulong fault_dar; 608 604 u32 fault_dsisr; 609 605 unsigned long intr_msr; 606 + ulong fault_gpa; /* guest real address of page fault (POWER9) */ 610 607 #endif 611 608 612 609 #ifdef CONFIG_BOOKE ··· 662 657 int state; 663 658 int ptid; 664 659 int thread_cpu; 660 + int prev_cpu; 665 661 bool timer_running; 666 662 wait_queue_head_t cpu_run; 667 663

+2

arch/powerpc/include/asm/kvm_ppc.h

··· 291 291 struct irq_bypass_producer *); 292 292 void (*irq_bypass_del_producer)(struct irq_bypass_consumer *, 293 293 struct irq_bypass_producer *); 294 + int (*configure_mmu)(struct kvm *kvm, struct kvm_ppc_mmuv3_cfg *cfg); 295 + int (*get_rmmu_info)(struct kvm *kvm, struct kvm_ppc_rmmu_info *info); 294 296 }; 295 297 296 298 extern struct kvmppc_ops *kvmppc_hv_ops;

+1

arch/powerpc/include/asm/mmu.h

+2 -1

arch/powerpc/include/asm/opal-api.h

··· 167 167 #define OPAL_INT_EOI 124 168 168 #define OPAL_INT_SET_MFRR 125 169 169 #define OPAL_PCI_TCE_KILL 126 170 - #define OPAL_LAST 126 170 + #define OPAL_NMMU_SET_PTCR 127 171 + #define OPAL_LAST 127 171 172 172 173 /* Device tree flags */ 173 174

+1 -7

arch/powerpc/include/asm/opal.h

··· 67 67 int64_t opal_pci_config_write_word(uint64_t phb_id, uint64_t bus_dev_func, 68 68 uint64_t offset, uint32_t data); 69 69 int64_t opal_set_xive(uint32_t isn, uint16_t server, uint8_t priority); 70 - int64_t opal_rm_set_xive(uint32_t isn, uint16_t server, uint8_t priority); 71 70 int64_t opal_get_xive(uint32_t isn, __be16 *server, uint8_t *priority); 72 71 int64_t opal_register_exception_handler(uint64_t opal_exception, 73 72 uint64_t handler_address, ··· 219 220 int64_t opal_pci_poll2(uint64_t id, uint64_t data); 220 221 221 222 int64_t opal_int_get_xirr(uint32_t *out_xirr, bool just_poll); 222 - int64_t opal_rm_int_get_xirr(__be32 *out_xirr, bool just_poll); 223 223 int64_t opal_int_set_cppr(uint8_t cppr); 224 224 int64_t opal_int_eoi(uint32_t xirr); 225 - int64_t opal_rm_int_eoi(uint32_t xirr); 226 225 int64_t opal_int_set_mfrr(uint32_t cpu, uint8_t mfrr); 227 - int64_t opal_rm_int_set_mfrr(uint32_t cpu, uint8_t mfrr); 228 226 int64_t opal_pci_tce_kill(uint64_t phb_id, uint32_t kill_type, 229 227 uint32_t pe_num, uint32_t tce_size, 230 228 uint64_t dma_addr, uint32_t npages); 231 - int64_t opal_rm_pci_tce_kill(uint64_t phb_id, uint32_t kill_type, 232 - uint32_t pe_num, uint32_t tce_size, 233 - uint64_t dma_addr, uint32_t npages); 229 + int64_t opal_nmmu_set_ptcr(uint64_t chip_id, uint64_t ptcr); 234 230 235 231 /* Internal functions */ 236 232 extern int early_init_dt_scan_opal(unsigned long node, const char *uname,

+2 -2

arch/powerpc/include/asm/page_64.h

··· 47 47 unsigned long iterations; 48 48 unsigned long onex, twox, fourx, eightx; 49 49 50 - iterations = ppc64_caches.dlines_per_page / 8; 50 + iterations = ppc64_caches.l1d.blocks_per_page / 8; 51 51 52 52 /* 53 53 * Some verisions of gcc use multiply instructions to 54 54 * calculate the offsets so lets give it a hand to 55 55 * do better. 56 56 */ 57 - onex = ppc64_caches.dline_size; 57 + onex = ppc64_caches.l1d.block_size; 58 58 twox = onex << 1; 59 59 fourx = onex << 2; 60 60 eightx = onex << 3;

-18

arch/powerpc/include/asm/pci-bridge.h

··· 174 174 u8 *bus, u8 *devfn); 175 175 extern void pci_create_OF_bus_map(void); 176 176 177 - static inline int isa_vaddr_is_ioport(void __iomem *address) 178 - { 179 - /* No specific ISA handling on ppc32 at this stage, it 180 - * all goes through PCI 181 - */ 182 - return 0; 183 - } 184 - 185 177 #else /* CONFIG_PPC64 */ 186 178 187 179 /* ··· 260 268 261 269 /** Discover new pci devices under this bus, and add them */ 262 270 extern void pci_hp_add_devices(struct pci_bus *bus); 263 - 264 - 265 - extern void isa_bridge_find_early(struct pci_controller *hose); 266 - 267 - static inline int isa_vaddr_is_ioport(void __iomem *address) 268 - { 269 - /* Check if address hits the reserved legacy IO range */ 270 - unsigned long ea = (unsigned long)address; 271 - return ea >= ISA_IO_BASE && ea < ISA_IO_END; 272 - } 273 271 274 272 extern int pcibios_unmap_io_space(struct pci_bus *bus); 275 273 extern int pcibios_map_io_space(struct pci_bus *bus);

+12

arch/powerpc/include/asm/plpar_wrappers.h

··· 210 210 return plpar_hcall_norets(H_PROTECT, flags, ptex, avpn); 211 211 } 212 212 213 + static inline long plpar_resize_hpt_prepare(unsigned long flags, 214 + unsigned long shift) 215 + { 216 + return plpar_hcall_norets(H_RESIZE_HPT_PREPARE, flags, shift); 217 + } 218 + 219 + static inline long plpar_resize_hpt_commit(unsigned long flags, 220 + unsigned long shift) 221 + { 222 + return plpar_hcall_norets(H_RESIZE_HPT_COMMIT, flags, shift); 223 + } 224 + 213 225 static inline long plpar_tce_get(unsigned long liobn, unsigned long ioba, 214 226 unsigned long *tce_ret) 215 227 {

+19

arch/powerpc/include/asm/powernv.h

··· 1 + /* 2 + * Copyright 2017 IBM Corp. 3 + * 4 + * This program is free software; you can redistribute it and/or 5 + * modify it under the terms of the GNU General Public License 6 + * as published by the Free Software Foundation; either version 7 + * 2 of the License, or (at your option) any later version. 8 + */ 9 + 10 + #ifndef _ASM_POWERNV_H 11 + #define _ASM_POWERNV_H 12 + 13 + #ifdef CONFIG_PPC_POWERNV 14 + extern void powernv_set_nmmu_ptcr(unsigned long ptcr); 15 + #else 16 + static inline void powernv_set_nmmu_ptcr(unsigned long ptcr) { } 17 + #endif 18 + 19 + #endif /* _ASM_POWERNV_H */

+1

arch/powerpc/include/asm/ppc-opcode.h

··· 306 306 #define __PPC_WC(w) (((w) & 0x3) << 21) 307 307 #define __PPC_WS(w) (((w) & 0x1f) << 11) 308 308 #define __PPC_SH(s) __PPC_WS(s) 309 + #define __PPC_SH64(s) (__PPC_SH(s) | (((s) & 0x20) >> 4)) 309 310 #define __PPC_MB(s) (((s) & 0x1f) << 6) 310 311 #define __PPC_ME(s) (((s) & 0x1f) << 1) 311 312 #define __PPC_MB64(s) (__PPC_MB(s) | ((s) & 0x20))

+2 -1

arch/powerpc/include/asm/processor.h

··· 454 454 extern unsigned long power7_nap(int check_irq); 455 455 extern unsigned long power7_sleep(void); 456 456 extern unsigned long power7_winkle(void); 457 - extern unsigned long power9_idle_stop(unsigned long stop_level); 457 + extern unsigned long power9_idle_stop(unsigned long stop_psscr_val, 458 + unsigned long stop_psscr_mask); 458 459 459 460 extern void flush_instruction_cache(void); 460 461 extern void hard_reset_now(void);

+14 -4

arch/powerpc/include/asm/prom.h

··· 121 121 #define OV1_PPC_2_06 0x02 /* set if we support PowerPC 2.06 */ 122 122 #define OV1_PPC_2_07 0x01 /* set if we support PowerPC 2.07 */ 123 123 124 + #define OV1_PPC_3_00 0x80 /* set if we support PowerPC 3.00 */ 125 + 124 126 /* Option vector 2: Open Firmware options supported */ 125 127 #define OV2_REAL_MODE 0x20 /* set if we want OF in real mode */ 126 128 ··· 153 151 #define OV5_XCMO 0x0440 /* Page Coalescing */ 154 152 #define OV5_TYPE1_AFFINITY 0x0580 /* Type 1 NUMA affinity */ 155 153 #define OV5_PRRN 0x0540 /* Platform Resource Reassignment */ 156 - #define OV5_PFO_HW_RNG 0x0E80 /* PFO Random Number Generator */ 157 - #define OV5_PFO_HW_842 0x0E40 /* PFO Compression Accelerator */ 158 - #define OV5_PFO_HW_ENCR 0x0E20 /* PFO Encryption Accelerator */ 159 - #define OV5_SUB_PROCESSORS 0x0F01 /* 1,2,or 4 Sub-Processors supported */ 154 + #define OV5_RESIZE_HPT 0x0601 /* Hash Page Table resizing */ 155 + #define OV5_PFO_HW_RNG 0x1180 /* PFO Random Number Generator */ 156 + #define OV5_PFO_HW_842 0x1140 /* PFO Compression Accelerator */ 157 + #define OV5_PFO_HW_ENCR 0x1120 /* PFO Encryption Accelerator */ 158 + #define OV5_SUB_PROCESSORS 0x1501 /* 1,2,or 4 Sub-Processors supported */ 159 + #define OV5_XIVE_EXPLOIT 0x1701 /* XIVE exploitation supported */ 160 + #define OV5_MMU_RADIX_300 0x1880 /* ISA v3.00 radix MMU supported */ 161 + #define OV5_MMU_HASH_300 0x1840 /* ISA v3.00 hash MMU supported */ 162 + #define OV5_MMU_SEGM_RADIX 0x1820 /* radix mode (no segmentation) */ 163 + #define OV5_MMU_PROC_TBL 0x1810 /* hcall selects SLB or proc table */ 164 + #define OV5_MMU_SLB 0x1800 /* always use SLB */ 165 + #define OV5_MMU_GTSE 0x1808 /* Guest translation shootdown */ 160 166 161 167 /* Option Vector 6: IBM PAPR hints */ 162 168 #define OV6_LINUX 0x02 /* Linux is our OS */

+5 -1

arch/powerpc/include/asm/reg.h

··· 274 274 #define SPRN_DSISR 0x012 /* Data Storage Interrupt Status Register */ 275 275 #define DSISR_NOHPTE 0x40000000 /* no translation found */ 276 276 #define DSISR_PROTFAULT 0x08000000 /* protection fault */ 277 + #define DSISR_BADACCESS 0x04000000 /* bad access to CI or G */ 277 278 #define DSISR_ISSTORE 0x02000000 /* access was a store */ 278 279 #define DSISR_DABRMATCH 0x00400000 /* hit data breakpoint */ 279 280 #define DSISR_NOSEGMENT 0x00200000 /* SLB miss */ 280 281 #define DSISR_KEYFAULT 0x00200000 /* Key fault */ 282 + #define DSISR_UNSUPP_MMU 0x00080000 /* Unsupported MMU config */ 283 + #define DSISR_SET_RC 0x00040000 /* Failed setting of R/C bits */ 284 + #define DSISR_PGDIRFAULT 0x00020000 /* Fault on page directory */ 281 285 #define SPRN_TBRL 0x10C /* Time Base Read Lower Register (user, R/O) */ 282 286 #define SPRN_TBRU 0x10D /* Time Base Read Upper Register (user, R/O) */ 283 287 #define SPRN_CIR 0x11B /* Chip Information Register (hyper, R/0) */ ··· 342 338 #define LPCR_DPFD_SH 52 343 339 #define LPCR_DPFD (ASM_CONST(7) << LPCR_DPFD_SH) 344 340 #define LPCR_VRMASD_SH 47 345 - #define LPCR_VRMASD (ASM_CONST(1) << LPCR_VRMASD_SH) 341 + #define LPCR_VRMASD (ASM_CONST(0x1f) << LPCR_VRMASD_SH) 346 342 #define LPCR_VRMA_L ASM_CONST(0x0008000000000000) 347 343 #define LPCR_VRMA_LP0 ASM_CONST(0x0001000000000000) 348 344 #define LPCR_VRMA_LP1 ASM_CONST(0x0000800000000000)

+1

arch/powerpc/include/asm/rtas.h

··· 318 318 319 319 #define PSERIES_HP_ELOG_ACTION_ADD 1 320 320 #define PSERIES_HP_ELOG_ACTION_REMOVE 2 321 + #define PSERIES_HP_ELOG_ACTION_READD 3 321 322 322 323 #define PSERIES_HP_ELOG_ID_DRC_NAME 1 323 324 #define PSERIES_HP_ELOG_ID_DRC_INDEX 2

+7

arch/powerpc/include/asm/sparsemem.h

··· 18 18 #ifdef CONFIG_MEMORY_HOTPLUG 19 19 extern int create_section_mapping(unsigned long start, unsigned long end); 20 20 extern int remove_section_mapping(unsigned long start, unsigned long end); 21 + 22 + #ifdef CONFIG_PPC_BOOK3S_64 23 + extern void resize_hpt_for_hotplug(unsigned long new_mem_size); 24 + #else 25 + static inline void resize_hpt_for_hotplug(unsigned long new_mem_size) { } 26 + #endif 27 + 21 28 #ifdef CONFIG_NUMA 22 29 extern int hot_add_scn_to_nid(unsigned long scn_addr); 23 30 #else

+3 -3

arch/powerpc/include/asm/uaccess.h

··· 261 261 ({ \ 262 262 long __gu_err; \ 263 263 unsigned long __gu_val; \ 264 - __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ 264 + const __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ 265 265 __chk_user_ptr(ptr); \ 266 266 if (!is_kernel_addr((unsigned long)__gu_addr)) \ 267 267 might_fault(); \ ··· 274 274 ({ \ 275 275 long __gu_err = -EFAULT; \ 276 276 unsigned long __gu_val = 0; \ 277 - __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ 277 + const __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ 278 278 might_fault(); \ 279 279 if (access_ok(VERIFY_READ, __gu_addr, (size))) \ 280 280 __get_user_size(__gu_val, __gu_addr, (size), __gu_err); \ ··· 286 286 ({ \ 287 287 long __gu_err; \ 288 288 unsigned long __gu_val; \ 289 - __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ 289 + const __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ 290 290 __chk_user_ptr(ptr); \ 291 291 __get_user_size(__gu_val, __gu_addr, (size), __gu_err); \ 292 292 (x) = (__force __typeof__(*(ptr)))__gu_val; \

+32 -1

arch/powerpc/include/uapi/asm/auxvec.h

··· 16 16 */ 17 17 #define AT_SYSINFO_EHDR 33 18 18 19 - #define AT_VECTOR_SIZE_ARCH 6 /* entries in ARCH_DLINFO */ 19 + /* 20 + * AT_*CACHEBSIZE above represent the cache *block* size which is 21 + * the size that is affected by the cache management instructions. 22 + * 23 + * It doesn't nececssarily matches the cache *line* size which is 24 + * more of a performance tuning hint. Additionally the latter can 25 + * be different for the different cache levels. 26 + * 27 + * The set of entries below represent more extensive information 28 + * about the caches, in the form of two entry per cache type, 29 + * one entry containing the cache size in bytes, and the other 30 + * containing the cache line size in bytes in the bottom 16 bits 31 + * and the cache associativity in the next 16 bits. 32 + * 33 + * The associativity is such that if N is the 16-bit value, the 34 + * cache is N way set associative. A value if 0xffff means fully 35 + * associative, a value of 1 means directly mapped. 36 + * 37 + * For all these fields, a value of 0 means that the information 38 + * is not known. 39 + */ 40 + 41 + #define AT_L1I_CACHESIZE 40 42 + #define AT_L1I_CACHEGEOMETRY 41 43 + #define AT_L1D_CACHESIZE 42 44 + #define AT_L1D_CACHEGEOMETRY 43 45 + #define AT_L2_CACHESIZE 44 46 + #define AT_L2_CACHEGEOMETRY 45 47 + #define AT_L3_CACHESIZE 46 48 + #define AT_L3_CACHEGEOMETRY 47 49 + 50 + #define AT_VECTOR_SIZE_ARCH 14 /* entries in ARCH_DLINFO */ 20 51 21 52 #endif

-23

arch/powerpc/include/uapi/asm/elf.h

··· 162 162 typedef elf_fpreg_t elf_vsrreghalf_t32[ELF_NVSRHALFREG]; 163 163 #endif 164 164 165 - 166 - /* 167 - * The requirements here are: 168 - * - keep the final alignment of sp (sp & 0xf) 169 - * - make sure the 32-bit value at the first 16 byte aligned position of 170 - * AUXV is greater than 16 for glibc compatibility. 171 - * AT_IGNOREPPC is used for that. 172 - * - for compatibility with glibc ARCH_DLINFO must always be defined on PPC, 173 - * even if DLINFO_ARCH_ITEMS goes to zero or is undefined. 174 - * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT entries changes 175 - */ 176 - #define ARCH_DLINFO \ 177 - do { \ 178 - /* Handle glibc compatibility. */ \ 179 - NEW_AUX_ENT(AT_IGNOREPPC, AT_IGNOREPPC); \ 180 - NEW_AUX_ENT(AT_IGNOREPPC, AT_IGNOREPPC); \ 181 - /* Cache size items */ \ 182 - NEW_AUX_ENT(AT_DCACHEBSIZE, dcache_bsize); \ 183 - NEW_AUX_ENT(AT_ICACHEBSIZE, icache_bsize); \ 184 - NEW_AUX_ENT(AT_UCACHEBSIZE, ucache_bsize); \ 185 - VDSO_AUX_ENT(AT_SYSINFO_EHDR, current->mm->context.vdso_base); \ 186 - } while (0) 187 - 188 165 /* PowerPC64 relocations defined by the ABIs */ 189 166 #define R_PPC64_NONE R_PPC_NONE 190 167 #define R_PPC64_ADDR32 R_PPC_ADDR32 /* 32bit absolute address. */

+20

arch/powerpc/include/uapi/asm/kvm.h

··· 413 413 __u16 n_invalid; 414 414 }; 415 415 416 + /* For KVM_PPC_CONFIGURE_V3_MMU */ 417 + struct kvm_ppc_mmuv3_cfg { 418 + __u64 flags; 419 + __u64 process_table; /* second doubleword of partition table entry */ 420 + }; 421 + 422 + /* Flag values for KVM_PPC_CONFIGURE_V3_MMU */ 423 + #define KVM_PPC_MMUV3_RADIX 1 /* 1 = radix mode, 0 = HPT */ 424 + #define KVM_PPC_MMUV3_GTSE 2 /* global translation shootdown enb. */ 425 + 426 + /* For KVM_PPC_GET_RMMU_INFO */ 427 + struct kvm_ppc_rmmu_info { 428 + struct kvm_ppc_radix_geom { 429 + __u8 page_shift; 430 + __u8 level_bits[4]; 431 + __u8 pad[3]; 432 + } geometries[8]; 433 + __u32 ap_encodings[8]; 434 + }; 435 + 416 436 /* Per-vcpu XICS interrupt controller state */ 417 437 #define KVM_REG_PPC_ICP_STATE (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8c) 418 438

+2 -1

arch/powerpc/kernel/Makefile

··· 15 15 endif 16 16 17 17 CFLAGS_cputable.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) 18 - CFLAGS_init.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) 18 + CFLAGS_prom_init.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) 19 19 CFLAGS_btext.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) 20 20 CFLAGS_prom.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) 21 21 ··· 96 96 obj-$(CONFIG_BOOTX_TEXT) += btext.o 97 97 obj-$(CONFIG_SMP) += smp.o 98 98 obj-$(CONFIG_KPROBES) += kprobes.o 99 + obj-$(CONFIG_OPTPROBES) += optprobes.o optprobes_head.o 99 100 obj-$(CONFIG_UPROBES) += uprobes.o 100 101 obj-$(CONFIG_PPC_UDBG_16550) += legacy_serial.o udbg_16550.o 101 102 obj-$(CONFIG_STACKTRACE) += stacktrace.o

+1 -1

arch/powerpc/kernel/align.c

··· 204 204 int i, size; 205 205 206 206 #ifdef __powerpc64__ 207 - size = ppc64_caches.dline_size; 207 + size = ppc64_caches.l1d.block_size; 208 208 #else 209 209 size = L1_CACHE_BYTES; 210 210 #endif

+8 -6

arch/powerpc/kernel/asm-offsets.c

··· 160 160 DEFINE(TI_CPU, offsetof(struct thread_info, cpu)); 161 161 162 162 #ifdef CONFIG_PPC64 163 - DEFINE(DCACHEL1LINESIZE, offsetof(struct ppc64_caches, dline_size)); 164 - DEFINE(DCACHEL1LOGLINESIZE, offsetof(struct ppc64_caches, log_dline_size)); 165 - DEFINE(DCACHEL1LINESPERPAGE, offsetof(struct ppc64_caches, dlines_per_page)); 166 - DEFINE(ICACHEL1LINESIZE, offsetof(struct ppc64_caches, iline_size)); 167 - DEFINE(ICACHEL1LOGLINESIZE, offsetof(struct ppc64_caches, log_iline_size)); 168 - DEFINE(ICACHEL1LINESPERPAGE, offsetof(struct ppc64_caches, ilines_per_page)); 163 + DEFINE(DCACHEL1BLOCKSIZE, offsetof(struct ppc64_caches, l1d.block_size)); 164 + DEFINE(DCACHEL1LOGBLOCKSIZE, offsetof(struct ppc64_caches, l1d.log_block_size)); 165 + DEFINE(DCACHEL1BLOCKSPERPAGE, offsetof(struct ppc64_caches, l1d.blocks_per_page)); 166 + DEFINE(ICACHEL1BLOCKSIZE, offsetof(struct ppc64_caches, l1i.block_size)); 167 + DEFINE(ICACHEL1LOGBLOCKSIZE, offsetof(struct ppc64_caches, l1i.log_block_size)); 168 + DEFINE(ICACHEL1BLOCKSPERPAGE, offsetof(struct ppc64_caches, l1i.blocks_per_page)); 169 169 /* paca */ 170 170 DEFINE(PACA_SIZE, sizeof(struct paca_struct)); 171 171 DEFINE(PACAPACAINDEX, offsetof(struct paca_struct, paca_index)); ··· 495 495 DEFINE(KVM_NEED_FLUSH, offsetof(struct kvm, arch.need_tlb_flush.bits)); 496 496 DEFINE(KVM_ENABLED_HCALLS, offsetof(struct kvm, arch.enabled_hcalls)); 497 497 DEFINE(KVM_VRMA_SLB_V, offsetof(struct kvm, arch.vrma_slb_v)); 498 + DEFINE(KVM_RADIX, offsetof(struct kvm, arch.radix)); 498 499 DEFINE(VCPU_DSISR, offsetof(struct kvm_vcpu, arch.shregs.dsisr)); 499 500 DEFINE(VCPU_DAR, offsetof(struct kvm_vcpu, arch.shregs.dar)); 500 501 DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa.pinned_addr)); ··· 535 534 DEFINE(VCPU_SLB_NR, offsetof(struct kvm_vcpu, arch.slb_nr)); 536 535 DEFINE(VCPU_FAULT_DSISR, offsetof(struct kvm_vcpu, arch.fault_dsisr)); 537 536 DEFINE(VCPU_FAULT_DAR, offsetof(struct kvm_vcpu, arch.fault_dar)); 537 + DEFINE(VCPU_FAULT_GPA, offsetof(struct kvm_vcpu, arch.fault_gpa)); 538 538 DEFINE(VCPU_INTR_MSR, offsetof(struct kvm_vcpu, arch.intr_msr)); 539 539 DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst)); 540 540 DEFINE(VCPU_TRAP, offsetof(struct kvm_vcpu, arch.trap));

+138 -124

arch/powerpc/kernel/exceptions-64s.S

··· 93 93 __start_interrupts: 94 94 95 95 /* No virt vectors corresponding with 0x0..0x100 */ 96 - EXC_VIRT_NONE(0x4000, 0x4100) 96 + EXC_VIRT_NONE(0x4000, 0x100) 97 97 98 98 99 99 #ifdef CONFIG_PPC_P7_NAP ··· 114 114 #define IDLETEST NOTEST 115 115 #endif 116 116 117 - EXC_REAL_BEGIN(system_reset, 0x100, 0x200) 117 + EXC_REAL_BEGIN(system_reset, 0x100, 0x100) 118 118 SET_SCRATCH0(r13) 119 119 GET_PACA(r13) 120 120 clrrdi r13,r13,1 /* Last bit of HSPRG0 is set if waking from winkle */ 121 121 EXCEPTION_PROLOG_PSERIES_PACA(PACA_EXGEN, system_reset_common, EXC_STD, 122 122 IDLETEST, 0x100) 123 123 124 - EXC_REAL_END(system_reset, 0x100, 0x200) 125 - EXC_VIRT_NONE(0x4100, 0x4200) 124 + EXC_REAL_END(system_reset, 0x100, 0x100) 125 + EXC_VIRT_NONE(0x4100, 0x100) 126 126 127 127 #ifdef CONFIG_PPC_P7_NAP 128 128 EXC_COMMON_BEGIN(system_reset_idle_common) ··· 142 142 lbz r0,HSTATE_HWTHREAD_REQ(r13) 143 143 cmpwi r0,0 144 144 beq 1f 145 - b kvm_start_guest 145 + BRANCH_TO_KVM(r10, kvm_start_guest) 146 146 1: 147 147 #endif 148 148 ··· 166 166 #endif /* CONFIG_PPC_PSERIES */ 167 167 168 168 169 - EXC_REAL_BEGIN(machine_check, 0x200, 0x300) 169 + EXC_REAL_BEGIN(machine_check, 0x200, 0x100) 170 170 /* This is moved out of line as it can be patched by FW, but 171 171 * some code path might still want to branch into the original 172 172 * vector ··· 186 186 FTR_SECTION_ELSE 187 187 b machine_check_pSeries_0 188 188 ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE) 189 - EXC_REAL_END(machine_check, 0x200, 0x300) 190 - EXC_VIRT_NONE(0x4200, 0x4300) 189 + EXC_REAL_END(machine_check, 0x200, 0x100) 190 + EXC_VIRT_NONE(0x4200, 0x100) 191 191 TRAMP_REAL_BEGIN(machine_check_powernv_early) 192 192 BEGIN_FTR_SECTION 193 193 EXCEPTION_PROLOG_1(PACA_EXMC, NOTEST, 0x200) ··· 381 381 lbz r3,PACA_THREAD_IDLE_STATE(r13) 382 382 cmpwi r3,PNV_THREAD_NAP 383 383 bgt 10f 384 - IDLE_STATE_ENTER_SEQ(PPC_NAP) 384 + IDLE_STATE_ENTER_SEQ_NORET(PPC_NAP) 385 385 /* No return */ 386 386 10: 387 387 cmpwi r3,PNV_THREAD_SLEEP 388 388 bgt 2f 389 - IDLE_STATE_ENTER_SEQ(PPC_SLEEP) 389 + IDLE_STATE_ENTER_SEQ_NORET(PPC_SLEEP) 390 390 /* No return */ 391 391 392 392 2: ··· 400 400 */ 401 401 ori r13,r13,1 402 402 SET_PACA(r13) 403 - IDLE_STATE_ENTER_SEQ(PPC_WINKLE) 403 + IDLE_STATE_ENTER_SEQ_NORET(PPC_WINKLE) 404 404 /* No return */ 405 405 4: 406 406 #endif ··· 483 483 b 1b 484 484 485 485 486 - EXC_REAL(data_access, 0x300, 0x380) 487 - EXC_VIRT(data_access, 0x4300, 0x4380, 0x300) 486 + EXC_REAL(data_access, 0x300, 0x80) 487 + EXC_VIRT(data_access, 0x4300, 0x80, 0x300) 488 488 TRAMP_KVM_SKIP(PACA_EXGEN, 0x300) 489 489 490 490 EXC_COMMON_BEGIN(data_access_common) ··· 512 512 ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) 513 513 514 514 515 - EXC_REAL_BEGIN(data_access_slb, 0x380, 0x400) 515 + EXC_REAL_BEGIN(data_access_slb, 0x380, 0x80) 516 516 SET_SCRATCH0(r13) 517 517 EXCEPTION_PROLOG_0(PACA_EXSLB) 518 518 EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST_PR, 0x380) ··· 533 533 mtctr r10 534 534 bctr 535 535 #endif 536 - EXC_REAL_END(data_access_slb, 0x380, 0x400) 536 + EXC_REAL_END(data_access_slb, 0x380, 0x80) 537 537 538 - EXC_VIRT_BEGIN(data_access_slb, 0x4380, 0x4400) 538 + EXC_VIRT_BEGIN(data_access_slb, 0x4380, 0x80) 539 539 SET_SCRATCH0(r13) 540 540 EXCEPTION_PROLOG_0(PACA_EXSLB) 541 541 EXCEPTION_PROLOG_1(PACA_EXSLB, NOTEST, 0x380) ··· 556 556 mtctr r10 557 557 bctr 558 558 #endif 559 - EXC_VIRT_END(data_access_slb, 0x4380, 0x4400) 559 + EXC_VIRT_END(data_access_slb, 0x4380, 0x80) 560 560 TRAMP_KVM_SKIP(PACA_EXSLB, 0x380) 561 561 562 562 563 - EXC_REAL(instruction_access, 0x400, 0x480) 564 - EXC_VIRT(instruction_access, 0x4400, 0x4480, 0x400) 563 + EXC_REAL(instruction_access, 0x400, 0x80) 564 + EXC_VIRT(instruction_access, 0x4400, 0x80, 0x400) 565 565 TRAMP_KVM(PACA_EXGEN, 0x400) 566 566 567 567 EXC_COMMON_BEGIN(instruction_access_common) ··· 580 580 ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) 581 581 582 582 583 - EXC_REAL_BEGIN(instruction_access_slb, 0x480, 0x500) 583 + EXC_REAL_BEGIN(instruction_access_slb, 0x480, 0x80) 584 584 SET_SCRATCH0(r13) 585 585 EXCEPTION_PROLOG_0(PACA_EXSLB) 586 586 EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST_PR, 0x480) ··· 596 596 mtctr r10 597 597 bctr 598 598 #endif 599 - EXC_REAL_END(instruction_access_slb, 0x480, 0x500) 599 + EXC_REAL_END(instruction_access_slb, 0x480, 0x80) 600 600 601 - EXC_VIRT_BEGIN(instruction_access_slb, 0x4480, 0x4500) 601 + EXC_VIRT_BEGIN(instruction_access_slb, 0x4480, 0x80) 602 602 SET_SCRATCH0(r13) 603 603 EXCEPTION_PROLOG_0(PACA_EXSLB) 604 604 EXCEPTION_PROLOG_1(PACA_EXSLB, NOTEST, 0x480) ··· 614 614 mtctr r10 615 615 bctr 616 616 #endif 617 - EXC_VIRT_END(instruction_access_slb, 0x4480, 0x4500) 617 + EXC_VIRT_END(instruction_access_slb, 0x4480, 0x80) 618 618 TRAMP_KVM(PACA_EXSLB, 0x480) 619 619 620 620 ··· 711 711 bl slb_miss_bad_addr 712 712 b ret_from_except 713 713 714 - EXC_REAL_BEGIN(hardware_interrupt, 0x500, 0x600) 714 + EXC_REAL_BEGIN(hardware_interrupt, 0x500, 0x100) 715 715 .globl hardware_interrupt_hv; 716 716 hardware_interrupt_hv: 717 717 BEGIN_FTR_SECTION 718 718 _MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt_common, 719 719 EXC_HV, SOFTEN_TEST_HV) 720 - do_kvm_H0x500: 721 - KVM_HANDLER(PACA_EXGEN, EXC_HV, 0x502) 722 720 FTR_SECTION_ELSE 723 721 _MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt_common, 724 722 EXC_STD, SOFTEN_TEST_PR) 725 - do_kvm_0x500: 726 - KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x500) 727 723 ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) 728 - EXC_REAL_END(hardware_interrupt, 0x500, 0x600) 724 + EXC_REAL_END(hardware_interrupt, 0x500, 0x100) 729 725 730 - EXC_VIRT_BEGIN(hardware_interrupt, 0x4500, 0x4600) 726 + EXC_VIRT_BEGIN(hardware_interrupt, 0x4500, 0x100) 731 727 .globl hardware_interrupt_relon_hv; 732 728 hardware_interrupt_relon_hv: 733 729 BEGIN_FTR_SECTION ··· 731 735 FTR_SECTION_ELSE 732 736 _MASKABLE_RELON_EXCEPTION_PSERIES(0x500, hardware_interrupt_common, EXC_STD, SOFTEN_TEST_PR) 733 737 ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE) 734 - EXC_VIRT_END(hardware_interrupt, 0x4500, 0x4600) 738 + EXC_VIRT_END(hardware_interrupt, 0x4500, 0x100) 735 739 740 + TRAMP_KVM(PACA_EXGEN, 0x500) 741 + TRAMP_KVM_HV(PACA_EXGEN, 0x500) 736 742 EXC_COMMON_ASYNC(hardware_interrupt_common, 0x500, do_IRQ) 737 743 738 744 739 - EXC_REAL(alignment, 0x600, 0x700) 740 - EXC_VIRT(alignment, 0x4600, 0x4700, 0x600) 745 + EXC_REAL(alignment, 0x600, 0x100) 746 + EXC_VIRT(alignment, 0x4600, 0x100, 0x600) 741 747 TRAMP_KVM(PACA_EXGEN, 0x600) 742 748 EXC_COMMON_BEGIN(alignment_common) 743 749 mfspr r10,SPRN_DAR ··· 758 760 b ret_from_except 759 761 760 762 761 - EXC_REAL(program_check, 0x700, 0x800) 762 - EXC_VIRT(program_check, 0x4700, 0x4800, 0x700) 763 + EXC_REAL(program_check, 0x700, 0x100) 764 + EXC_VIRT(program_check, 0x4700, 0x100, 0x700) 763 765 TRAMP_KVM(PACA_EXGEN, 0x700) 764 766 EXC_COMMON_BEGIN(program_check_common) 765 767 EXCEPTION_PROLOG_COMMON(0x700, PACA_EXGEN) ··· 770 772 b ret_from_except 771 773 772 774 773 - EXC_REAL(fp_unavailable, 0x800, 0x900) 774 - EXC_VIRT(fp_unavailable, 0x4800, 0x4900, 0x800) 775 + EXC_REAL(fp_unavailable, 0x800, 0x100) 776 + EXC_VIRT(fp_unavailable, 0x4800, 0x100, 0x800) 775 777 TRAMP_KVM(PACA_EXGEN, 0x800) 776 778 EXC_COMMON_BEGIN(fp_unavailable_common) 777 779 EXCEPTION_PROLOG_COMMON(0x800, PACA_EXGEN) ··· 803 805 #endif 804 806 805 807 806 - EXC_REAL_MASKABLE(decrementer, 0x900, 0x980) 807 - EXC_VIRT_MASKABLE(decrementer, 0x4900, 0x4980, 0x900) 808 + EXC_REAL_MASKABLE(decrementer, 0x900, 0x80) 809 + EXC_VIRT_MASKABLE(decrementer, 0x4900, 0x80, 0x900) 808 810 TRAMP_KVM(PACA_EXGEN, 0x900) 809 811 EXC_COMMON_ASYNC(decrementer_common, 0x900, timer_interrupt) 810 812 811 813 812 - EXC_REAL_HV(hdecrementer, 0x980, 0xa00) 813 - EXC_VIRT_HV(hdecrementer, 0x4980, 0x4a00, 0x980) 814 + EXC_REAL_HV(hdecrementer, 0x980, 0x80) 815 + EXC_VIRT_HV(hdecrementer, 0x4980, 0x80, 0x980) 814 816 TRAMP_KVM_HV(PACA_EXGEN, 0x980) 815 817 EXC_COMMON(hdecrementer_common, 0x980, hdec_interrupt) 816 818 817 819 818 - EXC_REAL_MASKABLE(doorbell_super, 0xa00, 0xb00) 819 - EXC_VIRT_MASKABLE(doorbell_super, 0x4a00, 0x4b00, 0xa00) 820 + EXC_REAL_MASKABLE(doorbell_super, 0xa00, 0x100) 821 + EXC_VIRT_MASKABLE(doorbell_super, 0x4a00, 0x100, 0xa00) 820 822 TRAMP_KVM(PACA_EXGEN, 0xa00) 821 823 #ifdef CONFIG_PPC_DOORBELL 822 824 EXC_COMMON_ASYNC(doorbell_super_common, 0xa00, doorbell_exception) ··· 825 827 #endif 826 828 827 829 828 - EXC_REAL(trap_0b, 0xb00, 0xc00) 829 - EXC_VIRT(trap_0b, 0x4b00, 0x4c00, 0xb00) 830 + EXC_REAL(trap_0b, 0xb00, 0x100) 831 + EXC_VIRT(trap_0b, 0x4b00, 0x100, 0xb00) 830 832 TRAMP_KVM(PACA_EXGEN, 0xb00) 831 833 EXC_COMMON(trap_0b_common, 0xb00, unknown_exception) 832 834 835 + #ifdef CONFIG_KVM_BOOK3S_64_HANDLER 836 + /* 837 + * If CONFIG_KVM_BOOK3S_64_HANDLER is set, save the PPR (on systems 838 + * that support it) before changing to HMT_MEDIUM. That allows the KVM 839 + * code to save that value into the guest state (it is the guest's PPR 840 + * value). Otherwise just change to HMT_MEDIUM as userspace has 841 + * already saved the PPR. 842 + */ 843 + #define SYSCALL_KVMTEST \ 844 + SET_SCRATCH0(r13); \ 845 + GET_PACA(r13); \ 846 + std r9,PACA_EXGEN+EX_R9(r13); \ 847 + OPT_GET_SPR(r9, SPRN_PPR, CPU_FTR_HAS_PPR); \ 848 + HMT_MEDIUM; \ 849 + std r10,PACA_EXGEN+EX_R10(r13); \ 850 + OPT_SAVE_REG_TO_PACA(PACA_EXGEN+EX_PPR, r9, CPU_FTR_HAS_PPR); \ 851 + mfcr r9; \ 852 + KVMTEST_PR(0xc00); \ 853 + GET_SCRATCH0(r13) 854 + 855 + #else 856 + #define SYSCALL_KVMTEST \ 857 + HMT_MEDIUM 858 + #endif 859 + 833 860 #define LOAD_SYSCALL_HANDLER(reg) \ 834 861 __LOAD_HANDLER(reg, system_call_common) 835 862 ··· 907 884 b system_call_common ; 908 885 #endif 909 886 910 - EXC_REAL_BEGIN(system_call, 0xc00, 0xd00) 911 - /* 912 - * If CONFIG_KVM_BOOK3S_64_HANDLER is set, save the PPR (on systems 913 - * that support it) before changing to HMT_MEDIUM. That allows the KVM 914 - * code to save that value into the guest state (it is the guest's PPR 915 - * value). Otherwise just change to HMT_MEDIUM as userspace has 916 - * already saved the PPR. 917 - */ 918 - #ifdef CONFIG_KVM_BOOK3S_64_HANDLER 919 - SET_SCRATCH0(r13) 920 - GET_PACA(r13) 921 - std r9,PACA_EXGEN+EX_R9(r13) 922 - OPT_GET_SPR(r9, SPRN_PPR, CPU_FTR_HAS_PPR); 923 - HMT_MEDIUM; 924 - std r10,PACA_EXGEN+EX_R10(r13) 925 - OPT_SAVE_REG_TO_PACA(PACA_EXGEN+EX_PPR, r9, CPU_FTR_HAS_PPR); 926 - mfcr r9 927 - KVMTEST_PR(0xc00) 928 - GET_SCRATCH0(r13) 929 - #else 930 - HMT_MEDIUM; 931 - #endif 887 + EXC_REAL_BEGIN(system_call, 0xc00, 0x100) 888 + SYSCALL_KVMTEST 932 889 SYSCALL_PSERIES_1 933 890 SYSCALL_PSERIES_2_RFID 934 891 SYSCALL_PSERIES_3 935 - EXC_REAL_END(system_call, 0xc00, 0xd00) 892 + EXC_REAL_END(system_call, 0xc00, 0x100) 936 893 937 - EXC_VIRT_BEGIN(system_call, 0x4c00, 0x4d00) 938 - HMT_MEDIUM 894 + EXC_VIRT_BEGIN(system_call, 0x4c00, 0x100) 895 + SYSCALL_KVMTEST 939 896 SYSCALL_PSERIES_1 940 897 SYSCALL_PSERIES_2_DIRECT 941 898 SYSCALL_PSERIES_3 942 - EXC_VIRT_END(system_call, 0x4c00, 0x4d00) 899 + EXC_VIRT_END(system_call, 0x4c00, 0x100) 943 900 944 901 TRAMP_KVM(PACA_EXGEN, 0xc00) 945 902 946 903 947 - EXC_REAL(single_step, 0xd00, 0xe00) 948 - EXC_VIRT(single_step, 0x4d00, 0x4e00, 0xd00) 904 + EXC_REAL(single_step, 0xd00, 0x100) 905 + EXC_VIRT(single_step, 0x4d00, 0x100, 0xd00) 949 906 TRAMP_KVM(PACA_EXGEN, 0xd00) 950 907 EXC_COMMON(single_step_common, 0xd00, single_step_exception) 951 908 952 - EXC_REAL_OOL_HV(h_data_storage, 0xe00, 0xe20) 953 - EXC_VIRT_NONE(0x4e00, 0x4e20) 909 + EXC_REAL_OOL_HV(h_data_storage, 0xe00, 0x20) 910 + EXC_VIRT_OOL_HV(h_data_storage, 0x4e00, 0x20, 0xe00) 954 911 TRAMP_KVM_HV_SKIP(PACA_EXGEN, 0xe00) 955 912 EXC_COMMON_BEGIN(h_data_storage_common) 956 913 mfspr r10,SPRN_HDAR ··· 945 942 b ret_from_except 946 943 947 944 948 - EXC_REAL_OOL_HV(h_instr_storage, 0xe20, 0xe40) 949 - EXC_VIRT_NONE(0x4e20, 0x4e40) 945 + EXC_REAL_OOL_HV(h_instr_storage, 0xe20, 0x20) 946 + EXC_VIRT_OOL_HV(h_instr_storage, 0x4e20, 0x20, 0xe20) 950 947 TRAMP_KVM_HV(PACA_EXGEN, 0xe20) 951 948 EXC_COMMON(h_instr_storage_common, 0xe20, unknown_exception) 952 949 953 950 954 - EXC_REAL_OOL_HV(emulation_assist, 0xe40, 0xe60) 955 - EXC_VIRT_OOL_HV(emulation_assist, 0x4e40, 0x4e60, 0xe40) 951 + EXC_REAL_OOL_HV(emulation_assist, 0xe40, 0x20) 952 + EXC_VIRT_OOL_HV(emulation_assist, 0x4e40, 0x20, 0xe40) 956 953 TRAMP_KVM_HV(PACA_EXGEN, 0xe40) 957 954 EXC_COMMON(emulation_assist_common, 0xe40, emulation_assist_interrupt) 958 955 ··· 962 959 * first, and then eventaully from there to the trampoline to get into virtual 963 960 * mode. 964 961 */ 965 - __EXC_REAL_OOL_HV_DIRECT(hmi_exception, 0xe60, 0xe80, hmi_exception_early) 966 - __TRAMP_REAL_REAL_OOL_MASKABLE_HV(hmi_exception, 0xe60) 967 - EXC_VIRT_NONE(0x4e60, 0x4e80) 962 + __EXC_REAL_OOL_HV_DIRECT(hmi_exception, 0xe60, 0x20, hmi_exception_early) 963 + __TRAMP_REAL_OOL_MASKABLE_HV(hmi_exception, 0xe60) 964 + EXC_VIRT_NONE(0x4e60, 0x20) 968 965 TRAMP_KVM_HV(PACA_EXGEN, 0xe60) 969 966 TRAMP_REAL_BEGIN(hmi_exception_early) 970 967 EXCEPTION_PROLOG_1(PACA_EXGEN, KVMTEST_HV, 0xe60) ··· 982 979 EXCEPTION_PROLOG_COMMON_2(PACA_EXGEN) 983 980 EXCEPTION_PROLOG_COMMON_3(0xe60) 984 981 addi r3,r1,STACK_FRAME_OVERHEAD 985 - bl hmi_exception_realmode 982 + BRANCH_LINK_TO_FAR(r4, hmi_exception_realmode) 986 983 /* Windup the stack. */ 987 984 /* Move original HSRR0 and HSRR1 into the respective regs */ 988 985 ld r9,_MSR(r1) ··· 1018 1015 EXC_COMMON_ASYNC(hmi_exception_common, 0xe60, handle_hmi_exception) 1019 1016 1020 1017 1021 - EXC_REAL_OOL_MASKABLE_HV(h_doorbell, 0xe80, 0xea0) 1022 - EXC_VIRT_OOL_MASKABLE_HV(h_doorbell, 0x4e80, 0x4ea0, 0xe80) 1018 + EXC_REAL_OOL_MASKABLE_HV(h_doorbell, 0xe80, 0x20) 1019 + EXC_VIRT_OOL_MASKABLE_HV(h_doorbell, 0x4e80, 0x20, 0xe80) 1023 1020 TRAMP_KVM_HV(PACA_EXGEN, 0xe80) 1024 1021 #ifdef CONFIG_PPC_DOORBELL 1025 1022 EXC_COMMON_ASYNC(h_doorbell_common, 0xe80, doorbell_exception) ··· 1028 1025 #endif 1029 1026 1030 1027 1031 - EXC_REAL_OOL_MASKABLE_HV(h_virt_irq, 0xea0, 0xec0) 1032 - EXC_VIRT_OOL_MASKABLE_HV(h_virt_irq, 0x4ea0, 0x4ec0, 0xea0) 1028 + EXC_REAL_OOL_MASKABLE_HV(h_virt_irq, 0xea0, 0x20) 1029 + EXC_VIRT_OOL_MASKABLE_HV(h_virt_irq, 0x4ea0, 0x20, 0xea0) 1033 1030 TRAMP_KVM_HV(PACA_EXGEN, 0xea0) 1034 1031 EXC_COMMON_ASYNC(h_virt_irq_common, 0xea0, do_IRQ) 1035 1032 1036 1033 1037 - EXC_REAL_NONE(0xec0, 0xf00) 1038 - EXC_VIRT_NONE(0x4ec0, 0x4f00) 1034 + EXC_REAL_NONE(0xec0, 0x20) 1035 + EXC_VIRT_NONE(0x4ec0, 0x20) 1036 + EXC_REAL_NONE(0xee0, 0x20) 1037 + EXC_VIRT_NONE(0x4ee0, 0x20) 1039 1038 1040 1039 1041 - EXC_REAL_OOL(performance_monitor, 0xf00, 0xf20) 1042 - EXC_VIRT_OOL(performance_monitor, 0x4f00, 0x4f20, 0xf00) 1040 + EXC_REAL_OOL(performance_monitor, 0xf00, 0x20) 1041 + EXC_VIRT_OOL(performance_monitor, 0x4f00, 0x20, 0xf00) 1043 1042 TRAMP_KVM(PACA_EXGEN, 0xf00) 1044 1043 EXC_COMMON_ASYNC(performance_monitor_common, 0xf00, performance_monitor_exception) 1045 1044 1046 1045 1047 - EXC_REAL_OOL(altivec_unavailable, 0xf20, 0xf40) 1048 - EXC_VIRT_OOL(altivec_unavailable, 0x4f20, 0x4f40, 0xf20) 1046 + EXC_REAL_OOL(altivec_unavailable, 0xf20, 0x20) 1047 + EXC_VIRT_OOL(altivec_unavailable, 0x4f20, 0x20, 0xf20) 1049 1048 TRAMP_KVM(PACA_EXGEN, 0xf20) 1050 1049 EXC_COMMON_BEGIN(altivec_unavailable_common) 1051 1050 EXCEPTION_PROLOG_COMMON(0xf20, PACA_EXGEN) ··· 1083 1078 b ret_from_except 1084 1079 1085 1080 1086 - EXC_REAL_OOL(vsx_unavailable, 0xf40, 0xf60) 1087 - EXC_VIRT_OOL(vsx_unavailable, 0x4f40, 0x4f60, 0xf40) 1081 + EXC_REAL_OOL(vsx_unavailable, 0xf40, 0x20) 1082 + EXC_VIRT_OOL(vsx_unavailable, 0x4f40, 0x20, 0xf40) 1088 1083 TRAMP_KVM(PACA_EXGEN, 0xf40) 1089 1084 EXC_COMMON_BEGIN(vsx_unavailable_common) 1090 1085 EXCEPTION_PROLOG_COMMON(0xf40, PACA_EXGEN) ··· 1119 1114 b ret_from_except 1120 1115 1121 1116 1122 - EXC_REAL_OOL(facility_unavailable, 0xf60, 0xf80) 1123 - EXC_VIRT_OOL(facility_unavailable, 0x4f60, 0x4f80, 0xf60) 1117 + EXC_REAL_OOL(facility_unavailable, 0xf60, 0x20) 1118 + EXC_VIRT_OOL(facility_unavailable, 0x4f60, 0x20, 0xf60) 1124 1119 TRAMP_KVM(PACA_EXGEN, 0xf60) 1125 1120 EXC_COMMON(facility_unavailable_common, 0xf60, facility_unavailable_exception) 1126 1121 1127 1122 1128 - EXC_REAL_OOL_HV(h_facility_unavailable, 0xf80, 0xfa0) 1129 - EXC_VIRT_OOL_HV(h_facility_unavailable, 0x4f80, 0x4fa0, 0xf80) 1123 + EXC_REAL_OOL_HV(h_facility_unavailable, 0xf80, 0x20) 1124 + EXC_VIRT_OOL_HV(h_facility_unavailable, 0x4f80, 0x20, 0xf80) 1130 1125 TRAMP_KVM_HV(PACA_EXGEN, 0xf80) 1131 1126 EXC_COMMON(h_facility_unavailable_common, 0xf80, facility_unavailable_exception) 1132 1127 1133 1128 1134 - EXC_REAL_NONE(0xfa0, 0x1200) 1135 - EXC_VIRT_NONE(0x4fa0, 0x5200) 1129 + EXC_REAL_NONE(0xfa0, 0x20) 1130 + EXC_VIRT_NONE(0x4fa0, 0x20) 1131 + EXC_REAL_NONE(0xfc0, 0x20) 1132 + EXC_VIRT_NONE(0x4fc0, 0x20) 1133 + EXC_REAL_NONE(0xfe0, 0x20) 1134 + EXC_VIRT_NONE(0x4fe0, 0x20) 1135 + 1136 + EXC_REAL_NONE(0x1000, 0x100) 1137 + EXC_VIRT_NONE(0x5000, 0x100) 1138 + EXC_REAL_NONE(0x1100, 0x100) 1139 + EXC_VIRT_NONE(0x5100, 0x100) 1136 1140 1137 1141 #ifdef CONFIG_CBE_RAS 1138 - EXC_REAL_HV(cbe_system_error, 0x1200, 0x1300) 1139 - EXC_VIRT_NONE(0x5200, 0x5300) 1142 + EXC_REAL_HV(cbe_system_error, 0x1200, 0x100) 1143 + EXC_VIRT_NONE(0x5200, 0x100) 1140 1144 TRAMP_KVM_HV_SKIP(PACA_EXGEN, 0x1200) 1141 1145 EXC_COMMON(cbe_system_error_common, 0x1200, cbe_system_error_exception) 1142 1146 #else /* CONFIG_CBE_RAS */ 1143 - EXC_REAL_NONE(0x1200, 0x1300) 1144 - EXC_VIRT_NONE(0x5200, 0x5300) 1147 + EXC_REAL_NONE(0x1200, 0x100) 1148 + EXC_VIRT_NONE(0x5200, 0x100) 1145 1149 #endif 1146 1150 1147 1151 1148 - EXC_REAL(instruction_breakpoint, 0x1300, 0x1400) 1149 - EXC_VIRT(instruction_breakpoint, 0x5300, 0x5400, 0x1300) 1152 + EXC_REAL(instruction_breakpoint, 0x1300, 0x100) 1153 + EXC_VIRT(instruction_breakpoint, 0x5300, 0x100, 0x1300) 1150 1154 TRAMP_KVM_SKIP(PACA_EXGEN, 0x1300) 1151 1155 EXC_COMMON(instruction_breakpoint_common, 0x1300, instruction_breakpoint_exception) 1152 1156 1153 - EXC_REAL_NONE(0x1400, 0x1500) 1154 - EXC_VIRT_NONE(0x5400, 0x5500) 1157 + EXC_REAL_NONE(0x1400, 0x100) 1158 + EXC_VIRT_NONE(0x5400, 0x100) 1155 1159 1156 - EXC_REAL_BEGIN(denorm_exception_hv, 0x1500, 0x1600) 1160 + EXC_REAL_BEGIN(denorm_exception_hv, 0x1500, 0x100) 1157 1161 mtspr SPRN_SPRG_HSCRATCH0,r13 1158 1162 EXCEPTION_PROLOG_0(PACA_EXGEN) 1159 1163 EXCEPTION_PROLOG_1(PACA_EXGEN, NOTEST, 0x1500) ··· 1177 1163 1178 1164 KVMTEST_PR(0x1500) 1179 1165 EXCEPTION_PROLOG_PSERIES_1(denorm_common, EXC_HV) 1180 - EXC_REAL_END(denorm_exception_hv, 0x1500, 0x1600) 1166 + EXC_REAL_END(denorm_exception_hv, 0x1500, 0x100) 1181 1167 1182 1168 #ifdef CONFIG_PPC_DENORMALISATION 1183 - EXC_VIRT_BEGIN(denorm_exception, 0x5500, 0x5600) 1169 + EXC_VIRT_BEGIN(denorm_exception, 0x5500, 0x100) 1184 1170 b exc_real_0x1500_denorm_exception_hv 1185 - EXC_VIRT_END(denorm_exception, 0x5500, 0x5600) 1171 + EXC_VIRT_END(denorm_exception, 0x5500, 0x100) 1186 1172 #else 1187 - EXC_VIRT_NONE(0x5500, 0x5600) 1173 + EXC_VIRT_NONE(0x5500, 0x100) 1188 1174 #endif 1189 1175 1190 1176 TRAMP_KVM_SKIP(PACA_EXGEN, 0x1500) ··· 1257 1243 1258 1244 1259 1245 #ifdef CONFIG_CBE_RAS 1260 - EXC_REAL_HV(cbe_maintenance, 0x1600, 0x1700) 1261 - EXC_VIRT_NONE(0x5600, 0x5700) 1246 + EXC_REAL_HV(cbe_maintenance, 0x1600, 0x100) 1247 + EXC_VIRT_NONE(0x5600, 0x100) 1262 1248 TRAMP_KVM_HV_SKIP(PACA_EXGEN, 0x1600) 1263 1249 EXC_COMMON(cbe_maintenance_common, 0x1600, cbe_maintenance_exception) 1264 1250 #else /* CONFIG_CBE_RAS */ 1265 - EXC_REAL_NONE(0x1600, 0x1700) 1266 - EXC_VIRT_NONE(0x5600, 0x5700) 1251 + EXC_REAL_NONE(0x1600, 0x100) 1252 + EXC_VIRT_NONE(0x5600, 0x100) 1267 1253 #endif 1268 1254 1269 1255 1270 - EXC_REAL(altivec_assist, 0x1700, 0x1800) 1271 - EXC_VIRT(altivec_assist, 0x5700, 0x5800, 0x1700) 1256 + EXC_REAL(altivec_assist, 0x1700, 0x100) 1257 + EXC_VIRT(altivec_assist, 0x5700, 0x100, 0x1700) 1272 1258 TRAMP_KVM(PACA_EXGEN, 0x1700) 1273 1259 #ifdef CONFIG_ALTIVEC 1274 1260 EXC_COMMON(altivec_assist_common, 0x1700, altivec_assist_exception) ··· 1278 1264 1279 1265 1280 1266 #ifdef CONFIG_CBE_RAS 1281 - EXC_REAL_HV(cbe_thermal, 0x1800, 0x1900) 1282 - EXC_VIRT_NONE(0x5800, 0x5900) 1267 + EXC_REAL_HV(cbe_thermal, 0x1800, 0x100) 1268 + EXC_VIRT_NONE(0x5800, 0x100) 1283 1269 TRAMP_KVM_HV_SKIP(PACA_EXGEN, 0x1800) 1284 1270 EXC_COMMON(cbe_thermal_common, 0x1800, cbe_thermal_exception) 1285 1271 #else /* CONFIG_CBE_RAS */ 1286 - EXC_REAL_NONE(0x1800, 0x1900) 1287 - EXC_VIRT_NONE(0x5800, 0x5900) 1272 + EXC_REAL_NONE(0x1800, 0x100) 1273 + EXC_VIRT_NONE(0x5800, 0x100) 1288 1274 #endif 1289 1275 1290 1276

+24 -1

arch/powerpc/kernel/fadump.c

··· 406 406 void crash_fadump(struct pt_regs *regs, const char *str) 407 407 { 408 408 struct fadump_crash_info_header *fdh = NULL; 409 + int old_cpu, this_cpu; 409 410 410 411 if (!fw_dump.dump_registered || !fw_dump.fadumphdr_addr) 411 412 return; 412 413 414 + /* 415 + * old_cpu == -1 means this is the first CPU which has come here, 416 + * go ahead and trigger fadump. 417 + * 418 + * old_cpu != -1 means some other CPU has already on it's way 419 + * to trigger fadump, just keep looping here. 420 + */ 421 + this_cpu = smp_processor_id(); 422 + old_cpu = cmpxchg(&crashing_cpu, -1, this_cpu); 423 + 424 + if (old_cpu != -1) { 425 + /* 426 + * We can't loop here indefinitely. Wait as long as fadump 427 + * is in force. If we race with fadump un-registration this 428 + * loop will break and then we go down to normal panic path 429 + * and reboot. If fadump is in force the first crashing 430 + * cpu will definitely trigger fadump. 431 + */ 432 + while (fw_dump.dump_registered) 433 + cpu_relax(); 434 + return; 435 + } 436 + 413 437 fdh = __va(fw_dump.fadumphdr_addr); 414 - crashing_cpu = smp_processor_id(); 415 438 fdh->crashing_cpu = crashing_cpu; 416 439 crash_save_vmcoreinfo(); 417 440

+3 -1

arch/powerpc/kernel/hw_breakpoint.c

··· 228 228 rcu_read_lock(); 229 229 230 230 bp = __this_cpu_read(bp_per_reg); 231 - if (!bp) 231 + if (!bp) { 232 + rc = NOTIFY_DONE; 232 233 goto out; 234 + } 233 235 info = counter_arch_bp(bp); 234 236 235 237 /*

+27 -19

arch/powerpc/kernel/idle_book3s.S

··· 40 40 #define _WORC GPR11 41 41 #define _PTCR GPR12 42 42 43 - #define PSSCR_HV_TEMPLATE PSSCR_ESL | PSSCR_EC | \ 44 - PSSCR_PSLL_MASK | PSSCR_TR_MASK | \ 45 - PSSCR_MTL_MASK 43 + #define PSSCR_EC_ESL_MASK_SHIFTED (PSSCR_EC | PSSCR_ESL) >> 16 46 44 47 45 .text 48 46 ··· 203 205 stb r3,PACA_THREAD_IDLE_STATE(r13) 204 206 cmpwi cr3,r3,PNV_THREAD_SLEEP 205 207 bge cr3,2f 206 - IDLE_STATE_ENTER_SEQ(PPC_NAP) 208 + IDLE_STATE_ENTER_SEQ_NORET(PPC_NAP) 207 209 /* No return */ 208 210 2: 209 211 /* Sleep or winkle */ ··· 237 239 238 240 common_enter: /* common code for all the threads entering sleep or winkle */ 239 241 bgt cr3,enter_winkle 240 - IDLE_STATE_ENTER_SEQ(PPC_SLEEP) 242 + IDLE_STATE_ENTER_SEQ_NORET(PPC_SLEEP) 241 243 242 244 fastsleep_workaround_at_entry: 243 245 ori r15,r15,PNV_CORE_IDLE_LOCK_BIT ··· 248 250 /* Fast sleep workaround */ 249 251 li r3,1 250 252 li r4,1 251 - bl opal_rm_config_cpu_idle_state 253 + bl opal_config_cpu_idle_state 252 254 253 255 /* Clear Lock bit */ 254 256 li r0,0 ··· 259 261 enter_winkle: 260 262 bl save_sprs_to_stack 261 263 262 - IDLE_STATE_ENTER_SEQ(PPC_WINKLE) 264 + IDLE_STATE_ENTER_SEQ_NORET(PPC_WINKLE) 263 265 264 266 /* 265 - * r3 - requested stop state 267 + * r3 - PSSCR value corresponding to the requested stop state. 266 268 */ 267 269 power_enter_stop: 268 270 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE ··· 272 274 stb r4,HSTATE_HWTHREAD_STATE(r13) 273 275 #endif 274 276 /* 277 + * Check if we are executing the lite variant with ESL=EC=0 278 + */ 279 + andis. r4,r3,PSSCR_EC_ESL_MASK_SHIFTED 280 + clrldi r3,r3,60 /* r3 = Bits[60:63] = Requested Level (RL) */ 281 + bne 1f 282 + IDLE_STATE_ENTER_SEQ(PPC_STOP) 283 + li r3,0 /* Since we didn't lose state, return 0 */ 284 + b pnv_wakeup_noloss 285 + /* 275 286 * Check if the requested state is a deep idle state. 276 287 */ 277 - LOAD_REG_ADDRBASE(r5,pnv_first_deep_stop_state) 288 + 1: LOAD_REG_ADDRBASE(r5,pnv_first_deep_stop_state) 278 289 ld r4,ADDROFF(pnv_first_deep_stop_state)(r5) 279 290 cmpd r3,r4 280 291 bge 2f 281 - IDLE_STATE_ENTER_SEQ(PPC_STOP) 292 + IDLE_STATE_ENTER_SEQ_NORET(PPC_STOP) 282 293 2: 283 294 /* 284 295 * Entering deep idle state. ··· 309 302 310 303 bl save_sprs_to_stack 311 304 312 - IDLE_STATE_ENTER_SEQ(PPC_STOP) 305 + IDLE_STATE_ENTER_SEQ_NORET(PPC_STOP) 313 306 314 307 _GLOBAL(power7_idle) 315 308 /* Now check if user or arch enabled NAP mode */ ··· 360 353 ld r3,ORIG_GPR3(r1); /* Restore original r3 */ \ 361 354 20: nop; 362 355 363 - 364 356 /* 365 - * r3 - requested stop state 357 + * r3 - The PSSCR value corresponding to the stop state. 358 + * r4 - The PSSCR mask corrresonding to the stop state. 366 359 */ 367 360 _GLOBAL(power9_idle_stop) 368 - LOAD_REG_IMMEDIATE(r4, PSSCR_HV_TEMPLATE) 369 - or r4,r4,r3 370 - mtspr SPRN_PSSCR, r4 371 - li r4, 1 361 + mfspr r5,SPRN_PSSCR 362 + andc r5,r5,r4 363 + or r3,r3,r5 364 + mtspr SPRN_PSSCR,r3 372 365 LOAD_REG_ADDR(r5,power_enter_stop) 366 + li r4,1 373 367 b pnv_powersave_common 374 368 /* No return */ 375 369 /* ··· 552 544 */ 553 545 ble cr3,clear_lock 554 546 /* Time base re-sync */ 555 - bl opal_rm_resync_timebase; 547 + bl opal_resync_timebase; 556 548 /* 557 549 * If waking up from sleep, per core state is not lost, skip to 558 550 * clear_lock. ··· 641 633 fastsleep_workaround_at_exit: 642 634 li r3,1 643 635 li r4,0 644 - bl opal_rm_config_cpu_idle_state 636 + bl opal_config_cpu_idle_state 645 637 b timebase_resync 646 638 647 639 /*

+1

arch/powerpc/kernel/iomap.c

··· 8 8 #include <linux/export.h> 9 9 #include <asm/io.h> 10 10 #include <asm/pci-bridge.h> 11 + #include <asm/isa-bridge.h> 11 12 12 13 /* 13 14 * Here comes the ppc64 implementation of the IOMAP

+92

arch/powerpc/kernel/isa-bridge.c

··· 29 29 #include <asm/pci-bridge.h> 30 30 #include <asm/machdep.h> 31 31 #include <asm/ppc-pci.h> 32 + #include <asm/isa-bridge.h> 32 33 33 34 unsigned long isa_io_base; /* NULL if no ISA bus */ 34 35 EXPORT_SYMBOL(isa_io_base); ··· 165 164 isa_io_base = ISA_IO_BASE; 166 165 167 166 pr_debug("ISA bridge (early) is %s\n", np->full_name); 167 + } 168 + 169 + /** 170 + * isa_bridge_find_early - Find and map the ISA IO space early before 171 + * main PCI discovery. This is optionally called by 172 + * the arch code when adding PCI PHBs to get early 173 + * access to ISA IO ports 174 + */ 175 + void __init isa_bridge_init_non_pci(struct device_node *np) 176 + { 177 + const __be32 *ranges, *pbasep = NULL; 178 + int rlen, i, rs; 179 + u32 na, ns, pna; 180 + u64 cbase, pbase, size = 0; 181 + 182 + /* If we already have an ISA bridge, bail off */ 183 + if (isa_bridge_devnode != NULL) 184 + return; 185 + 186 + pna = of_n_addr_cells(np); 187 + if (of_property_read_u32(np, "#address-cells", &na) || 188 + of_property_read_u32(np, "#size-cells", &ns)) { 189 + pr_warn("ISA: Non-PCI bridge %s is missing address format\n", 190 + np->full_name); 191 + return; 192 + } 193 + 194 + /* Check it's a supported address format */ 195 + if (na != 2 || ns != 1) { 196 + pr_warn("ISA: Non-PCI bridge %s has unsupported address format\n", 197 + np->full_name); 198 + return; 199 + } 200 + rs = na + ns + pna; 201 + 202 + /* Grab the ranges property */ 203 + ranges = of_get_property(np, "ranges", &rlen); 204 + if (ranges == NULL || rlen < rs) { 205 + pr_warn("ISA: Non-PCI bridge %s has absent or invalid ranges\n", 206 + np->full_name); 207 + return; 208 + } 209 + 210 + /* Parse it. We are only looking for IO space */ 211 + for (i = 0; (i + rs - 1) < rlen; i += rs) { 212 + if (be32_to_cpup(ranges + i) != 1) 213 + continue; 214 + cbase = be32_to_cpup(ranges + i + 1); 215 + size = of_read_number(ranges + i + na + pna, ns); 216 + pbasep = ranges + i + na; 217 + break; 218 + } 219 + 220 + /* Got something ? */ 221 + if (!size || !pbasep) { 222 + pr_warn("ISA: Non-PCI bridge %s has no usable IO range\n", 223 + np->full_name); 224 + return; 225 + } 226 + 227 + /* Align size and make sure it's cropped to 64K */ 228 + size = PAGE_ALIGN(size); 229 + if (size > 0x10000) 230 + size = 0x10000; 231 + 232 + /* Map pbase */ 233 + pbase = of_translate_address(np, pbasep); 234 + if (pbase == OF_BAD_ADDR) { 235 + pr_warn("ISA: Non-PCI bridge %s failed to translate IO base\n", 236 + np->full_name); 237 + return; 238 + } 239 + 240 + /* We need page alignment */ 241 + if ((cbase & ~PAGE_MASK) || (pbase & ~PAGE_MASK)) { 242 + pr_warn("ISA: Non-PCI bridge %s has non aligned IO range\n", 243 + np->full_name); 244 + return; 245 + } 246 + 247 + /* Got it */ 248 + isa_bridge_devnode = np; 249 + 250 + /* Set the global ISA io base to indicate we have an ISA bridge 251 + * and map it 252 + */ 253 + isa_io_base = ISA_IO_BASE; 254 + __ioremap_at(pbase, (void *)ISA_IO_BASE, 255 + size, pgprot_val(pgprot_noncached(__pgprot(0)))); 256 + 257 + pr_debug("ISA: Non-PCI bridge is %s\n", np->full_name); 168 258 } 169 259 170 260 /**

+8 -9

arch/powerpc/kernel/kprobes.c

··· 285 285 ".type kretprobe_trampoline, @function\n" 286 286 "kretprobe_trampoline:\n" 287 287 "nop\n" 288 + "blr\n" 288 289 ".size kretprobe_trampoline, .-kretprobe_trampoline\n"); 289 290 290 291 /* ··· 338 337 339 338 kretprobe_assert(ri, orig_ret_address, trampoline_address); 340 339 regs->nip = orig_ret_address; 340 + /* 341 + * Make LR point to the orig_ret_address. 342 + * When the 'nop' inside the kretprobe_trampoline 343 + * is optimized, we can do a 'blr' after executing the 344 + * detour buffer code. 345 + */ 346 + regs->link = orig_ret_address; 341 347 342 348 reset_current_kprobe(); 343 349 kretprobe_hash_unlock(current, &flags); ··· 473 465 break; 474 466 } 475 467 return 0; 476 - } 477 - 478 - /* 479 - * Wrapper routine to for handling exceptions. 480 - */ 481 - int __kprobes kprobe_exceptions_notify(struct notifier_block *self, 482 - unsigned long val, void *data) 483 - { 484 - return NOTIFY_DONE; 485 468 } 486 469 487 470 unsigned long arch_deref_entry_point(void *entry)

+2 -1

arch/powerpc/kernel/legacy_serial.c

··· 233 233 * 234 234 * Note: Don't even try on P8 lpc, we know it's not directly mapped 235 235 */ 236 - if (!of_device_is_compatible(isa_brg, "ibm,power8-lpc")) { 236 + if (!of_device_is_compatible(isa_brg, "ibm,power8-lpc") || 237 + of_get_property(isa_brg, "ranges", NULL)) { 237 238 taddr = of_translate_address(np, reg); 238 239 if (taddr == OF_BAD_ADDR) 239 240 taddr = 0;

+14 -14

arch/powerpc/kernel/misc_64.S

··· 80 80 * each other. 81 81 */ 82 82 ld r10,PPC64_CACHES@toc(r2) 83 - lwz r7,DCACHEL1LINESIZE(r10)/* Get cache line size */ 83 + lwz r7,DCACHEL1BLOCKSIZE(r10)/* Get cache block size */ 84 84 addi r5,r7,-1 85 85 andc r6,r3,r5 /* round low to line bdy */ 86 86 subf r8,r6,r4 /* compute length */ 87 87 add r8,r8,r5 /* ensure we get enough */ 88 - lwz r9,DCACHEL1LOGLINESIZE(r10) /* Get log-2 of cache line size */ 88 + lwz r9,DCACHEL1LOGBLOCKSIZE(r10) /* Get log-2 of cache block size */ 89 89 srw. r8,r8,r9 /* compute line count */ 90 90 beqlr /* nothing to do? */ 91 91 mtctr r8 ··· 96 96 97 97 /* Now invalidate the instruction cache */ 98 98 99 - lwz r7,ICACHEL1LINESIZE(r10) /* Get Icache line size */ 99 + lwz r7,ICACHEL1BLOCKSIZE(r10) /* Get Icache block size */ 100 100 addi r5,r7,-1 101 101 andc r6,r3,r5 /* round low to line bdy */ 102 102 subf r8,r6,r4 /* compute length */ 103 103 add r8,r8,r5 104 - lwz r9,ICACHEL1LOGLINESIZE(r10) /* Get log-2 of Icache line size */ 104 + lwz r9,ICACHEL1LOGBLOCKSIZE(r10) /* Get log-2 of Icache block size */ 105 105 srw. r8,r8,r9 /* compute line count */ 106 106 beqlr /* nothing to do? */ 107 107 mtctr r8 ··· 128 128 * Different systems have different cache line sizes 129 129 */ 130 130 ld r10,PPC64_CACHES@toc(r2) 131 - lwz r7,DCACHEL1LINESIZE(r10) /* Get dcache line size */ 131 + lwz r7,DCACHEL1BLOCKSIZE(r10) /* Get dcache block size */ 132 132 addi r5,r7,-1 133 133 andc r6,r3,r5 /* round low to line bdy */ 134 134 subf r8,r6,r4 /* compute length */ 135 135 add r8,r8,r5 /* ensure we get enough */ 136 - lwz r9,DCACHEL1LOGLINESIZE(r10) /* Get log-2 of dcache line size */ 136 + lwz r9,DCACHEL1LOGBLOCKSIZE(r10) /* Get log-2 of dcache block size */ 137 137 srw. r8,r8,r9 /* compute line count */ 138 138 beqlr /* nothing to do? */ 139 139 mtctr r8 ··· 156 156 */ 157 157 _GLOBAL(flush_dcache_phys_range) 158 158 ld r10,PPC64_CACHES@toc(r2) 159 - lwz r7,DCACHEL1LINESIZE(r10) /* Get dcache line size */ 159 + lwz r7,DCACHEL1BLOCKSIZE(r10) /* Get dcache block size */ 160 160 addi r5,r7,-1 161 161 andc r6,r3,r5 /* round low to line bdy */ 162 162 subf r8,r6,r4 /* compute length */ 163 163 add r8,r8,r5 /* ensure we get enough */ 164 - lwz r9,DCACHEL1LOGLINESIZE(r10) /* Get log-2 of dcache line size */ 164 + lwz r9,DCACHEL1LOGBLOCKSIZE(r10) /* Get log-2 of dcache block size */ 165 165 srw. r8,r8,r9 /* compute line count */ 166 166 beqlr /* nothing to do? */ 167 167 mfmsr r5 /* Disable MMU Data Relocation */ ··· 184 184 185 185 _GLOBAL(flush_inval_dcache_range) 186 186 ld r10,PPC64_CACHES@toc(r2) 187 - lwz r7,DCACHEL1LINESIZE(r10) /* Get dcache line size */ 187 + lwz r7,DCACHEL1BLOCKSIZE(r10) /* Get dcache block size */ 188 188 addi r5,r7,-1 189 189 andc r6,r3,r5 /* round low to line bdy */ 190 190 subf r8,r6,r4 /* compute length */ 191 191 add r8,r8,r5 /* ensure we get enough */ 192 - lwz r9,DCACHEL1LOGLINESIZE(r10)/* Get log-2 of dcache line size */ 192 + lwz r9,DCACHEL1LOGBLOCKSIZE(r10)/* Get log-2 of dcache block size */ 193 193 srw. r8,r8,r9 /* compute line count */ 194 194 beqlr /* nothing to do? */ 195 195 sync ··· 225 225 /* Flush the dcache */ 226 226 ld r7,PPC64_CACHES@toc(r2) 227 227 clrrdi r3,r3,PAGE_SHIFT /* Page align */ 228 - lwz r4,DCACHEL1LINESPERPAGE(r7) /* Get # dcache lines per page */ 229 - lwz r5,DCACHEL1LINESIZE(r7) /* Get dcache line size */ 228 + lwz r4,DCACHEL1BLOCKSPERPAGE(r7) /* Get # dcache blocks per page */ 229 + lwz r5,DCACHEL1BLOCKSIZE(r7) /* Get dcache block size */ 230 230 mr r6,r3 231 231 mtctr r4 232 232 0: dcbst 0,r6 ··· 236 236 237 237 /* Now invalidate the icache */ 238 238 239 - lwz r4,ICACHEL1LINESPERPAGE(r7) /* Get # icache lines per page */ 240 - lwz r5,ICACHEL1LINESIZE(r7) /* Get icache line size */ 239 + lwz r4,ICACHEL1BLOCKSPERPAGE(r7) /* Get # icache blocks per page */ 240 + lwz r5,ICACHEL1BLOCKSIZE(r7) /* Get icache block size */ 241 241 mtctr r4 242 242 1: icbi 0,r3 243 243 add r3,r3,r5

+347

arch/powerpc/kernel/optprobes.c

··· 1 + /* 2 + * Code for Kernel probes Jump optimization. 3 + * 4 + * Copyright 2017, Anju T, IBM Corp. 5 + * 6 + * This program is free software; you can redistribute it and/or 7 + * modify it under the terms of the GNU General Public License 8 + * as published by the Free Software Foundation; either version 9 + * 2 of the License, or (at your option) any later version. 10 + */ 11 + 12 + #include <linux/kprobes.h> 13 + #include <linux/jump_label.h> 14 + #include <linux/types.h> 15 + #include <linux/slab.h> 16 + #include <linux/list.h> 17 + #include <asm/kprobes.h> 18 + #include <asm/ptrace.h> 19 + #include <asm/cacheflush.h> 20 + #include <asm/code-patching.h> 21 + #include <asm/sstep.h> 22 + #include <asm/ppc-opcode.h> 23 + 24 + #define TMPL_CALL_HDLR_IDX \ 25 + (optprobe_template_call_handler - optprobe_template_entry) 26 + #define TMPL_EMULATE_IDX \ 27 + (optprobe_template_call_emulate - optprobe_template_entry) 28 + #define TMPL_RET_IDX \ 29 + (optprobe_template_ret - optprobe_template_entry) 30 + #define TMPL_OP_IDX \ 31 + (optprobe_template_op_address - optprobe_template_entry) 32 + #define TMPL_INSN_IDX \ 33 + (optprobe_template_insn - optprobe_template_entry) 34 + #define TMPL_END_IDX \ 35 + (optprobe_template_end - optprobe_template_entry) 36 + 37 + DEFINE_INSN_CACHE_OPS(ppc_optinsn); 38 + 39 + static bool insn_page_in_use; 40 + 41 + static void *__ppc_alloc_insn_page(void) 42 + { 43 + if (insn_page_in_use) 44 + return NULL; 45 + insn_page_in_use = true; 46 + return &optinsn_slot; 47 + } 48 + 49 + static void __ppc_free_insn_page(void *page __maybe_unused) 50 + { 51 + insn_page_in_use = false; 52 + } 53 + 54 + struct kprobe_insn_cache kprobe_ppc_optinsn_slots = { 55 + .mutex = __MUTEX_INITIALIZER(kprobe_ppc_optinsn_slots.mutex), 56 + .pages = LIST_HEAD_INIT(kprobe_ppc_optinsn_slots.pages), 57 + /* insn_size initialized later */ 58 + .alloc = __ppc_alloc_insn_page, 59 + .free = __ppc_free_insn_page, 60 + .nr_garbage = 0, 61 + }; 62 + 63 + /* 64 + * Check if we can optimize this probe. Returns NIP post-emulation if this can 65 + * be optimized and 0 otherwise. 66 + */ 67 + static unsigned long can_optimize(struct kprobe *p) 68 + { 69 + struct pt_regs regs; 70 + struct instruction_op op; 71 + unsigned long nip = 0; 72 + 73 + /* 74 + * kprobe placed for kretprobe during boot time 75 + * has a 'nop' instruction, which can be emulated. 76 + * So further checks can be skipped. 77 + */ 78 + if (p->addr == (kprobe_opcode_t *)&kretprobe_trampoline) 79 + return (unsigned long)p->addr + sizeof(kprobe_opcode_t); 80 + 81 + /* 82 + * We only support optimizing kernel addresses, but not 83 + * module addresses. 84 + * 85 + * FIXME: Optimize kprobes placed in module addresses. 86 + */ 87 + if (!is_kernel_addr((unsigned long)p->addr)) 88 + return 0; 89 + 90 + memset(&regs, 0, sizeof(struct pt_regs)); 91 + regs.nip = (unsigned long)p->addr; 92 + regs.trap = 0x0; 93 + regs.msr = MSR_KERNEL; 94 + 95 + /* 96 + * Kprobe placed in conditional branch instructions are 97 + * not optimized, as we can't predict the nip prior with 98 + * dummy pt_regs and can not ensure that the return branch 99 + * from detour buffer falls in the range of address (i.e 32MB). 100 + * A branch back from trampoline is set up in the detour buffer 101 + * to the nip returned by the analyse_instr() here. 102 + * 103 + * Ensure that the instruction is not a conditional branch, 104 + * and that can be emulated. 105 + */ 106 + if (!is_conditional_branch(*p->ainsn.insn) && 107 + analyse_instr(&op, &regs, *p->ainsn.insn)) 108 + nip = regs.nip; 109 + 110 + return nip; 111 + } 112 + 113 + static void optimized_callback(struct optimized_kprobe *op, 114 + struct pt_regs *regs) 115 + { 116 + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); 117 + unsigned long flags; 118 + 119 + /* This is possible if op is under delayed unoptimizing */ 120 + if (kprobe_disabled(&op->kp)) 121 + return; 122 + 123 + local_irq_save(flags); 124 + hard_irq_disable(); 125 + 126 + if (kprobe_running()) { 127 + kprobes_inc_nmissed_count(&op->kp); 128 + } else { 129 + __this_cpu_write(current_kprobe, &op->kp); 130 + regs->nip = (unsigned long)op->kp.addr; 131 + kcb->kprobe_status = KPROBE_HIT_ACTIVE; 132 + opt_pre_handler(&op->kp, regs); 133 + __this_cpu_write(current_kprobe, NULL); 134 + } 135 + 136 + /* 137 + * No need for an explicit __hard_irq_enable() here. 138 + * local_irq_restore() will re-enable interrupts, 139 + * if they were hard disabled. 140 + */ 141 + local_irq_restore(flags); 142 + } 143 + NOKPROBE_SYMBOL(optimized_callback); 144 + 145 + void arch_remove_optimized_kprobe(struct optimized_kprobe *op) 146 + { 147 + if (op->optinsn.insn) { 148 + free_ppc_optinsn_slot(op->optinsn.insn, 1); 149 + op->optinsn.insn = NULL; 150 + } 151 + } 152 + 153 + /* 154 + * emulate_step() requires insn to be emulated as 155 + * second parameter. Load register 'r4' with the 156 + * instruction. 157 + */ 158 + void patch_imm32_load_insns(unsigned int val, kprobe_opcode_t *addr) 159 + { 160 + /* addis r4,0,(insn)@h */ 161 + *addr++ = PPC_INST_ADDIS | ___PPC_RT(4) | 162 + ((val >> 16) & 0xffff); 163 + 164 + /* ori r4,r4,(insn)@l */ 165 + *addr = PPC_INST_ORI | ___PPC_RA(4) | ___PPC_RS(4) | 166 + (val & 0xffff); 167 + } 168 + 169 + /* 170 + * Generate instructions to load provided immediate 64-bit value 171 + * to register 'r3' and patch these instructions at 'addr'. 172 + */ 173 + void patch_imm64_load_insns(unsigned long val, kprobe_opcode_t *addr) 174 + { 175 + /* lis r3,(op)@highest */ 176 + *addr++ = PPC_INST_ADDIS | ___PPC_RT(3) | 177 + ((val >> 48) & 0xffff); 178 + 179 + /* ori r3,r3,(op)@higher */ 180 + *addr++ = PPC_INST_ORI | ___PPC_RA(3) | ___PPC_RS(3) | 181 + ((val >> 32) & 0xffff); 182 + 183 + /* rldicr r3,r3,32,31 */ 184 + *addr++ = PPC_INST_RLDICR | ___PPC_RA(3) | ___PPC_RS(3) | 185 + __PPC_SH64(32) | __PPC_ME64(31); 186 + 187 + /* oris r3,r3,(op)@h */ 188 + *addr++ = PPC_INST_ORIS | ___PPC_RA(3) | ___PPC_RS(3) | 189 + ((val >> 16) & 0xffff); 190 + 191 + /* ori r3,r3,(op)@l */ 192 + *addr = PPC_INST_ORI | ___PPC_RA(3) | ___PPC_RS(3) | 193 + (val & 0xffff); 194 + } 195 + 196 + int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p) 197 + { 198 + kprobe_opcode_t *buff, branch_op_callback, branch_emulate_step; 199 + kprobe_opcode_t *op_callback_addr, *emulate_step_addr; 200 + long b_offset; 201 + unsigned long nip; 202 + 203 + kprobe_ppc_optinsn_slots.insn_size = MAX_OPTINSN_SIZE; 204 + 205 + nip = can_optimize(p); 206 + if (!nip) 207 + return -EILSEQ; 208 + 209 + /* Allocate instruction slot for detour buffer */ 210 + buff = get_ppc_optinsn_slot(); 211 + if (!buff) 212 + return -ENOMEM; 213 + 214 + /* 215 + * OPTPROBE uses 'b' instruction to branch to optinsn.insn. 216 + * 217 + * The target address has to be relatively nearby, to permit use 218 + * of branch instruction in powerpc, because the address is specified 219 + * in an immediate field in the instruction opcode itself, ie 24 bits 220 + * in the opcode specify the address. Therefore the address should 221 + * be within 32MB on either side of the current instruction. 222 + */ 223 + b_offset = (unsigned long)buff - (unsigned long)p->addr; 224 + if (!is_offset_in_branch_range(b_offset)) 225 + goto error; 226 + 227 + /* Check if the return address is also within 32MB range */ 228 + b_offset = (unsigned long)(buff + TMPL_RET_IDX) - 229 + (unsigned long)nip; 230 + if (!is_offset_in_branch_range(b_offset)) 231 + goto error; 232 + 233 + /* Setup template */ 234 + memcpy(buff, optprobe_template_entry, 235 + TMPL_END_IDX * sizeof(kprobe_opcode_t)); 236 + 237 + /* 238 + * Fixup the template with instructions to: 239 + * 1. load the address of the actual probepoint 240 + */ 241 + patch_imm64_load_insns((unsigned long)op, buff + TMPL_OP_IDX); 242 + 243 + /* 244 + * 2. branch to optimized_callback() and emulate_step() 245 + */ 246 + kprobe_lookup_name("optimized_callback", op_callback_addr); 247 + kprobe_lookup_name("emulate_step", emulate_step_addr); 248 + if (!op_callback_addr || !emulate_step_addr) { 249 + WARN(1, "kprobe_lookup_name() failed\n"); 250 + goto error; 251 + } 252 + 253 + branch_op_callback = create_branch((unsigned int *)buff + TMPL_CALL_HDLR_IDX, 254 + (unsigned long)op_callback_addr, 255 + BRANCH_SET_LINK); 256 + 257 + branch_emulate_step = create_branch((unsigned int *)buff + TMPL_EMULATE_IDX, 258 + (unsigned long)emulate_step_addr, 259 + BRANCH_SET_LINK); 260 + 261 + if (!branch_op_callback || !branch_emulate_step) 262 + goto error; 263 + 264 + buff[TMPL_CALL_HDLR_IDX] = branch_op_callback; 265 + buff[TMPL_EMULATE_IDX] = branch_emulate_step; 266 + 267 + /* 268 + * 3. load instruction to be emulated into relevant register, and 269 + */ 270 + patch_imm32_load_insns(*p->ainsn.insn, buff + TMPL_INSN_IDX); 271 + 272 + /* 273 + * 4. branch back from trampoline 274 + */ 275 + buff[TMPL_RET_IDX] = create_branch((unsigned int *)buff + TMPL_RET_IDX, 276 + (unsigned long)nip, 0); 277 + 278 + flush_icache_range((unsigned long)buff, 279 + (unsigned long)(&buff[TMPL_END_IDX])); 280 + 281 + op->optinsn.insn = buff; 282 + 283 + return 0; 284 + 285 + error: 286 + free_ppc_optinsn_slot(buff, 0); 287 + return -ERANGE; 288 + 289 + } 290 + 291 + int arch_prepared_optinsn(struct arch_optimized_insn *optinsn) 292 + { 293 + return optinsn->insn != NULL; 294 + } 295 + 296 + /* 297 + * On powerpc, Optprobes always replaces one instruction (4 bytes 298 + * aligned and 4 bytes long). It is impossible to encounter another 299 + * kprobe in this address range. So always return 0. 300 + */ 301 + int arch_check_optimized_kprobe(struct optimized_kprobe *op) 302 + { 303 + return 0; 304 + } 305 + 306 + void arch_optimize_kprobes(struct list_head *oplist) 307 + { 308 + struct optimized_kprobe *op; 309 + struct optimized_kprobe *tmp; 310 + 311 + list_for_each_entry_safe(op, tmp, oplist, list) { 312 + /* 313 + * Backup instructions which will be replaced 314 + * by jump address 315 + */ 316 + memcpy(op->optinsn.copied_insn, op->kp.addr, 317 + RELATIVEJUMP_SIZE); 318 + patch_instruction(op->kp.addr, 319 + create_branch((unsigned int *)op->kp.addr, 320 + (unsigned long)op->optinsn.insn, 0)); 321 + list_del_init(&op->list); 322 + } 323 + } 324 + 325 + void arch_unoptimize_kprobe(struct optimized_kprobe *op) 326 + { 327 + arch_arm_kprobe(&op->kp); 328 + } 329 + 330 + void arch_unoptimize_kprobes(struct list_head *oplist, 331 + struct list_head *done_list) 332 + { 333 + struct optimized_kprobe *op; 334 + struct optimized_kprobe *tmp; 335 + 336 + list_for_each_entry_safe(op, tmp, oplist, list) { 337 + arch_unoptimize_kprobe(op); 338 + list_move(&op->list, done_list); 339 + } 340 + } 341 + 342 + int arch_within_optimized_kprobe(struct optimized_kprobe *op, 343 + unsigned long addr) 344 + { 345 + return ((unsigned long)op->kp.addr <= addr && 346 + (unsigned long)op->kp.addr + RELATIVEJUMP_SIZE > addr); 347 + }

+135

arch/powerpc/kernel/optprobes_head.S

··· 1 + /* 2 + * Code to prepare detour buffer for optprobes in Kernel. 3 + * 4 + * Copyright 2017, Anju T, IBM Corp. 5 + * 6 + * This program is free software; you can redistribute it and/or 7 + * modify it under the terms of the GNU General Public License 8 + * as published by the Free Software Foundation; either version 9 + * 2 of the License, or (at your option) any later version. 10 + */ 11 + 12 + #include <asm/ppc_asm.h> 13 + #include <asm/ptrace.h> 14 + #include <asm/asm-offsets.h> 15 + 16 + #define OPT_SLOT_SIZE 65536 17 + 18 + .balign 4 19 + 20 + /* 21 + * Reserve an area to allocate slots for detour buffer. 22 + * This is part of .text section (rather than vmalloc area) 23 + * as this needs to be within 32MB of the probed address. 24 + */ 25 + .global optinsn_slot 26 + optinsn_slot: 27 + .space OPT_SLOT_SIZE 28 + 29 + /* 30 + * Optprobe template: 31 + * This template gets copied into one of the slots in optinsn_slot 32 + * and gets fixed up with real optprobe structures et al. 33 + */ 34 + .global optprobe_template_entry 35 + optprobe_template_entry: 36 + /* Create an in-memory pt_regs */ 37 + stdu r1,-INT_FRAME_SIZE(r1) 38 + SAVE_GPR(0,r1) 39 + /* Save the previous SP into stack */ 40 + addi r0,r1,INT_FRAME_SIZE 41 + std r0,GPR1(r1) 42 + SAVE_10GPRS(2,r1) 43 + SAVE_10GPRS(12,r1) 44 + SAVE_10GPRS(22,r1) 45 + /* Save SPRS */ 46 + mfmsr r5 47 + std r5,_MSR(r1) 48 + li r5,0x700 49 + std r5,_TRAP(r1) 50 + li r5,0 51 + std r5,ORIG_GPR3(r1) 52 + std r5,RESULT(r1) 53 + mfctr r5 54 + std r5,_CTR(r1) 55 + mflr r5 56 + std r5,_LINK(r1) 57 + mfspr r5,SPRN_XER 58 + std r5,_XER(r1) 59 + mfcr r5 60 + std r5,_CCR(r1) 61 + lbz r5,PACASOFTIRQEN(r13) 62 + std r5,SOFTE(r1) 63 + mfdar r5 64 + std r5,_DAR(r1) 65 + mfdsisr r5 66 + std r5,_DSISR(r1) 67 + 68 + .global optprobe_template_op_address 69 + optprobe_template_op_address: 70 + /* 71 + * Parameters to optimized_callback(): 72 + * 1. optimized_kprobe structure in r3 73 + */ 74 + nop 75 + nop 76 + nop 77 + nop 78 + nop 79 + /* 2. pt_regs pointer in r4 */ 80 + addi r4,r1,STACK_FRAME_OVERHEAD 81 + 82 + .global optprobe_template_call_handler 83 + optprobe_template_call_handler: 84 + /* Branch to optimized_callback() */ 85 + nop 86 + 87 + /* 88 + * Parameters for instruction emulation: 89 + * 1. Pass SP in register r3. 90 + */ 91 + addi r3,r1,STACK_FRAME_OVERHEAD 92 + 93 + .global optprobe_template_insn 94 + optprobe_template_insn: 95 + /* 2, Pass instruction to be emulated in r4 */ 96 + nop 97 + nop 98 + 99 + .global optprobe_template_call_emulate 100 + optprobe_template_call_emulate: 101 + /* Branch to emulate_step() */ 102 + nop 103 + 104 + /* 105 + * All done. 106 + * Now, restore the registers... 107 + */ 108 + ld r5,_MSR(r1) 109 + mtmsr r5 110 + ld r5,_CTR(r1) 111 + mtctr r5 112 + ld r5,_LINK(r1) 113 + mtlr r5 114 + ld r5,_XER(r1) 115 + mtxer r5 116 + ld r5,_CCR(r1) 117 + mtcr r5 118 + ld r5,_DAR(r1) 119 + mtdar r5 120 + ld r5,_DSISR(r1) 121 + mtdsisr r5 122 + REST_GPR(0,r1) 123 + REST_10GPRS(2,r1) 124 + REST_10GPRS(12,r1) 125 + REST_10GPRS(22,r1) 126 + /* Restore the previous SP */ 127 + addi r1,r1,INT_FRAME_SIZE 128 + 129 + .global optprobe_template_ret 130 + optprobe_template_ret: 131 + /* ... and jump back from trampoline */ 132 + nop 133 + 134 + .global optprobe_template_end 135 + optprobe_template_end:

+18 -2

arch/powerpc/kernel/prom_init.c

··· 649 649 struct option_vector1 { 650 650 u8 byte1; 651 651 u8 arch_versions; 652 + u8 arch_versions3; 652 653 } __packed; 653 654 654 655 struct option_vector2 { ··· 692 691 u8 reserved2; 693 692 __be16 reserved3; 694 693 u8 subprocessors; 694 + u8 byte22; 695 + u8 intarch; 696 + u8 mmu; 695 697 } __packed; 696 698 697 699 struct option_vector6 { ··· 704 700 } __packed; 705 701 706 702 struct ibm_arch_vec { 707 - struct { u32 mask, val; } pvrs[10]; 703 + struct { u32 mask, val; } pvrs[12]; 708 704 709 705 u8 num_vectors; 710 706 ··· 754 750 .val = cpu_to_be32(0x004d0000), 755 751 }, 756 752 { 753 + .mask = cpu_to_be32(0xffff0000), /* POWER9 */ 754 + .val = cpu_to_be32(0x004e0000), 755 + }, 756 + { 757 + .mask = cpu_to_be32(0xffffffff), /* all 3.00-compliant */ 758 + .val = cpu_to_be32(0x0f000005), 759 + }, 760 + { 757 761 .mask = cpu_to_be32(0xffffffff), /* all 2.07-compliant */ 758 762 .val = cpu_to_be32(0x0f000004), 759 763 }, ··· 786 774 .byte1 = 0, 787 775 .arch_versions = OV1_PPC_2_00 | OV1_PPC_2_01 | OV1_PPC_2_02 | OV1_PPC_2_03 | 788 776 OV1_PPC_2_04 | OV1_PPC_2_05 | OV1_PPC_2_06 | OV1_PPC_2_07, 777 + .arch_versions3 = OV1_PPC_3_00, 789 778 }, 790 779 791 780 .vec2_len = VECTOR_LENGTH(sizeof(struct option_vector2)), ··· 839 826 0, 840 827 #endif 841 828 .associativity = OV5_FEAT(OV5_TYPE1_AFFINITY) | OV5_FEAT(OV5_PRRN), 842 - .bin_opts = 0, 829 + .bin_opts = OV5_FEAT(OV5_RESIZE_HPT), 843 830 .micro_checkpoint = 0, 844 831 .reserved0 = 0, 845 832 .max_cpus = cpu_to_be32(NR_CPUS), /* number of cores supported */ ··· 849 836 .reserved2 = 0, 850 837 .reserved3 = 0, 851 838 .subprocessors = 1, 839 + .intarch = 0, 840 + .mmu = OV5_FEAT(OV5_MMU_RADIX_300) | OV5_FEAT(OV5_MMU_HASH_300) | 841 + OV5_FEAT(OV5_MMU_PROC_TBL) | OV5_FEAT(OV5_MMU_GTSE), 852 842 }, 853 843 854 844 /* option vector 6: IBM PAPR hints */

+15 -17

arch/powerpc/kernel/rtas.c

··· 1145 1145 void __init rtas_initialize(void) 1146 1146 { 1147 1147 unsigned long rtas_region = RTAS_INSTANTIATE_MAX; 1148 + u32 base, size, entry; 1149 + int no_base, no_size, no_entry; 1148 1150 1149 1151 /* Get RTAS dev node and fill up our "rtas" structure with infos 1150 1152 * about it. 1151 1153 */ 1152 1154 rtas.dev = of_find_node_by_name(NULL, "rtas"); 1153 - if (rtas.dev) { 1154 - const __be32 *basep, *entryp, *sizep; 1155 - 1156 - basep = of_get_property(rtas.dev, "linux,rtas-base", NULL); 1157 - sizep = of_get_property(rtas.dev, "rtas-size", NULL); 1158 - if (basep != NULL && sizep != NULL) { 1159 - rtas.base = __be32_to_cpu(*basep); 1160 - rtas.size = __be32_to_cpu(*sizep); 1161 - entryp = of_get_property(rtas.dev, 1162 - "linux,rtas-entry", NULL); 1163 - if (entryp == NULL) /* Ugh */ 1164 - rtas.entry = rtas.base; 1165 - else 1166 - rtas.entry = __be32_to_cpu(*entryp); 1167 - } else 1168 - rtas.dev = NULL; 1169 - } 1170 1155 if (!rtas.dev) 1171 1156 return; 1157 + 1158 + no_base = of_property_read_u32(rtas.dev, "linux,rtas-base", &base); 1159 + no_size = of_property_read_u32(rtas.dev, "rtas-size", &size); 1160 + if (no_base || no_size) { 1161 + of_node_put(rtas.dev); 1162 + rtas.dev = NULL; 1163 + return; 1164 + } 1165 + 1166 + rtas.base = base; 1167 + rtas.size = size; 1168 + no_entry = of_property_read_u32(rtas.dev, "linux,rtas-entry", &entry); 1169 + rtas.entry = no_entry ? rtas.base : entry; 1172 1170 1173 1171 /* If RTAS was found, allocate the RMO buffer for it and look for 1174 1172 * the stop-self token if any

+6 -1

arch/powerpc/kernel/rtasd.c

··· 21 21 #include <linux/cpu.h> 22 22 #include <linux/workqueue.h> 23 23 #include <linux/slab.h> 24 + #include <linux/topology.h> 24 25 25 26 #include <linux/uaccess.h> 26 27 #include <asm/io.h> ··· 283 282 * the RTAS event. 284 283 */ 285 284 pseries_devicetree_update(-prrn_update_scope); 285 + arch_update_cpu_topology(); 286 286 } 287 287 288 288 static DECLARE_WORK(prrn_work, prrn_work_fn); ··· 436 434 } 437 435 438 436 if (error == 0) { 439 - pSeries_log_error(logdata, ERR_TYPE_RTAS_LOG, 0); 437 + if (rtas_error_type((struct rtas_error_log *)logdata) != 438 + RTAS_TYPE_PRRN) 439 + pSeries_log_error(logdata, ERR_TYPE_RTAS_LOG, 440 + 0); 440 441 handle_rtas_event((struct rtas_error_log *)logdata); 441 442 } 442 443

+9

arch/powerpc/kernel/setup-common.c

··· 87 87 int boot_cpuid = -1; 88 88 EXPORT_SYMBOL_GPL(boot_cpuid); 89 89 90 + /* 91 + * These are used in binfmt_elf.c to put aux entries on the stack 92 + * for each elf executable being started. 93 + */ 94 + int dcache_bsize; 95 + int icache_bsize; 96 + int ucache_bsize; 97 + 98 + 90 99 unsigned long klimit = (unsigned long) _end; 91 100 92 101 /*

-8

arch/powerpc/kernel/setup_32.c

··· 59 59 EXPORT_SYMBOL(DMA_MODE_WRITE); 60 60 61 61 /* 62 - * These are used in binfmt_elf.c to put aux entries on the stack 63 - * for each elf executable being started. 64 - */ 65 - int dcache_bsize; 66 - int icache_bsize; 67 - int ucache_bsize; 68 - 69 - /* 70 62 * We're called here very early in the boot. 71 63 * 72 64 * Note that the kernel may be running at an address which is different

+125 -71

arch/powerpc/kernel/setup_64.c

··· 77 77 int spinning_secondaries; 78 78 u64 ppc64_pft_size; 79 79 80 - /* Pick defaults since we might want to patch instructions 81 - * before we've read this from the device tree. 82 - */ 83 80 struct ppc64_caches ppc64_caches = { 84 - .dline_size = 0x40, 85 - .log_dline_size = 6, 86 - .iline_size = 0x40, 87 - .log_iline_size = 6 81 + .l1d = { 82 + .block_size = 0x40, 83 + .log_block_size = 6, 84 + }, 85 + .l1i = { 86 + .block_size = 0x40, 87 + .log_block_size = 6 88 + }, 88 89 }; 89 90 EXPORT_SYMBOL_GPL(ppc64_caches); 90 - 91 - /* 92 - * These are used in binfmt_elf.c to put aux entries on the stack 93 - * for each elf executable being started. 94 - */ 95 - int dcache_bsize; 96 - int icache_bsize; 97 - int ucache_bsize; 98 91 99 92 #if defined(CONFIG_PPC_BOOK3E) && defined(CONFIG_SMP) 100 93 void __init setup_tlb_core_data(void) ··· 401 408 * cache informations about the CPU that will be used by cache flush 402 409 * routines and/or provided to userland 403 410 */ 411 + 412 + static void init_cache_info(struct ppc_cache_info *info, u32 size, u32 lsize, 413 + u32 bsize, u32 sets) 414 + { 415 + info->size = size; 416 + info->sets = sets; 417 + info->line_size = lsize; 418 + info->block_size = bsize; 419 + info->log_block_size = __ilog2(bsize); 420 + info->blocks_per_page = PAGE_SIZE / bsize; 421 + 422 + if (sets == 0) 423 + info->assoc = 0xffff; 424 + else 425 + info->assoc = size / (sets * lsize); 426 + } 427 + 428 + static bool __init parse_cache_info(struct device_node *np, 429 + bool icache, 430 + struct ppc_cache_info *info) 431 + { 432 + static const char *ipropnames[] __initdata = { 433 + "i-cache-size", 434 + "i-cache-sets", 435 + "i-cache-block-size", 436 + "i-cache-line-size", 437 + }; 438 + static const char *dpropnames[] __initdata = { 439 + "d-cache-size", 440 + "d-cache-sets", 441 + "d-cache-block-size", 442 + "d-cache-line-size", 443 + }; 444 + const char **propnames = icache ? ipropnames : dpropnames; 445 + const __be32 *sizep, *lsizep, *bsizep, *setsp; 446 + u32 size, lsize, bsize, sets; 447 + bool success = true; 448 + 449 + size = 0; 450 + sets = -1u; 451 + lsize = bsize = cur_cpu_spec->dcache_bsize; 452 + sizep = of_get_property(np, propnames[0], NULL); 453 + if (sizep != NULL) 454 + size = be32_to_cpu(*sizep); 455 + setsp = of_get_property(np, propnames[1], NULL); 456 + if (setsp != NULL) 457 + sets = be32_to_cpu(*setsp); 458 + bsizep = of_get_property(np, propnames[2], NULL); 459 + lsizep = of_get_property(np, propnames[3], NULL); 460 + if (bsizep == NULL) 461 + bsizep = lsizep; 462 + if (lsizep != NULL) 463 + lsize = be32_to_cpu(*lsizep); 464 + if (bsizep != NULL) 465 + bsize = be32_to_cpu(*bsizep); 466 + if (sizep == NULL || bsizep == NULL || lsizep == NULL) 467 + success = false; 468 + 469 + /* 470 + * OF is weird .. it represents fully associative caches 471 + * as "1 way" which doesn't make much sense and doesn't 472 + * leave room for direct mapped. We'll assume that 0 473 + * in OF means direct mapped for that reason. 474 + */ 475 + if (sets == 1) 476 + sets = 0; 477 + else if (sets == 0) 478 + sets = 1; 479 + 480 + init_cache_info(info, size, lsize, bsize, sets); 481 + 482 + return success; 483 + } 484 + 404 485 void __init initialize_cache_info(void) 405 486 { 406 - struct device_node *np; 407 - unsigned long num_cpus = 0; 487 + struct device_node *cpu = NULL, *l2, *l3 = NULL; 488 + u32 pvr; 408 489 409 490 DBG(" -> initialize_cache_info()\n"); 410 491 411 - for_each_node_by_type(np, "cpu") { 412 - num_cpus += 1; 492 + /* 493 + * All shipping POWER8 machines have a firmware bug that 494 + * puts incorrect information in the device-tree. This will 495 + * be (hopefully) fixed for future chips but for now hard 496 + * code the values if we are running on one of these 497 + */ 498 + pvr = PVR_VER(mfspr(SPRN_PVR)); 499 + if (pvr == PVR_POWER8 || pvr == PVR_POWER8E || 500 + pvr == PVR_POWER8NVL) { 501 + /* size lsize blk sets */ 502 + init_cache_info(&ppc64_caches.l1i, 0x8000, 128, 128, 32); 503 + init_cache_info(&ppc64_caches.l1d, 0x10000, 128, 128, 64); 504 + init_cache_info(&ppc64_caches.l2, 0x80000, 128, 0, 512); 505 + init_cache_info(&ppc64_caches.l3, 0x800000, 128, 0, 8192); 506 + } else 507 + cpu = of_find_node_by_type(NULL, "cpu"); 508 + 509 + /* 510 + * We're assuming *all* of the CPUs have the same 511 + * d-cache and i-cache sizes... -Peter 512 + */ 513 + if (cpu) { 514 + if (!parse_cache_info(cpu, false, &ppc64_caches.l1d)) 515 + DBG("Argh, can't find dcache properties !\n"); 516 + 517 + if (!parse_cache_info(cpu, true, &ppc64_caches.l1i)) 518 + DBG("Argh, can't find icache properties !\n"); 413 519 414 520 /* 415 - * We're assuming *all* of the CPUs have the same 416 - * d-cache and i-cache sizes... -Peter 521 + * Try to find the L2 and L3 if any. Assume they are 522 + * unified and use the D-side properties. 417 523 */ 418 - if (num_cpus == 1) { 419 - const __be32 *sizep, *lsizep; 420 - u32 size, lsize; 421 - 422 - size = 0; 423 - lsize = cur_cpu_spec->dcache_bsize; 424 - sizep = of_get_property(np, "d-cache-size", NULL); 425 - if (sizep != NULL) 426 - size = be32_to_cpu(*sizep); 427 - lsizep = of_get_property(np, "d-cache-block-size", 428 - NULL); 429 - /* fallback if block size missing */ 430 - if (lsizep == NULL) 431 - lsizep = of_get_property(np, 432 - "d-cache-line-size", 433 - NULL); 434 - if (lsizep != NULL) 435 - lsize = be32_to_cpu(*lsizep); 436 - if (sizep == NULL || lsizep == NULL) 437 - DBG("Argh, can't find dcache properties ! " 438 - "sizep: %p, lsizep: %p\n", sizep, lsizep); 439 - 440 - ppc64_caches.dsize = size; 441 - ppc64_caches.dline_size = lsize; 442 - ppc64_caches.log_dline_size = __ilog2(lsize); 443 - ppc64_caches.dlines_per_page = PAGE_SIZE / lsize; 444 - 445 - size = 0; 446 - lsize = cur_cpu_spec->icache_bsize; 447 - sizep = of_get_property(np, "i-cache-size", NULL); 448 - if (sizep != NULL) 449 - size = be32_to_cpu(*sizep); 450 - lsizep = of_get_property(np, "i-cache-block-size", 451 - NULL); 452 - if (lsizep == NULL) 453 - lsizep = of_get_property(np, 454 - "i-cache-line-size", 455 - NULL); 456 - if (lsizep != NULL) 457 - lsize = be32_to_cpu(*lsizep); 458 - if (sizep == NULL || lsizep == NULL) 459 - DBG("Argh, can't find icache properties ! " 460 - "sizep: %p, lsizep: %p\n", sizep, lsizep); 461 - 462 - ppc64_caches.isize = size; 463 - ppc64_caches.iline_size = lsize; 464 - ppc64_caches.log_iline_size = __ilog2(lsize); 465 - ppc64_caches.ilines_per_page = PAGE_SIZE / lsize; 524 + l2 = of_find_next_cache_node(cpu); 525 + of_node_put(cpu); 526 + if (l2) { 527 + parse_cache_info(l2, false, &ppc64_caches.l2); 528 + l3 = of_find_next_cache_node(l2); 529 + of_node_put(l2); 530 + } 531 + if (l3) { 532 + parse_cache_info(l3, false, &ppc64_caches.l3); 533 + of_node_put(l3); 466 534 } 467 535 } 468 536 469 537 /* For use by binfmt_elf */ 470 - dcache_bsize = ppc64_caches.dline_size; 471 - icache_bsize = ppc64_caches.iline_size; 538 + dcache_bsize = ppc64_caches.l1d.block_size; 539 + icache_bsize = ppc64_caches.l1i.block_size; 472 540 473 541 DBG(" <- initialize_cache_info()\n"); 474 542 }

+8 -10

arch/powerpc/kernel/vdso.c

··· 736 736 if (firmware_has_feature(FW_FEATURE_LPAR)) 737 737 vdso_data->platform |= 1; 738 738 vdso_data->physicalMemorySize = memblock_phys_mem_size(); 739 - vdso_data->dcache_size = ppc64_caches.dsize; 740 - vdso_data->dcache_line_size = ppc64_caches.dline_size; 741 - vdso_data->icache_size = ppc64_caches.isize; 742 - vdso_data->icache_line_size = ppc64_caches.iline_size; 743 - 744 - /* XXXOJN: Blocks should be added to ppc64_caches and used instead */ 745 - vdso_data->dcache_block_size = ppc64_caches.dline_size; 746 - vdso_data->icache_block_size = ppc64_caches.iline_size; 747 - vdso_data->dcache_log_block_size = ppc64_caches.log_dline_size; 748 - vdso_data->icache_log_block_size = ppc64_caches.log_iline_size; 739 + vdso_data->dcache_size = ppc64_caches.l1d.size; 740 + vdso_data->dcache_line_size = ppc64_caches.l1d.line_size; 741 + vdso_data->icache_size = ppc64_caches.l1i.size; 742 + vdso_data->icache_line_size = ppc64_caches.l1i.line_size; 743 + vdso_data->dcache_block_size = ppc64_caches.l1d.block_size; 744 + vdso_data->icache_block_size = ppc64_caches.l1i.block_size; 745 + vdso_data->dcache_log_block_size = ppc64_caches.l1d.log_block_size; 746 + vdso_data->icache_log_block_size = ppc64_caches.l1i.log_block_size; 749 747 750 748 /* 751 749 * Calculate the size of the 64 bits vDSO

+2 -1

arch/powerpc/kvm/Makefile

··· 70 70 kvm-hv-y += \ 71 71 book3s_hv.o \ 72 72 book3s_hv_interrupts.o \ 73 - book3s_64_mmu_hv.o 73 + book3s_64_mmu_hv.o \ 74 + book3s_64_mmu_radix.o 74 75 75 76 kvm-book3s_64-builtin-xics-objs-$(CONFIG_KVM_XICS) := \ 76 77 book3s_hv_rm_xics.o

+1

arch/powerpc/kvm/book3s.c

··· 239 239 kvmppc_set_dsisr(vcpu, flags); 240 240 kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_DATA_STORAGE); 241 241 } 242 + EXPORT_SYMBOL_GPL(kvmppc_core_queue_data_storage); /* used by kvm_hv */ 242 243 243 244 void kvmppc_core_queue_inst_storage(struct kvm_vcpu *vcpu, ulong flags) 244 245 {

+67 -43

arch/powerpc/kvm/book3s_64_mmu_hv.c

··· 119 119 long err = -EBUSY; 120 120 long order; 121 121 122 + if (kvm_is_radix(kvm)) 123 + return -EINVAL; 124 + 122 125 mutex_lock(&kvm->lock); 123 126 if (kvm->arch.hpte_setup_done) { 124 127 kvm->arch.hpte_setup_done = 0; ··· 155 152 156 153 void kvmppc_free_hpt(struct kvm *kvm) 157 154 { 158 - kvmppc_free_lpid(kvm->arch.lpid); 159 155 vfree(kvm->arch.revmap); 160 156 if (kvm->arch.hpt_cma_alloc) 161 157 kvm_release_hpt(virt_to_page(kvm->arch.hpt_virt), 162 158 1 << (kvm->arch.hpt_order - PAGE_SHIFT)); 163 - else 159 + else if (kvm->arch.hpt_virt) 164 160 free_pages(kvm->arch.hpt_virt, 165 161 kvm->arch.hpt_order - PAGE_SHIFT); 166 162 } ··· 394 392 return (instr & mask) != 0; 395 393 } 396 394 397 - static int kvmppc_hv_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu, 398 - unsigned long gpa, gva_t ea, int is_store) 395 + int kvmppc_hv_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu, 396 + unsigned long gpa, gva_t ea, int is_store) 399 397 { 400 398 u32 last_inst; 401 399 ··· 459 457 struct vm_area_struct *vma; 460 458 unsigned long rcbits; 461 459 long mmio_update; 460 + 461 + if (kvm_is_radix(kvm)) 462 + return kvmppc_book3s_radix_page_fault(run, vcpu, ea, dsisr); 462 463 463 464 /* 464 465 * Real-mode code has already searched the HPT and found the ··· 700 695 srcu_read_unlock(&kvm->srcu, srcu_idx); 701 696 } 702 697 698 + typedef int (*hva_handler_fn)(struct kvm *kvm, struct kvm_memory_slot *memslot, 699 + unsigned long gfn); 700 + 703 701 static int kvm_handle_hva_range(struct kvm *kvm, 704 702 unsigned long start, 705 703 unsigned long end, 706 - int (*handler)(struct kvm *kvm, 707 - unsigned long *rmapp, 708 - unsigned long gfn)) 704 + hva_handler_fn handler) 709 705 { 710 706 int ret; 711 707 int retval = 0; ··· 731 725 gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot); 732 726 733 727 for (; gfn < gfn_end; ++gfn) { 734 - gfn_t gfn_offset = gfn - memslot->base_gfn; 735 - 736 - ret = handler(kvm, &memslot->arch.rmap[gfn_offset], gfn); 728 + ret = handler(kvm, memslot, gfn); 737 729 retval |= ret; 738 730 } 739 731 } ··· 740 736 } 741 737 742 738 static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, 743 - int (*handler)(struct kvm *kvm, unsigned long *rmapp, 744 - unsigned long gfn)) 739 + hva_handler_fn handler) 745 740 { 746 741 return kvm_handle_hva_range(kvm, hva, hva + 1, handler); 747 742 } 748 743 749 - static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, 744 + static int kvm_unmap_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot, 750 745 unsigned long gfn) 751 746 { 752 747 struct revmap_entry *rev = kvm->arch.revmap; 753 748 unsigned long h, i, j; 754 749 __be64 *hptep; 755 750 unsigned long ptel, psize, rcbits; 751 + unsigned long *rmapp; 756 752 753 + rmapp = &memslot->arch.rmap[gfn - memslot->base_gfn]; 757 754 for (;;) { 758 755 lock_rmap(rmapp); 759 756 if (!(*rmapp & KVMPPC_RMAP_PRESENT)) { ··· 815 810 816 811 int kvm_unmap_hva_hv(struct kvm *kvm, unsigned long hva) 817 812 { 818 - kvm_handle_hva(kvm, hva, kvm_unmap_rmapp); 813 + hva_handler_fn handler; 814 + 815 + handler = kvm_is_radix(kvm) ? kvm_unmap_radix : kvm_unmap_rmapp; 816 + kvm_handle_hva(kvm, hva, handler); 819 817 return 0; 820 818 } 821 819 822 820 int kvm_unmap_hva_range_hv(struct kvm *kvm, unsigned long start, unsigned long end) 823 821 { 824 - kvm_handle_hva_range(kvm, start, end, kvm_unmap_rmapp); 822 + hva_handler_fn handler; 823 + 824 + handler = kvm_is_radix(kvm) ? kvm_unmap_radix : kvm_unmap_rmapp; 825 + kvm_handle_hva_range(kvm, start, end, handler); 825 826 return 0; 826 827 } 827 828 828 829 void kvmppc_core_flush_memslot_hv(struct kvm *kvm, 829 830 struct kvm_memory_slot *memslot) 830 831 { 831 - unsigned long *rmapp; 832 832 unsigned long gfn; 833 833 unsigned long n; 834 + unsigned long *rmapp; 834 835 835 - rmapp = memslot->arch.rmap; 836 836 gfn = memslot->base_gfn; 837 - for (n = memslot->npages; n; --n) { 837 + rmapp = memslot->arch.rmap; 838 + for (n = memslot->npages; n; --n, ++gfn) { 839 + if (kvm_is_radix(kvm)) { 840 + kvm_unmap_radix(kvm, memslot, gfn); 841 + continue; 842 + } 838 843 /* 839 844 * Testing the present bit without locking is OK because 840 845 * the memslot has been marked invalid already, and hence ··· 852 837 * thus the present bit can't go from 0 to 1. 853 838 */ 854 839 if (*rmapp & KVMPPC_RMAP_PRESENT) 855 - kvm_unmap_rmapp(kvm, rmapp, gfn); 840 + kvm_unmap_rmapp(kvm, memslot, gfn); 856 841 ++rmapp; 857 - ++gfn; 858 842 } 859 843 } 860 844 861 - static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, 845 + static int kvm_age_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot, 862 846 unsigned long gfn) 863 847 { 864 848 struct revmap_entry *rev = kvm->arch.revmap; 865 849 unsigned long head, i, j; 866 850 __be64 *hptep; 867 851 int ret = 0; 852 + unsigned long *rmapp; 868 853 854 + rmapp = &memslot->arch.rmap[gfn - memslot->base_gfn]; 869 855 retry: 870 856 lock_rmap(rmapp); 871 857 if (*rmapp & KVMPPC_RMAP_REFERENCED) { ··· 914 898 915 899 int kvm_age_hva_hv(struct kvm *kvm, unsigned long start, unsigned long end) 916 900 { 917 - return kvm_handle_hva_range(kvm, start, end, kvm_age_rmapp); 901 + hva_handler_fn handler; 902 + 903 + handler = kvm_is_radix(kvm) ? kvm_age_radix : kvm_age_rmapp; 904 + return kvm_handle_hva_range(kvm, start, end, handler); 918 905 } 919 906 920 - static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp, 907 + static int kvm_test_age_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot, 921 908 unsigned long gfn) 922 909 { 923 910 struct revmap_entry *rev = kvm->arch.revmap; 924 911 unsigned long head, i, j; 925 912 unsigned long *hp; 926 913 int ret = 1; 914 + unsigned long *rmapp; 927 915 916 + rmapp = &memslot->arch.rmap[gfn - memslot->base_gfn]; 928 917 if (*rmapp & KVMPPC_RMAP_REFERENCED) 929 918 return 1; 930 919 ··· 955 934 956 935 int kvm_test_age_hva_hv(struct kvm *kvm, unsigned long hva) 957 936 { 958 - return kvm_handle_hva(kvm, hva, kvm_test_age_rmapp); 937 + hva_handler_fn handler; 938 + 939 + handler = kvm_is_radix(kvm) ? kvm_test_age_radix : kvm_test_age_rmapp; 940 + return kvm_handle_hva(kvm, hva, handler); 959 941 } 960 942 961 943 void kvm_set_spte_hva_hv(struct kvm *kvm, unsigned long hva, pte_t pte) 962 944 { 963 - kvm_handle_hva(kvm, hva, kvm_unmap_rmapp); 945 + hva_handler_fn handler; 946 + 947 + handler = kvm_is_radix(kvm) ? kvm_unmap_radix : kvm_unmap_rmapp; 948 + kvm_handle_hva(kvm, hva, handler); 964 949 } 965 950 966 951 static int vcpus_running(struct kvm *kvm) ··· 1067 1040 return npages_dirty; 1068 1041 } 1069 1042 1070 - static void harvest_vpa_dirty(struct kvmppc_vpa *vpa, 1043 + void kvmppc_harvest_vpa_dirty(struct kvmppc_vpa *vpa, 1071 1044 struct kvm_memory_slot *memslot, 1072 1045 unsigned long *map) 1073 1046 { ··· 1085 1058 __set_bit_le(gfn - memslot->base_gfn, map); 1086 1059 } 1087 1060 1088 - long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot, 1089 - unsigned long *map) 1061 + long kvmppc_hv_get_dirty_log_hpt(struct kvm *kvm, 1062 + struct kvm_memory_slot *memslot, unsigned long *map) 1090 1063 { 1091 1064 unsigned long i, j; 1092 1065 unsigned long *rmapp; 1093 - struct kvm_vcpu *vcpu; 1094 1066 1095 1067 preempt_disable(); 1096 1068 rmapp = memslot->arch.rmap; ··· 1104 1078 for (j = i; npages; ++j, --npages) 1105 1079 __set_bit_le(j, map); 1106 1080 ++rmapp; 1107 - } 1108 - 1109 - /* Harvest dirty bits from VPA and DTL updates */ 1110 - /* Note: we never modify the SLB shadow buffer areas */ 1111 - kvm_for_each_vcpu(i, vcpu, kvm) { 1112 - spin_lock(&vcpu->arch.vpa_update_lock); 1113 - harvest_vpa_dirty(&vcpu->arch.vpa, memslot, map); 1114 - harvest_vpa_dirty(&vcpu->arch.dtl, memslot, map); 1115 - spin_unlock(&vcpu->arch.vpa_update_lock); 1116 1081 } 1117 1082 preempt_enable(); 1118 1083 return 0; ··· 1159 1142 srcu_idx = srcu_read_lock(&kvm->srcu); 1160 1143 memslot = gfn_to_memslot(kvm, gfn); 1161 1144 if (memslot) { 1162 - rmap = &memslot->arch.rmap[gfn - memslot->base_gfn]; 1163 - lock_rmap(rmap); 1164 - *rmap |= KVMPPC_RMAP_CHANGED; 1165 - unlock_rmap(rmap); 1145 + if (!kvm_is_radix(kvm)) { 1146 + rmap = &memslot->arch.rmap[gfn - memslot->base_gfn]; 1147 + lock_rmap(rmap); 1148 + *rmap |= KVMPPC_RMAP_CHANGED; 1149 + unlock_rmap(rmap); 1150 + } else if (memslot->dirty_bitmap) { 1151 + mark_page_dirty(kvm, gfn); 1152 + } 1166 1153 } 1167 1154 srcu_read_unlock(&kvm->srcu, srcu_idx); 1168 1155 } ··· 1696 1675 1697 1676 vcpu->arch.slb_nr = 32; /* POWER7/POWER8 */ 1698 1677 1699 - mmu->xlate = kvmppc_mmu_book3s_64_hv_xlate; 1678 + if (kvm_is_radix(vcpu->kvm)) 1679 + mmu->xlate = kvmppc_mmu_radix_xlate; 1680 + else 1681 + mmu->xlate = kvmppc_mmu_book3s_64_hv_xlate; 1700 1682 mmu->reset_msr = kvmppc_mmu_book3s_64_hv_reset_msr; 1701 1683 1702 1684 vcpu->arch.hflags |= BOOK3S_HFLAG_SLB;

+716

arch/powerpc/kvm/book3s_64_mmu_radix.c

··· 1 + /* 2 + * This program is free software; you can redistribute it and/or modify 3 + * it under the terms of the GNU General Public License, version 2, as 4 + * published by the Free Software Foundation. 5 + * 6 + * Copyright 2016 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> 7 + */ 8 + 9 + #include <linux/types.h> 10 + #include <linux/string.h> 11 + #include <linux/kvm.h> 12 + #include <linux/kvm_host.h> 13 + 14 + #include <asm/kvm_ppc.h> 15 + #include <asm/kvm_book3s.h> 16 + #include <asm/page.h> 17 + #include <asm/mmu.h> 18 + #include <asm/pgtable.h> 19 + #include <asm/pgalloc.h> 20 + 21 + /* 22 + * Supported radix tree geometry. 23 + * Like p9, we support either 5 or 9 bits at the first (lowest) level, 24 + * for a page size of 64k or 4k. 25 + */ 26 + static int p9_supported_radix_bits[4] = { 5, 9, 9, 13 }; 27 + 28 + int kvmppc_mmu_radix_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, 29 + struct kvmppc_pte *gpte, bool data, bool iswrite) 30 + { 31 + struct kvm *kvm = vcpu->kvm; 32 + u32 pid; 33 + int ret, level, ps; 34 + __be64 prte, rpte; 35 + unsigned long root, pte, index; 36 + unsigned long rts, bits, offset; 37 + unsigned long gpa; 38 + unsigned long proc_tbl_size; 39 + 40 + /* Work out effective PID */ 41 + switch (eaddr >> 62) { 42 + case 0: 43 + pid = vcpu->arch.pid; 44 + break; 45 + case 3: 46 + pid = 0; 47 + break; 48 + default: 49 + return -EINVAL; 50 + } 51 + proc_tbl_size = 1 << ((kvm->arch.process_table & PRTS_MASK) + 12); 52 + if (pid * 16 >= proc_tbl_size) 53 + return -EINVAL; 54 + 55 + /* Read partition table to find root of tree for effective PID */ 56 + ret = kvm_read_guest(kvm, kvm->arch.process_table + pid * 16, 57 + &prte, sizeof(prte)); 58 + if (ret) 59 + return ret; 60 + 61 + root = be64_to_cpu(prte); 62 + rts = ((root & RTS1_MASK) >> (RTS1_SHIFT - 3)) | 63 + ((root & RTS2_MASK) >> RTS2_SHIFT); 64 + bits = root & RPDS_MASK; 65 + root = root & RPDB_MASK; 66 + 67 + /* P9 DD1 interprets RTS (radix tree size) differently */ 68 + offset = rts + 31; 69 + if (cpu_has_feature(CPU_FTR_POWER9_DD1)) 70 + offset -= 3; 71 + 72 + /* current implementations only support 52-bit space */ 73 + if (offset != 52) 74 + return -EINVAL; 75 + 76 + for (level = 3; level >= 0; --level) { 77 + if (level && bits != p9_supported_radix_bits[level]) 78 + return -EINVAL; 79 + if (level == 0 && !(bits == 5 || bits == 9)) 80 + return -EINVAL; 81 + offset -= bits; 82 + index = (eaddr >> offset) & ((1UL << bits) - 1); 83 + /* check that low bits of page table base are zero */ 84 + if (root & ((1UL << (bits + 3)) - 1)) 85 + return -EINVAL; 86 + ret = kvm_read_guest(kvm, root + index * 8, 87 + &rpte, sizeof(rpte)); 88 + if (ret) 89 + return ret; 90 + pte = __be64_to_cpu(rpte); 91 + if (!(pte & _PAGE_PRESENT)) 92 + return -ENOENT; 93 + if (pte & _PAGE_PTE) 94 + break; 95 + bits = pte & 0x1f; 96 + root = pte & 0x0fffffffffffff00ul; 97 + } 98 + /* need a leaf at lowest level; 512GB pages not supported */ 99 + if (level < 0 || level == 3) 100 + return -EINVAL; 101 + 102 + /* offset is now log base 2 of the page size */ 103 + gpa = pte & 0x01fffffffffff000ul; 104 + if (gpa & ((1ul << offset) - 1)) 105 + return -EINVAL; 106 + gpa += eaddr & ((1ul << offset) - 1); 107 + for (ps = MMU_PAGE_4K; ps < MMU_PAGE_COUNT; ++ps) 108 + if (offset == mmu_psize_defs[ps].shift) 109 + break; 110 + gpte->page_size = ps; 111 + 112 + gpte->eaddr = eaddr; 113 + gpte->raddr = gpa; 114 + 115 + /* Work out permissions */ 116 + gpte->may_read = !!(pte & _PAGE_READ); 117 + gpte->may_write = !!(pte & _PAGE_WRITE); 118 + gpte->may_execute = !!(pte & _PAGE_EXEC); 119 + if (kvmppc_get_msr(vcpu) & MSR_PR) { 120 + if (pte & _PAGE_PRIVILEGED) { 121 + gpte->may_read = 0; 122 + gpte->may_write = 0; 123 + gpte->may_execute = 0; 124 + } 125 + } else { 126 + if (!(pte & _PAGE_PRIVILEGED)) { 127 + /* Check AMR/IAMR to see if strict mode is in force */ 128 + if (vcpu->arch.amr & (1ul << 62)) 129 + gpte->may_read = 0; 130 + if (vcpu->arch.amr & (1ul << 63)) 131 + gpte->may_write = 0; 132 + if (vcpu->arch.iamr & (1ul << 62)) 133 + gpte->may_execute = 0; 134 + } 135 + } 136 + 137 + return 0; 138 + } 139 + 140 + #ifdef CONFIG_PPC_64K_PAGES 141 + #define MMU_BASE_PSIZE MMU_PAGE_64K 142 + #else 143 + #define MMU_BASE_PSIZE MMU_PAGE_4K 144 + #endif 145 + 146 + static void kvmppc_radix_tlbie_page(struct kvm *kvm, unsigned long addr, 147 + unsigned int pshift) 148 + { 149 + int psize = MMU_BASE_PSIZE; 150 + 151 + if (pshift >= PMD_SHIFT) 152 + psize = MMU_PAGE_2M; 153 + addr &= ~0xfffUL; 154 + addr |= mmu_psize_defs[psize].ap << 5; 155 + asm volatile("ptesync": : :"memory"); 156 + asm volatile(PPC_TLBIE_5(%0, %1, 0, 0, 1) 157 + : : "r" (addr), "r" (kvm->arch.lpid) : "memory"); 158 + asm volatile("ptesync": : :"memory"); 159 + } 160 + 161 + unsigned long kvmppc_radix_update_pte(struct kvm *kvm, pte_t *ptep, 162 + unsigned long clr, unsigned long set, 163 + unsigned long addr, unsigned int shift) 164 + { 165 + unsigned long old = 0; 166 + 167 + if (!(clr & _PAGE_PRESENT) && cpu_has_feature(CPU_FTR_POWER9_DD1) && 168 + pte_present(*ptep)) { 169 + /* have to invalidate it first */ 170 + old = __radix_pte_update(ptep, _PAGE_PRESENT, 0); 171 + kvmppc_radix_tlbie_page(kvm, addr, shift); 172 + set |= _PAGE_PRESENT; 173 + old &= _PAGE_PRESENT; 174 + } 175 + return __radix_pte_update(ptep, clr, set) | old; 176 + } 177 + 178 + void kvmppc_radix_set_pte_at(struct kvm *kvm, unsigned long addr, 179 + pte_t *ptep, pte_t pte) 180 + { 181 + radix__set_pte_at(kvm->mm, addr, ptep, pte, 0); 182 + } 183 + 184 + static struct kmem_cache *kvm_pte_cache; 185 + 186 + static pte_t *kvmppc_pte_alloc(void) 187 + { 188 + return kmem_cache_alloc(kvm_pte_cache, GFP_KERNEL); 189 + } 190 + 191 + static void kvmppc_pte_free(pte_t *ptep) 192 + { 193 + kmem_cache_free(kvm_pte_cache, ptep); 194 + } 195 + 196 + static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa, 197 + unsigned int level, unsigned long mmu_seq) 198 + { 199 + pgd_t *pgd; 200 + pud_t *pud, *new_pud = NULL; 201 + pmd_t *pmd, *new_pmd = NULL; 202 + pte_t *ptep, *new_ptep = NULL; 203 + unsigned long old; 204 + int ret; 205 + 206 + /* Traverse the guest's 2nd-level tree, allocate new levels needed */ 207 + pgd = kvm->arch.pgtable + pgd_index(gpa); 208 + pud = NULL; 209 + if (pgd_present(*pgd)) 210 + pud = pud_offset(pgd, gpa); 211 + else 212 + new_pud = pud_alloc_one(kvm->mm, gpa); 213 + 214 + pmd = NULL; 215 + if (pud && pud_present(*pud)) 216 + pmd = pmd_offset(pud, gpa); 217 + else 218 + new_pmd = pmd_alloc_one(kvm->mm, gpa); 219 + 220 + if (level == 0 && !(pmd && pmd_present(*pmd))) 221 + new_ptep = kvmppc_pte_alloc(); 222 + 223 + /* Check if we might have been invalidated; let the guest retry if so */ 224 + spin_lock(&kvm->mmu_lock); 225 + ret = -EAGAIN; 226 + if (mmu_notifier_retry(kvm, mmu_seq)) 227 + goto out_unlock; 228 + 229 + /* Now traverse again under the lock and change the tree */ 230 + ret = -ENOMEM; 231 + if (pgd_none(*pgd)) { 232 + if (!new_pud) 233 + goto out_unlock; 234 + pgd_populate(kvm->mm, pgd, new_pud); 235 + new_pud = NULL; 236 + } 237 + pud = pud_offset(pgd, gpa); 238 + if (pud_none(*pud)) { 239 + if (!new_pmd) 240 + goto out_unlock; 241 + pud_populate(kvm->mm, pud, new_pmd); 242 + new_pmd = NULL; 243 + } 244 + pmd = pmd_offset(pud, gpa); 245 + if (pmd_large(*pmd)) { 246 + /* Someone else has instantiated a large page here; retry */ 247 + ret = -EAGAIN; 248 + goto out_unlock; 249 + } 250 + if (level == 1 && !pmd_none(*pmd)) { 251 + /* 252 + * There's a page table page here, but we wanted 253 + * to install a large page. Tell the caller and let 254 + * it try installing a normal page if it wants. 255 + */ 256 + ret = -EBUSY; 257 + goto out_unlock; 258 + } 259 + if (level == 0) { 260 + if (pmd_none(*pmd)) { 261 + if (!new_ptep) 262 + goto out_unlock; 263 + pmd_populate(kvm->mm, pmd, new_ptep); 264 + new_ptep = NULL; 265 + } 266 + ptep = pte_offset_kernel(pmd, gpa); 267 + if (pte_present(*ptep)) { 268 + /* PTE was previously valid, so invalidate it */ 269 + old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_PRESENT, 270 + 0, gpa, 0); 271 + kvmppc_radix_tlbie_page(kvm, gpa, 0); 272 + if (old & _PAGE_DIRTY) 273 + mark_page_dirty(kvm, gpa >> PAGE_SHIFT); 274 + } 275 + kvmppc_radix_set_pte_at(kvm, gpa, ptep, pte); 276 + } else { 277 + kvmppc_radix_set_pte_at(kvm, gpa, pmdp_ptep(pmd), pte); 278 + } 279 + ret = 0; 280 + 281 + out_unlock: 282 + spin_unlock(&kvm->mmu_lock); 283 + if (new_pud) 284 + pud_free(kvm->mm, new_pud); 285 + if (new_pmd) 286 + pmd_free(kvm->mm, new_pmd); 287 + if (new_ptep) 288 + kvmppc_pte_free(new_ptep); 289 + return ret; 290 + } 291 + 292 + int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, 293 + unsigned long ea, unsigned long dsisr) 294 + { 295 + struct kvm *kvm = vcpu->kvm; 296 + unsigned long mmu_seq, pte_size; 297 + unsigned long gpa, gfn, hva, pfn; 298 + struct kvm_memory_slot *memslot; 299 + struct page *page = NULL, *pages[1]; 300 + long ret, npages, ok; 301 + unsigned int writing; 302 + struct vm_area_struct *vma; 303 + unsigned long flags; 304 + pte_t pte, *ptep; 305 + unsigned long pgflags; 306 + unsigned int shift, level; 307 + 308 + /* Check for unusual errors */ 309 + if (dsisr & DSISR_UNSUPP_MMU) { 310 + pr_err("KVM: Got unsupported MMU fault\n"); 311 + return -EFAULT; 312 + } 313 + if (dsisr & DSISR_BADACCESS) { 314 + /* Reflect to the guest as DSI */ 315 + pr_err("KVM: Got radix HV page fault with DSISR=%lx\n", dsisr); 316 + kvmppc_core_queue_data_storage(vcpu, ea, dsisr); 317 + return RESUME_GUEST; 318 + } 319 + 320 + /* Translate the logical address and get the page */ 321 + gpa = vcpu->arch.fault_gpa & ~0xfffUL; 322 + gpa &= ~0xF000000000000000ul; 323 + gfn = gpa >> PAGE_SHIFT; 324 + if (!(dsisr & DSISR_PGDIRFAULT)) 325 + gpa |= ea & 0xfff; 326 + memslot = gfn_to_memslot(kvm, gfn); 327 + 328 + /* No memslot means it's an emulated MMIO region */ 329 + if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) { 330 + if (dsisr & (DSISR_PGDIRFAULT | DSISR_BADACCESS | 331 + DSISR_SET_RC)) { 332 + /* 333 + * Bad address in guest page table tree, or other 334 + * unusual error - reflect it to the guest as DSI. 335 + */ 336 + kvmppc_core_queue_data_storage(vcpu, ea, dsisr); 337 + return RESUME_GUEST; 338 + } 339 + return kvmppc_hv_emulate_mmio(run, vcpu, gpa, ea, 340 + dsisr & DSISR_ISSTORE); 341 + } 342 + 343 + /* used to check for invalidations in progress */ 344 + mmu_seq = kvm->mmu_notifier_seq; 345 + smp_rmb(); 346 + 347 + writing = (dsisr & DSISR_ISSTORE) != 0; 348 + hva = gfn_to_hva_memslot(memslot, gfn); 349 + if (dsisr & DSISR_SET_RC) { 350 + /* 351 + * Need to set an R or C bit in the 2nd-level tables; 352 + * if the relevant bits aren't already set in the linux 353 + * page tables, fall through to do the gup_fast to 354 + * set them in the linux page tables too. 355 + */ 356 + ok = 0; 357 + pgflags = _PAGE_ACCESSED; 358 + if (writing) 359 + pgflags |= _PAGE_DIRTY; 360 + local_irq_save(flags); 361 + ptep = __find_linux_pte_or_hugepte(current->mm->pgd, hva, 362 + NULL, NULL); 363 + if (ptep) { 364 + pte = READ_ONCE(*ptep); 365 + if (pte_present(pte) && 366 + (pte_val(pte) & pgflags) == pgflags) 367 + ok = 1; 368 + } 369 + local_irq_restore(flags); 370 + if (ok) { 371 + spin_lock(&kvm->mmu_lock); 372 + if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) { 373 + spin_unlock(&kvm->mmu_lock); 374 + return RESUME_GUEST; 375 + } 376 + ptep = __find_linux_pte_or_hugepte(kvm->arch.pgtable, 377 + gpa, NULL, &shift); 378 + if (ptep && pte_present(*ptep)) { 379 + kvmppc_radix_update_pte(kvm, ptep, 0, pgflags, 380 + gpa, shift); 381 + spin_unlock(&kvm->mmu_lock); 382 + return RESUME_GUEST; 383 + } 384 + spin_unlock(&kvm->mmu_lock); 385 + } 386 + } 387 + 388 + ret = -EFAULT; 389 + pfn = 0; 390 + pte_size = PAGE_SIZE; 391 + pgflags = _PAGE_READ | _PAGE_EXEC; 392 + level = 0; 393 + npages = get_user_pages_fast(hva, 1, writing, pages); 394 + if (npages < 1) { 395 + /* Check if it's an I/O mapping */ 396 + down_read(&current->mm->mmap_sem); 397 + vma = find_vma(current->mm, hva); 398 + if (vma && vma->vm_start <= hva && hva < vma->vm_end && 399 + (vma->vm_flags & VM_PFNMAP)) { 400 + pfn = vma->vm_pgoff + 401 + ((hva - vma->vm_start) >> PAGE_SHIFT); 402 + pgflags = pgprot_val(vma->vm_page_prot); 403 + } 404 + up_read(&current->mm->mmap_sem); 405 + if (!pfn) 406 + return -EFAULT; 407 + } else { 408 + page = pages[0]; 409 + pfn = page_to_pfn(page); 410 + if (PageHuge(page)) { 411 + page = compound_head(page); 412 + pte_size <<= compound_order(page); 413 + /* See if we can insert a 2MB large-page PTE here */ 414 + if (pte_size >= PMD_SIZE && 415 + (gpa & PMD_MASK & PAGE_MASK) == 416 + (hva & PMD_MASK & PAGE_MASK)) { 417 + level = 1; 418 + pfn &= ~((PMD_SIZE >> PAGE_SHIFT) - 1); 419 + } 420 + } 421 + /* See if we can provide write access */ 422 + if (writing) { 423 + /* 424 + * We assume gup_fast has set dirty on the host PTE. 425 + */ 426 + pgflags |= _PAGE_WRITE; 427 + } else { 428 + local_irq_save(flags); 429 + ptep = __find_linux_pte_or_hugepte(current->mm->pgd, 430 + hva, NULL, NULL); 431 + if (ptep && pte_write(*ptep) && pte_dirty(*ptep)) 432 + pgflags |= _PAGE_WRITE; 433 + local_irq_restore(flags); 434 + } 435 + } 436 + 437 + /* 438 + * Compute the PTE value that we need to insert. 439 + */ 440 + pgflags |= _PAGE_PRESENT | _PAGE_PTE | _PAGE_ACCESSED; 441 + if (pgflags & _PAGE_WRITE) 442 + pgflags |= _PAGE_DIRTY; 443 + pte = pfn_pte(pfn, __pgprot(pgflags)); 444 + 445 + /* Allocate space in the tree and write the PTE */ 446 + ret = kvmppc_create_pte(kvm, pte, gpa, level, mmu_seq); 447 + if (ret == -EBUSY) { 448 + /* 449 + * There's already a PMD where wanted to install a large page; 450 + * for now, fall back to installing a small page. 451 + */ 452 + level = 0; 453 + pfn |= gfn & ((PMD_SIZE >> PAGE_SHIFT) - 1); 454 + pte = pfn_pte(pfn, __pgprot(pgflags)); 455 + ret = kvmppc_create_pte(kvm, pte, gpa, level, mmu_seq); 456 + } 457 + if (ret == 0 || ret == -EAGAIN) 458 + ret = RESUME_GUEST; 459 + 460 + if (page) { 461 + /* 462 + * We drop pages[0] here, not page because page might 463 + * have been set to the head page of a compound, but 464 + * we have to drop the reference on the correct tail 465 + * page to match the get inside gup() 466 + */ 467 + put_page(pages[0]); 468 + } 469 + return ret; 470 + } 471 + 472 + static void mark_pages_dirty(struct kvm *kvm, struct kvm_memory_slot *memslot, 473 + unsigned long gfn, unsigned int order) 474 + { 475 + unsigned long i, limit; 476 + unsigned long *dp; 477 + 478 + if (!memslot->dirty_bitmap) 479 + return; 480 + limit = 1ul << order; 481 + if (limit < BITS_PER_LONG) { 482 + for (i = 0; i < limit; ++i) 483 + mark_page_dirty(kvm, gfn + i); 484 + return; 485 + } 486 + dp = memslot->dirty_bitmap + (gfn - memslot->base_gfn); 487 + limit /= BITS_PER_LONG; 488 + for (i = 0; i < limit; ++i) 489 + *dp++ = ~0ul; 490 + } 491 + 492 + /* Called with kvm->lock held */ 493 + int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, 494 + unsigned long gfn) 495 + { 496 + pte_t *ptep; 497 + unsigned long gpa = gfn << PAGE_SHIFT; 498 + unsigned int shift; 499 + unsigned long old; 500 + 501 + ptep = __find_linux_pte_or_hugepte(kvm->arch.pgtable, gpa, 502 + NULL, &shift); 503 + if (ptep && pte_present(*ptep)) { 504 + old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_PRESENT, 0, 505 + gpa, shift); 506 + kvmppc_radix_tlbie_page(kvm, gpa, shift); 507 + if (old & _PAGE_DIRTY) { 508 + if (!shift) 509 + mark_page_dirty(kvm, gfn); 510 + else 511 + mark_pages_dirty(kvm, memslot, 512 + gfn, shift - PAGE_SHIFT); 513 + } 514 + } 515 + return 0; 516 + } 517 + 518 + /* Called with kvm->lock held */ 519 + int kvm_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, 520 + unsigned long gfn) 521 + { 522 + pte_t *ptep; 523 + unsigned long gpa = gfn << PAGE_SHIFT; 524 + unsigned int shift; 525 + int ref = 0; 526 + 527 + ptep = __find_linux_pte_or_hugepte(kvm->arch.pgtable, gpa, 528 + NULL, &shift); 529 + if (ptep && pte_present(*ptep) && pte_young(*ptep)) { 530 + kvmppc_radix_update_pte(kvm, ptep, _PAGE_ACCESSED, 0, 531 + gpa, shift); 532 + /* XXX need to flush tlb here? */ 533 + ref = 1; 534 + } 535 + return ref; 536 + } 537 + 538 + /* Called with kvm->lock held */ 539 + int kvm_test_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, 540 + unsigned long gfn) 541 + { 542 + pte_t *ptep; 543 + unsigned long gpa = gfn << PAGE_SHIFT; 544 + unsigned int shift; 545 + int ref = 0; 546 + 547 + ptep = __find_linux_pte_or_hugepte(kvm->arch.pgtable, gpa, 548 + NULL, &shift); 549 + if (ptep && pte_present(*ptep) && pte_young(*ptep)) 550 + ref = 1; 551 + return ref; 552 + } 553 + 554 + /* Returns the number of PAGE_SIZE pages that are dirty */ 555 + static int kvm_radix_test_clear_dirty(struct kvm *kvm, 556 + struct kvm_memory_slot *memslot, int pagenum) 557 + { 558 + unsigned long gfn = memslot->base_gfn + pagenum; 559 + unsigned long gpa = gfn << PAGE_SHIFT; 560 + pte_t *ptep; 561 + unsigned int shift; 562 + int ret = 0; 563 + 564 + ptep = __find_linux_pte_or_hugepte(kvm->arch.pgtable, gpa, 565 + NULL, &shift); 566 + if (ptep && pte_present(*ptep) && pte_dirty(*ptep)) { 567 + ret = 1; 568 + if (shift) 569 + ret = 1 << (shift - PAGE_SHIFT); 570 + kvmppc_radix_update_pte(kvm, ptep, _PAGE_DIRTY, 0, 571 + gpa, shift); 572 + kvmppc_radix_tlbie_page(kvm, gpa, shift); 573 + } 574 + return ret; 575 + } 576 + 577 + long kvmppc_hv_get_dirty_log_radix(struct kvm *kvm, 578 + struct kvm_memory_slot *memslot, unsigned long *map) 579 + { 580 + unsigned long i, j; 581 + unsigned long n, *p; 582 + int npages; 583 + 584 + /* 585 + * Radix accumulates dirty bits in the first half of the 586 + * memslot's dirty_bitmap area, for when pages are paged 587 + * out or modified by the host directly. Pick up these 588 + * bits and add them to the map. 589 + */ 590 + n = kvm_dirty_bitmap_bytes(memslot) / sizeof(long); 591 + p = memslot->dirty_bitmap; 592 + for (i = 0; i < n; ++i) 593 + map[i] |= xchg(&p[i], 0); 594 + 595 + for (i = 0; i < memslot->npages; i = j) { 596 + npages = kvm_radix_test_clear_dirty(kvm, memslot, i); 597 + 598 + /* 599 + * Note that if npages > 0 then i must be a multiple of npages, 600 + * since huge pages are only used to back the guest at guest 601 + * real addresses that are a multiple of their size. 602 + * Since we have at most one PTE covering any given guest 603 + * real address, if npages > 1 we can skip to i + npages. 604 + */ 605 + j = i + 1; 606 + if (npages) 607 + for (j = i; npages; ++j, --npages) 608 + __set_bit_le(j, map); 609 + } 610 + return 0; 611 + } 612 + 613 + static void add_rmmu_ap_encoding(struct kvm_ppc_rmmu_info *info, 614 + int psize, int *indexp) 615 + { 616 + if (!mmu_psize_defs[psize].shift) 617 + return; 618 + info->ap_encodings[*indexp] = mmu_psize_defs[psize].shift | 619 + (mmu_psize_defs[psize].ap << 29); 620 + ++(*indexp); 621 + } 622 + 623 + int kvmhv_get_rmmu_info(struct kvm *kvm, struct kvm_ppc_rmmu_info *info) 624 + { 625 + int i; 626 + 627 + if (!radix_enabled()) 628 + return -EINVAL; 629 + memset(info, 0, sizeof(*info)); 630 + 631 + /* 4k page size */ 632 + info->geometries[0].page_shift = 12; 633 + info->geometries[0].level_bits[0] = 9; 634 + for (i = 1; i < 4; ++i) 635 + info->geometries[0].level_bits[i] = p9_supported_radix_bits[i]; 636 + /* 64k page size */ 637 + info->geometries[1].page_shift = 16; 638 + for (i = 0; i < 4; ++i) 639 + info->geometries[1].level_bits[i] = p9_supported_radix_bits[i]; 640 + 641 + i = 0; 642 + add_rmmu_ap_encoding(info, MMU_PAGE_4K, &i); 643 + add_rmmu_ap_encoding(info, MMU_PAGE_64K, &i); 644 + add_rmmu_ap_encoding(info, MMU_PAGE_2M, &i); 645 + add_rmmu_ap_encoding(info, MMU_PAGE_1G, &i); 646 + 647 + return 0; 648 + } 649 + 650 + int kvmppc_init_vm_radix(struct kvm *kvm) 651 + { 652 + kvm->arch.pgtable = pgd_alloc(kvm->mm); 653 + if (!kvm->arch.pgtable) 654 + return -ENOMEM; 655 + return 0; 656 + } 657 + 658 + void kvmppc_free_radix(struct kvm *kvm) 659 + { 660 + unsigned long ig, iu, im; 661 + pte_t *pte; 662 + pmd_t *pmd; 663 + pud_t *pud; 664 + pgd_t *pgd; 665 + 666 + if (!kvm->arch.pgtable) 667 + return; 668 + pgd = kvm->arch.pgtable; 669 + for (ig = 0; ig < PTRS_PER_PGD; ++ig, ++pgd) { 670 + if (!pgd_present(*pgd)) 671 + continue; 672 + pud = pud_offset(pgd, 0); 673 + for (iu = 0; iu < PTRS_PER_PUD; ++iu, ++pud) { 674 + if (!pud_present(*pud)) 675 + continue; 676 + pmd = pmd_offset(pud, 0); 677 + for (im = 0; im < PTRS_PER_PMD; ++im, ++pmd) { 678 + if (pmd_huge(*pmd)) { 679 + pmd_clear(pmd); 680 + continue; 681 + } 682 + if (!pmd_present(*pmd)) 683 + continue; 684 + pte = pte_offset_map(pmd, 0); 685 + memset(pte, 0, sizeof(long) << PTE_INDEX_SIZE); 686 + kvmppc_pte_free(pte); 687 + pmd_clear(pmd); 688 + } 689 + pmd_free(kvm->mm, pmd_offset(pud, 0)); 690 + pud_clear(pud); 691 + } 692 + pud_free(kvm->mm, pud_offset(pgd, 0)); 693 + pgd_clear(pgd); 694 + } 695 + pgd_free(kvm->mm, kvm->arch.pgtable); 696 + } 697 + 698 + static void pte_ctor(void *addr) 699 + { 700 + memset(addr, 0, PTE_TABLE_SIZE); 701 + } 702 + 703 + int kvmppc_radix_init(void) 704 + { 705 + unsigned long size = sizeof(void *) << PTE_INDEX_SIZE; 706 + 707 + kvm_pte_cache = kmem_cache_create("kvm-pte", size, size, 0, pte_ctor); 708 + if (!kvm_pte_cache) 709 + return -ENOMEM; 710 + return 0; 711 + } 712 + 713 + void kvmppc_radix_exit(void) 714 + { 715 + kmem_cache_destroy(kvm_pte_cache); 716 + }

+181 -24

arch/powerpc/kvm/book3s_hv.c

··· 1135 1135 /* 1136 1136 * Userspace can only modify DPFD (default prefetch depth), 1137 1137 * ILE (interrupt little-endian) and TC (translation control). 1138 - * On POWER8 userspace can also modify AIL (alt. interrupt loc.) 1138 + * On POWER8 and POWER9 userspace can also modify AIL (alt. interrupt loc.). 1139 1139 */ 1140 1140 mask = LPCR_DPFD | LPCR_ILE | LPCR_TC; 1141 1141 if (cpu_has_feature(CPU_FTR_ARCH_207S)) ··· 1821 1821 vcpu->arch.vcore = vcore; 1822 1822 vcpu->arch.ptid = vcpu->vcpu_id - vcore->first_vcpuid; 1823 1823 vcpu->arch.thread_cpu = -1; 1824 + vcpu->arch.prev_cpu = -1; 1824 1825 1825 1826 vcpu->arch.cpu_type = KVM_CPU_3S_64; 1826 1827 kvmppc_sanity_check(vcpu); ··· 1951 1950 tpaca->kvm_hstate.kvm_split_mode = NULL; 1952 1951 } 1953 1952 1953 + static void do_nothing(void *x) 1954 + { 1955 + } 1956 + 1957 + static void radix_flush_cpu(struct kvm *kvm, int cpu, struct kvm_vcpu *vcpu) 1958 + { 1959 + int i; 1960 + 1961 + cpu = cpu_first_thread_sibling(cpu); 1962 + cpumask_set_cpu(cpu, &kvm->arch.need_tlb_flush); 1963 + /* 1964 + * Make sure setting of bit in need_tlb_flush precedes 1965 + * testing of cpu_in_guest bits. The matching barrier on 1966 + * the other side is the first smp_mb() in kvmppc_run_core(). 1967 + */ 1968 + smp_mb(); 1969 + for (i = 0; i < threads_per_core; ++i) 1970 + if (cpumask_test_cpu(cpu + i, &kvm->arch.cpu_in_guest)) 1971 + smp_call_function_single(cpu + i, do_nothing, NULL, 1); 1972 + } 1973 + 1954 1974 static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc) 1955 1975 { 1956 1976 int cpu; 1957 1977 struct paca_struct *tpaca; 1958 1978 struct kvmppc_vcore *mvc = vc->master_vcore; 1979 + struct kvm *kvm = vc->kvm; 1959 1980 1960 1981 cpu = vc->pcpu; 1961 1982 if (vcpu) { ··· 1988 1965 cpu += vcpu->arch.ptid; 1989 1966 vcpu->cpu = mvc->pcpu; 1990 1967 vcpu->arch.thread_cpu = cpu; 1968 + 1969 + /* 1970 + * With radix, the guest can do TLB invalidations itself, 1971 + * and it could choose to use the local form (tlbiel) if 1972 + * it is invalidating a translation that has only ever been 1973 + * used on one vcpu. However, that doesn't mean it has 1974 + * only ever been used on one physical cpu, since vcpus 1975 + * can move around between pcpus. To cope with this, when 1976 + * a vcpu moves from one pcpu to another, we need to tell 1977 + * any vcpus running on the same core as this vcpu previously 1978 + * ran to flush the TLB. The TLB is shared between threads, 1979 + * so we use a single bit in .need_tlb_flush for all 4 threads. 1980 + */ 1981 + if (kvm_is_radix(kvm) && vcpu->arch.prev_cpu != cpu) { 1982 + if (vcpu->arch.prev_cpu >= 0 && 1983 + cpu_first_thread_sibling(vcpu->arch.prev_cpu) != 1984 + cpu_first_thread_sibling(cpu)) 1985 + radix_flush_cpu(kvm, vcpu->arch.prev_cpu, vcpu); 1986 + vcpu->arch.prev_cpu = cpu; 1987 + } 1988 + cpumask_set_cpu(cpu, &kvm->arch.cpu_in_guest); 1991 1989 } 1992 1990 tpaca = &paca[cpu]; 1993 1991 tpaca->kvm_hstate.kvm_vcpu = vcpu; ··· 2596 2552 kvmppc_release_hwthread(pcpu + i); 2597 2553 if (sip && sip->napped[i]) 2598 2554 kvmppc_ipi_thread(pcpu + i); 2555 + cpumask_clear_cpu(pcpu + i, &vc->kvm->arch.cpu_in_guest); 2599 2556 } 2600 2557 2601 2558 kvmppc_set_host_core(pcpu); ··· 2922 2877 smp_mb(); 2923 2878 2924 2879 /* On the first time here, set up HTAB and VRMA */ 2925 - if (!vcpu->kvm->arch.hpte_setup_done) { 2880 + if (!kvm_is_radix(vcpu->kvm) && !vcpu->kvm->arch.hpte_setup_done) { 2926 2881 r = kvmppc_hv_setup_htab_rma(vcpu); 2927 2882 if (r) 2928 2883 goto out; ··· 2984 2939 { 2985 2940 struct kvm_ppc_one_seg_page_size *sps; 2986 2941 2942 + /* 2943 + * Since we don't yet support HPT guests on a radix host, 2944 + * return an error if the host uses radix. 2945 + */ 2946 + if (radix_enabled()) 2947 + return -EINVAL; 2948 + 2987 2949 info->flags = KVM_PPC_PAGE_SIZES_REAL; 2988 2950 if (mmu_has_feature(MMU_FTR_1T_SEGMENT)) 2989 2951 info->flags |= KVM_PPC_1T_SEGMENTS; ··· 3013 2961 { 3014 2962 struct kvm_memslots *slots; 3015 2963 struct kvm_memory_slot *memslot; 3016 - int r; 2964 + int i, r; 3017 2965 unsigned long n; 2966 + unsigned long *buf; 2967 + struct kvm_vcpu *vcpu; 3018 2968 3019 2969 mutex_lock(&kvm->slots_lock); 3020 2970 ··· 3030 2976 if (!memslot->dirty_bitmap) 3031 2977 goto out; 3032 2978 2979 + /* 2980 + * Use second half of bitmap area because radix accumulates 2981 + * bits in the first half. 2982 + */ 3033 2983 n = kvm_dirty_bitmap_bytes(memslot); 3034 - memset(memslot->dirty_bitmap, 0, n); 2984 + buf = memslot->dirty_bitmap + n / sizeof(long); 2985 + memset(buf, 0, n); 3035 2986 3036 - r = kvmppc_hv_get_dirty_log(kvm, memslot, memslot->dirty_bitmap); 2987 + if (kvm_is_radix(kvm)) 2988 + r = kvmppc_hv_get_dirty_log_radix(kvm, memslot, buf); 2989 + else 2990 + r = kvmppc_hv_get_dirty_log_hpt(kvm, memslot, buf); 3037 2991 if (r) 3038 2992 goto out; 3039 2993 2994 + /* Harvest dirty bits from VPA and DTL updates */ 2995 + /* Note: we never modify the SLB shadow buffer areas */ 2996 + kvm_for_each_vcpu(i, vcpu, kvm) { 2997 + spin_lock(&vcpu->arch.vpa_update_lock); 2998 + kvmppc_harvest_vpa_dirty(&vcpu->arch.vpa, memslot, buf); 2999 + kvmppc_harvest_vpa_dirty(&vcpu->arch.dtl, memslot, buf); 3000 + spin_unlock(&vcpu->arch.vpa_update_lock); 3001 + } 3002 + 3040 3003 r = -EFAULT; 3041 - if (copy_to_user(log->dirty_bitmap, memslot->dirty_bitmap, n)) 3004 + if (copy_to_user(log->dirty_bitmap, buf, n)) 3042 3005 goto out; 3043 3006 3044 3007 r = 0; ··· 3076 3005 static int kvmppc_core_create_memslot_hv(struct kvm_memory_slot *slot, 3077 3006 unsigned long npages) 3078 3007 { 3008 + /* 3009 + * For now, if radix_enabled() then we only support radix guests, 3010 + * and in that case we don't need the rmap array. 3011 + */ 3012 + if (radix_enabled()) { 3013 + slot->arch.rmap = NULL; 3014 + return 0; 3015 + } 3016 + 3079 3017 slot->arch.rmap = vzalloc(npages * sizeof(*slot->arch.rmap)); 3080 3018 if (!slot->arch.rmap) 3081 3019 return -ENOMEM; ··· 3117 3037 if (npages) 3118 3038 atomic64_inc(&kvm->arch.mmio_update); 3119 3039 3120 - if (npages && old->npages) { 3040 + if (npages && old->npages && !kvm_is_radix(kvm)) { 3121 3041 /* 3122 3042 * If modifying a memslot, reset all the rmap dirty bits. 3123 3043 * If this is a new memslot, we don't need to do anything ··· 3126 3046 */ 3127 3047 slots = kvm_memslots(kvm); 3128 3048 memslot = id_to_memslot(slots, mem->slot); 3129 - kvmppc_hv_get_dirty_log(kvm, memslot, NULL); 3049 + kvmppc_hv_get_dirty_log_hpt(kvm, memslot, NULL); 3130 3050 } 3131 3051 } 3132 3052 ··· 3165 3085 { 3166 3086 unsigned long dw0, dw1; 3167 3087 3168 - /* PS field - page size for VRMA */ 3169 - dw0 = ((kvm->arch.vrma_slb_v & SLB_VSID_L) >> 1) | 3170 - ((kvm->arch.vrma_slb_v & SLB_VSID_LP) << 1); 3171 - /* HTABSIZE and HTABORG fields */ 3172 - dw0 |= kvm->arch.sdr1; 3088 + if (!kvm_is_radix(kvm)) { 3089 + /* PS field - page size for VRMA */ 3090 + dw0 = ((kvm->arch.vrma_slb_v & SLB_VSID_L) >> 1) | 3091 + ((kvm->arch.vrma_slb_v & SLB_VSID_LP) << 1); 3092 + /* HTABSIZE and HTABORG fields */ 3093 + dw0 |= kvm->arch.sdr1; 3173 3094 3174 - /* Second dword has GR=0; other fields are unused since UPRT=0 */ 3175 - dw1 = 0; 3095 + /* Second dword as set by userspace */ 3096 + dw1 = kvm->arch.process_table; 3097 + } else { 3098 + dw0 = PATB_HR | radix__get_tree_size() | 3099 + __pa(kvm->arch.pgtable) | RADIX_PGD_INDEX_SIZE; 3100 + dw1 = PATB_GR | kvm->arch.process_table; 3101 + } 3176 3102 3177 3103 mmu_partition_table_set_entry(kvm->arch.lpid, dw0, dw1); 3178 3104 } ··· 3348 3262 { 3349 3263 unsigned long lpcr, lpid; 3350 3264 char buf[32]; 3265 + int ret; 3351 3266 3352 3267 /* Allocate the guest's logical partition ID */ 3353 3268 ··· 3396 3309 lpcr |= LPCR_HVICE; 3397 3310 } 3398 3311 3312 + /* 3313 + * For now, if the host uses radix, the guest must be radix. 3314 + */ 3315 + if (radix_enabled()) { 3316 + kvm->arch.radix = 1; 3317 + lpcr &= ~LPCR_VPM1; 3318 + lpcr |= LPCR_UPRT | LPCR_GTSE | LPCR_HR; 3319 + ret = kvmppc_init_vm_radix(kvm); 3320 + if (ret) { 3321 + kvmppc_free_lpid(kvm->arch.lpid); 3322 + return ret; 3323 + } 3324 + kvmppc_setup_partition_table(kvm); 3325 + } 3326 + 3399 3327 kvm->arch.lpcr = lpcr; 3400 3328 3401 3329 /* 3402 3330 * Work out how many sets the TLB has, for the use of 3403 3331 * the TLB invalidation loop in book3s_hv_rmhandlers.S. 3404 3332 */ 3405 - if (cpu_has_feature(CPU_FTR_ARCH_300)) 3333 + if (kvm_is_radix(kvm)) 3334 + kvm->arch.tlb_sets = POWER9_TLB_SETS_RADIX; /* 128 */ 3335 + else if (cpu_has_feature(CPU_FTR_ARCH_300)) 3406 3336 kvm->arch.tlb_sets = POWER9_TLB_SETS_HASH; /* 256 */ 3407 3337 else if (cpu_has_feature(CPU_FTR_ARCH_207S)) 3408 3338 kvm->arch.tlb_sets = POWER8_TLB_SETS; /* 512 */ ··· 3429 3325 /* 3430 3326 * Track that we now have a HV mode VM active. This blocks secondary 3431 3327 * CPU threads from coming online. 3328 + * On POWER9, we only need to do this for HPT guests on a radix 3329 + * host, which is not yet supported. 3432 3330 */ 3433 - kvm_hv_vm_activated(); 3331 + if (!cpu_has_feature(CPU_FTR_ARCH_300)) 3332 + kvm_hv_vm_activated(); 3434 3333 3435 3334 /* 3436 3335 * Create a debugfs directory for the VM ··· 3459 3352 { 3460 3353 debugfs_remove_recursive(kvm->arch.debugfs_dir); 3461 3354 3462 - kvm_hv_vm_deactivated(); 3355 + if (!cpu_has_feature(CPU_FTR_ARCH_300)) 3356 + kvm_hv_vm_deactivated(); 3463 3357 3464 3358 kvmppc_free_vcores(kvm); 3465 3359 3466 - kvmppc_free_hpt(kvm); 3360 + kvmppc_free_lpid(kvm->arch.lpid); 3361 + 3362 + if (kvm_is_radix(kvm)) 3363 + kvmppc_free_radix(kvm); 3364 + else 3365 + kvmppc_free_hpt(kvm); 3467 3366 3468 3367 kvmppc_free_pimap(kvm); 3469 3368 } ··· 3497 3384 { 3498 3385 if (!cpu_has_feature(CPU_FTR_HVMODE) || 3499 3386 !cpu_has_feature(CPU_FTR_ARCH_206)) 3500 - return -EIO; 3501 - /* 3502 - * Disable KVM for Power9 in radix mode. 3503 - */ 3504 - if (cpu_has_feature(CPU_FTR_ARCH_300) && radix_enabled()) 3505 3387 return -EIO; 3506 3388 3507 3389 return 0; ··· 3765 3657 } 3766 3658 } 3767 3659 3660 + static int kvmhv_configure_mmu(struct kvm *kvm, struct kvm_ppc_mmuv3_cfg *cfg) 3661 + { 3662 + unsigned long lpcr; 3663 + int radix; 3664 + 3665 + /* If not on a POWER9, reject it */ 3666 + if (!cpu_has_feature(CPU_FTR_ARCH_300)) 3667 + return -ENODEV; 3668 + 3669 + /* If any unknown flags set, reject it */ 3670 + if (cfg->flags & ~(KVM_PPC_MMUV3_RADIX | KVM_PPC_MMUV3_GTSE)) 3671 + return -EINVAL; 3672 + 3673 + /* We can't change a guest to/from radix yet */ 3674 + radix = !!(cfg->flags & KVM_PPC_MMUV3_RADIX); 3675 + if (radix != kvm_is_radix(kvm)) 3676 + return -EINVAL; 3677 + 3678 + /* GR (guest radix) bit in process_table field must match */ 3679 + if (!!(cfg->process_table & PATB_GR) != radix) 3680 + return -EINVAL; 3681 + 3682 + /* Process table size field must be reasonable, i.e. <= 24 */ 3683 + if ((cfg->process_table & PRTS_MASK) > 24) 3684 + return -EINVAL; 3685 + 3686 + kvm->arch.process_table = cfg->process_table; 3687 + kvmppc_setup_partition_table(kvm); 3688 + 3689 + lpcr = (cfg->flags & KVM_PPC_MMUV3_GTSE) ? LPCR_GTSE : 0; 3690 + kvmppc_update_lpcr(kvm, lpcr, LPCR_GTSE); 3691 + 3692 + return 0; 3693 + } 3694 + 3768 3695 static struct kvmppc_ops kvm_ops_hv = { 3769 3696 .get_sregs = kvm_arch_vcpu_ioctl_get_sregs_hv, 3770 3697 .set_sregs = kvm_arch_vcpu_ioctl_set_sregs_hv, ··· 3837 3694 .irq_bypass_add_producer = kvmppc_irq_bypass_add_producer_hv, 3838 3695 .irq_bypass_del_producer = kvmppc_irq_bypass_del_producer_hv, 3839 3696 #endif 3697 + .configure_mmu = kvmhv_configure_mmu, 3698 + .get_rmmu_info = kvmhv_get_rmmu_info, 3840 3699 }; 3841 3700 3842 3701 static int kvm_init_subcore_bitmap(void) ··· 3871 3726 } 3872 3727 } 3873 3728 return 0; 3729 + } 3730 + 3731 + static int kvmppc_radix_possible(void) 3732 + { 3733 + return cpu_has_feature(CPU_FTR_ARCH_300) && radix_enabled(); 3874 3734 } 3875 3735 3876 3736 static int kvmppc_book3s_init_hv(void) ··· 3917 3767 init_vcore_lists(); 3918 3768 3919 3769 r = kvmppc_mmu_hv_init(); 3770 + if (r) 3771 + return r; 3772 + 3773 + if (kvmppc_radix_possible()) 3774 + r = kvmppc_radix_init(); 3920 3775 return r; 3921 3776 } 3922 3777 3923 3778 static void kvmppc_book3s_exit_hv(void) 3924 3779 { 3925 3780 kvmppc_free_host_rm_ops(); 3781 + if (kvmppc_radix_possible()) 3782 + kvmppc_radix_exit(); 3926 3783 kvmppc_hv_ops = NULL; 3927 3784 } 3928 3785

+14 -16

arch/powerpc/kvm/book3s_hv_builtin.c

··· 200 200 201 201 /* 202 202 * Send an interrupt or message to another CPU. 203 - * This can only be called in real mode. 204 203 * The caller needs to include any barrier needed to order writes 205 204 * to memory vs. the IPI/message. 206 205 */ ··· 228 229 if (xics_phys) 229 230 rm_writeb(xics_phys + XICS_MFRR, IPI_PRIORITY); 230 231 else 231 - opal_rm_int_set_mfrr(get_hard_smp_processor_id(cpu), 232 - IPI_PRIORITY); 232 + opal_int_set_mfrr(get_hard_smp_processor_id(cpu), IPI_PRIORITY); 233 233 } 234 234 235 235 /* ··· 410 412 411 413 /* Now read the interrupt from the ICP */ 412 414 xics_phys = local_paca->kvm_hstate.xics_phys; 413 - if (!xics_phys) { 414 - /* Use OPAL to read the XIRR */ 415 - rc = opal_rm_int_get_xirr(&xirr, false); 416 - if (rc < 0) 417 - return 1; 418 - } else { 415 + rc = 0; 416 + if (!xics_phys) 417 + rc = opal_int_get_xirr(&xirr, false); 418 + else 419 419 xirr = _lwzcix(xics_phys + XICS_XIRR); 420 - } 420 + if (rc < 0) 421 + return 1; 421 422 422 423 /* 423 424 * Save XIRR for later. Since we get control in reverse endian ··· 442 445 * If it is an IPI, clear the MFRR and EOI it. 443 446 */ 444 447 if (xisr == XICS_IPI) { 448 + rc = 0; 445 449 if (xics_phys) { 446 450 _stbcix(xics_phys + XICS_MFRR, 0xff); 447 451 _stwcix(xics_phys + XICS_XIRR, xirr); 448 452 } else { 449 - opal_rm_int_set_mfrr(hard_smp_processor_id(), 0xff); 450 - rc = opal_rm_int_eoi(h_xirr); 451 - /* If rc > 0, there is another interrupt pending */ 452 - *again = rc > 0; 453 + opal_int_set_mfrr(hard_smp_processor_id(), 0xff); 454 + rc = opal_int_eoi(h_xirr); 453 455 } 456 + /* If rc > 0, there is another interrupt pending */ 457 + *again = rc > 0; 454 458 455 459 /* 456 460 * Need to ensure side effects of above stores ··· 472 474 if (xics_phys) 473 475 _stbcix(xics_phys + XICS_MFRR, IPI_PRIORITY); 474 476 else 475 - opal_rm_int_set_mfrr(hard_smp_processor_id(), 476 - IPI_PRIORITY); 477 + opal_int_set_mfrr(hard_smp_processor_id(), 478 + IPI_PRIORITY); 477 479 /* Let side effects complete */ 478 480 smp_mb(); 479 481 return 1;

+23 -2

arch/powerpc/kvm/book3s_hv_rm_mmu.c

··· 43 43 static int global_invalidates(struct kvm *kvm, unsigned long flags) 44 44 { 45 45 int global; 46 + int cpu; 46 47 47 48 /* 48 49 * If there is only one vcore, and it's currently running, ··· 61 60 /* any other core might now have stale TLB entries... */ 62 61 smp_wmb(); 63 62 cpumask_setall(&kvm->arch.need_tlb_flush); 64 - cpumask_clear_cpu(local_paca->kvm_hstate.kvm_vcore->pcpu, 65 - &kvm->arch.need_tlb_flush); 63 + cpu = local_paca->kvm_hstate.kvm_vcore->pcpu; 64 + /* 65 + * On POWER9, threads are independent but the TLB is shared, 66 + * so use the bit for the first thread to represent the core. 67 + */ 68 + if (cpu_has_feature(CPU_FTR_ARCH_300)) 69 + cpu = cpu_first_thread_sibling(cpu); 70 + cpumask_clear_cpu(cpu, &kvm->arch.need_tlb_flush); 66 71 } 67 72 68 73 return global; ··· 189 182 unsigned long mmu_seq; 190 183 unsigned long rcbits, irq_flags = 0; 191 184 185 + if (kvm_is_radix(kvm)) 186 + return H_FUNCTION; 192 187 psize = hpte_page_size(pteh, ptel); 193 188 if (!psize) 194 189 return H_PARAMETER; ··· 467 458 struct revmap_entry *rev; 468 459 u64 pte, orig_pte, pte_r; 469 460 461 + if (kvm_is_radix(kvm)) 462 + return H_FUNCTION; 470 463 if (pte_index >= kvm->arch.hpt_npte) 471 464 return H_PARAMETER; 472 465 hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4)); ··· 540 529 struct revmap_entry *rev, *revs[4]; 541 530 u64 hp0, hp1; 542 531 532 + if (kvm_is_radix(kvm)) 533 + return H_FUNCTION; 543 534 global = global_invalidates(kvm, 0); 544 535 for (i = 0; i < 4 && ret == H_SUCCESS; ) { 545 536 n = 0; ··· 655 642 unsigned long v, r, rb, mask, bits; 656 643 u64 pte_v, pte_r; 657 644 645 + if (kvm_is_radix(kvm)) 646 + return H_FUNCTION; 658 647 if (pte_index >= kvm->arch.hpt_npte) 659 648 return H_PARAMETER; 660 649 ··· 726 711 int i, n = 1; 727 712 struct revmap_entry *rev = NULL; 728 713 714 + if (kvm_is_radix(kvm)) 715 + return H_FUNCTION; 729 716 if (pte_index >= kvm->arch.hpt_npte) 730 717 return H_PARAMETER; 731 718 if (flags & H_READ_4) { ··· 767 750 unsigned long *rmap; 768 751 long ret = H_NOT_FOUND; 769 752 753 + if (kvm_is_radix(kvm)) 754 + return H_FUNCTION; 770 755 if (pte_index >= kvm->arch.hpt_npte) 771 756 return H_PARAMETER; 772 757 ··· 815 796 unsigned long *rmap; 816 797 long ret = H_NOT_FOUND; 817 798 799 + if (kvm_is_radix(kvm)) 800 + return H_FUNCTION; 818 801 if (pte_index >= kvm->arch.hpt_npte) 819 802 return H_PARAMETER; 820 803

+8 -10

arch/powerpc/kvm/book3s_hv_rm_xics.c

··· 36 36 37 37 static void icp_rm_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp, 38 38 u32 new_irq); 39 - static int xics_opal_rm_set_server(unsigned int hw_irq, int server_cpu); 39 + static int xics_opal_set_server(unsigned int hw_irq, int server_cpu); 40 40 41 41 /* -- ICS routines -- */ 42 42 static void ics_rm_check_resend(struct kvmppc_xics *xics, ··· 70 70 hcpu = hcore << threads_shift; 71 71 kvmppc_host_rm_ops_hv->rm_core[hcore].rm_data = vcpu; 72 72 smp_muxed_ipi_set_message(hcpu, PPC_MSG_RM_HOST_ACTION); 73 - if (paca[hcpu].kvm_hstate.xics_phys) 74 - icp_native_cause_ipi_rm(hcpu); 75 - else 76 - opal_rm_int_set_mfrr(get_hard_smp_processor_id(hcpu), 77 - IPI_PRIORITY); 73 + kvmppc_set_host_ipi(hcpu, 1); 74 + smp_mb(); 75 + kvmhv_rm_send_ipi(hcpu); 78 76 } 79 77 #else 80 78 static inline void icp_send_hcore_msg(int hcore, struct kvm_vcpu *vcpu) { } ··· 728 730 ++vcpu->stat.pthru_host; 729 731 if (state->intr_cpu != pcpu) { 730 732 ++vcpu->stat.pthru_bad_aff; 731 - xics_opal_rm_set_server(state->host_irq, pcpu); 733 + xics_opal_set_server(state->host_irq, pcpu); 732 734 } 733 735 state->intr_cpu = -1; 734 736 } ··· 756 758 if (xics_phys) { 757 759 _stwcix(xics_phys + XICS_XIRR, xirr); 758 760 } else { 759 - rc = opal_rm_int_eoi(be32_to_cpu(xirr)); 761 + rc = opal_int_eoi(be32_to_cpu(xirr)); 760 762 *again = rc > 0; 761 763 } 762 764 } 763 765 764 - static int xics_opal_rm_set_server(unsigned int hw_irq, int server_cpu) 766 + static int xics_opal_set_server(unsigned int hw_irq, int server_cpu) 765 767 { 766 768 unsigned int mangle_cpu = get_hard_smp_processor_id(server_cpu) << 2; 767 769 768 - return opal_rm_set_xive(hw_irq, mangle_cpu, DEFAULT_PRIORITY); 770 + return opal_set_xive(hw_irq, mangle_cpu, DEFAULT_PRIORITY); 769 771 } 770 772 771 773 /*

+127 -27

arch/powerpc/kvm/book3s_hv_rmhandlers.S

··· 148 148 addi r1, r1, 112 149 149 ld r7, HSTATE_HOST_MSR(r13) 150 150 151 + /* 152 + * If we came back from the guest via a relocation-on interrupt, 153 + * we will be in virtual mode at this point, which makes it a 154 + * little easier to get back to the caller. 155 + */ 156 + mfmsr r0 157 + andi. r0, r0, MSR_IR /* in real mode? */ 158 + bne .Lvirt_return 159 + 151 160 cmpwi cr1, r12, BOOK3S_INTERRUPT_MACHINE_CHECK 152 161 cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL 153 162 beq 11f ··· 189 180 15: mtspr SPRN_HSRR0, r8 190 181 mtspr SPRN_HSRR1, r7 191 182 ba 0xe80 183 + 184 + /* Virtual-mode return - can't get here for HMI or machine check */ 185 + .Lvirt_return: 186 + cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL 187 + beq 16f 188 + cmpwi r12, BOOK3S_INTERRUPT_H_DOORBELL 189 + beq 17f 190 + andi. r0, r7, MSR_EE /* were interrupts hard-enabled? */ 191 + beq 18f 192 + mtmsrd r7, 1 /* if so then re-enable them */ 193 + 18: mtlr r8 194 + blr 195 + 196 + 16: mtspr SPRN_HSRR0, r8 /* jump to reloc-on external vector */ 197 + mtspr SPRN_HSRR1, r7 198 + b exc_virt_0x4500_hardware_interrupt 199 + 200 + 17: mtspr SPRN_HSRR0, r8 201 + mtspr SPRN_HSRR1, r7 202 + b exc_virt_0x4e80_h_doorbell 192 203 193 204 kvmppc_primary_no_guest: 194 205 /* We handle this much like a ceded vcpu */ ··· 547 518 /* Stack frame offsets */ 548 519 #define STACK_SLOT_TID (112-16) 549 520 #define STACK_SLOT_PSSCR (112-24) 521 + #define STACK_SLOT_PID (112-32) 550 522 551 523 .global kvmppc_hv_entry 552 524 kvmppc_hv_entry: ··· 560 530 * R1 = host R1 561 531 * R2 = TOC 562 532 * all other volatile GPRS = free 533 + * Does not preserve non-volatile GPRs or CR fields 563 534 */ 564 535 mflr r0 565 536 std r0, PPC_LR_STKOFF(r1) ··· 580 549 bl kvmhv_start_timing 581 550 1: 582 551 #endif 583 - /* Clear out SLB */ 552 + 553 + /* Use cr7 as an indication of radix mode */ 554 + ld r5, HSTATE_KVM_VCORE(r13) 555 + ld r9, VCORE_KVM(r5) /* pointer to struct kvm */ 556 + lbz r0, KVM_RADIX(r9) 557 + cmpwi cr7, r0, 0 558 + 559 + /* Clear out SLB if hash */ 560 + bne cr7, 2f 584 561 li r6,0 585 562 slbmte r6,r6 586 563 slbia 587 564 ptesync 588 - 565 + 2: 589 566 /* 590 567 * POWER7/POWER8 host -> guest partition switch code. 591 568 * We don't have to lock against concurrent tlbies, 592 569 * but we do have to coordinate across hardware threads. 593 570 */ 594 571 /* Set bit in entry map iff exit map is zero. */ 595 - ld r5, HSTATE_KVM_VCORE(r13) 596 572 li r7, 1 597 573 lbz r6, HSTATE_PTID(r13) 598 574 sld r7, r7, r6 599 - addi r9, r5, VCORE_ENTRY_EXIT 600 - 21: lwarx r3, 0, r9 575 + addi r8, r5, VCORE_ENTRY_EXIT 576 + 21: lwarx r3, 0, r8 601 577 cmpwi r3, 0x100 /* any threads starting to exit? */ 602 578 bge secondary_too_late /* if so we're too late to the party */ 603 579 or r3, r3, r7 604 - stwcx. r3, 0, r9 580 + stwcx. r3, 0, r8 605 581 bne 21b 606 582 607 583 /* Primary thread switches to guest partition. */ 608 - ld r9,VCORE_KVM(r5) /* pointer to struct kvm */ 609 584 cmpwi r6,0 610 585 bne 10f 611 586 lwz r7,KVM_LPID(r9) ··· 627 590 628 591 /* See if we need to flush the TLB */ 629 592 lhz r6,PACAPACAINDEX(r13) /* test_bit(cpu, need_tlb_flush) */ 593 + BEGIN_FTR_SECTION 594 + /* 595 + * On POWER9, individual threads can come in here, but the 596 + * TLB is shared between the 4 threads in a core, hence 597 + * invalidating on one thread invalidates for all. 598 + * Thus we make all 4 threads use the same bit here. 599 + */ 600 + clrrdi r6,r6,2 601 + END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) 630 602 clrldi r7,r6,64-6 /* extract bit number (6 bits) */ 631 603 srdi r6,r6,6 /* doubleword number */ 632 604 sldi r6,r6,3 /* address offset */ 633 605 add r6,r6,r9 634 606 addi r6,r6,KVM_NEED_FLUSH /* dword in kvm->arch.need_tlb_flush */ 635 - li r0,1 636 - sld r0,r0,r7 607 + li r8,1 608 + sld r8,r8,r7 637 609 ld r7,0(r6) 638 - and. r7,r7,r0 610 + and. r7,r7,r8 639 611 beq 22f 640 - 23: ldarx r7,0,r6 /* if set, clear the bit */ 641 - andc r7,r7,r0 642 - stdcx. r7,0,r6 643 - bne 23b 644 612 /* Flush the TLB of any entries for this LPID */ 645 - lwz r6,KVM_TLB_SETS(r9) 646 - li r0,0 /* RS for P9 version of tlbiel */ 647 - mtctr r6 613 + lwz r0,KVM_TLB_SETS(r9) 614 + mtctr r0 648 615 li r7,0x800 /* IS field = 0b10 */ 649 616 ptesync 650 - 28: tlbiel r7 617 + li r0,0 /* RS for P9 version of tlbiel */ 618 + bne cr7, 29f 619 + 28: tlbiel r7 /* On P9, rs=0, RIC=0, PRS=0, R=0 */ 651 620 addi r7,r7,0x1000 652 621 bdnz 28b 653 - ptesync 622 + b 30f 623 + 29: PPC_TLBIEL(7,0,2,1,1) /* for radix, RIC=2, PRS=1, R=1 */ 624 + addi r7,r7,0x1000 625 + bdnz 29b 626 + 30: ptesync 627 + 23: ldarx r7,0,r6 /* clear the bit after TLB flushed */ 628 + andc r7,r7,r8 629 + stdcx. r7,0,r6 630 + bne 23b 654 631 655 632 /* Add timebase offset onto timebase */ 656 633 22: ld r8,VCORE_TB_OFFSET(r5) ··· 709 658 beq kvmppc_primary_no_guest 710 659 kvmppc_got_guest: 711 660 712 - /* Load up guest SLB entries */ 661 + /* Load up guest SLB entries (N.B. slb_max will be 0 for radix) */ 713 662 lwz r5,VCPU_SLB_MAX(r4) 714 663 cmpwi r5,0 715 664 beq 9f ··· 747 696 BEGIN_FTR_SECTION 748 697 mfspr r5, SPRN_TIDR 749 698 mfspr r6, SPRN_PSSCR 699 + mfspr r7, SPRN_PID 750 700 std r5, STACK_SLOT_TID(r1) 751 701 std r6, STACK_SLOT_PSSCR(r1) 702 + std r7, STACK_SLOT_PID(r1) 752 703 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) 753 704 754 705 BEGIN_FTR_SECTION ··· 876 823 mtspr SPRN_BESCR, r6 877 824 mtspr SPRN_PID, r7 878 825 mtspr SPRN_WORT, r8 826 + BEGIN_FTR_SECTION 827 + PPC_INVALIDATE_ERAT 828 + END_FTR_SECTION_IFSET(CPU_FTR_POWER9_DD1) 879 829 BEGIN_FTR_SECTION 880 830 /* POWER8-only registers */ 881 831 ld r5, VCPU_TCSCR(r4) ··· 1113 1057 kvmppc_interrupt_hv: 1114 1058 /* 1115 1059 * Register contents: 1116 - * R12 = interrupt vector 1060 + * R12 = (guest CR << 32) | interrupt vector 1117 1061 * R13 = PACA 1118 - * guest CR, R12 saved in shadow VCPU SCRATCH1/0 1062 + * guest R12 saved in shadow VCPU SCRATCH0 1063 + * guest CTR saved in shadow VCPU SCRATCH1 if RELOCATABLE 1119 1064 * guest R13 saved in SPRN_SCRATCH0 1120 1065 */ 1121 1066 std r9, HSTATE_SCRATCH2(r13) 1122 - 1123 1067 lbz r9, HSTATE_IN_GUEST(r13) 1124 1068 cmpwi r9, KVM_GUEST_MODE_HOST_HV 1125 1069 beq kvmppc_bad_host_intr ··· 1150 1094 std r10, VCPU_GPR(R10)(r9) 1151 1095 std r11, VCPU_GPR(R11)(r9) 1152 1096 ld r3, HSTATE_SCRATCH0(r13) 1153 - lwz r4, HSTATE_SCRATCH1(r13) 1154 1097 std r3, VCPU_GPR(R12)(r9) 1098 + /* CR is in the high half of r12 */ 1099 + srdi r4, r12, 32 1155 1100 stw r4, VCPU_CR(r9) 1156 1101 BEGIN_FTR_SECTION 1157 1102 ld r3, HSTATE_CFAR(r13) ··· 1171 1114 mfspr r11, SPRN_SRR1 1172 1115 std r10, VCPU_SRR0(r9) 1173 1116 std r11, VCPU_SRR1(r9) 1117 + /* trap is in the low half of r12, clear CR from the high half */ 1118 + clrldi r12, r12, 32 1174 1119 andi. r0, r12, 2 /* need to read HSRR0/1? */ 1175 1120 beq 1f 1176 1121 mfspr r10, SPRN_HSRR0 ··· 1208 1149 11: stw r3,VCPU_HEIR(r9) 1209 1150 1210 1151 /* these are volatile across C function calls */ 1152 + #ifdef CONFIG_RELOCATABLE 1153 + ld r3, HSTATE_SCRATCH1(r13) 1154 + mtctr r3 1155 + #else 1211 1156 mfctr r3 1157 + #endif 1212 1158 mfxer r4 1213 1159 std r3, VCPU_CTR(r9) 1214 1160 std r4, VCPU_XER(r9) ··· 1349 1285 mtspr SPRN_CTRLT,r6 1350 1286 4: 1351 1287 /* Read the guest SLB and save it away */ 1288 + ld r5, VCPU_KVM(r9) 1289 + lbz r0, KVM_RADIX(r5) 1290 + cmpwi r0, 0 1291 + li r5, 0 1292 + bne 3f /* for radix, save 0 entries */ 1352 1293 lwz r0,VCPU_SLB_NR(r9) /* number of entries in SLB */ 1353 1294 mtctr r0 1354 1295 li r6,0 1355 1296 addi r7,r9,VCPU_SLB 1356 - li r5,0 1357 1297 1: slbmfee r8,r6 1358 1298 andis. r0,r8,SLB_ESID_V@h 1359 1299 beq 2f ··· 1369 1301 addi r5,r5,1 1370 1302 2: addi r6,r6,1 1371 1303 bdnz 1b 1372 - stw r5,VCPU_SLB_MAX(r9) 1304 + 3: stw r5,VCPU_SLB_MAX(r9) 1373 1305 1374 1306 /* 1375 1307 * Save the guest PURR/SPURR ··· 1618 1550 BEGIN_FTR_SECTION 1619 1551 ld r5, STACK_SLOT_TID(r1) 1620 1552 ld r6, STACK_SLOT_PSSCR(r1) 1553 + ld r7, STACK_SLOT_PID(r1) 1621 1554 mtspr SPRN_TIDR, r5 1622 1555 mtspr SPRN_PSSCR, r6 1556 + mtspr SPRN_PID, r7 1623 1557 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) 1558 + BEGIN_FTR_SECTION 1559 + PPC_INVALIDATE_ERAT 1560 + END_FTR_SECTION_IFSET(CPU_FTR_POWER9_DD1) 1624 1561 1625 1562 /* 1626 1563 * POWER7/POWER8 guest -> host partition switch code. ··· 1736 1663 isync 1737 1664 1738 1665 /* load host SLB entries */ 1666 + BEGIN_MMU_FTR_SECTION 1667 + b 0f 1668 + END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX) 1739 1669 ld r8,PACA_SLBSHADOWPTR(r13) 1740 1670 1741 1671 .rept SLB_NUM_BOLTED ··· 1751 1675 slbmte r6,r5 1752 1676 1: addi r8,r8,16 1753 1677 .endr 1754 - 1678 + 0: 1755 1679 #ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING 1756 1680 /* Finish timing, if we have a vcpu */ 1757 1681 ld r4, HSTATE_KVM_VCPU(r13) ··· 1778 1702 * reflect the HDSI to the guest as a DSI. 1779 1703 */ 1780 1704 kvmppc_hdsi: 1705 + ld r3, VCPU_KVM(r9) 1706 + lbz r0, KVM_RADIX(r3) 1707 + cmpwi r0, 0 1781 1708 mfspr r4, SPRN_HDAR 1782 1709 mfspr r6, SPRN_HDSISR 1710 + bne .Lradix_hdsi /* on radix, just save DAR/DSISR/ASDR */ 1783 1711 /* HPTE not found fault or protection fault? */ 1784 1712 andis. r0, r6, (DSISR_NOHPTE | DSISR_PROTFAULT)@h 1785 1713 beq 1f /* if not, send it to the guest */ 1714 + BEGIN_FTR_SECTION 1715 + mfspr r5, SPRN_ASDR /* on POWER9, use ASDR to get VSID */ 1716 + b 4f 1717 + END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) 1786 1718 andi. r0, r11, MSR_DR /* data relocation enabled? */ 1787 1719 beq 3f 1788 1720 clrrdi r0, r4, 28 ··· 1860 1776 stb r0, HSTATE_IN_GUEST(r13) 1861 1777 b guest_exit_cont 1862 1778 1779 + .Lradix_hdsi: 1780 + std r4, VCPU_FAULT_DAR(r9) 1781 + stw r6, VCPU_FAULT_DSISR(r9) 1782 + .Lradix_hisi: 1783 + mfspr r5, SPRN_ASDR 1784 + std r5, VCPU_FAULT_GPA(r9) 1785 + b guest_exit_cont 1786 + 1863 1787 /* 1864 1788 * Similarly for an HISI, reflect it to the guest as an ISI unless 1865 1789 * it is an HPTE not found fault for a page that we have paged out. 1866 1790 */ 1867 1791 kvmppc_hisi: 1792 + ld r3, VCPU_KVM(r9) 1793 + lbz r0, KVM_RADIX(r3) 1794 + cmpwi r0, 0 1795 + bne .Lradix_hisi /* for radix, just save ASDR */ 1868 1796 andis. r0, r11, SRR1_ISI_NOPT@h 1869 1797 beq 1f 1798 + BEGIN_FTR_SECTION 1799 + mfspr r5, SPRN_ASDR /* on POWER9, use ASDR to get VSID */ 1800 + b 4f 1801 + END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) 1870 1802 andi. r0, r11, MSR_IR /* instruction relocation enabled? */ 1871 1803 beq 3f 1872 1804 clrrdi r0, r10, 28

+25 -7

arch/powerpc/kvm/book3s_segment.S

··· 167 167 * * 168 168 *****************************************************************************/ 169 169 170 - .global kvmppc_handler_trampoline_exit 171 - kvmppc_handler_trampoline_exit: 172 - 173 170 .global kvmppc_interrupt_pr 174 171 kvmppc_interrupt_pr: 172 + /* 64-bit entry. Register usage at this point: 173 + * 174 + * SPRG_SCRATCH0 = guest R13 175 + * R12 = (guest CR << 32) | exit handler id 176 + * R13 = PACA 177 + * HSTATE.SCRATCH0 = guest R12 178 + * HSTATE.SCRATCH1 = guest CTR if RELOCATABLE 179 + */ 180 + #ifdef CONFIG_PPC64 181 + /* Match 32-bit entry */ 182 + #ifdef CONFIG_RELOCATABLE 183 + std r9, HSTATE_SCRATCH2(r13) 184 + ld r9, HSTATE_SCRATCH1(r13) 185 + mtctr r9 186 + ld r9, HSTATE_SCRATCH2(r13) 187 + #endif 188 + rotldi r12, r12, 32 /* Flip R12 halves for stw */ 189 + stw r12, HSTATE_SCRATCH1(r13) /* CR is now in the low half */ 190 + srdi r12, r12, 32 /* shift trap into low half */ 191 + #endif 175 192 193 + .global kvmppc_handler_trampoline_exit 194 + kvmppc_handler_trampoline_exit: 176 195 /* Register usage at this point: 177 196 * 178 - * SPRG_SCRATCH0 = guest R13 179 - * R12 = exit handler id 180 - * R13 = shadow vcpu (32-bit) or PACA (64-bit) 197 + * SPRG_SCRATCH0 = guest R13 198 + * R12 = exit handler id 199 + * R13 = shadow vcpu (32-bit) or PACA (64-bit) 181 200 * HSTATE.SCRATCH0 = guest R12 182 201 * HSTATE.SCRATCH1 = guest CR 183 - * 184 202 */ 185 203 186 204 /* Save registers */

+32

arch/powerpc/kvm/powerpc.c

··· 565 565 case KVM_CAP_PPC_HWRNG: 566 566 r = kvmppc_hwrng_present(); 567 567 break; 568 + case KVM_CAP_PPC_MMU_RADIX: 569 + r = !!(hv_enabled && radix_enabled()); 570 + break; 571 + case KVM_CAP_PPC_MMU_HASH_V3: 572 + r = !!(hv_enabled && !radix_enabled() && 573 + cpu_has_feature(CPU_FTR_ARCH_300)); 574 + break; 568 575 #endif 569 576 case KVM_CAP_SYNC_MMU: 570 577 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE ··· 1473 1466 struct kvm *kvm = filp->private_data; 1474 1467 1475 1468 r = kvm_vm_ioctl_rtas_define_token(kvm, argp); 1469 + break; 1470 + } 1471 + case KVM_PPC_CONFIGURE_V3_MMU: { 1472 + struct kvm *kvm = filp->private_data; 1473 + struct kvm_ppc_mmuv3_cfg cfg; 1474 + 1475 + r = -EINVAL; 1476 + if (!kvm->arch.kvm_ops->configure_mmu) 1477 + goto out; 1478 + r = -EFAULT; 1479 + if (copy_from_user(&cfg, argp, sizeof(cfg))) 1480 + goto out; 1481 + r = kvm->arch.kvm_ops->configure_mmu(kvm, &cfg); 1482 + break; 1483 + } 1484 + case KVM_PPC_GET_RMMU_INFO: { 1485 + struct kvm *kvm = filp->private_data; 1486 + struct kvm_ppc_rmmu_info info; 1487 + 1488 + r = -EINVAL; 1489 + if (!kvm->arch.kvm_ops->get_rmmu_info) 1490 + goto out; 1491 + r = kvm->arch.kvm_ops->get_rmmu_info(kvm, &info); 1492 + if (r >= 0 && copy_to_user(argp, &info, sizeof(info))) 1493 + r = -EFAULT; 1476 1494 break; 1477 1495 } 1478 1496 default: {

-2

arch/powerpc/lib/Makefile

··· 21 21 obj64-$(CONFIG_SMP) += locks.o 22 22 obj64-$(CONFIG_ALTIVEC) += vmx-helper.o 23 23 24 - ifeq ($(CONFIG_GENERIC_CSUM),) 25 24 obj-y += checksum_$(BITS).o checksum_wrappers.o 26 - endif 27 25 28 26 obj-$(CONFIG_PPC_EMULATE_SSTEP) += sstep.o ldstfp.o 29 27

+10 -2

arch/powerpc/lib/checksum_64.S

··· 36 36 * work to calculate the correct checksum, we ignore that case 37 37 * and take the potential slowdown of unaligned loads. 38 38 */ 39 - rldicl. r6,r3,64-1,64-2 /* r6 = (r3 & 0x3) >> 1 */ 39 + rldicl. r6,r3,64-1,64-2 /* r6 = (r3 >> 1) & 0x3 */ 40 40 beq .Lcsum_aligned 41 41 42 42 li r7,4 ··· 168 168 beq .Lcsum_finish 169 169 170 170 lbz r6,0(r3) 171 + #ifdef __BIG_ENDIAN__ 171 172 sldi r9,r6,8 /* Pad the byte out to 16 bits */ 172 173 adde r0,r0,r9 174 + #else 175 + adde r0,r0,r6 176 + #endif 173 177 174 178 .Lcsum_finish: 175 179 addze r0,r0 /* add in final carry */ ··· 228 224 * If the source and destination are relatively unaligned we only 229 225 * align the source. This keeps things simple. 230 226 */ 231 - rldicl. r6,r3,64-1,64-2 /* r6 = (r3 & 0x3) >> 1 */ 227 + rldicl. r6,r3,64-1,64-2 /* r6 = (r3 >> 1) & 0x3 */ 232 228 beq .Lcopy_aligned 233 229 234 230 li r9,4 ··· 390 386 beq .Lcopy_finish 391 387 392 388 srcnr; lbz r6,0(r3) 389 + #ifdef __BIG_ENDIAN__ 393 390 sldi r9,r6,8 /* Pad the byte out to 16 bits */ 394 391 adde r0,r0,r9 392 + #else 393 + adde r0,r0,r6 394 + #endif 395 395 dstnr; stb r6,0(r4) 396 396 397 397 .Lcopy_finish:

+44 -1

arch/powerpc/lib/code-patching.c

··· 32 32 return patch_instruction(addr, create_branch(addr, target, flags)); 33 33 } 34 34 35 + bool is_offset_in_branch_range(long offset) 36 + { 37 + /* 38 + * Powerpc branch instruction is : 39 + * 40 + * 0 6 30 31 41 + * +---------+----------------+---+---+ 42 + * | opcode | LI |AA |LK | 43 + * +---------+----------------+---+---+ 44 + * Where AA = 0 and LK = 0 45 + * 46 + * LI is a signed 24 bits integer. The real branch offset is computed 47 + * by: imm32 = SignExtend(LI:'0b00', 32); 48 + * 49 + * So the maximum forward branch should be: 50 + * (0x007fffff << 2) = 0x01fffffc = 0x1fffffc 51 + * The maximum backward branch should be: 52 + * (0xff800000 << 2) = 0xfe000000 = -0x2000000 53 + */ 54 + return (offset >= -0x2000000 && offset <= 0x1fffffc && !(offset & 0x3)); 55 + } 56 + 57 + /* 58 + * Helper to check if a given instruction is a conditional branch 59 + * Derived from the conditional checks in analyse_instr() 60 + */ 61 + bool __kprobes is_conditional_branch(unsigned int instr) 62 + { 63 + unsigned int opcode = instr >> 26; 64 + 65 + if (opcode == 16) /* bc, bca, bcl, bcla */ 66 + return true; 67 + if (opcode == 19) { 68 + switch ((instr >> 1) & 0x3ff) { 69 + case 16: /* bclr, bclrl */ 70 + case 528: /* bcctr, bcctrl */ 71 + case 560: /* bctar, bctarl */ 72 + return true; 73 + } 74 + } 75 + return false; 76 + } 77 + 35 78 unsigned int create_branch(const unsigned int *addr, 36 79 unsigned long target, int flags) 37 80 { ··· 86 43 offset = offset - (unsigned long)addr; 87 44 88 45 /* Check we can represent the target in the instruction format */ 89 - if (offset < -0x2000000 || offset > 0x1fffffc || offset & 0x3) 46 + if (!is_offset_in_branch_range(offset)) 90 47 return 0; 91 48 92 49 /* Mask out the flags and target, so they don't step on each other. */

+2 -2

arch/powerpc/lib/copypage_64.S

··· 26 26 ori r5,r5,PAGE_SIZE@l 27 27 BEGIN_FTR_SECTION 28 28 ld r10,PPC64_CACHES@toc(r2) 29 - lwz r11,DCACHEL1LOGLINESIZE(r10) /* log2 of cache line size */ 30 - lwz r12,DCACHEL1LINESIZE(r10) /* get cache line size */ 29 + lwz r11,DCACHEL1LOGBLOCKSIZE(r10) /* log2 of cache block size */ 30 + lwz r12,DCACHEL1BLOCKSIZE(r10) /* get cache block size */ 31 31 li r9,0 32 32 srd r8,r5,r11 33 33

+2 -4

arch/powerpc/lib/sstep.c

··· 1803 1803 return 0; 1804 1804 if (op.ea & (size - 1)) 1805 1805 break; /* can't handle misaligned */ 1806 - err = -EFAULT; 1807 1806 if (!address_ok(regs, op.ea, size)) 1808 - goto ldst_done; 1807 + return 0; 1809 1808 err = 0; 1810 1809 switch (size) { 1811 1810 case 4: ··· 1827 1828 return 0; 1828 1829 if (op.ea & (size - 1)) 1829 1830 break; /* can't handle misaligned */ 1830 - err = -EFAULT; 1831 1831 if (!address_ok(regs, op.ea, size)) 1832 - goto ldst_done; 1832 + return 0; 1833 1833 err = 0; 1834 1834 switch (size) { 1835 1835 case 4:

+3 -3

arch/powerpc/lib/string_64.S

··· 152 152 addi r3,r3,8 153 153 addi r4,r4,-8 154 154 155 - /* Destination is 16 byte aligned, need to get it cacheline aligned */ 156 - 11: lwz r7,DCACHEL1LOGLINESIZE(r5) 157 - lwz r9,DCACHEL1LINESIZE(r5) 155 + /* Destination is 16 byte aligned, need to get it cache block aligned */ 156 + 11: lwz r7,DCACHEL1LOGBLOCKSIZE(r5) 157 + lwz r9,DCACHEL1BLOCKSIZE(r5) 158 158 159 159 /* 160 160 * With worst case alignment the long clear loop takes a minimum

+6 -4

arch/powerpc/mm/copro_fault.c

··· 67 67 if (!(vma->vm_flags & (VM_READ | VM_EXEC))) 68 68 goto out_unlock; 69 69 /* 70 - * protfault should only happen due to us 71 - * mapping a region readonly temporarily. PROT_NONE 72 - * is also covered by the VMA check above. 70 + * PROT_NONE is covered by the VMA check above. 71 + * and hash should get a NOHPTE fault instead of 72 + * a PROTFAULT in case fixup is needed for things 73 + * like autonuma. 73 74 */ 74 - WARN_ON_ONCE(dsisr & DSISR_PROTFAULT); 75 + if (!radix_enabled()) 76 + WARN_ON_ONCE(dsisr & DSISR_PROTFAULT); 75 77 } 76 78 77 79 ret = 0;

+33 -10

arch/powerpc/mm/fault.c

··· 407 407 (cpu_has_feature(CPU_FTR_NOEXECUTE) || 408 408 !(vma->vm_flags & (VM_READ | VM_WRITE)))) 409 409 goto bad_area; 410 - 411 - #ifdef CONFIG_PPC_STD_MMU 412 - /* 413 - * protfault should only happen due to us 414 - * mapping a region readonly temporarily. PROT_NONE 415 - * is also covered by the VMA check above. 416 - */ 417 - WARN_ON_ONCE(error_code & DSISR_PROTFAULT); 418 - #endif /* CONFIG_PPC_STD_MMU */ 419 410 /* a write */ 420 411 } else if (is_write) { 421 412 if (!(vma->vm_flags & VM_WRITE)) ··· 416 425 } else { 417 426 if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))) 418 427 goto bad_area; 419 - WARN_ON_ONCE(error_code & DSISR_PROTFAULT); 420 428 } 429 + #ifdef CONFIG_PPC_STD_MMU 430 + /* 431 + * For hash translation mode, we should never get a 432 + * PROTFAULT. Any update to pte to reduce access will result in us 433 + * removing the hash page table entry, thus resulting in a DSISR_NOHPTE 434 + * fault instead of DSISR_PROTFAULT. 435 + * 436 + * A pte update to relax the access will not result in a hash page table 437 + * entry invalidate and hence can result in DSISR_PROTFAULT. 438 + * ptep_set_access_flags() doesn't do a hpte flush. This is why we have 439 + * the special !is_write in the below conditional. 440 + * 441 + * For platforms that doesn't supports coherent icache and do support 442 + * per page noexec bit, we do setup things such that we do the 443 + * sync between D/I cache via fault. But that is handled via low level 444 + * hash fault code (hash_page_do_lazy_icache()) and we should not reach 445 + * here in such case. 446 + * 447 + * For wrong access that can result in PROTFAULT, the above vma->vm_flags 448 + * check should handle those and hence we should fall to the bad_area 449 + * handling correctly. 450 + * 451 + * For embedded with per page exec support that doesn't support coherent 452 + * icache we do get PROTFAULT and we handle that D/I cache sync in 453 + * set_pte_at while taking the noexec/prot fault. Hence this is WARN_ON 454 + * is conditional for server MMU. 455 + * 456 + * For radix, we can get prot fault for autonuma case, because radix 457 + * page table will have them marked noaccess for user. 458 + */ 459 + if (!radix_enabled() && !is_write) 460 + WARN_ON_ONCE(error_code & DSISR_PROTFAULT); 461 + #endif /* CONFIG_PPC_STD_MMU */ 421 462 422 463 /* 423 464 * If for any reason at all we couldn't handle the fault,

+62

arch/powerpc/mm/hash_utils_64.c

··· 35 35 #include <linux/memblock.h> 36 36 #include <linux/context_tracking.h> 37 37 #include <linux/libfdt.h> 38 + #include <linux/debugfs.h> 38 39 40 + #include <asm/debug.h> 39 41 #include <asm/processor.h> 40 42 #include <asm/pgtable.h> 41 43 #include <asm/mmu.h> ··· 749 747 } 750 748 751 749 #ifdef CONFIG_MEMORY_HOTPLUG 750 + void resize_hpt_for_hotplug(unsigned long new_mem_size) 751 + { 752 + unsigned target_hpt_shift; 753 + 754 + if (!mmu_hash_ops.resize_hpt) 755 + return; 756 + 757 + target_hpt_shift = htab_shift_for_mem_size(new_mem_size); 758 + 759 + /* 760 + * To avoid lots of HPT resizes if memory size is fluctuating 761 + * across a boundary, we deliberately have some hysterisis 762 + * here: we immediately increase the HPT size if the target 763 + * shift exceeds the current shift, but we won't attempt to 764 + * reduce unless the target shift is at least 2 below the 765 + * current shift 766 + */ 767 + if ((target_hpt_shift > ppc64_pft_size) 768 + || (target_hpt_shift < (ppc64_pft_size - 1))) { 769 + int rc; 770 + 771 + rc = mmu_hash_ops.resize_hpt(target_hpt_shift); 772 + if (rc) 773 + printk(KERN_WARNING 774 + "Unable to resize hash page table to target order %d: %d\n", 775 + target_hpt_shift, rc); 776 + } 777 + } 778 + 752 779 int hash__create_section_mapping(unsigned long start, unsigned long end) 753 780 { 754 781 int rc = htab_bolt_mapping(start, end, __pa(start), ··· 1826 1795 /* Finally limit subsequent allocations */ 1827 1796 memblock_set_current_limit(ppc64_rma_size); 1828 1797 } 1798 + 1799 + #ifdef CONFIG_DEBUG_FS 1800 + 1801 + static int hpt_order_get(void *data, u64 *val) 1802 + { 1803 + *val = ppc64_pft_size; 1804 + return 0; 1805 + } 1806 + 1807 + static int hpt_order_set(void *data, u64 val) 1808 + { 1809 + if (!mmu_hash_ops.resize_hpt) 1810 + return -ENODEV; 1811 + 1812 + return mmu_hash_ops.resize_hpt(val); 1813 + } 1814 + 1815 + DEFINE_SIMPLE_ATTRIBUTE(fops_hpt_order, hpt_order_get, hpt_order_set, "%llu\n"); 1816 + 1817 + static int __init hash64_debugfs(void) 1818 + { 1819 + if (!debugfs_create_file("hpt_order", 0600, powerpc_debugfs_root, 1820 + NULL, &fops_hpt_order)) { 1821 + pr_err("lpar: unable to create hpt_order debugsfs file\n"); 1822 + } 1823 + 1824 + return 0; 1825 + } 1826 + machine_device_initcall(pseries, hash64_debugfs); 1827 + 1828 + #endif /* CONFIG_DEBUG_FS */

-21

arch/powerpc/mm/hugetlbpage-hash64.c

··· 116 116 *ptep = __pte(new_pte & ~H_PAGE_BUSY); 117 117 return 0; 118 118 } 119 - 120 - #if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_DEBUG_VM) 121 - /* 122 - * This enables us to catch the wrong page directory format 123 - * Moved here so that we can use WARN() in the call. 124 - */ 125 - int hugepd_ok(hugepd_t hpd) 126 - { 127 - bool is_hugepd; 128 - unsigned long hpdval; 129 - 130 - hpdval = hpd_val(hpd); 131 - 132 - /* 133 - * We should not find this format in page directory, warn otherwise. 134 - */ 135 - is_hugepd = (((hpdval & 0x3) == 0x0) && ((hpdval & HUGEPD_SHIFT_MASK) != 0)); 136 - WARN(is_hugepd, "Found wrong page directory format\n"); 137 - return 0; 138 - } 139 - #endif

+2 -1

arch/powerpc/mm/init-common.c

··· 41 41 } 42 42 43 43 struct kmem_cache *pgtable_cache[MAX_PGTABLE_INDEX_SIZE]; 44 + EXPORT_SYMBOL_GPL(pgtable_cache); /* used by kvm_hv module */ 44 45 45 46 /* 46 47 * Create a kmem_cache() for pagetables. This is not used for PTE ··· 87 86 88 87 pr_debug("Allocated pgtable cache for order %d\n", shift); 89 88 } 90 - 89 + EXPORT_SYMBOL_GPL(pgtable_cache_add); /* used by kvm_hv module */ 91 90 92 91 void pgtable_cache_init(void) 93 92 {

+35

arch/powerpc/mm/init_64.c

··· 42 42 #include <linux/memblock.h> 43 43 #include <linux/hugetlb.h> 44 44 #include <linux/slab.h> 45 + #include <linux/of_fdt.h> 46 + #include <linux/libfdt.h> 45 47 46 48 #include <asm/pgalloc.h> 47 49 #include <asm/page.h> ··· 346 344 } 347 345 early_param("disable_radix", parse_disable_radix); 348 346 347 + /* 348 + * If we're running under a hypervisor, we need to check the contents of 349 + * /chosen/ibm,architecture-vec-5 to see if the hypervisor is willing to do 350 + * radix. If not, we clear the radix feature bit so we fall back to hash. 351 + */ 352 + static void early_check_vec5(void) 353 + { 354 + unsigned long root, chosen; 355 + int size; 356 + const u8 *vec5; 357 + 358 + root = of_get_flat_dt_root(); 359 + chosen = of_get_flat_dt_subnode_by_name(root, "chosen"); 360 + if (chosen == -FDT_ERR_NOTFOUND) 361 + return; 362 + vec5 = of_get_flat_dt_prop(chosen, "ibm,architecture-vec-5", &size); 363 + if (!vec5) 364 + return; 365 + if (size <= OV5_INDX(OV5_MMU_RADIX_300) || 366 + !(vec5[OV5_INDX(OV5_MMU_RADIX_300)] & OV5_FEAT(OV5_MMU_RADIX_300))) 367 + /* Hypervisor doesn't support radix */ 368 + cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX; 369 + } 370 + 349 371 void __init mmu_early_init_devtree(void) 350 372 { 351 373 /* Disable radix mode based on kernel command line. */ 352 374 /* We don't yet have the machinery to do radix as a guest. */ 353 375 if (disable_radix || !(mfmsr() & MSR_HV)) 354 376 cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX; 377 + 378 + /* 379 + * Check /chosen/ibm,architecture-vec-5 if running as a guest. 380 + * When running bare-metal, we can use radix if we like 381 + * even though the ibm,architecture-vec-5 property created by 382 + * skiboot doesn't have the necessary bits set. 383 + */ 384 + if (early_radix_enabled() && !(mfmsr() & MSR_HV)) 385 + early_check_vec5(); 355 386 356 387 if (early_radix_enabled()) 357 388 radix__early_init_devtree();

+4

arch/powerpc/mm/mem.c

··· 134 134 unsigned long nr_pages = size >> PAGE_SHIFT; 135 135 int rc; 136 136 137 + resize_hpt_for_hotplug(memblock_phys_mem_size()); 138 + 137 139 pgdata = NODE_DATA(nid); 138 140 139 141 start = (unsigned long)__va(start); ··· 175 173 * hit that section of memory 176 174 */ 177 175 vm_unmap_aliases(); 176 + 177 + resize_hpt_for_hotplug(memblock_phys_mem_size()); 178 178 179 179 return ret; 180 180 }

+1 -1

arch/powerpc/mm/mmu_context_iommu.c

··· 184 184 * of the CMA zone if possible. NOTE: faulting in + migration 185 185 * can be expensive. Batching can be considered later 186 186 */ 187 - if (get_pageblock_migratetype(page) == MIGRATE_CMA) { 187 + if (is_migrate_cma_page(page)) { 188 188 if (mm_iommu_move_page_from_cma(page)) 189 189 goto populate; 190 190 if (1 != get_user_pages_fast(ua + (i << PAGE_SHIFT),

+5 -10

arch/powerpc/mm/numa.c

··· 290 290 291 291 return nid; 292 292 } 293 - EXPORT_SYMBOL_GPL(of_node_to_nid); 293 + EXPORT_SYMBOL(of_node_to_nid); 294 294 295 295 static int __init find_min_common_depth(void) 296 296 { ··· 786 786 fake_numa_create_new_node(((start + size) >> PAGE_SHIFT), &nid); 787 787 node_set_online(nid); 788 788 789 - if (!(size = numa_enforce_memory_limit(start, size))) { 790 - if (--ranges) 791 - goto new_range; 792 - else 793 - continue; 794 - } 795 - 796 - memblock_set_node(start, size, &memblock.memory, nid); 789 + size = numa_enforce_memory_limit(start, size); 790 + if (size) 791 + memblock_set_node(start, size, &memblock.memory, nid); 797 792 798 793 if (--ranges) 799 794 goto new_range; ··· 1093 1098 nid = hot_add_node_scn_to_nid(scn_addr); 1094 1099 } 1095 1100 1096 - if (nid < 0 || !node_online(nid)) 1101 + if (nid < 0 || !node_possible(nid)) 1097 1102 nid = first_online_node; 1098 1103 1099 1104 return nid;

+2 -2

arch/powerpc/mm/pgtable-book3s64.c

··· 131 131 int create_section_mapping(unsigned long start, unsigned long end) 132 132 { 133 133 if (radix_enabled()) 134 - return -ENODEV; 134 + return radix__create_section_mapping(start, end); 135 135 136 136 return hash__create_section_mapping(start, end); 137 137 } ··· 139 139 int remove_section_mapping(unsigned long start, unsigned long end) 140 140 { 141 141 if (radix_enabled()) 142 - return -ENODEV; 142 + return radix__remove_section_mapping(start, end); 143 143 144 144 return hash__remove_section_mapping(start, end); 145 145 }

+222 -39

arch/powerpc/mm/pgtable-radix.c

··· 18 18 #include <asm/machdep.h> 19 19 #include <asm/mmu.h> 20 20 #include <asm/firmware.h> 21 + #include <asm/powernv.h> 21 22 22 23 #include <trace/events/thp.h> 23 24 ··· 108 107 return 0; 109 108 } 110 109 110 + static inline void __meminit print_mapping(unsigned long start, 111 + unsigned long end, 112 + unsigned long size) 113 + { 114 + if (end <= start) 115 + return; 116 + 117 + pr_info("Mapped range 0x%lx - 0x%lx with 0x%lx\n", start, end, size); 118 + } 119 + 120 + static int __meminit create_physical_mapping(unsigned long start, 121 + unsigned long end) 122 + { 123 + unsigned long addr, mapping_size = 0; 124 + 125 + start = _ALIGN_UP(start, PAGE_SIZE); 126 + for (addr = start; addr < end; addr += mapping_size) { 127 + unsigned long gap, previous_size; 128 + int rc; 129 + 130 + gap = end - addr; 131 + previous_size = mapping_size; 132 + 133 + if (IS_ALIGNED(addr, PUD_SIZE) && gap >= PUD_SIZE && 134 + mmu_psize_defs[MMU_PAGE_1G].shift) 135 + mapping_size = PUD_SIZE; 136 + else if (IS_ALIGNED(addr, PMD_SIZE) && gap >= PMD_SIZE && 137 + mmu_psize_defs[MMU_PAGE_2M].shift) 138 + mapping_size = PMD_SIZE; 139 + else 140 + mapping_size = PAGE_SIZE; 141 + 142 + if (mapping_size != previous_size) { 143 + print_mapping(start, addr, previous_size); 144 + start = addr; 145 + } 146 + 147 + rc = radix__map_kernel_page((unsigned long)__va(addr), addr, 148 + PAGE_KERNEL_X, mapping_size); 149 + if (rc) 150 + return rc; 151 + } 152 + 153 + print_mapping(start, addr, mapping_size); 154 + return 0; 155 + } 156 + 111 157 static void __init radix_init_pgtable(void) 112 158 { 113 - int loop_count; 114 - u64 base, end, start_addr; 115 159 unsigned long rts_field; 116 160 struct memblock_region *reg; 117 - unsigned long linear_page_size; 118 161 119 162 /* We don't support slb for radix */ 120 163 mmu_slb_size = 0; 121 164 /* 122 165 * Create the linear mapping, using standard page size for now 123 166 */ 124 - loop_count = 0; 125 - for_each_memblock(memory, reg) { 126 - 127 - start_addr = reg->base; 128 - 129 - redo: 130 - if (loop_count < 1 && mmu_psize_defs[MMU_PAGE_1G].shift) 131 - linear_page_size = PUD_SIZE; 132 - else if (loop_count < 2 && mmu_psize_defs[MMU_PAGE_2M].shift) 133 - linear_page_size = PMD_SIZE; 134 - else 135 - linear_page_size = PAGE_SIZE; 136 - 137 - base = _ALIGN_UP(start_addr, linear_page_size); 138 - end = _ALIGN_DOWN(reg->base + reg->size, linear_page_size); 139 - 140 - pr_info("Mapping range 0x%lx - 0x%lx with 0x%lx\n", 141 - (unsigned long)base, (unsigned long)end, 142 - linear_page_size); 143 - 144 - while (base < end) { 145 - radix__map_kernel_page((unsigned long)__va(base), 146 - base, PAGE_KERNEL_X, 147 - linear_page_size); 148 - base += linear_page_size; 149 - } 150 - /* 151 - * map the rest using lower page size 152 - */ 153 - if (end < reg->base + reg->size) { 154 - start_addr = end; 155 - loop_count++; 156 - goto redo; 157 - } 158 - } 167 + for_each_memblock(memory, reg) 168 + WARN_ON(create_physical_mapping(reg->base, 169 + reg->base + reg->size)); 159 170 /* 160 171 * Allocate Partition table and process table for the 161 172 * host. ··· 414 401 mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR); 415 402 radix_init_partition_table(); 416 403 radix_init_amor(); 404 + } else { 405 + radix_init_pseries(); 417 406 } 418 407 419 408 memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE); ··· 453 438 lpcr = mfspr(SPRN_LPCR); 454 439 mtspr(SPRN_LPCR, lpcr & ~LPCR_UPRT); 455 440 mtspr(SPRN_PTCR, 0); 441 + powernv_set_nmmu_ptcr(0); 456 442 radix__flush_tlb_all(); 457 443 } 458 444 } ··· 483 467 memblock_set_current_limit(first_memblock_base + first_memblock_size); 484 468 } 485 469 470 + #ifdef CONFIG_MEMORY_HOTPLUG 471 + static void free_pte_table(pte_t *pte_start, pmd_t *pmd) 472 + { 473 + pte_t *pte; 474 + int i; 475 + 476 + for (i = 0; i < PTRS_PER_PTE; i++) { 477 + pte = pte_start + i; 478 + if (!pte_none(*pte)) 479 + return; 480 + } 481 + 482 + pte_free_kernel(&init_mm, pte_start); 483 + pmd_clear(pmd); 484 + } 485 + 486 + static void free_pmd_table(pmd_t *pmd_start, pud_t *pud) 487 + { 488 + pmd_t *pmd; 489 + int i; 490 + 491 + for (i = 0; i < PTRS_PER_PMD; i++) { 492 + pmd = pmd_start + i; 493 + if (!pmd_none(*pmd)) 494 + return; 495 + } 496 + 497 + pmd_free(&init_mm, pmd_start); 498 + pud_clear(pud); 499 + } 500 + 501 + static void remove_pte_table(pte_t *pte_start, unsigned long addr, 502 + unsigned long end) 503 + { 504 + unsigned long next; 505 + pte_t *pte; 506 + 507 + pte = pte_start + pte_index(addr); 508 + for (; addr < end; addr = next, pte++) { 509 + next = (addr + PAGE_SIZE) & PAGE_MASK; 510 + if (next > end) 511 + next = end; 512 + 513 + if (!pte_present(*pte)) 514 + continue; 515 + 516 + if (!PAGE_ALIGNED(addr) || !PAGE_ALIGNED(next)) { 517 + /* 518 + * The vmemmap_free() and remove_section_mapping() 519 + * codepaths call us with aligned addresses. 520 + */ 521 + WARN_ONCE(1, "%s: unaligned range\n", __func__); 522 + continue; 523 + } 524 + 525 + pte_clear(&init_mm, addr, pte); 526 + } 527 + } 528 + 529 + static void remove_pmd_table(pmd_t *pmd_start, unsigned long addr, 530 + unsigned long end) 531 + { 532 + unsigned long next; 533 + pte_t *pte_base; 534 + pmd_t *pmd; 535 + 536 + pmd = pmd_start + pmd_index(addr); 537 + for (; addr < end; addr = next, pmd++) { 538 + next = pmd_addr_end(addr, end); 539 + 540 + if (!pmd_present(*pmd)) 541 + continue; 542 + 543 + if (pmd_huge(*pmd)) { 544 + if (!IS_ALIGNED(addr, PMD_SIZE) || 545 + !IS_ALIGNED(next, PMD_SIZE)) { 546 + WARN_ONCE(1, "%s: unaligned range\n", __func__); 547 + continue; 548 + } 549 + 550 + pte_clear(&init_mm, addr, (pte_t *)pmd); 551 + continue; 552 + } 553 + 554 + pte_base = (pte_t *)pmd_page_vaddr(*pmd); 555 + remove_pte_table(pte_base, addr, next); 556 + free_pte_table(pte_base, pmd); 557 + } 558 + } 559 + 560 + static void remove_pud_table(pud_t *pud_start, unsigned long addr, 561 + unsigned long end) 562 + { 563 + unsigned long next; 564 + pmd_t *pmd_base; 565 + pud_t *pud; 566 + 567 + pud = pud_start + pud_index(addr); 568 + for (; addr < end; addr = next, pud++) { 569 + next = pud_addr_end(addr, end); 570 + 571 + if (!pud_present(*pud)) 572 + continue; 573 + 574 + if (pud_huge(*pud)) { 575 + if (!IS_ALIGNED(addr, PUD_SIZE) || 576 + !IS_ALIGNED(next, PUD_SIZE)) { 577 + WARN_ONCE(1, "%s: unaligned range\n", __func__); 578 + continue; 579 + } 580 + 581 + pte_clear(&init_mm, addr, (pte_t *)pud); 582 + continue; 583 + } 584 + 585 + pmd_base = (pmd_t *)pud_page_vaddr(*pud); 586 + remove_pmd_table(pmd_base, addr, next); 587 + free_pmd_table(pmd_base, pud); 588 + } 589 + } 590 + 591 + static void remove_pagetable(unsigned long start, unsigned long end) 592 + { 593 + unsigned long addr, next; 594 + pud_t *pud_base; 595 + pgd_t *pgd; 596 + 597 + spin_lock(&init_mm.page_table_lock); 598 + 599 + for (addr = start; addr < end; addr = next) { 600 + next = pgd_addr_end(addr, end); 601 + 602 + pgd = pgd_offset_k(addr); 603 + if (!pgd_present(*pgd)) 604 + continue; 605 + 606 + if (pgd_huge(*pgd)) { 607 + if (!IS_ALIGNED(addr, PGDIR_SIZE) || 608 + !IS_ALIGNED(next, PGDIR_SIZE)) { 609 + WARN_ONCE(1, "%s: unaligned range\n", __func__); 610 + continue; 611 + } 612 + 613 + pte_clear(&init_mm, addr, (pte_t *)pgd); 614 + continue; 615 + } 616 + 617 + pud_base = (pud_t *)pgd_page_vaddr(*pgd); 618 + remove_pud_table(pud_base, addr, next); 619 + } 620 + 621 + spin_unlock(&init_mm.page_table_lock); 622 + radix__flush_tlb_kernel_range(start, end); 623 + } 624 + 625 + int __ref radix__create_section_mapping(unsigned long start, unsigned long end) 626 + { 627 + return create_physical_mapping(start, end); 628 + } 629 + 630 + int radix__remove_section_mapping(unsigned long start, unsigned long end) 631 + { 632 + remove_pagetable(start, end); 633 + return 0; 634 + } 635 + #endif /* CONFIG_MEMORY_HOTPLUG */ 636 + 486 637 #ifdef CONFIG_SPARSEMEM_VMEMMAP 487 638 int __meminit radix__vmemmap_create_mapping(unsigned long start, 488 639 unsigned long page_size, ··· 665 482 #ifdef CONFIG_MEMORY_HOTPLUG 666 483 void radix__vmemmap_remove_mapping(unsigned long start, unsigned long page_size) 667 484 { 668 - /* FIXME!! intel does more. We should free page tables mapping vmemmap ? */ 485 + remove_pagetable(start, start + page_size); 669 486 } 670 487 #endif 671 488 #endif

+18 -4

arch/powerpc/mm/pgtable_64.c

··· 52 52 #include <asm/sections.h> 53 53 #include <asm/firmware.h> 54 54 #include <asm/dma.h> 55 + #include <asm/powernv.h> 55 56 56 57 #include "mmu_decl.h" 57 58 ··· 437 436 void __init mmu_partition_table_init(void) 438 437 { 439 438 unsigned long patb_size = 1UL << PATB_SIZE_SHIFT; 439 + unsigned long ptcr; 440 440 441 441 BUILD_BUG_ON_MSG((PATB_SIZE_SHIFT > 36), "Partition table size too large."); 442 442 partition_tb = __va(memblock_alloc_base(patb_size, patb_size, ··· 450 448 * update partition table control register, 451 449 * 64 K size. 452 450 */ 453 - mtspr(SPRN_PTCR, __pa(partition_tb) | (PATB_SIZE_SHIFT - 12)); 451 + ptcr = __pa(partition_tb) | (PATB_SIZE_SHIFT - 12); 452 + mtspr(SPRN_PTCR, ptcr); 453 + powernv_set_nmmu_ptcr(ptcr); 454 454 } 455 455 456 456 void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0, 457 457 unsigned long dw1) 458 458 { 459 + unsigned long old = be64_to_cpu(partition_tb[lpid].patb0); 460 + 459 461 partition_tb[lpid].patb0 = cpu_to_be64(dw0); 460 462 partition_tb[lpid].patb1 = cpu_to_be64(dw1); 461 463 462 - /* Global flush of TLBs and partition table caches for this lpid */ 464 + /* 465 + * Global flush of TLBs and partition table caches for this lpid. 466 + * The type of flush (hash or radix) depends on what the previous 467 + * use of this partition ID was, not the new use. 468 + */ 463 469 asm volatile("ptesync" : : : "memory"); 464 - asm volatile(PPC_TLBIE_5(%0,%1,2,0,0) : : 465 - "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid)); 470 + if (old & PATB_HR) 471 + asm volatile(PPC_TLBIE_5(%0,%1,2,0,1) : : 472 + "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid)); 473 + else 474 + asm volatile(PPC_TLBIE_5(%0,%1,2,0,0) : : 475 + "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid)); 466 476 asm volatile("eieio; tlbsync; ptesync" : : : "memory"); 467 477 } 468 478 EXPORT_SYMBOL_GPL(mmu_partition_table_set_entry);

+1 -3

arch/powerpc/mm/subpage-prot.c

··· 248 248 nw = (next - addr) >> PAGE_SHIFT; 249 249 250 250 up_write(&mm->mmap_sem); 251 - err = -EFAULT; 252 251 if (__copy_from_user(spp, map, nw * sizeof(u32))) 253 - goto out2; 252 + return -EFAULT; 254 253 map += nw; 255 254 down_write(&mm->mmap_sem); 256 255 ··· 261 262 err = 0; 262 263 out: 263 264 up_write(&mm->mmap_sem); 264 - out2: 265 265 return err; 266 266 }

+5 -6

arch/powerpc/net/bpf_jit.h

··· 157 157 #define PPC_SRAD(d, a, s) EMIT(PPC_INST_SRAD | ___PPC_RA(d) | \ 158 158 ___PPC_RS(a) | ___PPC_RB(s)) 159 159 #define PPC_SRADI(d, a, i) EMIT(PPC_INST_SRADI | ___PPC_RA(d) | \ 160 - ___PPC_RS(a) | __PPC_SH(i) | \ 161 - (((i) & 0x20) >> 4)) 160 + ___PPC_RS(a) | __PPC_SH64(i)) 162 161 #define PPC_RLWINM(d, a, i, mb, me) EMIT(PPC_INST_RLWINM | ___PPC_RA(d) | \ 163 162 ___PPC_RS(a) | __PPC_SH(i) | \ 164 163 __PPC_MB(mb) | __PPC_ME(me)) ··· 165 166 ___PPC_RS(a) | __PPC_SH(i) | \ 166 167 __PPC_MB(mb) | __PPC_ME(me)) 167 168 #define PPC_RLDICL(d, a, i, mb) EMIT(PPC_INST_RLDICL | ___PPC_RA(d) | \ 168 - ___PPC_RS(a) | __PPC_SH(i) | \ 169 - __PPC_MB64(mb) | (((i) & 0x20) >> 4)) 169 + ___PPC_RS(a) | __PPC_SH64(i) | \ 170 + __PPC_MB64(mb)) 170 171 #define PPC_RLDICR(d, a, i, me) EMIT(PPC_INST_RLDICR | ___PPC_RA(d) | \ 171 - ___PPC_RS(a) | __PPC_SH(i) | \ 172 - __PPC_ME64(me) | (((i) & 0x20) >> 4)) 172 + ___PPC_RS(a) | __PPC_SH64(i) | \ 173 + __PPC_ME64(me)) 173 174 174 175 /* slwi = rlwinm Rx, Ry, n, 0, 31-n */ 175 176 #define PPC_SLWI(d, a, i) PPC_RLWINM(d, a, i, 0, 31-(i))

+9 -8

arch/powerpc/net/bpf_jit_comp.c

··· 662 662 */ 663 663 bpf_jit_dump(flen, proglen, pass, code_base); 664 664 665 - if (image) { 666 - bpf_flush_icache(code_base, code_base + (proglen/4)); 665 + bpf_flush_icache(code_base, code_base + (proglen/4)); 666 + 667 667 #ifdef CONFIG_PPC64 668 - /* Function descriptor nastiness: Address + TOC */ 669 - ((u64 *)image)[0] = (u64)code_base; 670 - ((u64 *)image)[1] = local_paca->kernel_toc; 668 + /* Function descriptor nastiness: Address + TOC */ 669 + ((u64 *)image)[0] = (u64)code_base; 670 + ((u64 *)image)[1] = local_paca->kernel_toc; 671 671 #endif 672 - fp->bpf_func = (void *)image; 673 - fp->jited = 1; 674 - } 672 + 673 + fp->bpf_func = (void *)image; 674 + fp->jited = 1; 675 + 675 676 out: 676 677 kfree(addrs); 677 678 return;

+8 -8

arch/powerpc/net/bpf_jit_comp64.c

··· 1044 1044 */ 1045 1045 bpf_jit_dump(flen, proglen, pass, code_base); 1046 1046 1047 - if (image) { 1048 - bpf_flush_icache(bpf_hdr, image + alloclen); 1049 1047 #ifdef PPC64_ELF_ABI_v1 1050 - /* Function descriptor nastiness: Address + TOC */ 1051 - ((u64 *)image)[0] = (u64)code_base; 1052 - ((u64 *)image)[1] = local_paca->kernel_toc; 1048 + /* Function descriptor nastiness: Address + TOC */ 1049 + ((u64 *)image)[0] = (u64)code_base; 1050 + ((u64 *)image)[1] = local_paca->kernel_toc; 1053 1051 #endif 1054 - fp->bpf_func = (void *)image; 1055 - fp->jited = 1; 1056 - } 1052 + 1053 + fp->bpf_func = (void *)image; 1054 + fp->jited = 1; 1055 + 1056 + bpf_flush_icache(bpf_hdr, (u8 *)bpf_hdr + (bpf_hdr->pages * PAGE_SIZE)); 1057 1057 1058 1058 out: 1059 1059 kfree(addrs);

+1

arch/powerpc/platforms/maple/pci.c

··· 24 24 #include <asm/machdep.h> 25 25 #include <asm/iommu.h> 26 26 #include <asm/ppc-pci.h> 27 + #include <asm/isa-bridge.h> 27 28 28 29 #include "maple.h" 29 30

+127 -15

arch/powerpc/platforms/powernv/idle.c

··· 237 237 show_fastsleep_workaround_applyonce, 238 238 store_fastsleep_workaround_applyonce); 239 239 240 + /* 241 + * The default stop state that will be used by ppc_md.power_save 242 + * function on platforms that support stop instruction. 243 + */ 244 + u64 pnv_default_stop_val; 245 + u64 pnv_default_stop_mask; 240 246 241 247 /* 242 248 * Used for ppc_md.power_save which needs a function with no parameters 243 249 */ 244 250 static void power9_idle(void) 245 251 { 246 - /* Requesting stop state 0 */ 247 - power9_idle_stop(0); 252 + power9_idle_stop(pnv_default_stop_val, pnv_default_stop_mask); 248 253 } 254 + 249 255 /* 250 256 * First deep stop state. Used to figure out when to save/restore 251 257 * hypervisor context. ··· 259 253 u64 pnv_first_deep_stop_state = MAX_STOP_STATE; 260 254 261 255 /* 262 - * Deepest stop idle state. Used when a cpu is offlined 256 + * psscr value and mask of the deepest stop idle state. 257 + * Used when a cpu is offlined. 263 258 */ 264 - u64 pnv_deepest_stop_state; 259 + u64 pnv_deepest_stop_psscr_val; 260 + u64 pnv_deepest_stop_psscr_mask; 265 261 266 262 /* 267 263 * Power ISA 3.0 idle initialization. ··· 300 292 * Bits 60:63 - Requested Level 301 293 * Used to specify which power-saving level must be entered on executing 302 294 * stop instruction 295 + */ 296 + 297 + int validate_psscr_val_mask(u64 *psscr_val, u64 *psscr_mask, u32 flags) 298 + { 299 + int err = 0; 300 + 301 + /* 302 + * psscr_mask == 0xf indicates an older firmware. 303 + * Set remaining fields of psscr to the default values. 304 + * See NOTE above definition of PSSCR_HV_DEFAULT_VAL 305 + */ 306 + if (*psscr_mask == 0xf) { 307 + *psscr_val = *psscr_val | PSSCR_HV_DEFAULT_VAL; 308 + *psscr_mask = PSSCR_HV_DEFAULT_MASK; 309 + return err; 310 + } 311 + 312 + /* 313 + * New firmware is expected to set the psscr_val bits correctly. 314 + * Validate that the following invariants are correctly maintained by 315 + * the new firmware. 316 + * - ESL bit value matches the EC bit value. 317 + * - ESL bit is set for all the deep stop states. 318 + */ 319 + if (GET_PSSCR_ESL(*psscr_val) != GET_PSSCR_EC(*psscr_val)) { 320 + err = ERR_EC_ESL_MISMATCH; 321 + } else if ((flags & OPAL_PM_LOSE_FULL_CONTEXT) && 322 + GET_PSSCR_ESL(*psscr_val) == 0) { 323 + err = ERR_DEEP_STATE_ESL_MISMATCH; 324 + } 325 + 326 + return err; 327 + } 328 + 329 + /* 330 + * pnv_arch300_idle_init: Initializes the default idle state, first 331 + * deep idle state and deepest idle state on 332 + * ISA 3.0 CPUs. 303 333 * 304 334 * @np: /ibm,opal/power-mgt device node 305 335 * @flags: cpu-idle-state-flags array 306 336 * @dt_idle_states: Number of idle state entries 307 337 * Returns 0 on success 308 338 */ 309 - static int __init pnv_arch300_idle_init(struct device_node *np, u32 *flags, 339 + static int __init pnv_power9_idle_init(struct device_node *np, u32 *flags, 310 340 int dt_idle_states) 311 341 { 312 342 u64 *psscr_val = NULL; 343 + u64 *psscr_mask = NULL; 344 + u32 *residency_ns = NULL; 345 + u64 max_residency_ns = 0; 313 346 int rc = 0, i; 347 + bool default_stop_found = false, deepest_stop_found = false; 314 348 315 - psscr_val = kcalloc(dt_idle_states, sizeof(*psscr_val), 316 - GFP_KERNEL); 317 - if (!psscr_val) { 349 + psscr_val = kcalloc(dt_idle_states, sizeof(*psscr_val), GFP_KERNEL); 350 + psscr_mask = kcalloc(dt_idle_states, sizeof(*psscr_mask), GFP_KERNEL); 351 + residency_ns = kcalloc(dt_idle_states, sizeof(*residency_ns), 352 + GFP_KERNEL); 353 + 354 + if (!psscr_val || !psscr_mask || !residency_ns) { 318 355 rc = -1; 319 356 goto out; 320 357 } 358 + 321 359 if (of_property_read_u64_array(np, 322 360 "ibm,cpu-idle-state-psscr", 323 361 psscr_val, dt_idle_states)) { 324 - pr_warn("cpuidle-powernv: missing ibm,cpu-idle-states-psscr in DT\n"); 362 + pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-psscr in DT\n"); 363 + rc = -1; 364 + goto out; 365 + } 366 + 367 + if (of_property_read_u64_array(np, 368 + "ibm,cpu-idle-state-psscr-mask", 369 + psscr_mask, dt_idle_states)) { 370 + pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-psscr-mask in DT\n"); 371 + rc = -1; 372 + goto out; 373 + } 374 + 375 + if (of_property_read_u32_array(np, 376 + "ibm,cpu-idle-state-residency-ns", 377 + residency_ns, dt_idle_states)) { 378 + pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-residency-ns in DT\n"); 325 379 rc = -1; 326 380 goto out; 327 381 } 328 382 329 383 /* 330 - * Set pnv_first_deep_stop_state and pnv_deepest_stop_state. 384 + * Set pnv_first_deep_stop_state, pnv_deepest_stop_psscr_{val,mask}, 385 + * and the pnv_default_stop_{val,mask}. 386 + * 331 387 * pnv_first_deep_stop_state should be set to the first stop 332 388 * level to cause hypervisor state loss. 333 - * pnv_deepest_stop_state should be set to the deepest stop 334 - * stop state. 389 + * 390 + * pnv_deepest_stop_{val,mask} should be set to values corresponding to 391 + * the deepest stop state. 392 + * 393 + * pnv_default_stop_{val,mask} should be set to values corresponding to 394 + * the shallowest (OPAL_PM_STOP_INST_FAST) loss-less stop state. 335 395 */ 336 396 pnv_first_deep_stop_state = MAX_STOP_STATE; 337 397 for (i = 0; i < dt_idle_states; i++) { 398 + int err; 338 399 u64 psscr_rl = psscr_val[i] & PSSCR_RL_MASK; 339 400 340 401 if ((flags[i] & OPAL_PM_LOSE_FULL_CONTEXT) && 341 402 (pnv_first_deep_stop_state > psscr_rl)) 342 403 pnv_first_deep_stop_state = psscr_rl; 343 404 344 - if (pnv_deepest_stop_state < psscr_rl) 345 - pnv_deepest_stop_state = psscr_rl; 405 + err = validate_psscr_val_mask(&psscr_val[i], &psscr_mask[i], 406 + flags[i]); 407 + if (err) { 408 + report_invalid_psscr_val(psscr_val[i], err); 409 + continue; 410 + } 411 + 412 + if (max_residency_ns < residency_ns[i]) { 413 + max_residency_ns = residency_ns[i]; 414 + pnv_deepest_stop_psscr_val = psscr_val[i]; 415 + pnv_deepest_stop_psscr_mask = psscr_mask[i]; 416 + deepest_stop_found = true; 417 + } 418 + 419 + if (!default_stop_found && 420 + (flags[i] & OPAL_PM_STOP_INST_FAST)) { 421 + pnv_default_stop_val = psscr_val[i]; 422 + pnv_default_stop_mask = psscr_mask[i]; 423 + default_stop_found = true; 424 + } 425 + } 426 + 427 + if (!default_stop_found) { 428 + pnv_default_stop_val = PSSCR_HV_DEFAULT_VAL; 429 + pnv_default_stop_mask = PSSCR_HV_DEFAULT_MASK; 430 + pr_warn("Setting default stop psscr val=0x%016llx,mask=0x%016llx\n", 431 + pnv_default_stop_val, pnv_default_stop_mask); 432 + } 433 + 434 + if (!deepest_stop_found) { 435 + pnv_deepest_stop_psscr_val = PSSCR_HV_DEFAULT_VAL; 436 + pnv_deepest_stop_psscr_mask = PSSCR_HV_DEFAULT_MASK; 437 + pr_warn("Setting default stop psscr val=0x%016llx,mask=0x%016llx\n", 438 + pnv_deepest_stop_psscr_val, 439 + pnv_deepest_stop_psscr_mask); 346 440 } 347 441 348 442 out: 349 443 kfree(psscr_val); 444 + kfree(psscr_mask); 445 + kfree(residency_ns); 350 446 return rc; 351 447 } 352 448 ··· 485 373 } 486 374 487 375 if (cpu_has_feature(CPU_FTR_ARCH_300)) { 488 - if (pnv_arch300_idle_init(np, flags, dt_idle_states)) 376 + if (pnv_power9_idle_init(np, flags, dt_idle_states)) 489 377 goto out; 490 378 } 491 379

+2 -1

arch/powerpc/platforms/powernv/opal-hmi.c

··· 180 180 "An XSCOM operation completed", 181 181 "SCOM has set a reserved FIR bit to cause recovery", 182 182 "Debug trigger has set a reserved FIR bit to cause recovery", 183 - "A hypervisor resource error occurred" 183 + "A hypervisor resource error occurred", 184 + "CAPP recovery process is in progress", 184 185 }; 185 186 186 187 /* Print things out */

+42 -13

arch/powerpc/platforms/powernv/opal-irqchip.c

··· 183 183 int __init opal_event_init(void) 184 184 { 185 185 struct device_node *dn, *opal_node; 186 - const __be32 *irqs; 187 - int i, irqlen, rc = 0; 186 + const char **names; 187 + u32 *irqs; 188 + int i, rc; 188 189 189 190 opal_node = of_find_node_by_path("/ibm,opal"); 190 191 if (!opal_node) { ··· 210 209 goto out; 211 210 } 212 211 213 - /* Get interrupt property */ 214 - irqs = of_get_property(opal_node, "opal-interrupts", &irqlen); 215 - opal_irq_count = irqs ? (irqlen / 4) : 0; 212 + /* Get opal-interrupts property and names if present */ 213 + rc = of_property_count_u32_elems(opal_node, "opal-interrupts"); 214 + if (rc < 0) 215 + goto out; 216 + 217 + opal_irq_count = rc; 216 218 pr_debug("Found %d interrupts reserved for OPAL\n", opal_irq_count); 217 219 218 - /* Install interrupt handlers */ 220 + irqs = kcalloc(opal_irq_count, sizeof(*irqs), GFP_KERNEL); 221 + names = kcalloc(opal_irq_count, sizeof(*names), GFP_KERNEL); 219 222 opal_irqs = kcalloc(opal_irq_count, sizeof(*opal_irqs), GFP_KERNEL); 220 - for (i = 0; irqs && i < opal_irq_count; i++, irqs++) { 221 - unsigned int irq, virq; 223 + 224 + if (WARN_ON(!irqs || !names || !opal_irqs)) 225 + goto out_free; 226 + 227 + rc = of_property_read_u32_array(opal_node, "opal-interrupts", 228 + irqs, opal_irq_count); 229 + if (rc < 0) { 230 + pr_err("Error %d reading opal-interrupts array\n", rc); 231 + goto out_free; 232 + } 233 + 234 + /* It's not an error for the names to be missing */ 235 + of_property_read_string_array(opal_node, "opal-interrupts-names", 236 + names, opal_irq_count); 237 + 238 + /* Install interrupt handlers */ 239 + for (i = 0; i < opal_irq_count; i++) { 240 + unsigned int virq; 241 + char *name; 222 242 223 243 /* Get hardware and virtual IRQ */ 224 - irq = be32_to_cpup(irqs); 225 - virq = irq_create_mapping(NULL, irq); 244 + virq = irq_create_mapping(NULL, irqs[i]); 226 245 if (!virq) { 227 - pr_warn("Failed to map irq 0x%x\n", irq); 246 + pr_warn("Failed to map irq 0x%x\n", irqs[i]); 228 247 continue; 229 248 } 230 249 250 + if (names[i] && strlen(names[i])) 251 + name = kasprintf(GFP_KERNEL, "opal-%s", names[i]); 252 + else 253 + name = kasprintf(GFP_KERNEL, "opal"); 254 + 231 255 /* Install interrupt handler */ 232 256 rc = request_irq(virq, opal_interrupt, IRQF_TRIGGER_LOW, 233 - "opal", NULL); 257 + name, NULL); 234 258 if (rc) { 235 259 irq_dispose_mapping(virq); 236 260 pr_warn("Error %d requesting irq %d (0x%x)\n", 237 - rc, virq, irq); 261 + rc, virq, irqs[i]); 238 262 continue; 239 263 } 240 264 ··· 267 241 opal_irqs[i] = virq; 268 242 } 269 243 244 + out_free: 245 + kfree(irqs); 246 + kfree(names); 270 247 out: 271 248 of_node_put(opal_node); 272 249 return rc;

+14 -6

arch/powerpc/platforms/powernv/opal-lpc.c

··· 18 18 19 19 #include <asm/machdep.h> 20 20 #include <asm/firmware.h> 21 - #include <asm/xics.h> 22 21 #include <asm/opal.h> 23 22 #include <asm/prom.h> 24 23 #include <linux/uaccess.h> 25 24 #include <asm/debug.h> 25 + #include <asm/isa-bridge.h> 26 26 27 27 static int opal_lpc_chip_id = -1; 28 28 ··· 386 386 machine_device_initcall(powernv, opal_lpc_init_debugfs); 387 387 #endif /* CONFIG_DEBUG_FS */ 388 388 389 - void opal_lpc_init(void) 389 + void __init opal_lpc_init(void) 390 390 { 391 391 struct device_node *np; 392 392 ··· 406 406 if (opal_lpc_chip_id < 0) 407 407 return; 408 408 409 - /* Setup special IO ops */ 410 - ppc_pci_io = opal_lpc_io; 411 - isa_io_special = true; 409 + /* Does it support direct mapping ? */ 410 + if (of_get_property(np, "ranges", NULL)) { 411 + pr_info("OPAL: Found memory mapped LPC bus on chip %d\n", 412 + opal_lpc_chip_id); 413 + isa_bridge_init_non_pci(np); 414 + } else { 415 + pr_info("OPAL: Found non-mapped LPC bus on chip %d\n", 416 + opal_lpc_chip_id); 412 417 413 - pr_info("OPAL: Power8 LPC bus found, chip ID %d\n", opal_lpc_chip_id); 418 + /* Setup special IO ops */ 419 + ppc_pci_io = opal_lpc_io; 420 + isa_io_special = true; 421 + } 414 422 }

+4

arch/powerpc/platforms/powernv/opal-msglog.c

··· 123 123 return; 124 124 } 125 125 126 + /* Report maximum size */ 127 + opal_msglog_attr.size = be32_to_cpu(mc->ibuf_size) + 128 + be32_to_cpu(mc->obuf_size); 129 + 126 130 opal_memcons = mc; 127 131 } 128 132

+31 -42

arch/powerpc/platforms/powernv/opal-wrappers.S

··· 58 58 59 59 #define OPAL_CALL(name, token) \ 60 60 _GLOBAL_TOC(name); \ 61 + mfmsr r12; \ 61 62 mflr r0; \ 63 + andi. r11,r12,MSR_IR|MSR_DR; \ 62 64 std r0,PPC_LR_STKOFF(r1); \ 63 65 li r0,token; \ 66 + beq opal_real_call; \ 64 67 OPAL_BRANCH(opal_tracepoint_entry) \ 65 - mfcr r12; \ 66 - stw r12,8(r1); \ 68 + mfcr r11; \ 69 + stw r11,8(r1); \ 67 70 li r11,0; \ 68 - mfmsr r12; \ 69 71 ori r11,r11,MSR_EE; \ 70 72 std r12,PACASAVEDMSR(r13); \ 71 73 andc r12,r12,r11; \ ··· 99 97 mtspr SPRN_SRR1,r6; 100 98 mtcr r4; 101 99 rfid 100 + 101 + opal_real_call: 102 + mfcr r11 103 + stw r11,8(r1) 104 + /* Set opal return address */ 105 + LOAD_REG_ADDR(r11, opal_return_realmode) 106 + mtlr r11 107 + li r11,MSR_LE 108 + andc r12,r12,r11 109 + mtspr SPRN_HSRR1,r12 110 + LOAD_REG_ADDR(r11,opal) 111 + ld r12,8(r11) 112 + ld r2,0(r11) 113 + mtspr SPRN_HSRR0,r12 114 + hrfid 115 + 116 + opal_return_realmode: 117 + FIXUP_ENDIAN 118 + ld r2,PACATOC(r13); 119 + lwz r11,8(r1); 120 + ld r12,PPC_LR_STKOFF(r1) 121 + mtcr r11; 122 + mtlr r12 123 + blr 102 124 103 125 #ifdef CONFIG_TRACEPOINTS 104 126 opal_tracepoint_entry: ··· 172 146 opal_tracepoint_return: 173 147 std r3,STK_REG(R31)(r1) 174 148 mr r4,r3 175 - ld r0,STK_REG(R23)(r1) 149 + ld r3,STK_REG(R23)(r1) 176 150 bl __trace_opal_exit 177 151 ld r3,STK_REG(R31)(r1) 178 152 addi r1,r1,STACKFRAMESIZE ··· 180 154 mtlr r0 181 155 blr 182 156 #endif 183 - 184 - #define OPAL_CALL_REAL(name, token) \ 185 - _GLOBAL_TOC(name); \ 186 - mflr r0; \ 187 - std r0,PPC_LR_STKOFF(r1); \ 188 - li r0,token; \ 189 - mfcr r12; \ 190 - stw r12,8(r1); \ 191 - \ 192 - /* Set opal return address */ \ 193 - LOAD_REG_ADDR(r11, opal_return_realmode); \ 194 - mtlr r11; \ 195 - mfmsr r12; \ 196 - li r11,MSR_LE; \ 197 - andc r12,r12,r11; \ 198 - mtspr SPRN_HSRR1,r12; \ 199 - LOAD_REG_ADDR(r11,opal); \ 200 - ld r12,8(r11); \ 201 - ld r2,0(r11); \ 202 - mtspr SPRN_HSRR0,r12; \ 203 - hrfid 204 - 205 - opal_return_realmode: 206 - FIXUP_ENDIAN 207 - ld r2,PACATOC(r13); 208 - lwz r11,8(r1); 209 - ld r12,PPC_LR_STKOFF(r1) 210 - mtcr r11; 211 - mtlr r12 212 - blr 213 157 214 158 215 159 OPAL_CALL(opal_invalid_call, OPAL_INVALID_CALL); ··· 204 208 OPAL_CALL(opal_pci_config_write_half_word, OPAL_PCI_CONFIG_WRITE_HALF_WORD); 205 209 OPAL_CALL(opal_pci_config_write_word, OPAL_PCI_CONFIG_WRITE_WORD); 206 210 OPAL_CALL(opal_set_xive, OPAL_SET_XIVE); 207 - OPAL_CALL_REAL(opal_rm_set_xive, OPAL_SET_XIVE); 208 211 OPAL_CALL(opal_get_xive, OPAL_GET_XIVE); 209 212 OPAL_CALL(opal_register_exception_handler, OPAL_REGISTER_OPAL_EXCEPTION_HANDLER); 210 213 OPAL_CALL(opal_pci_eeh_freeze_status, OPAL_PCI_EEH_FREEZE_STATUS); ··· 259 264 OPAL_CALL(opal_manage_flash, OPAL_FLASH_MANAGE); 260 265 OPAL_CALL(opal_update_flash, OPAL_FLASH_UPDATE); 261 266 OPAL_CALL(opal_resync_timebase, OPAL_RESYNC_TIMEBASE); 262 - OPAL_CALL_REAL(opal_rm_resync_timebase, OPAL_RESYNC_TIMEBASE); 263 267 OPAL_CALL(opal_check_token, OPAL_CHECK_TOKEN); 264 268 OPAL_CALL(opal_dump_init, OPAL_DUMP_INIT); 265 269 OPAL_CALL(opal_dump_info, OPAL_DUMP_INFO); ··· 274 280 OPAL_CALL(opal_get_param, OPAL_GET_PARAM); 275 281 OPAL_CALL(opal_set_param, OPAL_SET_PARAM); 276 282 OPAL_CALL(opal_handle_hmi, OPAL_HANDLE_HMI); 277 - OPAL_CALL_REAL(opal_rm_handle_hmi, OPAL_HANDLE_HMI); 278 283 OPAL_CALL(opal_config_cpu_idle_state, OPAL_CONFIG_CPU_IDLE_STATE); 279 - OPAL_CALL_REAL(opal_rm_config_cpu_idle_state, OPAL_CONFIG_CPU_IDLE_STATE); 280 284 OPAL_CALL(opal_slw_set_reg, OPAL_SLW_SET_REG); 281 285 OPAL_CALL(opal_register_dump_region, OPAL_REGISTER_DUMP_REGION); 282 286 OPAL_CALL(opal_unregister_dump_region, OPAL_UNREGISTER_DUMP_REGION); ··· 296 304 OPAL_CALL(opal_pci_get_power_state, OPAL_PCI_GET_POWER_STATE); 297 305 OPAL_CALL(opal_pci_set_power_state, OPAL_PCI_SET_POWER_STATE); 298 306 OPAL_CALL(opal_int_get_xirr, OPAL_INT_GET_XIRR); 299 - OPAL_CALL_REAL(opal_rm_int_get_xirr, OPAL_INT_GET_XIRR); 300 307 OPAL_CALL(opal_int_set_cppr, OPAL_INT_SET_CPPR); 301 308 OPAL_CALL(opal_int_eoi, OPAL_INT_EOI); 302 - OPAL_CALL_REAL(opal_rm_int_eoi, OPAL_INT_EOI); 303 309 OPAL_CALL(opal_int_set_mfrr, OPAL_INT_SET_MFRR); 304 - OPAL_CALL_REAL(opal_rm_int_set_mfrr, OPAL_INT_SET_MFRR); 305 310 OPAL_CALL(opal_pci_tce_kill, OPAL_PCI_TCE_KILL); 306 - OPAL_CALL_REAL(opal_rm_pci_tce_kill, OPAL_PCI_TCE_KILL); 311 + OPAL_CALL(opal_nmmu_set_ptcr, OPAL_NMMU_SET_PTCR);

+11

arch/powerpc/platforms/powernv/opal.c

··· 875 875 } 876 876 } 877 877 878 + void powernv_set_nmmu_ptcr(unsigned long ptcr) 879 + { 880 + int rc; 881 + 882 + if (firmware_has_feature(FW_FEATURE_OPAL)) { 883 + rc = opal_nmmu_set_ptcr(-1UL, ptcr); 884 + if (rc != OPAL_SUCCESS && rc != OPAL_UNSUPPORTED) 885 + pr_warn("%s: Unable to set nest mmu ptcr\n", __func__); 886 + } 887 + } 888 + 878 889 EXPORT_SYMBOL_GPL(opal_poll_events); 879 890 EXPORT_SYMBOL_GPL(opal_rtc_read); 880 891 EXPORT_SYMBOL_GPL(opal_rtc_write);

+14 -9

arch/powerpc/platforms/powernv/pci-ioda.c

··· 1326 1326 else 1327 1327 m64_bars = 1; 1328 1328 1329 - pdn->m64_map = kmalloc(sizeof(*pdn->m64_map) * m64_bars, GFP_KERNEL); 1329 + pdn->m64_map = kmalloc_array(m64_bars, 1330 + sizeof(*pdn->m64_map), 1331 + GFP_KERNEL); 1330 1332 if (!pdn->m64_map) 1331 1333 return -ENOMEM; 1332 1334 /* Initialize the m64_map to IODA_INVALID_M64 */ ··· 1595 1593 1596 1594 /* Allocating pe_num_map */ 1597 1595 if (pdn->m64_single_mode) 1598 - pdn->pe_num_map = kmalloc(sizeof(*pdn->pe_num_map) * num_vfs, 1599 - GFP_KERNEL); 1596 + pdn->pe_num_map = kmalloc_array(num_vfs, 1597 + sizeof(*pdn->pe_num_map), 1598 + GFP_KERNEL); 1600 1599 else 1601 1600 pdn->pe_num_map = kmalloc(sizeof(*pdn->pe_num_map), GFP_KERNEL); 1602 1601 ··· 1953 1950 struct pnv_phb *phb = pe->phb; 1954 1951 unsigned int shift = tbl->it_page_shift; 1955 1952 1956 - if (phb->type == PNV_PHB_NPU) { 1953 + /* 1954 + * NVLink1 can use the TCE kill register directly as 1955 + * it's the same as PHB3. NVLink2 is different and 1956 + * should go via the OPAL call. 1957 + */ 1958 + if (phb->model == PNV_PHB_MODEL_NPU) { 1957 1959 /* 1958 1960 * The NVLink hardware does not support TCE kill 1959 1961 * per TCE entry so we have to invalidate ··· 1970 1962 if (phb->model == PNV_PHB_MODEL_PHB3 && phb->regs) 1971 1963 pnv_pci_phb3_tce_invalidate(pe, rm, shift, 1972 1964 index, npages); 1973 - else if (rm) 1974 - opal_rm_pci_tce_kill(phb->opal_id, 1975 - OPAL_PCI_TCE_KILL_PAGES, 1976 - pe->pe_number, 1u << shift, 1977 - index << shift, npages); 1978 1965 else 1979 1966 opal_pci_tce_kill(phb->opal_id, 1980 1967 OPAL_PCI_TCE_KILL_PAGES, ··· 3674 3671 phb->model = PNV_PHB_MODEL_PHB3; 3675 3672 else if (of_device_is_compatible(np, "ibm,power8-npu-pciex")) 3676 3673 phb->model = PNV_PHB_MODEL_NPU; 3674 + else if (of_device_is_compatible(np, "ibm,power9-npu-pciex")) 3675 + phb->model = PNV_PHB_MODEL_NPU2; 3677 3676 else 3678 3677 phb->model = PNV_PHB_MODEL_UNKNOWN; 3679 3678

+7

arch/powerpc/platforms/powernv/pci.c

··· 940 940 for_each_compatible_node(np, NULL, "ibm,ioda2-npu-phb") 941 941 pnv_pci_init_npu_phb(np); 942 942 943 + /* 944 + * Look for NPU2 PHBs which we treat mostly as NPU PHBs with 945 + * the exception of TCE kill which requires an OPAL call. 946 + */ 947 + for_each_compatible_node(np, NULL, "ibm,ioda2-npu2-phb") 948 + pnv_pci_init_npu_phb(np); 949 + 943 950 /* Configure IOMMU DMA hooks */ 944 951 set_pci_dma_ops(&dma_iommu_ops); 945 952 }

+1

arch/powerpc/platforms/powernv/pci.h

··· 19 19 PNV_PHB_MODEL_P7IOC, 20 20 PNV_PHB_MODEL_PHB3, 21 21 PNV_PHB_MODEL_NPU, 22 + PNV_PHB_MODEL_NPU2, 22 23 }; 23 24 24 25 #define PNV_PCI_DIAG_BUF_SIZE 8192

+2 -1

arch/powerpc/platforms/powernv/powernv.h

··· 18 18 #endif 19 19 20 20 extern u32 pnv_get_supported_cpuidle_states(void); 21 - extern u64 pnv_deepest_stop_state; 21 + extern u64 pnv_deepest_stop_psscr_val; 22 + extern u64 pnv_deepest_stop_psscr_mask; 22 23 23 24 extern void pnv_lpc_init(void); 24 25

+8 -6

arch/powerpc/platforms/powernv/smp.c

··· 184 184 185 185 ppc64_runlatch_off(); 186 186 187 - if (cpu_has_feature(CPU_FTR_ARCH_300)) 188 - srr1 = power9_idle_stop(pnv_deepest_stop_state); 189 - else if (idle_states & OPAL_PM_WINKLE_ENABLED) 187 + if (cpu_has_feature(CPU_FTR_ARCH_300)) { 188 + srr1 = power9_idle_stop(pnv_deepest_stop_psscr_val, 189 + pnv_deepest_stop_psscr_mask); 190 + } else if (idle_states & OPAL_PM_WINKLE_ENABLED) { 190 191 srr1 = power7_winkle(); 191 - else if ((idle_states & OPAL_PM_SLEEP_ENABLED) || 192 - (idle_states & OPAL_PM_SLEEP_ENABLED_ER1)) 192 + } else if ((idle_states & OPAL_PM_SLEEP_ENABLED) || 193 + (idle_states & OPAL_PM_SLEEP_ENABLED_ER1)) { 193 194 srr1 = power7_sleep(); 194 - else 195 + } else { 195 196 srr1 = power7_nap(1); 197 + } 196 198 197 199 ppc64_runlatch_on(); 198 200

-1

arch/powerpc/platforms/pseries/Kconfig

··· 17 17 select PPC_UDBG_16550 18 18 select PPC_NATIVE 19 19 select PPC_DOORBELL 20 - select HAVE_CONTEXT_TRACKING 21 20 select HOTPLUG_CPU if SMP 22 21 select ARCH_RANDOM 23 22 select PPC_DOORBELL

+1 -1

arch/powerpc/platforms/pseries/cmm.c

··· 76 76 MODULE_PARM_DESC(delay, "Delay (in seconds) between polls to query hypervisor paging requests. " 77 77 "[Default=" __stringify(CMM_DEFAULT_DELAY) "]"); 78 78 module_param_named(hotplug_delay, hotplug_delay, uint, S_IRUGO | S_IWUSR); 79 - MODULE_PARM_DESC(delay, "Delay (in seconds) after memory hotplug remove " 79 + MODULE_PARM_DESC(hotplug_delay, "Delay (in seconds) after memory hotplug remove " 80 80 "before loaning resumes. " 81 81 "[Default=" __stringify(CMM_HOTPLUG_DELAY) "]"); 82 82 module_param_named(oom_kb, oom_kb, uint, S_IRUGO | S_IWUSR);

+7 -1

arch/powerpc/platforms/pseries/dlpar.c

··· 551 551 return rc ? rc : count; 552 552 } 553 553 554 - static CLASS_ATTR(dlpar, S_IWUSR, NULL, dlpar_store); 554 + static ssize_t dlpar_show(struct class *class, struct class_attribute *attr, 555 + char *buf) 556 + { 557 + return sprintf(buf, "%s\n", "memory,cpu"); 558 + } 559 + 560 + static CLASS_ATTR(dlpar, S_IWUSR | S_IRUSR, dlpar_show, dlpar_store); 555 561 556 562 static int __init pseries_dlpar_init(void) 557 563 {

+2 -1

arch/powerpc/platforms/pseries/firmware.c

··· 64 64 {FW_FEATURE_VPHN, "hcall-vphn"}, 65 65 {FW_FEATURE_SET_MODE, "hcall-set-mode"}, 66 66 {FW_FEATURE_BEST_ENERGY, "hcall-best-energy-1*"}, 67 + {FW_FEATURE_HPT_RESIZE, "hcall-hpt-resize"}, 67 68 }; 68 69 69 70 /* Build up the firmware features bitmask using the contents of ··· 127 126 index = OV5_INDX(vec5_fw_features_table[i].feature); 128 127 feat = OV5_FEAT(vec5_fw_features_table[i].feature); 129 128 130 - if (vec5[index] & feat) 129 + if (index < len && (vec5[index] & feat)) 131 130 powerpc_firmware_features |= 132 131 vec5_fw_features_table[i].val; 133 132 }

+63 -12

arch/powerpc/platforms/pseries/hotplug-memory.c

··· 446 446 /* Update memory regions for memory remove */ 447 447 memblock_remove(lmb->base_addr, block_sz); 448 448 449 - dlpar_release_drc(lmb->drc_index); 450 449 dlpar_remove_device_tree_lmb(lmb); 451 - 452 450 return 0; 453 451 } 454 452 ··· 514 516 if (!lmbs[i].reserved) 515 517 continue; 516 518 519 + dlpar_release_drc(lmbs[i].drc_index); 517 520 pr_info("Memory at %llx was hot-removed\n", 518 521 lmbs[i].base_addr); 519 522 ··· 544 545 if (lmbs[i].drc_index == drc_index) { 545 546 lmb_found = 1; 546 547 rc = dlpar_remove_lmb(&lmbs[i]); 548 + if (!rc) 549 + dlpar_release_drc(lmbs[i].drc_index); 550 + 547 551 break; 548 552 } 549 553 } ··· 563 561 return rc; 564 562 } 565 563 564 + static int dlpar_memory_readd_by_index(u32 drc_index, struct property *prop) 565 + { 566 + struct of_drconf_cell *lmbs; 567 + u32 num_lmbs, *p; 568 + int lmb_found; 569 + int i, rc; 570 + 571 + pr_info("Attempting to update LMB, drc index %x\n", drc_index); 572 + 573 + p = prop->value; 574 + num_lmbs = *p++; 575 + lmbs = (struct of_drconf_cell *)p; 576 + 577 + lmb_found = 0; 578 + for (i = 0; i < num_lmbs; i++) { 579 + if (lmbs[i].drc_index == drc_index) { 580 + lmb_found = 1; 581 + rc = dlpar_remove_lmb(&lmbs[i]); 582 + if (!rc) { 583 + rc = dlpar_add_lmb(&lmbs[i]); 584 + if (rc) 585 + dlpar_release_drc(lmbs[i].drc_index); 586 + } 587 + break; 588 + } 589 + } 590 + 591 + if (!lmb_found) 592 + rc = -EINVAL; 593 + 594 + if (rc) 595 + pr_info("Failed to update memory at %llx\n", 596 + lmbs[i].base_addr); 597 + else 598 + pr_info("Memory at %llx was updated\n", lmbs[i].base_addr); 599 + 600 + return rc; 601 + } 566 602 #else 567 603 static inline int pseries_remove_memblock(unsigned long base, 568 604 unsigned int memblock_size) ··· 639 599 if (lmb->flags & DRCONF_MEM_ASSIGNED) 640 600 return -EINVAL; 641 601 642 - rc = dlpar_acquire_drc(lmb->drc_index); 643 - if (rc) 644 - return rc; 645 - 646 602 rc = dlpar_add_device_tree_lmb(lmb); 647 603 if (rc) { 648 604 pr_err("Couldn't update device tree for drc index %x\n", ··· 654 618 655 619 /* Add the memory */ 656 620 rc = add_memory(nid, lmb->base_addr, block_sz); 657 - if (rc) { 621 + if (rc) 658 622 dlpar_remove_device_tree_lmb(lmb); 659 - dlpar_release_drc(lmb->drc_index); 660 - } else { 623 + else 661 624 lmb->flags |= DRCONF_MEM_ASSIGNED; 662 - } 663 625 664 626 return rc; 665 627 } ··· 689 655 return -EINVAL; 690 656 691 657 for (i = 0; i < num_lmbs && lmbs_to_add != lmbs_added; i++) { 692 - rc = dlpar_add_lmb(&lmbs[i]); 658 + rc = dlpar_acquire_drc(lmbs[i].drc_index); 693 659 if (rc) 694 660 continue; 661 + 662 + rc = dlpar_add_lmb(&lmbs[i]); 663 + if (rc) { 664 + dlpar_release_drc(lmbs[i].drc_index); 665 + continue; 666 + } 695 667 696 668 lmbs_added++; 697 669 ··· 718 678 if (rc) 719 679 pr_err("Failed to remove LMB, drc index %x\n", 720 680 be32_to_cpu(lmbs[i].drc_index)); 681 + else 682 + dlpar_release_drc(lmbs[i].drc_index); 721 683 } 722 684 rc = -EINVAL; 723 685 } else { ··· 753 711 for (i = 0; i < num_lmbs; i++) { 754 712 if (lmbs[i].drc_index == drc_index) { 755 713 lmb_found = 1; 756 - rc = dlpar_add_lmb(&lmbs[i]); 714 + rc = dlpar_acquire_drc(lmbs[i].drc_index); 715 + if (!rc) { 716 + rc = dlpar_add_lmb(&lmbs[i]); 717 + if (rc) 718 + dlpar_release_drc(lmbs[i].drc_index); 719 + } 720 + 757 721 break; 758 722 } 759 723 } ··· 816 768 rc = dlpar_memory_remove_by_index(drc_index, prop); 817 769 else 818 770 rc = -EINVAL; 771 + break; 772 + case PSERIES_HP_ELOG_ACTION_READD: 773 + rc = dlpar_memory_readd_by_index(drc_index, prop); 819 774 break; 820 775 default: 821 776 pr_err("Invalid action (%d) specified\n", hp_elog->action);

+138

arch/powerpc/platforms/pseries/lpar.c

··· 27 27 #include <linux/console.h> 28 28 #include <linux/export.h> 29 29 #include <linux/jump_label.h> 30 + #include <linux/delay.h> 31 + #include <linux/stop_machine.h> 30 32 #include <asm/processor.h> 31 33 #include <asm/mmu.h> 32 34 #include <asm/page.h> ··· 611 609 612 610 __setup("bulk_remove=", disable_bulk_remove); 613 611 612 + #define HPT_RESIZE_TIMEOUT 10000 /* ms */ 613 + 614 + struct hpt_resize_state { 615 + unsigned long shift; 616 + int commit_rc; 617 + }; 618 + 619 + static int pseries_lpar_resize_hpt_commit(void *data) 620 + { 621 + struct hpt_resize_state *state = data; 622 + 623 + state->commit_rc = plpar_resize_hpt_commit(0, state->shift); 624 + if (state->commit_rc != H_SUCCESS) 625 + return -EIO; 626 + 627 + /* Hypervisor has transitioned the HTAB, update our globals */ 628 + ppc64_pft_size = state->shift; 629 + htab_size_bytes = 1UL << ppc64_pft_size; 630 + htab_hash_mask = (htab_size_bytes >> 7) - 1; 631 + 632 + return 0; 633 + } 634 + 635 + /* Must be called in user context */ 636 + static int pseries_lpar_resize_hpt(unsigned long shift) 637 + { 638 + struct hpt_resize_state state = { 639 + .shift = shift, 640 + .commit_rc = H_FUNCTION, 641 + }; 642 + unsigned int delay, total_delay = 0; 643 + int rc; 644 + ktime_t t0, t1, t2; 645 + 646 + might_sleep(); 647 + 648 + if (!firmware_has_feature(FW_FEATURE_HPT_RESIZE)) 649 + return -ENODEV; 650 + 651 + printk(KERN_INFO "lpar: Attempting to resize HPT to shift %lu\n", 652 + shift); 653 + 654 + t0 = ktime_get(); 655 + 656 + rc = plpar_resize_hpt_prepare(0, shift); 657 + while (H_IS_LONG_BUSY(rc)) { 658 + delay = get_longbusy_msecs(rc); 659 + total_delay += delay; 660 + if (total_delay > HPT_RESIZE_TIMEOUT) { 661 + /* prepare with shift==0 cancels an in-progress resize */ 662 + rc = plpar_resize_hpt_prepare(0, 0); 663 + if (rc != H_SUCCESS) 664 + printk(KERN_WARNING 665 + "lpar: Unexpected error %d cancelling timed out HPT resize\n", 666 + rc); 667 + return -ETIMEDOUT; 668 + } 669 + msleep(delay); 670 + rc = plpar_resize_hpt_prepare(0, shift); 671 + }; 672 + 673 + switch (rc) { 674 + case H_SUCCESS: 675 + /* Continue on */ 676 + break; 677 + 678 + case H_PARAMETER: 679 + return -EINVAL; 680 + case H_RESOURCE: 681 + return -EPERM; 682 + default: 683 + printk(KERN_WARNING 684 + "lpar: Unexpected error %d from H_RESIZE_HPT_PREPARE\n", 685 + rc); 686 + return -EIO; 687 + } 688 + 689 + t1 = ktime_get(); 690 + 691 + rc = stop_machine(pseries_lpar_resize_hpt_commit, &state, NULL); 692 + 693 + t2 = ktime_get(); 694 + 695 + if (rc != 0) { 696 + switch (state.commit_rc) { 697 + case H_PTEG_FULL: 698 + printk(KERN_WARNING 699 + "lpar: Hash collision while resizing HPT\n"); 700 + return -ENOSPC; 701 + 702 + default: 703 + printk(KERN_WARNING 704 + "lpar: Unexpected error %d from H_RESIZE_HPT_COMMIT\n", 705 + state.commit_rc); 706 + return -EIO; 707 + }; 708 + } 709 + 710 + printk(KERN_INFO 711 + "lpar: HPT resize to shift %lu complete (%lld ms / %lld ms)\n", 712 + shift, (long long) ktime_ms_delta(t1, t0), 713 + (long long) ktime_ms_delta(t2, t1)); 714 + 715 + return 0; 716 + } 717 + 718 + /* Actually only used for radix, so far */ 719 + static int pseries_lpar_register_process_table(unsigned long base, 720 + unsigned long page_size, unsigned long table_size) 721 + { 722 + long rc; 723 + unsigned long flags = PROC_TABLE_NEW; 724 + 725 + if (radix_enabled()) 726 + flags |= PROC_TABLE_RADIX | PROC_TABLE_GTSE; 727 + for (;;) { 728 + rc = plpar_hcall_norets(H_REGISTER_PROC_TBL, flags, base, 729 + page_size, table_size); 730 + if (!H_IS_LONG_BUSY(rc)) 731 + break; 732 + mdelay(get_longbusy_msecs(rc)); 733 + } 734 + if (rc != H_SUCCESS) { 735 + pr_err("Failed to register process table (rc=%ld)\n", rc); 736 + BUG(); 737 + } 738 + return rc; 739 + } 740 + 614 741 void __init hpte_init_pseries(void) 615 742 { 616 743 mmu_hash_ops.hpte_invalidate = pSeries_lpar_hpte_invalidate; ··· 751 620 mmu_hash_ops.flush_hash_range = pSeries_lpar_flush_hash_range; 752 621 mmu_hash_ops.hpte_clear_all = pseries_hpte_clear_all; 753 622 mmu_hash_ops.hugepage_invalidate = pSeries_lpar_hugepage_invalidate; 623 + mmu_hash_ops.resize_hpt = pseries_lpar_resize_hpt; 624 + } 625 + 626 + void radix_init_pseries(void) 627 + { 628 + pr_info("Using radix MMU under hypervisor\n"); 629 + register_process_table = pseries_lpar_register_process_table; 754 630 } 755 631 756 632 #ifdef CONFIG_PPC_SMLPAR

+34

arch/powerpc/platforms/pseries/mobility.c

··· 39 39 #define ADD_DT_NODE 0x03000000 40 40 41 41 #define MIGRATION_SCOPE (1) 42 + #define PRRN_SCOPE -2 42 43 43 44 static int mobility_rtas_call(int token, char *buf, s32 scope) 44 45 { ··· 237 236 return rc; 238 237 } 239 238 239 + static void prrn_update_node(__be32 phandle) 240 + { 241 + struct pseries_hp_errorlog *hp_elog; 242 + struct device_node *dn; 243 + 244 + /* 245 + * If a node is found from a the given phandle, the phandle does not 246 + * represent the drc index of an LMB and we can ignore. 247 + */ 248 + dn = of_find_node_by_phandle(be32_to_cpu(phandle)); 249 + if (dn) { 250 + of_node_put(dn); 251 + return; 252 + } 253 + 254 + hp_elog = kzalloc(sizeof(*hp_elog), GFP_KERNEL); 255 + if(!hp_elog) 256 + return; 257 + 258 + hp_elog->resource = PSERIES_HP_ELOG_RESOURCE_MEM; 259 + hp_elog->action = PSERIES_HP_ELOG_ACTION_READD; 260 + hp_elog->id_type = PSERIES_HP_ELOG_ID_DRC_INDEX; 261 + hp_elog->_drc_u.drc_index = phandle; 262 + 263 + queue_hotplug_event(hp_elog, NULL, NULL); 264 + 265 + kfree(hp_elog); 266 + } 267 + 240 268 int pseries_devicetree_update(s32 scope) 241 269 { 242 270 char *rtas_buf; ··· 304 274 break; 305 275 case UPDATE_DT_NODE: 306 276 update_dt_node(phandle, scope); 277 + 278 + if (scope == PRRN_SCOPE) 279 + prrn_update_node(phandle); 280 + 307 281 break; 308 282 case ADD_DT_NODE: 309 283 drc_index = *data++;

+1

arch/powerpc/platforms/pseries/setup.c

··· 66 66 #include <asm/reg.h> 67 67 #include <asm/plpar_wrappers.h> 68 68 #include <asm/kexec.h> 69 + #include <asm/isa-bridge.h> 69 70 70 71 #include "pseries.h" 71 72

+4 -4

arch/powerpc/xmon/xmon.c

··· 1403 1403 struct pt_regs regs; 1404 1404 1405 1405 while (max_to_print--) { 1406 - if (sp < PAGE_OFFSET) { 1406 + if (!is_kernel_addr(sp)) { 1407 1407 if (sp != 0) 1408 1408 printf("SP (%lx) is in userspace\n", sp); 1409 1409 break; ··· 1431 1431 mread(newsp + LRSAVE_OFFSET, &nextip, 1432 1432 sizeof(unsigned long)); 1433 1433 if (lr == ip) { 1434 - if (lr < PAGE_OFFSET 1434 + if (!is_kernel_addr(lr) 1435 1435 || (fnstart <= lr && lr < fnend)) 1436 1436 printip = 0; 1437 1437 } else if (lr == nextip) { 1438 1438 printip = 0; 1439 - } else if (lr >= PAGE_OFFSET 1439 + } else if (is_kernel_addr(lr) 1440 1440 && !(fnstart <= lr && lr < fnend)) { 1441 1441 printf("[link register ] "); 1442 1442 xmon_print_symbol(lr, " ", "\n"); ··· 1496 1496 if (regs->msr & MSR_PR) 1497 1497 return; /* not in kernel */ 1498 1498 addr = regs->nip; /* address of trap instruction */ 1499 - if (addr < PAGE_OFFSET) 1499 + if (!is_kernel_addr(addr)) 1500 1500 return; 1501 1501 bug = find_bug(regs->nip); 1502 1502 if (bug == NULL)

+88 -41

drivers/cpuidle/cpuidle-powernv.c

··· 19 19 #include <asm/firmware.h> 20 20 #include <asm/opal.h> 21 21 #include <asm/runlatch.h> 22 + #include <asm/cpuidle.h> 22 23 24 + /* 25 + * Expose only those Hardware idle states via the cpuidle framework 26 + * that have latency value below POWERNV_THRESHOLD_LATENCY_NS. 27 + */ 23 28 #define POWERNV_THRESHOLD_LATENCY_NS 200000 24 29 25 30 static struct cpuidle_driver powernv_idle_driver = { ··· 35 30 static int max_idle_state; 36 31 static struct cpuidle_state *cpuidle_state_table; 37 32 38 - static u64 stop_psscr_table[CPUIDLE_STATE_MAX]; 33 + struct stop_psscr_table { 34 + u64 val; 35 + u64 mask; 36 + }; 37 + 38 + static struct stop_psscr_table stop_psscr_table[CPUIDLE_STATE_MAX]; 39 39 40 40 static u64 snooze_timeout; 41 41 static bool snooze_timeout_en; ··· 112 102 int index) 113 103 { 114 104 ppc64_runlatch_off(); 115 - power9_idle_stop(stop_psscr_table[index]); 105 + power9_idle_stop(stop_psscr_table[index].val, 106 + stop_psscr_table[index].mask); 116 107 ppc64_runlatch_on(); 117 108 return index; 118 109 } ··· 178 167 return 0; 179 168 } 180 169 170 + static inline void add_powernv_state(int index, const char *name, 171 + unsigned int flags, 172 + int (*idle_fn)(struct cpuidle_device *, 173 + struct cpuidle_driver *, 174 + int), 175 + unsigned int target_residency, 176 + unsigned int exit_latency, 177 + u64 psscr_val, u64 psscr_mask) 178 + { 179 + strlcpy(powernv_states[index].name, name, CPUIDLE_NAME_LEN); 180 + strlcpy(powernv_states[index].desc, name, CPUIDLE_NAME_LEN); 181 + powernv_states[index].flags = flags; 182 + powernv_states[index].target_residency = target_residency; 183 + powernv_states[index].exit_latency = exit_latency; 184 + powernv_states[index].enter = idle_fn; 185 + stop_psscr_table[index].val = psscr_val; 186 + stop_psscr_table[index].mask = psscr_mask; 187 + } 188 + 181 189 static int powernv_add_idle_states(void) 182 190 { 183 191 struct device_node *power_mgt; ··· 206 176 u32 residency_ns[CPUIDLE_STATE_MAX]; 207 177 u32 flags[CPUIDLE_STATE_MAX]; 208 178 u64 psscr_val[CPUIDLE_STATE_MAX]; 179 + u64 psscr_mask[CPUIDLE_STATE_MAX]; 209 180 const char *names[CPUIDLE_STATE_MAX]; 181 + u32 has_stop_states = 0; 210 182 int i, rc; 211 183 212 184 /* Currently we have snooze statically defined */ ··· 255 223 256 224 /* 257 225 * If the idle states use stop instruction, probe for psscr values 258 - * which are necessary to specify required stop level. 226 + * and psscr mask which are necessary to specify required stop level. 259 227 */ 260 - if (flags[0] & (OPAL_PM_STOP_INST_FAST | OPAL_PM_STOP_INST_DEEP)) 228 + has_stop_states = (flags[0] & 229 + (OPAL_PM_STOP_INST_FAST | OPAL_PM_STOP_INST_DEEP)); 230 + if (has_stop_states) { 261 231 if (of_property_read_u64_array(power_mgt, 262 232 "ibm,cpu-idle-state-psscr", psscr_val, dt_idle_states)) { 263 - pr_warn("cpuidle-powernv: missing ibm,cpu-idle-states-psscr in DT\n"); 233 + pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-psscr in DT\n"); 264 234 goto out; 265 235 } 236 + 237 + if (of_property_read_u64_array(power_mgt, 238 + "ibm,cpu-idle-state-psscr-mask", 239 + psscr_mask, dt_idle_states)) { 240 + pr_warn("cpuidle-powernv:Missing ibm,cpu-idle-state-psscr-mask in DT\n"); 241 + goto out; 242 + } 243 + } 266 244 267 245 rc = of_property_read_u32_array(power_mgt, 268 246 "ibm,cpu-idle-state-residency-ns", residency_ns, dt_idle_states); 269 247 270 248 for (i = 0; i < dt_idle_states; i++) { 249 + unsigned int exit_latency, target_residency; 271 250 /* 272 251 * If an idle state has exit latency beyond 273 252 * POWERNV_THRESHOLD_LATENCY_NS then don't use it ··· 286 243 */ 287 244 if (latency_ns[i] > POWERNV_THRESHOLD_LATENCY_NS) 288 245 continue; 246 + /* 247 + * Firmware passes residency and latency values in ns. 248 + * cpuidle expects it in us. 249 + */ 250 + exit_latency = latency_ns[i] / 1000; 251 + if (!rc) 252 + target_residency = residency_ns[i] / 1000; 253 + else 254 + target_residency = 0; 255 + 256 + if (has_stop_states) { 257 + int err = validate_psscr_val_mask(&psscr_val[i], 258 + &psscr_mask[i], 259 + flags[i]); 260 + if (err) { 261 + report_invalid_psscr_val(psscr_val[i], err); 262 + continue; 263 + } 264 + } 289 265 290 266 /* 291 - * Cpuidle accepts exit_latency and target_residency in us. 292 - * Use default target_residency values if f/w does not expose it. 267 + * For nap and fastsleep, use default target_residency 268 + * values if f/w does not expose it. 293 269 */ 294 270 if (flags[i] & OPAL_PM_NAP_ENABLED) { 271 + if (!rc) 272 + target_residency = 100; 295 273 /* Add NAP state */ 296 - strcpy(powernv_states[nr_idle_states].name, "Nap"); 297 - strcpy(powernv_states[nr_idle_states].desc, "Nap"); 298 - powernv_states[nr_idle_states].flags = 0; 299 - powernv_states[nr_idle_states].target_residency = 100; 300 - powernv_states[nr_idle_states].enter = nap_loop; 274 + add_powernv_state(nr_idle_states, "Nap", 275 + CPUIDLE_FLAG_NONE, nap_loop, 276 + target_residency, exit_latency, 0, 0); 301 277 } else if ((flags[i] & OPAL_PM_STOP_INST_FAST) && 302 278 !(flags[i] & OPAL_PM_TIMEBASE_STOP)) { 303 - strncpy(powernv_states[nr_idle_states].name, 304 - names[i], CPUIDLE_NAME_LEN); 305 - strncpy(powernv_states[nr_idle_states].desc, 306 - names[i], CPUIDLE_NAME_LEN); 307 - powernv_states[nr_idle_states].flags = 0; 308 - 309 - powernv_states[nr_idle_states].enter = stop_loop; 310 - stop_psscr_table[nr_idle_states] = psscr_val[i]; 279 + add_powernv_state(nr_idle_states, names[i], 280 + CPUIDLE_FLAG_NONE, stop_loop, 281 + target_residency, exit_latency, 282 + psscr_val[i], psscr_mask[i]); 311 283 } 312 284 313 285 /* ··· 332 274 #ifdef CONFIG_TICK_ONESHOT 333 275 if (flags[i] & OPAL_PM_SLEEP_ENABLED || 334 276 flags[i] & OPAL_PM_SLEEP_ENABLED_ER1) { 277 + if (!rc) 278 + target_residency = 300000; 335 279 /* Add FASTSLEEP state */ 336 - strcpy(powernv_states[nr_idle_states].name, "FastSleep"); 337 - strcpy(powernv_states[nr_idle_states].desc, "FastSleep"); 338 - powernv_states[nr_idle_states].flags = CPUIDLE_FLAG_TIMER_STOP; 339 - powernv_states[nr_idle_states].target_residency = 300000; 340 - powernv_states[nr_idle_states].enter = fastsleep_loop; 280 + add_powernv_state(nr_idle_states, "FastSleep", 281 + CPUIDLE_FLAG_TIMER_STOP, 282 + fastsleep_loop, 283 + target_residency, exit_latency, 0, 0); 341 284 } else if ((flags[i] & OPAL_PM_STOP_INST_DEEP) && 342 285 (flags[i] & OPAL_PM_TIMEBASE_STOP)) { 343 - strncpy(powernv_states[nr_idle_states].name, 344 - names[i], CPUIDLE_NAME_LEN); 345 - strncpy(powernv_states[nr_idle_states].desc, 346 - names[i], CPUIDLE_NAME_LEN); 347 - 348 - powernv_states[nr_idle_states].flags = CPUIDLE_FLAG_TIMER_STOP; 349 - powernv_states[nr_idle_states].enter = stop_loop; 350 - stop_psscr_table[nr_idle_states] = psscr_val[i]; 286 + add_powernv_state(nr_idle_states, names[i], 287 + CPUIDLE_FLAG_TIMER_STOP, stop_loop, 288 + target_residency, exit_latency, 289 + psscr_val[i], psscr_mask[i]); 351 290 } 352 291 #endif 353 - powernv_states[nr_idle_states].exit_latency = 354 - ((unsigned int)latency_ns[i]) / 1000; 355 - 356 - if (!rc) { 357 - powernv_states[nr_idle_states].target_residency = 358 - ((unsigned int)residency_ns[i]) / 1000; 359 - } 360 - 361 292 nr_idle_states++; 362 293 } 363 294 out:

+7 -17

drivers/macintosh/Kconfig

··· 30 30 Quadra 610, Quadra 650, Quadra 700, Quadra 800, Centris 610 and 31 31 Centris 650. 32 32 33 - config ADB_MACIISI 34 - bool "Include Mac IIsi ADB driver" 35 - depends on ADB && MAC && BROKEN 36 - help 37 - Say Y here if want your kernel to support Macintosh systems that use 38 - the Mac IIsi style ADB. This includes the IIsi, IIvi, IIvx, Classic 39 - II, LC, LC II, LC III, Performa 460, and the Performa 600. 40 - 41 33 config ADB_IOP 42 34 bool "Include IOP (IIfx/Quadra 9x0) ADB driver" 43 35 depends on ADB && MAC ··· 52 60 53 61 # we want to change this to something like CONFIG_SYSCTRL_CUDA/PMU 54 62 config ADB_CUDA 55 - bool "Support for CUDA based Macs and PowerMacs" 63 + bool "Support for Cuda/Egret based Macs and PowerMacs" 56 64 depends on (ADB || PPC_PMAC) && !PPC_PMAC64 57 65 help 58 - This provides support for CUDA based Macintosh and Power Macintosh 59 - systems. This includes many m68k based Macs (Color Classic, Mac TV, 60 - Performa 475, Performa 520, Performa 550, Performa 575, 61 - Performa 588, Quadra 605, Quadra 630, Quadra/Centris 660AV, and 62 - Quadra 840AV), most OldWorld PowerMacs, the first generation iMacs, 63 - the Blue&White G3 and the "Yikes" G4 (PCI Graphics). All later 64 - models should use CONFIG_ADB_PMU instead. It is safe to say Y here 65 - even if your machine doesn't have a CUDA. 66 + This provides support for Cuda/Egret based Macintosh and 67 + Power Macintosh systems. This includes most m68k based Macs, 68 + most Old World PowerMacs, the first generation iMacs, the 69 + Blue & White G3 and the "Yikes" G4 (PCI Graphics). All later 70 + models should use CONFIG_ADB_PMU instead. It is safe to say Y 71 + here even if your machine doesn't have a Cuda or Egret device. 66 72 67 73 If unsure say Y. 68 74

-1

drivers/macintosh/Makefile

··· 20 20 21 21 obj-$(CONFIG_ADB) += adb.o 22 22 obj-$(CONFIG_ADB_MACII) += via-macii.o 23 - obj-$(CONFIG_ADB_MACIISI) += via-maciisi.o 24 23 obj-$(CONFIG_ADB_IOP) += adb-iop.o 25 24 obj-$(CONFIG_ADB_PMU68K) += via-pmu68k.o 26 25 obj-$(CONFIG_ADB_MACIO) += macio-adb.o

-4

drivers/macintosh/adb.c

··· 48 48 EXPORT_SYMBOL(adb_client_list); 49 49 50 50 extern struct adb_driver via_macii_driver; 51 - extern struct adb_driver via_maciisi_driver; 52 51 extern struct adb_driver via_cuda_driver; 53 52 extern struct adb_driver adb_iop_driver; 54 53 extern struct adb_driver via_pmu_driver; ··· 57 58 static struct adb_driver *adb_driver_list[] = { 58 59 #ifdef CONFIG_ADB_MACII 59 60 &via_macii_driver, 60 - #endif 61 - #ifdef CONFIG_ADB_MACIISI 62 - &via_maciisi_driver, 63 61 #endif 64 62 #ifdef CONFIG_ADB_CUDA 65 63 &via_cuda_driver,

+210 -84

drivers/macintosh/via-cuda.c

··· 1 1 /* 2 - * Device driver for the via-cuda on Apple Powermacs. 2 + * Device driver for the Cuda and Egret system controllers found on PowerMacs 3 + * and 68k Macs. 3 4 * 4 - * The VIA (versatile interface adapter) interfaces to the CUDA, 5 - * a 6805 microprocessor core which controls the ADB (Apple Desktop 6 - * Bus) which connects to the keyboard and mouse. The CUDA also 7 - * controls system power and the RTC (real time clock) chip. 5 + * The Cuda or Egret is a 6805 microcontroller interfaced to the 6522 VIA. 6 + * This MCU controls system power, Parameter RAM, Real Time Clock and the 7 + * Apple Desktop Bus (ADB) that connects to the keyboard and mouse. 8 8 * 9 9 * Copyright (C) 1996 Paul Mackerras. 10 10 */ ··· 50 50 #define IER (14*RS) /* Interrupt enable register */ 51 51 #define ANH (15*RS) /* A-side data, no handshake */ 52 52 53 - /* Bits in B data register: all active low */ 54 - #define TREQ 0x08 /* Transfer request (input) */ 55 - #define TACK 0x10 /* Transfer acknowledge (output) */ 56 - #define TIP 0x20 /* Transfer in progress (output) */ 53 + /* 54 + * When the Cuda design replaced the Egret, some signal names and 55 + * logic sense changed. They all serve the same purposes, however. 56 + * 57 + * VIA pin | Egret pin 58 + * ----------------+------------------------------------------ 59 + * PB3 (input) | Transceiver session (active low) 60 + * PB4 (output) | VIA full (active high) 61 + * PB5 (output) | System session (active high) 62 + * 63 + * VIA pin | Cuda pin 64 + * ----------------+------------------------------------------ 65 + * PB3 (input) | Transfer request (active low) 66 + * PB4 (output) | Byte acknowledge (active low) 67 + * PB5 (output) | Transfer in progress (active low) 68 + */ 69 + 70 + /* Bits in Port B data register */ 71 + #define TREQ 0x08 /* Transfer request */ 72 + #define TACK 0x10 /* Transfer acknowledge */ 73 + #define TIP 0x20 /* Transfer in progress */ 57 74 58 75 /* Bits in ACR */ 59 76 #define SR_CTRL 0x1c /* Shift register control bits */ ··· 81 64 #define IER_SET 0x80 /* set bits in IER */ 82 65 #define IER_CLR 0 /* clear bits in IER */ 83 66 #define SR_INT 0x04 /* Shift register full/empty */ 67 + 68 + /* Duration of byte acknowledgement pulse (us) */ 69 + #define EGRET_TACK_ASSERTED_DELAY 300 70 + #define EGRET_TACK_NEGATED_DELAY 400 71 + 72 + /* Interval from interrupt to start of session (us) */ 73 + #define EGRET_SESSION_DELAY 450 74 + 75 + #ifdef CONFIG_PPC 76 + #define mcu_is_egret false 77 + #else 78 + static bool mcu_is_egret; 79 + #endif 80 + 81 + static inline bool TREQ_asserted(u8 portb) 82 + { 83 + return !(portb & TREQ); 84 + } 85 + 86 + static inline void assert_TIP(void) 87 + { 88 + if (mcu_is_egret) { 89 + udelay(EGRET_SESSION_DELAY); 90 + out_8(&via[B], in_8(&via[B]) | TIP); 91 + } else 92 + out_8(&via[B], in_8(&via[B]) & ~TIP); 93 + } 94 + 95 + static inline void assert_TIP_and_TACK(void) 96 + { 97 + if (mcu_is_egret) { 98 + udelay(EGRET_SESSION_DELAY); 99 + out_8(&via[B], in_8(&via[B]) | TIP | TACK); 100 + } else 101 + out_8(&via[B], in_8(&via[B]) & ~(TIP | TACK)); 102 + } 103 + 104 + static inline void assert_TACK(void) 105 + { 106 + if (mcu_is_egret) { 107 + udelay(EGRET_TACK_NEGATED_DELAY); 108 + out_8(&via[B], in_8(&via[B]) | TACK); 109 + } else 110 + out_8(&via[B], in_8(&via[B]) & ~TACK); 111 + } 112 + 113 + static inline void toggle_TACK(void) 114 + { 115 + out_8(&via[B], in_8(&via[B]) ^ TACK); 116 + } 117 + 118 + static inline void negate_TACK(void) 119 + { 120 + if (mcu_is_egret) { 121 + udelay(EGRET_TACK_ASSERTED_DELAY); 122 + out_8(&via[B], in_8(&via[B]) & ~TACK); 123 + } else 124 + out_8(&via[B], in_8(&via[B]) | TACK); 125 + } 126 + 127 + static inline void negate_TIP_and_TACK(void) 128 + { 129 + if (mcu_is_egret) { 130 + udelay(EGRET_TACK_ASSERTED_DELAY); 131 + out_8(&via[B], in_8(&via[B]) & ~(TIP | TACK)); 132 + } else 133 + out_8(&via[B], in_8(&via[B]) | TIP | TACK); 134 + } 84 135 85 136 static enum cuda_state { 86 137 idle, ··· 205 120 struct adb_request req; 206 121 int err; 207 122 208 - if (macintosh_config->adb_type != MAC_ADB_CUDA) 123 + if (macintosh_config->adb_type != MAC_ADB_CUDA && 124 + macintosh_config->adb_type != MAC_ADB_EGRET) 209 125 return 0; 210 126 211 127 via = via1; 212 128 cuda_state = idle; 129 + mcu_is_egret = macintosh_config->adb_type == MAC_ADB_EGRET; 213 130 214 131 err = cuda_init_via(); 215 132 if (err) { ··· 308 221 return -EAGAIN; 309 222 } 310 223 311 - printk("Macintosh CUDA driver v0.5 for Unified ADB.\n"); 224 + pr_info("Macintosh Cuda and Egret driver.\n"); 312 225 313 226 cuda_fully_inited = 1; 314 227 return 0; ··· 324 237 if (sys_ctrler != SYS_CTRLER_CUDA) 325 238 return -ENODEV; 326 239 #else 327 - if (macintosh_config->adb_type != MAC_ADB_CUDA) 240 + if (macintosh_config->adb_type != MAC_ADB_CUDA && 241 + macintosh_config->adb_type != MAC_ADB_EGRET) 328 242 return -ENODEV; 329 243 #endif 330 244 if (via == NULL) ··· 334 246 } 335 247 #endif /* CONFIG_ADB */ 336 248 249 + static int __init sync_egret(void) 250 + { 251 + if (TREQ_asserted(in_8(&via[B]))) { 252 + /* Complete the inbound transfer */ 253 + assert_TIP_and_TACK(); 254 + while (1) { 255 + negate_TACK(); 256 + mdelay(1); 257 + (void)in_8(&via[SR]); 258 + assert_TACK(); 259 + if (!TREQ_asserted(in_8(&via[B]))) 260 + break; 261 + } 262 + negate_TIP_and_TACK(); 263 + } else if (in_8(&via[B]) & TIP) { 264 + /* Terminate the outbound transfer */ 265 + negate_TACK(); 266 + assert_TACK(); 267 + mdelay(1); 268 + negate_TIP_and_TACK(); 269 + } 270 + /* Clear shift register interrupt */ 271 + if (in_8(&via[IFR]) & SR_INT) 272 + (void)in_8(&via[SR]); 273 + return 0; 274 + } 275 + 337 276 #define WAIT_FOR(cond, what) \ 338 277 do { \ 339 278 int x; \ 340 279 for (x = 1000; !(cond); --x) { \ 341 280 if (x == 0) { \ 342 - printk("Timeout waiting for " what "\n"); \ 281 + pr_err("Timeout waiting for " what "\n"); \ 343 282 return -ENXIO; \ 344 283 } \ 345 284 udelay(100); \ ··· 376 261 static int 377 262 __init cuda_init_via(void) 378 263 { 379 - out_8(&via[DIRB], (in_8(&via[DIRB]) | TACK | TIP) & ~TREQ); /* TACK & TIP out */ 380 - out_8(&via[B], in_8(&via[B]) | TACK | TIP); /* negate them */ 381 - out_8(&via[ACR] ,(in_8(&via[ACR]) & ~SR_CTRL) | SR_EXT); /* SR data in */ 382 - (void)in_8(&via[SR]); /* clear any left-over data */ 383 264 #ifdef CONFIG_PPC 384 265 out_8(&via[IER], 0x7f); /* disable interrupts from VIA */ 385 266 (void)in_8(&via[IER]); ··· 383 272 out_8(&via[IER], SR_INT); /* disable SR interrupt from VIA */ 384 273 #endif 385 274 275 + out_8(&via[DIRB], (in_8(&via[DIRB]) | TACK | TIP) & ~TREQ); /* TACK & TIP out */ 276 + out_8(&via[ACR], (in_8(&via[ACR]) & ~SR_CTRL) | SR_EXT); /* SR data in */ 277 + (void)in_8(&via[SR]); /* clear any left-over data */ 278 + 279 + if (mcu_is_egret) 280 + return sync_egret(); 281 + 282 + negate_TIP_and_TACK(); 283 + 386 284 /* delay 4ms and then clear any pending interrupt */ 387 285 mdelay(4); 388 286 (void)in_8(&via[SR]); 389 287 out_8(&via[IFR], SR_INT); 390 288 391 289 /* sync with the CUDA - assert TACK without TIP */ 392 - out_8(&via[B], in_8(&via[B]) & ~TACK); 290 + assert_TACK(); 393 291 394 292 /* wait for the CUDA to assert TREQ in response */ 395 - WAIT_FOR((in_8(&via[B]) & TREQ) == 0, "CUDA response to sync"); 293 + WAIT_FOR(TREQ_asserted(in_8(&via[B])), "CUDA response to sync"); 396 294 397 295 /* wait for the interrupt and then clear it */ 398 296 WAIT_FOR(in_8(&via[IFR]) & SR_INT, "CUDA response to sync (2)"); ··· 409 289 out_8(&via[IFR], SR_INT); 410 290 411 291 /* finish the sync by negating TACK */ 412 - out_8(&via[B], in_8(&via[B]) | TACK); 292 + negate_TACK(); 413 293 414 294 /* wait for the CUDA to negate TREQ and the corresponding interrupt */ 415 - WAIT_FOR(in_8(&via[B]) & TREQ, "CUDA response to sync (3)"); 295 + WAIT_FOR(!TREQ_asserted(in_8(&via[B])), "CUDA response to sync (3)"); 416 296 WAIT_FOR(in_8(&via[IFR]) & SR_INT, "CUDA response to sync (4)"); 417 297 (void)in_8(&via[SR]); 418 298 out_8(&via[IFR], SR_INT); 419 - out_8(&via[B], in_8(&via[B]) | TIP); /* should be unnecessary */ 420 299 421 300 return 0; 422 301 } ··· 476 357 return 0; 477 358 } 478 359 #endif /* CONFIG_ADB */ 360 + 479 361 /* Construct and send a cuda request */ 480 362 int 481 363 cuda_request(struct adb_request *req, void (*done)(struct adb_request *), ··· 533 413 static void 534 414 cuda_start(void) 535 415 { 536 - struct adb_request *req; 537 - 538 416 /* assert cuda_state == idle */ 539 - /* get the packet to send */ 540 - req = current_req; 541 - if (req == 0) 417 + if (current_req == NULL) 542 418 return; 543 - if ((in_8(&via[B]) & TREQ) == 0) 419 + data_index = 0; 420 + if (TREQ_asserted(in_8(&via[B]))) 544 421 return; /* a byte is coming in from the CUDA */ 545 422 546 423 /* set the shift register to shift out and send a byte */ 547 424 out_8(&via[ACR], in_8(&via[ACR]) | SR_OUT); 548 - out_8(&via[SR], req->data[0]); 549 - out_8(&via[B], in_8(&via[B]) & ~TIP); 425 + out_8(&via[SR], current_req->data[data_index++]); 426 + if (mcu_is_egret) 427 + assert_TIP_and_TACK(); 428 + else 429 + assert_TIP(); 550 430 cuda_state = sent_first_byte; 551 431 } 552 432 553 433 void 554 434 cuda_poll(void) 555 435 { 556 - /* cuda_interrupt only takes a normal lock, we disable 557 - * interrupts here to avoid re-entering and thus deadlocking. 558 - */ 559 - if (cuda_irq) 560 - disable_irq(cuda_irq); 561 - cuda_interrupt(0, NULL); 562 - if (cuda_irq) 563 - enable_irq(cuda_irq); 436 + cuda_interrupt(0, NULL); 564 437 } 565 438 EXPORT_SYMBOL(cuda_poll); 439 + 440 + #define ARRAY_FULL(a, p) ((p) - (a) == ARRAY_SIZE(a)) 566 441 567 442 static irqreturn_t 568 443 cuda_interrupt(int irq, void *arg) 569 444 { 570 - int status; 445 + unsigned long flags; 446 + u8 status; 571 447 struct adb_request *req = NULL; 572 448 unsigned char ibuf[16]; 573 449 int ibuf_len = 0; 574 450 int complete = 0; 575 451 576 - spin_lock(&cuda_lock); 452 + spin_lock_irqsave(&cuda_lock, flags); 577 453 578 454 /* On powermacs, this handler is registered for the VIA IRQ. But they use 579 455 * just the shift register IRQ -- other VIA interrupt sources are disabled. ··· 582 466 #endif 583 467 { 584 468 if ((in_8(&via[IFR]) & SR_INT) == 0) { 585 - spin_unlock(&cuda_lock); 469 + spin_unlock_irqrestore(&cuda_lock, flags); 586 470 return IRQ_NONE; 587 471 } else { 588 472 out_8(&via[IFR], SR_INT); 589 473 } 590 474 } 591 - 592 - status = (~in_8(&via[B]) & (TIP|TREQ)) | (in_8(&via[ACR]) & SR_OUT); 593 - /* printk("cuda_interrupt: state=%d status=%x\n", cuda_state, status); */ 475 + 476 + status = in_8(&via[B]) & (TIP | TACK | TREQ); 477 + 594 478 switch (cuda_state) { 595 479 case idle: 596 - /* CUDA has sent us the first byte of data - unsolicited */ 597 - if (status != TREQ) 598 - printk("cuda: state=idle, status=%x\n", status); 480 + /* System controller has unsolicited data for us */ 599 481 (void)in_8(&via[SR]); 600 - out_8(&via[B], in_8(&via[B]) & ~TIP); 482 + idle_state: 483 + assert_TIP(); 601 484 cuda_state = reading; 602 485 reply_ptr = cuda_rbuf; 603 486 reading_reply = 0; 604 487 break; 605 488 606 489 case awaiting_reply: 607 - /* CUDA has sent us the first byte of data of a reply */ 608 - if (status != TREQ) 609 - printk("cuda: state=awaiting_reply, status=%x\n", status); 490 + /* System controller has reply data for us */ 610 491 (void)in_8(&via[SR]); 611 - out_8(&via[B], in_8(&via[B]) & ~TIP); 492 + assert_TIP(); 612 493 cuda_state = reading; 613 494 reply_ptr = current_req->reply; 614 495 reading_reply = 1; 615 496 break; 616 497 617 498 case sent_first_byte: 618 - if (status == TREQ + TIP + SR_OUT) { 499 + if (TREQ_asserted(status)) { 619 500 /* collision */ 620 501 out_8(&via[ACR], in_8(&via[ACR]) & ~SR_OUT); 621 502 (void)in_8(&via[SR]); 622 - out_8(&via[B], in_8(&via[B]) | TIP | TACK); 503 + negate_TIP_and_TACK(); 623 504 cuda_state = idle; 505 + /* Egret does not raise an "aborted" interrupt */ 506 + if (mcu_is_egret) 507 + goto idle_state; 624 508 } else { 625 - /* assert status == TIP + SR_OUT */ 626 - if (status != TIP + SR_OUT) 627 - printk("cuda: state=sent_first_byte status=%x\n", status); 628 - out_8(&via[SR], current_req->data[1]); 629 - out_8(&via[B], in_8(&via[B]) ^ TACK); 630 - data_index = 2; 509 + out_8(&via[SR], current_req->data[data_index++]); 510 + toggle_TACK(); 511 + if (mcu_is_egret) 512 + assert_TACK(); 631 513 cuda_state = sending; 632 514 } 633 515 break; ··· 635 521 if (data_index >= req->nbytes) { 636 522 out_8(&via[ACR], in_8(&via[ACR]) & ~SR_OUT); 637 523 (void)in_8(&via[SR]); 638 - out_8(&via[B], in_8(&via[B]) | TACK | TIP); 524 + negate_TIP_and_TACK(); 639 525 req->sent = 1; 640 526 if (req->reply_expected) { 641 527 cuda_state = awaiting_reply; ··· 648 534 } 649 535 } else { 650 536 out_8(&via[SR], req->data[data_index++]); 651 - out_8(&via[B], in_8(&via[B]) ^ TACK); 537 + toggle_TACK(); 538 + if (mcu_is_egret) 539 + assert_TACK(); 652 540 } 653 541 break; 654 542 655 543 case reading: 656 - *reply_ptr++ = in_8(&via[SR]); 657 - if (status == TIP) { 544 + if (reading_reply ? ARRAY_FULL(current_req->reply, reply_ptr) 545 + : ARRAY_FULL(cuda_rbuf, reply_ptr)) 546 + (void)in_8(&via[SR]); 547 + else 548 + *reply_ptr++ = in_8(&via[SR]); 549 + if (!TREQ_asserted(status)) { 550 + if (mcu_is_egret) 551 + assert_TACK(); 658 552 /* that's all folks */ 659 - out_8(&via[B], in_8(&via[B]) | TACK | TIP); 553 + negate_TIP_and_TACK(); 660 554 cuda_state = read_done; 555 + /* Egret does not raise a "read done" interrupt */ 556 + if (mcu_is_egret) 557 + goto read_done_state; 661 558 } else { 662 - /* assert status == TIP | TREQ */ 663 - if (status != TIP + TREQ) 664 - printk("cuda: state=reading status=%x\n", status); 665 - out_8(&via[B], in_8(&via[B]) ^ TACK); 559 + toggle_TACK(); 560 + if (mcu_is_egret) 561 + negate_TACK(); 666 562 } 667 563 break; 668 564 669 565 case read_done: 670 566 (void)in_8(&via[SR]); 567 + read_done_state: 671 568 if (reading_reply) { 672 569 req = current_req; 673 570 req->reply_len = reply_ptr - req->reply; ··· 695 570 } 696 571 current_req = req->next; 697 572 complete = 1; 573 + reading_reply = 0; 698 574 } else { 699 575 /* This is tricky. We must break the spinlock to call 700 576 * cuda_input. However, doing so means we might get ··· 707 581 ibuf_len = reply_ptr - cuda_rbuf; 708 582 memcpy(ibuf, cuda_rbuf, ibuf_len); 709 583 } 710 - if (status == TREQ) { 711 - out_8(&via[B], in_8(&via[B]) & ~TIP); 584 + reply_ptr = cuda_rbuf; 585 + cuda_state = idle; 586 + cuda_start(); 587 + if (cuda_state == idle && TREQ_asserted(in_8(&via[B]))) { 588 + assert_TIP(); 712 589 cuda_state = reading; 713 - reply_ptr = cuda_rbuf; 714 - reading_reply = 0; 715 - } else { 716 - cuda_state = idle; 717 - cuda_start(); 718 590 } 719 591 break; 720 592 721 593 default: 722 - printk("cuda_interrupt: unknown cuda_state %d?\n", cuda_state); 594 + pr_err("cuda_interrupt: unknown cuda_state %d?\n", cuda_state); 723 595 } 724 - spin_unlock(&cuda_lock); 596 + spin_unlock_irqrestore(&cuda_lock, flags); 725 597 if (complete && req) { 726 598 void (*done)(struct adb_request *) = req->done; 727 599 mb(); ··· 738 614 static void 739 615 cuda_input(unsigned char *buf, int nb) 740 616 { 741 - int i; 742 - 743 617 switch (buf[0]) { 744 618 case ADB_PACKET: 745 619 #ifdef CONFIG_XMON ··· 754 632 #endif /* CONFIG_ADB */ 755 633 break; 756 634 635 + case TIMER_PACKET: 636 + /* Egret sends these periodically. Might be useful as a 'heartbeat' 637 + * to trigger a recovery for the VIA shift register errata. 638 + */ 639 + break; 640 + 757 641 default: 758 - printk("data from cuda (%d bytes):", nb); 759 - for (i = 0; i < nb; ++i) 760 - printk(" %.2x", buf[i]); 761 - printk("\n"); 642 + print_hex_dump(KERN_INFO, "cuda_input: ", DUMP_PREFIX_NONE, 32, 1, 643 + buf, nb, false); 762 644 } 763 645 }

-677

drivers/macintosh/via-maciisi.c

··· 1 - /* 2 - * Device driver for the IIsi-style ADB on some Mac LC and II-class machines 3 - * 4 - * Based on via-cuda.c and via-macii.c, as well as the original 5 - * adb-bus.c, which in turn is somewhat influenced by (but uses no 6 - * code from) the NetBSD HWDIRECT ADB code. Original IIsi driver work 7 - * was done by Robert Thompson and integrated into the old style 8 - * driver by Michael Schmitz. 9 - * 10 - * Original sources (c) Alan Cox, Paul Mackerras, and others. 11 - * 12 - * Rewritten for Unified ADB by David Huggins-Daines <dhd@debian.org> 13 - * 14 - * 7/13/2000- extensive changes by Andrew McPherson <andrew@macduff.dhs.org> 15 - * Works about 30% of the time now. 16 - */ 17 - 18 - #include <linux/types.h> 19 - #include <linux/errno.h> 20 - #include <linux/kernel.h> 21 - #include <linux/adb.h> 22 - #include <linux/cuda.h> 23 - #include <linux/delay.h> 24 - #include <linux/interrupt.h> 25 - #include <asm/macintosh.h> 26 - #include <asm/macints.h> 27 - #include <asm/mac_via.h> 28 - 29 - static volatile unsigned char *via; 30 - 31 - /* VIA registers - spaced 0x200 bytes apart - only the ones we actually use */ 32 - #define RS 0x200 /* skip between registers */ 33 - #define B 0 /* B-side data */ 34 - #define A RS /* A-side data */ 35 - #define DIRB (2*RS) /* B-side direction (1=output) */ 36 - #define DIRA (3*RS) /* A-side direction (1=output) */ 37 - #define SR (10*RS) /* Shift register */ 38 - #define ACR (11*RS) /* Auxiliary control register */ 39 - #define IFR (13*RS) /* Interrupt flag register */ 40 - #define IER (14*RS) /* Interrupt enable register */ 41 - 42 - /* Bits in B data register: all active low */ 43 - #define TREQ 0x08 /* Transfer request (input) */ 44 - #define TACK 0x10 /* Transfer acknowledge (output) */ 45 - #define TIP 0x20 /* Transfer in progress (output) */ 46 - #define ST_MASK 0x30 /* mask for selecting ADB state bits */ 47 - 48 - /* Bits in ACR */ 49 - #define SR_CTRL 0x1c /* Shift register control bits */ 50 - #define SR_EXT 0x0c /* Shift on external clock */ 51 - #define SR_OUT 0x10 /* Shift out if 1 */ 52 - 53 - /* Bits in IFR and IER */ 54 - #define IER_SET 0x80 /* set bits in IER */ 55 - #define IER_CLR 0 /* clear bits in IER */ 56 - #define SR_INT 0x04 /* Shift register full/empty */ 57 - #define SR_DATA 0x08 /* Shift register data */ 58 - #define SR_CLOCK 0x10 /* Shift register clock */ 59 - 60 - #define ADB_DELAY 150 61 - 62 - #undef DEBUG_MACIISI_ADB 63 - 64 - static struct adb_request* current_req; 65 - static struct adb_request* last_req; 66 - static unsigned char maciisi_rbuf[16]; 67 - static unsigned char *reply_ptr; 68 - static int data_index; 69 - static int reading_reply; 70 - static int reply_len; 71 - static int tmp; 72 - static int need_sync; 73 - 74 - static enum maciisi_state { 75 - idle, 76 - sending, 77 - reading, 78 - } maciisi_state; 79 - 80 - static int maciisi_probe(void); 81 - static int maciisi_init(void); 82 - static int maciisi_send_request(struct adb_request* req, int sync); 83 - static void maciisi_sync(struct adb_request *req); 84 - static int maciisi_write(struct adb_request* req); 85 - static irqreturn_t maciisi_interrupt(int irq, void* arg); 86 - static void maciisi_input(unsigned char *buf, int nb); 87 - static int maciisi_init_via(void); 88 - static void maciisi_poll(void); 89 - static int maciisi_start(void); 90 - 91 - struct adb_driver via_maciisi_driver = { 92 - "Mac IIsi", 93 - maciisi_probe, 94 - maciisi_init, 95 - maciisi_send_request, 96 - NULL, /* maciisi_adb_autopoll, */ 97 - maciisi_poll, 98 - NULL /* maciisi_reset_adb_bus */ 99 - }; 100 - 101 - static int 102 - maciisi_probe(void) 103 - { 104 - if (macintosh_config->adb_type != MAC_ADB_IISI) 105 - return -ENODEV; 106 - 107 - via = via1; 108 - return 0; 109 - } 110 - 111 - static int 112 - maciisi_init(void) 113 - { 114 - int err; 115 - 116 - if (via == NULL) 117 - return -ENODEV; 118 - 119 - if ((err = maciisi_init_via())) { 120 - printk(KERN_ERR "maciisi_init: maciisi_init_via() failed, code %d\n", err); 121 - via = NULL; 122 - return err; 123 - } 124 - 125 - if (request_irq(IRQ_MAC_ADB, maciisi_interrupt, 0, "ADB", 126 - maciisi_interrupt)) { 127 - printk(KERN_ERR "maciisi_init: can't get irq %d\n", IRQ_MAC_ADB); 128 - return -EAGAIN; 129 - } 130 - 131 - printk("adb: Mac IIsi driver v0.2 for Unified ADB.\n"); 132 - return 0; 133 - } 134 - 135 - /* Flush data from the ADB controller */ 136 - static void 137 - maciisi_stfu(void) 138 - { 139 - int status = via[B] & (TIP|TREQ); 140 - 141 - if (status & TREQ) { 142 - #ifdef DEBUG_MACIISI_ADB 143 - printk (KERN_DEBUG "maciisi_stfu called with TREQ high!\n"); 144 - #endif 145 - return; 146 - } 147 - 148 - udelay(ADB_DELAY); 149 - via[ACR] &= ~SR_OUT; 150 - via[IER] = IER_CLR | SR_INT; 151 - 152 - udelay(ADB_DELAY); 153 - 154 - status = via[B] & (TIP|TREQ); 155 - 156 - if (!(status & TREQ)) 157 - { 158 - via[B] |= TIP; 159 - 160 - while(1) 161 - { 162 - int poll_timeout = ADB_DELAY * 5; 163 - /* Poll for SR interrupt */ 164 - while (!(via[IFR] & SR_INT) && poll_timeout-- > 0) 165 - status = via[B] & (TIP|TREQ); 166 - 167 - tmp = via[SR]; /* Clear shift register */ 168 - #ifdef DEBUG_MACIISI_ADB 169 - printk(KERN_DEBUG "maciisi_stfu: status %x timeout %d data %x\n", 170 - status, poll_timeout, tmp); 171 - #endif 172 - if(via[B] & TREQ) 173 - break; 174 - 175 - /* ACK on-off */ 176 - via[B] |= TACK; 177 - udelay(ADB_DELAY); 178 - via[B] &= ~TACK; 179 - } 180 - 181 - /* end frame */ 182 - via[B] &= ~TIP; 183 - udelay(ADB_DELAY); 184 - } 185 - 186 - via[IER] = IER_SET | SR_INT; 187 - } 188 - 189 - /* All specifically VIA-related initialization goes here */ 190 - static int 191 - maciisi_init_via(void) 192 - { 193 - int i; 194 - 195 - /* Set the lines up. We want TREQ as input TACK|TIP as output */ 196 - via[DIRB] = (via[DIRB] | TACK | TIP) & ~TREQ; 197 - /* Shift register on input */ 198 - via[ACR] = (via[ACR] & ~SR_CTRL) | SR_EXT; 199 - #ifdef DEBUG_MACIISI_ADB 200 - printk(KERN_DEBUG "maciisi_init_via: initial status %x\n", via[B] & (TIP|TREQ)); 201 - #endif 202 - /* Wipe any pending data and int */ 203 - tmp = via[SR]; 204 - /* Enable keyboard interrupts */ 205 - via[IER] = IER_SET | SR_INT; 206 - /* Set initial state: idle */ 207 - via[B] &= ~(TACK|TIP); 208 - /* Clear interrupt bit */ 209 - via[IFR] = SR_INT; 210 - 211 - for(i = 0; i < 60; i++) { 212 - udelay(ADB_DELAY); 213 - maciisi_stfu(); 214 - udelay(ADB_DELAY); 215 - if(via[B] & TREQ) 216 - break; 217 - } 218 - if (i == 60) 219 - printk(KERN_ERR "maciisi_init_via: bus jam?\n"); 220 - 221 - maciisi_state = idle; 222 - need_sync = 0; 223 - 224 - return 0; 225 - } 226 - 227 - /* Send a request, possibly waiting for a reply */ 228 - static int 229 - maciisi_send_request(struct adb_request* req, int sync) 230 - { 231 - int i; 232 - 233 - #ifdef DEBUG_MACIISI_ADB 234 - static int dump_packet = 0; 235 - #endif 236 - 237 - if (via == NULL) { 238 - req->complete = 1; 239 - return -ENXIO; 240 - } 241 - 242 - #ifdef DEBUG_MACIISI_ADB 243 - if (dump_packet) { 244 - printk(KERN_DEBUG "maciisi_send_request:"); 245 - for (i = 0; i < req->nbytes; i++) { 246 - printk(" %.2x", req->data[i]); 247 - } 248 - printk(" sync %d\n", sync); 249 - } 250 - #endif 251 - 252 - req->reply_expected = 1; 253 - 254 - i = maciisi_write(req); 255 - if (i) 256 - { 257 - /* Normally, if a packet requires syncing, that happens at the end of 258 - * maciisi_send_request. But if the transfer fails, it will be restarted 259 - * by maciisi_interrupt(). We use need_sync to tell maciisi_interrupt 260 - * when to sync a packet that it sends out. 261 - * 262 - * Suggestions on a better way to do this are welcome. 263 - */ 264 - if(i == -EBUSY && sync) 265 - need_sync = 1; 266 - else 267 - need_sync = 0; 268 - return i; 269 - } 270 - if(sync) 271 - maciisi_sync(req); 272 - 273 - return 0; 274 - } 275 - 276 - /* Poll the ADB chip until the request completes */ 277 - static void maciisi_sync(struct adb_request *req) 278 - { 279 - int count = 0; 280 - 281 - #ifdef DEBUG_MACIISI_ADB 282 - printk(KERN_DEBUG "maciisi_sync called\n"); 283 - #endif 284 - 285 - /* If for some reason the ADB chip shuts up on us, we want to avoid an endless loop. */ 286 - while (!req->complete && count++ < 50) { 287 - maciisi_poll(); 288 - } 289 - /* This could be BAD... when the ADB controller doesn't respond 290 - * for this long, it's probably not coming back :-( */ 291 - if (count > 50) /* Hopefully shouldn't happen */ 292 - printk(KERN_ERR "maciisi_send_request: poll timed out!\n"); 293 - } 294 - 295 - int 296 - maciisi_request(struct adb_request *req, void (*done)(struct adb_request *), 297 - int nbytes, ...) 298 - { 299 - va_list list; 300 - int i; 301 - 302 - req->nbytes = nbytes; 303 - req->done = done; 304 - req->reply_expected = 0; 305 - va_start(list, nbytes); 306 - for (i = 0; i < nbytes; i++) 307 - req->data[i++] = va_arg(list, int); 308 - va_end(list); 309 - 310 - return maciisi_send_request(req, 1); 311 - } 312 - 313 - /* Enqueue a request, and run the queue if possible */ 314 - static int 315 - maciisi_write(struct adb_request* req) 316 - { 317 - unsigned long flags; 318 - int i; 319 - 320 - /* We will accept CUDA packets - the VIA sends them to us, so 321 - it figures that we should be able to send them to it */ 322 - if (req->nbytes < 2 || req->data[0] > CUDA_PACKET) { 323 - printk(KERN_ERR "maciisi_write: packet too small or not an ADB or CUDA packet\n"); 324 - req->complete = 1; 325 - return -EINVAL; 326 - } 327 - req->next = NULL; 328 - req->sent = 0; 329 - req->complete = 0; 330 - req->reply_len = 0; 331 - 332 - local_irq_save(flags); 333 - 334 - if (current_req) { 335 - last_req->next = req; 336 - last_req = req; 337 - } else { 338 - current_req = req; 339 - last_req = req; 340 - } 341 - if (maciisi_state == idle) 342 - { 343 - i = maciisi_start(); 344 - if(i != 0) 345 - { 346 - local_irq_restore(flags); 347 - return i; 348 - } 349 - } 350 - else 351 - { 352 - #ifdef DEBUG_MACIISI_ADB 353 - printk(KERN_DEBUG "maciisi_write: would start, but state is %d\n", maciisi_state); 354 - #endif 355 - local_irq_restore(flags); 356 - return -EBUSY; 357 - } 358 - 359 - local_irq_restore(flags); 360 - 361 - return 0; 362 - } 363 - 364 - static int 365 - maciisi_start(void) 366 - { 367 - struct adb_request* req; 368 - int status; 369 - 370 - #ifdef DEBUG_MACIISI_ADB 371 - status = via[B] & (TIP | TREQ); 372 - 373 - printk(KERN_DEBUG "maciisi_start called, state=%d, status=%x, ifr=%x\n", maciisi_state, status, via[IFR]); 374 - #endif 375 - 376 - if (maciisi_state != idle) { 377 - /* shouldn't happen */ 378 - printk(KERN_ERR "maciisi_start: maciisi_start called when driver busy!\n"); 379 - return -EBUSY; 380 - } 381 - 382 - req = current_req; 383 - if (req == NULL) 384 - return -EINVAL; 385 - 386 - status = via[B] & (TIP|TREQ); 387 - if (!(status & TREQ)) { 388 - #ifdef DEBUG_MACIISI_ADB 389 - printk(KERN_DEBUG "maciisi_start: bus busy - aborting\n"); 390 - #endif 391 - return -EBUSY; 392 - } 393 - 394 - /* Okay, send */ 395 - #ifdef DEBUG_MACIISI_ADB 396 - printk(KERN_DEBUG "maciisi_start: sending\n"); 397 - #endif 398 - /* Set state to active */ 399 - via[B] |= TIP; 400 - /* ACK off */ 401 - via[B] &= ~TACK; 402 - /* Delay */ 403 - udelay(ADB_DELAY); 404 - /* Shift out and send */ 405 - via[ACR] |= SR_OUT; 406 - via[SR] = req->data[0]; 407 - data_index = 1; 408 - /* ACK on */ 409 - via[B] |= TACK; 410 - maciisi_state = sending; 411 - 412 - return 0; 413 - } 414 - 415 - void 416 - maciisi_poll(void) 417 - { 418 - unsigned long flags; 419 - 420 - local_irq_save(flags); 421 - if (via[IFR] & SR_INT) { 422 - maciisi_interrupt(0, NULL); 423 - } 424 - else /* avoid calling this function too quickly in a loop */ 425 - udelay(ADB_DELAY); 426 - 427 - local_irq_restore(flags); 428 - } 429 - 430 - /* Shift register interrupt - this is *supposed* to mean that the 431 - register is either full or empty. In practice, I have no idea what 432 - it means :( */ 433 - static irqreturn_t 434 - maciisi_interrupt(int irq, void* arg) 435 - { 436 - int status; 437 - struct adb_request *req; 438 - #ifdef DEBUG_MACIISI_ADB 439 - static int dump_reply = 0; 440 - #endif 441 - int i; 442 - unsigned long flags; 443 - 444 - local_irq_save(flags); 445 - 446 - status = via[B] & (TIP|TREQ); 447 - #ifdef DEBUG_MACIISI_ADB 448 - printk(KERN_DEBUG "state %d status %x ifr %x\n", maciisi_state, status, via[IFR]); 449 - #endif 450 - 451 - if (!(via[IFR] & SR_INT)) { 452 - /* Shouldn't happen, we hope */ 453 - printk(KERN_ERR "maciisi_interrupt: called without interrupt flag set\n"); 454 - local_irq_restore(flags); 455 - return IRQ_NONE; 456 - } 457 - 458 - /* Clear the interrupt */ 459 - /* via[IFR] = SR_INT; */ 460 - 461 - switch_start: 462 - switch (maciisi_state) { 463 - case idle: 464 - if (status & TIP) 465 - printk(KERN_ERR "maciisi_interrupt: state is idle but TIP asserted!\n"); 466 - 467 - if(!reading_reply) 468 - udelay(ADB_DELAY); 469 - /* Shift in */ 470 - via[ACR] &= ~SR_OUT; 471 - /* Signal start of frame */ 472 - via[B] |= TIP; 473 - /* Clear the interrupt (throw this value on the floor, it's useless) */ 474 - tmp = via[SR]; 475 - /* ACK adb chip, high-low */ 476 - via[B] |= TACK; 477 - udelay(ADB_DELAY); 478 - via[B] &= ~TACK; 479 - reply_len = 0; 480 - maciisi_state = reading; 481 - if (reading_reply) { 482 - reply_ptr = current_req->reply; 483 - } else { 484 - reply_ptr = maciisi_rbuf; 485 - } 486 - break; 487 - 488 - case sending: 489 - /* via[SR]; */ 490 - /* Set ACK off */ 491 - via[B] &= ~TACK; 492 - req = current_req; 493 - 494 - if (!(status & TREQ)) { 495 - /* collision */ 496 - printk(KERN_ERR "maciisi_interrupt: send collision\n"); 497 - /* Set idle and input */ 498 - via[ACR] &= ~SR_OUT; 499 - tmp = via[SR]; 500 - via[B] &= ~TIP; 501 - /* Must re-send */ 502 - reading_reply = 0; 503 - reply_len = 0; 504 - maciisi_state = idle; 505 - udelay(ADB_DELAY); 506 - /* process this now, because the IFR has been cleared */ 507 - goto switch_start; 508 - } 509 - 510 - udelay(ADB_DELAY); 511 - 512 - if (data_index >= req->nbytes) { 513 - /* Sent the whole packet, put the bus back in idle state */ 514 - /* Shift in, we are about to read a reply (hopefully) */ 515 - via[ACR] &= ~SR_OUT; 516 - tmp = via[SR]; 517 - /* End of frame */ 518 - via[B] &= ~TIP; 519 - req->sent = 1; 520 - maciisi_state = idle; 521 - if (req->reply_expected) { 522 - /* Note: only set this once we've 523 - successfully sent the packet */ 524 - reading_reply = 1; 525 - } else { 526 - current_req = req->next; 527 - if (req->done) 528 - (*req->done)(req); 529 - /* Do any queued requests now */ 530 - i = maciisi_start(); 531 - if(i == 0 && need_sync) { 532 - /* Packet needs to be synced */ 533 - maciisi_sync(current_req); 534 - } 535 - if(i != -EBUSY) 536 - need_sync = 0; 537 - } 538 - } else { 539 - /* Sending more stuff */ 540 - /* Shift out */ 541 - via[ACR] |= SR_OUT; 542 - /* Write */ 543 - via[SR] = req->data[data_index++]; 544 - /* Signal 'byte ready' */ 545 - via[B] |= TACK; 546 - } 547 - break; 548 - 549 - case reading: 550 - /* Shift in */ 551 - /* via[ACR] &= ~SR_OUT; */ /* Not in 2.2 */ 552 - if (reply_len++ > 16) { 553 - printk(KERN_ERR "maciisi_interrupt: reply too long, aborting read\n"); 554 - via[B] |= TACK; 555 - udelay(ADB_DELAY); 556 - via[B] &= ~(TACK|TIP); 557 - maciisi_state = idle; 558 - i = maciisi_start(); 559 - if(i == 0 && need_sync) { 560 - /* Packet needs to be synced */ 561 - maciisi_sync(current_req); 562 - } 563 - if(i != -EBUSY) 564 - need_sync = 0; 565 - break; 566 - } 567 - /* Read data */ 568 - *reply_ptr++ = via[SR]; 569 - status = via[B] & (TIP|TREQ); 570 - /* ACK on/off */ 571 - via[B] |= TACK; 572 - udelay(ADB_DELAY); 573 - via[B] &= ~TACK; 574 - if (!(status & TREQ)) 575 - break; /* more stuff to deal with */ 576 - 577 - /* end of frame */ 578 - via[B] &= ~TIP; 579 - tmp = via[SR]; /* That's what happens in 2.2 */ 580 - udelay(ADB_DELAY); /* Give controller time to recover */ 581 - 582 - /* end of packet, deal with it */ 583 - if (reading_reply) { 584 - req = current_req; 585 - req->reply_len = reply_ptr - req->reply; 586 - if (req->data[0] == ADB_PACKET) { 587 - /* Have to adjust the reply from ADB commands */ 588 - if (req->reply_len <= 2 || (req->reply[1] & 2) != 0) { 589 - /* the 0x2 bit indicates no response */ 590 - req->reply_len = 0; 591 - } else { 592 - /* leave just the command and result bytes in the reply */ 593 - req->reply_len -= 2; 594 - memmove(req->reply, req->reply + 2, req->reply_len); 595 - } 596 - } 597 - #ifdef DEBUG_MACIISI_ADB 598 - if (dump_reply) { 599 - int i; 600 - printk(KERN_DEBUG "maciisi_interrupt: reply is "); 601 - for (i = 0; i < req->reply_len; ++i) 602 - printk(" %.2x", req->reply[i]); 603 - printk("\n"); 604 - } 605 - #endif 606 - req->complete = 1; 607 - current_req = req->next; 608 - if (req->done) 609 - (*req->done)(req); 610 - /* Obviously, we got it */ 611 - reading_reply = 0; 612 - } else { 613 - maciisi_input(maciisi_rbuf, reply_ptr - maciisi_rbuf); 614 - } 615 - maciisi_state = idle; 616 - status = via[B] & (TIP|TREQ); 617 - if (!(status & TREQ)) { 618 - /* Timeout?! More likely, another packet coming in already */ 619 - #ifdef DEBUG_MACIISI_ADB 620 - printk(KERN_DEBUG "extra data after packet: status %x ifr %x\n", 621 - status, via[IFR]); 622 - #endif 623 - #if 0 624 - udelay(ADB_DELAY); 625 - via[B] |= TIP; 626 - 627 - maciisi_state = reading; 628 - reading_reply = 0; 629 - reply_ptr = maciisi_rbuf; 630 - #else 631 - /* Process the packet now */ 632 - reading_reply = 0; 633 - goto switch_start; 634 - #endif 635 - /* We used to do this... but the controller might actually have data for us */ 636 - /* maciisi_stfu(); */ 637 - } 638 - else { 639 - /* Do any queued requests now if possible */ 640 - i = maciisi_start(); 641 - if(i == 0 && need_sync) { 642 - /* Packet needs to be synced */ 643 - maciisi_sync(current_req); 644 - } 645 - if(i != -EBUSY) 646 - need_sync = 0; 647 - } 648 - break; 649 - 650 - default: 651 - printk("maciisi_interrupt: unknown maciisi_state %d?\n", maciisi_state); 652 - } 653 - local_irq_restore(flags); 654 - return IRQ_HANDLED; 655 - } 656 - 657 - static void 658 - maciisi_input(unsigned char *buf, int nb) 659 - { 660 - #ifdef DEBUG_MACIISI_ADB 661 - int i; 662 - #endif 663 - 664 - switch (buf[0]) { 665 - case ADB_PACKET: 666 - adb_input(buf+2, nb-2, buf[1] & 0x40); 667 - break; 668 - default: 669 - #ifdef DEBUG_MACIISI_ADB 670 - printk(KERN_DEBUG "data from IIsi ADB (%d bytes):", nb); 671 - for (i = 0; i < nb; ++i) 672 - printk(" %.2x", buf[i]); 673 - printk("\n"); 674 - #endif 675 - break; 676 - } 677 - }

+2 -1

drivers/misc/cxl/Makefile

··· 2 2 ccflags-$(CONFIG_PPC_WERROR) += -Werror 3 3 4 4 cxl-y += main.o file.o irq.o fault.o native.o 5 - cxl-y += context.o sysfs.o debugfs.o pci.o trace.o 5 + cxl-y += context.o sysfs.o pci.o trace.o 6 6 cxl-y += vphb.o phb.o api.o 7 7 cxl-$(CONFIG_PPC_PSERIES) += flash.o guest.o of.o hcalls.o 8 + cxl-$(CONFIG_DEBUG_FS) += debugfs.o 8 9 obj-$(CONFIG_CXL) += cxl.o 9 10 obj-$(CONFIG_CXL_BASE) += base.o 10 11

-1

drivers/misc/cxl/api.c

··· 11 11 #include <linux/slab.h> 12 12 #include <linux/file.h> 13 13 #include <misc/cxl.h> 14 - #include <asm/pnv-pci.h> 15 14 #include <linux/msi.h> 16 15 #include <linux/module.h> 17 16 #include <linux/mount.h>

+57 -4

drivers/misc/cxl/cxl.h

··· 418 418 struct dentry *debugfs; 419 419 struct mutex contexts_lock; 420 420 spinlock_t afu_cntl_lock; 421 + /* Used to block access to AFU config space while deconfigured */ 422 + struct rw_semaphore configured_rwsem; 421 423 422 424 /* AFU error buffer fields and bin attribute for sysfs */ 423 425 u64 eb_len, eb_offset; ··· 802 800 void afu_release_irqs(struct cxl_context *ctx, void *cookie); 803 801 void afu_irq_name_free(struct cxl_context *ctx); 804 802 803 + #ifdef CONFIG_DEBUG_FS 804 + 805 805 int cxl_debugfs_init(void); 806 806 void cxl_debugfs_exit(void); 807 807 int cxl_debugfs_adapter_add(struct cxl *adapter); 808 808 void cxl_debugfs_adapter_remove(struct cxl *adapter); 809 809 int cxl_debugfs_afu_add(struct cxl_afu *afu); 810 810 void cxl_debugfs_afu_remove(struct cxl_afu *afu); 811 + void cxl_stop_trace(struct cxl *cxl); 812 + void cxl_debugfs_add_adapter_psl_regs(struct cxl *adapter, struct dentry *dir); 813 + void cxl_debugfs_add_adapter_xsl_regs(struct cxl *adapter, struct dentry *dir); 814 + void cxl_debugfs_add_afu_psl_regs(struct cxl_afu *afu, struct dentry *dir); 815 + 816 + #else /* CONFIG_DEBUG_FS */ 817 + 818 + static inline int __init cxl_debugfs_init(void) 819 + { 820 + return 0; 821 + } 822 + 823 + static inline void cxl_debugfs_exit(void) 824 + { 825 + } 826 + 827 + static inline int cxl_debugfs_adapter_add(struct cxl *adapter) 828 + { 829 + return 0; 830 + } 831 + 832 + static inline void cxl_debugfs_adapter_remove(struct cxl *adapter) 833 + { 834 + } 835 + 836 + static inline int cxl_debugfs_afu_add(struct cxl_afu *afu) 837 + { 838 + return 0; 839 + } 840 + 841 + static inline void cxl_debugfs_afu_remove(struct cxl_afu *afu) 842 + { 843 + } 844 + 845 + static inline void cxl_stop_trace(struct cxl *cxl) 846 + { 847 + } 848 + 849 + static inline void cxl_debugfs_add_adapter_psl_regs(struct cxl *adapter, 850 + struct dentry *dir) 851 + { 852 + } 853 + 854 + static inline void cxl_debugfs_add_adapter_xsl_regs(struct cxl *adapter, 855 + struct dentry *dir) 856 + { 857 + } 858 + 859 + static inline void cxl_debugfs_add_afu_psl_regs(struct cxl_afu *afu, struct dentry *dir) 860 + { 861 + } 862 + 863 + #endif /* CONFIG_DEBUG_FS */ 811 864 812 865 void cxl_handle_fault(struct work_struct *work); 813 866 void cxl_prefault(struct cxl_context *ctx, u64 wed); ··· 927 870 int cxl_afu_disable(struct cxl_afu *afu); 928 871 int cxl_psl_purge(struct cxl_afu *afu); 929 872 930 - void cxl_debugfs_add_adapter_psl_regs(struct cxl *adapter, struct dentry *dir); 931 - void cxl_debugfs_add_adapter_xsl_regs(struct cxl *adapter, struct dentry *dir); 932 - void cxl_debugfs_add_afu_psl_regs(struct cxl_afu *afu, struct dentry *dir); 933 873 void cxl_native_psl_irq_dump_regs(struct cxl_context *ctx); 934 874 void cxl_native_err_irq_dump_regs(struct cxl *adapter); 935 - void cxl_stop_trace(struct cxl *cxl); 936 875 int cxl_pci_vphb_add(struct cxl_afu *afu); 937 876 void cxl_pci_vphb_remove(struct cxl_afu *afu); 938 877 void cxl_release_mapping(struct cxl_context *ctx);

+2 -1

drivers/misc/cxl/main.c

··· 268 268 idr_init(&afu->contexts_idr); 269 269 mutex_init(&afu->contexts_lock); 270 270 spin_lock_init(&afu->afu_cntl_lock); 271 - 271 + init_rwsem(&afu->configured_rwsem); 272 + down_write(&afu->configured_rwsem); 272 273 afu->prefault_mode = CXL_PREFAULT_NONE; 273 274 afu->irqs_max = afu->adapter->user_irqs; 274 275

+5

drivers/misc/cxl/pci.c

··· 1129 1129 if ((rc = cxl_native_register_psl_irq(afu))) 1130 1130 goto err2; 1131 1131 1132 + up_write(&afu->configured_rwsem); 1132 1133 return 0; 1133 1134 1134 1135 err2: ··· 1142 1141 1143 1142 static void pci_deconfigure_afu(struct cxl_afu *afu) 1144 1143 { 1144 + down_write(&afu->configured_rwsem); 1145 1145 cxl_native_release_psl_irq(afu); 1146 1146 if (afu->adapter->native->sl_ops->release_serr_irq) 1147 1147 afu->adapter->native->sl_ops->release_serr_irq(afu); ··· 1611 1609 1612 1610 cxl_sysfs_adapter_remove(adapter); 1613 1611 cxl_debugfs_adapter_remove(adapter); 1612 + 1613 + /* Flush adapter datacache as its about to be removed */ 1614 + cxl_data_cache_flush(adapter); 1614 1615 1615 1616 cxl_deconfigure_adapter(adapter); 1616 1617

+29 -22

drivers/misc/cxl/vphb.c

··· 76 76 return (bus << 8) + devfn; 77 77 } 78 78 79 - static int cxl_pcie_config_info(struct pci_bus *bus, unsigned int devfn, 80 - struct cxl_afu **_afu, int *_record) 79 + static inline struct cxl_afu *pci_bus_to_afu(struct pci_bus *bus) 81 80 { 82 - struct pci_controller *phb; 83 - struct cxl_afu *afu; 81 + struct pci_controller *phb = bus ? pci_bus_to_host(bus) : NULL; 82 + 83 + return phb ? phb->private_data : NULL; 84 + } 85 + 86 + static inline int cxl_pcie_config_info(struct pci_bus *bus, unsigned int devfn, 87 + struct cxl_afu *afu, int *_record) 88 + { 84 89 int record; 85 90 86 - phb = pci_bus_to_host(bus); 87 - if (phb == NULL) 88 - return PCIBIOS_DEVICE_NOT_FOUND; 89 - 90 - afu = (struct cxl_afu *)phb->private_data; 91 91 record = cxl_pcie_cfg_record(bus->number, devfn); 92 92 if (record > afu->crs_num) 93 93 return PCIBIOS_DEVICE_NOT_FOUND; 94 94 95 - *_afu = afu; 96 95 *_record = record; 97 96 return 0; 98 97 } ··· 105 106 u16 val16; 106 107 u32 val32; 107 108 108 - rc = cxl_pcie_config_info(bus, devfn, &afu, &record); 109 + afu = pci_bus_to_afu(bus); 110 + /* Grab a reader lock on afu. */ 111 + if (afu == NULL || !down_read_trylock(&afu->configured_rwsem)) 112 + return PCIBIOS_DEVICE_NOT_FOUND; 113 + 114 + rc = cxl_pcie_config_info(bus, devfn, afu, &record); 109 115 if (rc) 110 - return rc; 116 + goto out; 111 117 112 118 switch (len) { 113 119 case 1: ··· 131 127 WARN_ON(1); 132 128 } 133 129 134 - if (rc) 135 - return PCIBIOS_DEVICE_NOT_FOUND; 136 - 137 - return PCIBIOS_SUCCESSFUL; 130 + out: 131 + up_read(&afu->configured_rwsem); 132 + return rc ? PCIBIOS_DEVICE_NOT_FOUND : PCIBIOS_SUCCESSFUL; 138 133 } 139 134 140 135 static int cxl_pcie_write_config(struct pci_bus *bus, unsigned int devfn, ··· 142 139 int rc, record; 143 140 struct cxl_afu *afu; 144 141 145 - rc = cxl_pcie_config_info(bus, devfn, &afu, &record); 142 + afu = pci_bus_to_afu(bus); 143 + /* Grab a reader lock on afu. */ 144 + if (afu == NULL || !down_read_trylock(&afu->configured_rwsem)) 145 + return PCIBIOS_DEVICE_NOT_FOUND; 146 + 147 + rc = cxl_pcie_config_info(bus, devfn, afu, &record); 146 148 if (rc) 147 - return rc; 149 + goto out; 148 150 149 151 switch (len) { 150 152 case 1: ··· 165 157 WARN_ON(1); 166 158 } 167 159 168 - if (rc) 169 - return PCIBIOS_SET_FAILED; 170 - 171 - return PCIBIOS_SUCCESSFUL; 160 + out: 161 + up_read(&afu->configured_rwsem); 162 + return rc ? PCIBIOS_SET_FAILED : PCIBIOS_SUCCESSFUL; 172 163 } 173 164 174 165 static struct pci_ops cxl_pcie_pci_ops =

+1

include/linux/cpuidle.h

··· 62 62 }; 63 63 64 64 /* Idle State Flags */ 65 + #define CPUIDLE_FLAG_NONE (0x00) 65 66 #define CPUIDLE_FLAG_COUPLED (0x02) /* state applies to multiple cpus */ 66 67 #define CPUIDLE_FLAG_TIMER_STOP (0x04) /* timer is stopped on this state */ 67 68

+6

include/uapi/linux/kvm.h

··· 871 871 #define KVM_CAP_S390_USER_INSTR0 130 872 872 #define KVM_CAP_MSI_DEVID 131 873 873 #define KVM_CAP_PPC_HTM 132 874 + #define KVM_CAP_PPC_MMU_RADIX 134 875 + #define KVM_CAP_PPC_MMU_HASH_V3 135 874 876 875 877 #ifdef KVM_CAP_IRQ_ROUTING 876 878 ··· 1189 1187 #define KVM_ARM_SET_DEVICE_ADDR _IOW(KVMIO, 0xab, struct kvm_arm_device_addr) 1190 1188 /* Available with KVM_CAP_PPC_RTAS */ 1191 1189 #define KVM_PPC_RTAS_DEFINE_TOKEN _IOW(KVMIO, 0xac, struct kvm_rtas_token_args) 1190 + /* Available with KVM_CAP_PPC_RADIX_MMU or KVM_CAP_PPC_HASH_MMU_V3 */ 1191 + #define KVM_PPC_CONFIGURE_V3_MMU _IOW(KVMIO, 0xaf, struct kvm_ppc_mmuv3_cfg) 1192 + /* Available with KVM_CAP_PPC_RADIX_MMU */ 1193 + #define KVM_PPC_GET_RMMU_INFO _IOW(KVMIO, 0xb0, struct kvm_ppc_rmmu_info) 1192 1194 1193 1195 /* ioctl for vm fd */ 1194 1196 #define KVM_CREATE_DEVICE _IOWR(KVMIO, 0xe0, struct kvm_create_device)

+6

kernel/kprobes.c

··· 1740 1740 } 1741 1741 EXPORT_SYMBOL_GPL(unregister_kprobes); 1742 1742 1743 + int __weak __kprobes kprobe_exceptions_notify(struct notifier_block *self, 1744 + unsigned long val, void *data) 1745 + { 1746 + return NOTIFY_DONE; 1747 + } 1748 + 1743 1749 static struct notifier_block kprobe_exceptions_nb = { 1744 1750 .notifier_call = kprobe_exceptions_notify, 1745 1751 .priority = 0x7fffffff /* we need to be notified first */

+9 -1

scripts/Makefile.gcc-plugins

··· 8 8 9 9 gcc-plugin-$(CONFIG_GCC_PLUGIN_LATENT_ENTROPY) += latent_entropy_plugin.so 10 10 gcc-plugin-cflags-$(CONFIG_GCC_PLUGIN_LATENT_ENTROPY) += -DLATENT_ENTROPY_PLUGIN 11 - ifdef CONFIG_PAX_LATENT_ENTROPY 11 + ifdef CONFIG_GCC_PLUGIN_LATENT_ENTROPY 12 12 DISABLE_LATENT_ENTROPY_PLUGIN += -fplugin-arg-latent_entropy_plugin-disable 13 13 endif 14 14 ··· 51 51 ifdef CONFIG_GCC_PLUGINS 52 52 ifeq ($(PLUGINCC),) 53 53 ifneq ($(GCC_PLUGINS_CFLAGS),) 54 + # Various gccs between 4.5 and 5.1 have bugs on powerpc due to missing 55 + # header files. gcc <= 4.6 doesn't work at all, gccs from 4.8 to 5.1 have 56 + # issues with 64-bit targets. 57 + ifeq ($(ARCH),powerpc) 58 + ifeq ($(call cc-ifversion, -le, 0501, y), y) 59 + @echo "Cannot use CONFIG_GCC_PLUGINS: plugin support on gcc <= 5.1 is buggy on powerpc, please upgrade to gcc 5.2 or newer" >&2 && exit 1 60 + endif 61 + endif 54 62 ifeq ($(call cc-ifversion, -ge, 0405, y), y) 55 63 $(Q)$(srctree)/scripts/gcc-plugin.sh --show-error "$(__PLUGINCC)" "$(HOSTCXX)" "$(CC)" || true 56 64 @echo "Cannot use CONFIG_GCC_PLUGINS: your gcc installation does not support plugins, perhaps the necessary headers are missing?" >&2 && exit 1