Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'topic/ppc-kvm' into next

Merge the topic branch we share with kvm-ppc, this brings in two xive
commits, one from Paul to rework HMI handling, and a minor cleanup to
drop an unused flag.

+206 -78
+4
arch/powerpc/include/asm/hmi.h
··· 42 42 static inline void wait_for_subcore_guest_exit(void) { } 43 43 static inline void wait_for_tb_resync(void) { } 44 44 #endif 45 + 46 + struct pt_regs; 47 + extern long hmi_handle_debugtrig(struct pt_regs *regs); 48 + 45 49 #endif /* __ASM_PPC64_HMI_H__ */
+17
arch/powerpc/include/asm/hvcall.h
··· 241 241 #define H_GET_HCA_INFO 0x1B8 242 242 #define H_GET_PERF_COUNT 0x1BC 243 243 #define H_MANAGE_TRACE 0x1C0 244 + #define H_GET_CPU_CHARACTERISTICS 0x1C8 244 245 #define H_FREE_LOGICAL_LAN_BUFFER 0x1D4 245 246 #define H_QUERY_INT_STATE 0x1E4 246 247 #define H_POLL_PENDING 0x1D8 ··· 330 329 #define H_SIGNAL_SYS_RESET_ALL -1 331 330 #define H_SIGNAL_SYS_RESET_ALL_OTHERS -2 332 331 /* >= 0 values are CPU number */ 332 + 333 + /* H_GET_CPU_CHARACTERISTICS return values */ 334 + #define H_CPU_CHAR_SPEC_BAR_ORI31 (1ull << 63) // IBM bit 0 335 + #define H_CPU_CHAR_BCCTRL_SERIALISED (1ull << 62) // IBM bit 1 336 + #define H_CPU_CHAR_L1D_FLUSH_ORI30 (1ull << 61) // IBM bit 2 337 + #define H_CPU_CHAR_L1D_FLUSH_TRIG2 (1ull << 60) // IBM bit 3 338 + #define H_CPU_CHAR_L1D_THREAD_PRIV (1ull << 59) // IBM bit 4 339 + 340 + #define H_CPU_BEHAV_FAVOUR_SECURITY (1ull << 63) // IBM bit 0 341 + #define H_CPU_BEHAV_L1D_FLUSH_PR (1ull << 62) // IBM bit 1 342 + #define H_CPU_BEHAV_BNDS_CHK_SPEC_BAR (1ull << 61) // IBM bit 2 333 343 334 344 /* Flag values used in H_REGISTER_PROC_TBL hcall */ 335 345 #define PROC_TABLE_OP_MASK 0x18 ··· 447 435 return 1; 448 436 } 449 437 } 438 + 439 + struct h_cpu_char_result { 440 + u64 character; 441 + u64 behaviour; 442 + }; 450 443 451 444 #endif /* __ASSEMBLY__ */ 452 445 #endif /* __KERNEL__ */
+14
arch/powerpc/include/asm/plpar_wrappers.h
··· 326 326 return plpar_hcall_norets(H_SIGNAL_SYS_RESET, cpu); 327 327 } 328 328 329 + static inline long plpar_get_cpu_characteristics(struct h_cpu_char_result *p) 330 + { 331 + unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; 332 + long rc; 333 + 334 + rc = plpar_hcall(H_GET_CPU_CHARACTERISTICS, retbuf); 335 + if (rc == H_SUCCESS) { 336 + p->character = retbuf[0]; 337 + p->behaviour = retbuf[1]; 338 + } 339 + 340 + return rc; 341 + } 342 + 329 343 #endif /* _ASM_POWERPC_PLPAR_WRAPPERS_H */
+3 -2
arch/powerpc/include/asm/reg.h
··· 431 431 #define SPRN_LPID 0x13F /* Logical Partition Identifier */ 432 432 #endif 433 433 #define LPID_RSVD 0x3ff /* Reserved LPID for partn switching */ 434 - #define SPRN_HMER 0x150 /* Hardware m? error recovery */ 435 - #define SPRN_HMEER 0x151 /* Hardware m? enable error recovery */ 434 + #define SPRN_HMER 0x150 /* Hypervisor maintenance exception reg */ 435 + #define HMER_DEBUG_TRIG (1ul << (63 - 17)) /* Debug trigger */ 436 + #define SPRN_HMEER 0x151 /* Hyp maintenance exception enable reg */ 436 437 #define SPRN_PCR 0x152 /* Processor compatibility register */ 437 438 #define PCR_VEC_DIS (1ul << (63-0)) /* Vec. disable (bit NA since POWER8) */ 438 439 #define PCR_VSX_DIS (1ul << (63-1)) /* VSX disable (bit NA since POWER8) */
+35
arch/powerpc/include/asm/xive-regs.h
··· 10 10 #define _ASM_POWERPC_XIVE_REGS_H 11 11 12 12 /* 13 + * "magic" Event State Buffer (ESB) MMIO offsets. 14 + * 15 + * Each interrupt source has a 2-bit state machine called ESB 16 + * which can be controlled by MMIO. It's made of 2 bits, P and 17 + * Q. P indicates that an interrupt is pending (has been sent 18 + * to a queue and is waiting for an EOI). Q indicates that the 19 + * interrupt has been triggered while pending. 20 + * 21 + * This acts as a coalescing mechanism in order to guarantee 22 + * that a given interrupt only occurs at most once in a queue. 23 + * 24 + * When doing an EOI, the Q bit will indicate if the interrupt 25 + * needs to be re-triggered. 26 + * 27 + * The following offsets into the ESB MMIO allow to read or 28 + * manipulate the PQ bits. They must be used with an 8-bytes 29 + * load instruction. They all return the previous state of the 30 + * interrupt (atomically). 31 + * 32 + * Additionally, some ESB pages support doing an EOI via a 33 + * store at 0 and some ESBs support doing a trigger via a 34 + * separate trigger page. 35 + */ 36 + #define XIVE_ESB_STORE_EOI 0x400 /* Store */ 37 + #define XIVE_ESB_LOAD_EOI 0x000 /* Load */ 38 + #define XIVE_ESB_GET 0x800 /* Load */ 39 + #define XIVE_ESB_SET_PQ_00 0xc00 /* Load */ 40 + #define XIVE_ESB_SET_PQ_01 0xd00 /* Load */ 41 + #define XIVE_ESB_SET_PQ_10 0xe00 /* Load */ 42 + #define XIVE_ESB_SET_PQ_11 0xf00 /* Load */ 43 + 44 + #define XIVE_ESB_VAL_P 0x2 45 + #define XIVE_ESB_VAL_Q 0x1 46 + 47 + /* 13 48 * Thread Management (aka "TM") registers 14 49 */ 15 50
+3 -35
arch/powerpc/include/asm/xive.h
··· 58 58 #define XIVE_IRQ_FLAG_EOI_FW 0x10 59 59 #define XIVE_IRQ_FLAG_H_INT_ESB 0x20 60 60 61 + /* Special flag set by KVM for excalation interrupts */ 62 + #define XIVE_IRQ_NO_EOI 0x80 63 + 61 64 #define XIVE_INVALID_CHIP_ID -1 62 65 63 66 /* A queue tracking structure in a CPU */ ··· 74 71 atomic_t count; 75 72 atomic_t pending_count; 76 73 }; 77 - 78 - /* 79 - * "magic" Event State Buffer (ESB) MMIO offsets. 80 - * 81 - * Each interrupt source has a 2-bit state machine called ESB 82 - * which can be controlled by MMIO. It's made of 2 bits, P and 83 - * Q. P indicates that an interrupt is pending (has been sent 84 - * to a queue and is waiting for an EOI). Q indicates that the 85 - * interrupt has been triggered while pending. 86 - * 87 - * This acts as a coalescing mechanism in order to guarantee 88 - * that a given interrupt only occurs at most once in a queue. 89 - * 90 - * When doing an EOI, the Q bit will indicate if the interrupt 91 - * needs to be re-triggered. 92 - * 93 - * The following offsets into the ESB MMIO allow to read or 94 - * manipulate the PQ bits. They must be used with an 8-bytes 95 - * load instruction. They all return the previous state of the 96 - * interrupt (atomically). 97 - * 98 - * Additionally, some ESB pages support doing an EOI via a 99 - * store at 0 and some ESBs support doing a trigger via a 100 - * separate trigger page. 101 - */ 102 - #define XIVE_ESB_STORE_EOI 0x400 /* Store */ 103 - #define XIVE_ESB_LOAD_EOI 0x000 /* Load */ 104 - #define XIVE_ESB_GET 0x800 /* Load */ 105 - #define XIVE_ESB_SET_PQ_00 0xc00 /* Load */ 106 - #define XIVE_ESB_SET_PQ_01 0xd00 /* Load */ 107 - #define XIVE_ESB_SET_PQ_10 0xe00 /* Load */ 108 - #define XIVE_ESB_SET_PQ_11 0xf00 /* Load */ 109 - 110 - #define XIVE_ESB_VAL_P 0x2 111 - #define XIVE_ESB_VAL_Q 0x1 112 74 113 75 /* Global enable flags for the XIVE support */ 114 76 extern bool __xive_enabled;
+114 -28
arch/powerpc/kernel/mce.c
··· 495 495 return handled; 496 496 } 497 497 498 - long hmi_exception_realmode(struct pt_regs *regs) 498 + /* Possible meanings for HMER_DEBUG_TRIG bit being set on POWER9 */ 499 + static enum { 500 + DTRIG_UNKNOWN, 501 + DTRIG_VECTOR_CI, /* need to emulate vector CI load instr */ 502 + DTRIG_SUSPEND_ESCAPE, /* need to escape from TM suspend mode */ 503 + } hmer_debug_trig_function; 504 + 505 + static int init_debug_trig_function(void) 499 506 { 507 + int pvr; 508 + struct device_node *cpun; 509 + struct property *prop = NULL; 510 + const char *str; 511 + 512 + /* First look in the device tree */ 513 + preempt_disable(); 514 + cpun = of_get_cpu_node(smp_processor_id(), NULL); 515 + if (cpun) { 516 + of_property_for_each_string(cpun, "ibm,hmi-special-triggers", 517 + prop, str) { 518 + if (strcmp(str, "bit17-vector-ci-load") == 0) 519 + hmer_debug_trig_function = DTRIG_VECTOR_CI; 520 + else if (strcmp(str, "bit17-tm-suspend-escape") == 0) 521 + hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE; 522 + } 523 + of_node_put(cpun); 524 + } 525 + preempt_enable(); 526 + 527 + /* If we found the property, don't look at PVR */ 528 + if (prop) 529 + goto out; 530 + 531 + pvr = mfspr(SPRN_PVR); 532 + /* Check for POWER9 Nimbus (scale-out) */ 533 + if ((PVR_VER(pvr) == PVR_POWER9) && (pvr & 0xe000) == 0) { 534 + /* DD2.2 and later */ 535 + if ((pvr & 0xfff) >= 0x202) 536 + hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE; 537 + /* DD2.0 and DD2.1 - used for vector CI load emulation */ 538 + else if ((pvr & 0xfff) >= 0x200) 539 + hmer_debug_trig_function = DTRIG_VECTOR_CI; 540 + } 541 + 542 + out: 543 + switch (hmer_debug_trig_function) { 544 + case DTRIG_VECTOR_CI: 545 + pr_debug("HMI debug trigger used for vector CI load\n"); 546 + break; 547 + case DTRIG_SUSPEND_ESCAPE: 548 + pr_debug("HMI debug trigger used for TM suspend escape\n"); 549 + break; 550 + default: 551 + break; 552 + } 553 + return 0; 554 + } 555 + __initcall(init_debug_trig_function); 556 + 557 + /* 558 + * Handle HMIs that occur as a result of a debug trigger. 559 + * Return values: 560 + * -1 means this is not a HMI cause that we know about 561 + * 0 means no further handling is required 562 + * 1 means further handling is required 563 + */ 564 + long hmi_handle_debugtrig(struct pt_regs *regs) 565 + { 566 + unsigned long hmer = mfspr(SPRN_HMER); 567 + long ret = 0; 568 + 569 + /* HMER_DEBUG_TRIG bit is used for various workarounds on P9 */ 570 + if (!((hmer & HMER_DEBUG_TRIG) 571 + && hmer_debug_trig_function != DTRIG_UNKNOWN)) 572 + return -1; 573 + 574 + hmer &= ~HMER_DEBUG_TRIG; 575 + /* HMER is a write-AND register */ 576 + mtspr(SPRN_HMER, ~HMER_DEBUG_TRIG); 577 + 578 + switch (hmer_debug_trig_function) { 579 + case DTRIG_VECTOR_CI: 580 + /* 581 + * Now to avoid problems with soft-disable we 582 + * only do the emulation if we are coming from 583 + * host user space 584 + */ 585 + if (regs && user_mode(regs)) 586 + ret = local_paca->hmi_p9_special_emu = 1; 587 + 588 + break; 589 + 590 + default: 591 + break; 592 + } 593 + 594 + /* 595 + * See if any other HMI causes remain to be handled 596 + */ 597 + if (hmer & mfspr(SPRN_HMEER)) 598 + return -1; 599 + 600 + return ret; 601 + } 602 + 603 + /* 604 + * Return values: 605 + */ 606 + long hmi_exception_realmode(struct pt_regs *regs) 607 + { 608 + int ret; 609 + 500 610 __this_cpu_inc(irq_stat.hmi_exceptions); 501 611 502 - #ifdef CONFIG_PPC_BOOK3S_64 503 - /* Workaround for P9 vector CI loads (see p9_hmi_special_emu) */ 504 - if (pvr_version_is(PVR_POWER9)) { 505 - unsigned long hmer = mfspr(SPRN_HMER); 506 - 507 - /* Do we have the debug bit set */ 508 - if (hmer & PPC_BIT(17)) { 509 - hmer &= ~PPC_BIT(17); 510 - mtspr(SPRN_HMER, hmer); 511 - 512 - /* 513 - * Now to avoid problems with soft-disable we 514 - * only do the emulation if we are coming from 515 - * user space 516 - */ 517 - if (user_mode(regs)) 518 - local_paca->hmi_p9_special_emu = 1; 519 - 520 - /* 521 - * Don't bother going to OPAL if that's the 522 - * only relevant bit. 523 - */ 524 - if (!(hmer & mfspr(SPRN_HMEER))) 525 - return local_paca->hmi_p9_special_emu; 526 - } 527 - } 528 - #endif /* CONFIG_PPC_BOOK3S_64 */ 612 + ret = hmi_handle_debugtrig(regs); 613 + if (ret >= 0) 614 + return ret; 529 615 530 616 wait_for_subcore_guest_exit(); 531 617
+5 -3
arch/powerpc/kvm/book3s_hv_ras.c
··· 266 266 * secondary threads to proceed. 267 267 * - All secondary threads will eventually call opal hmi handler on 268 268 * their exit path. 269 + * 270 + * Returns 1 if the timebase offset should be applied, 0 if not. 269 271 */ 270 272 271 273 long kvmppc_realmode_hmi_handler(void) 272 274 { 273 - int ptid = local_paca->kvm_hstate.ptid; 274 275 bool resync_req; 275 276 276 - /* This is only called on primary thread. */ 277 - BUG_ON(ptid != 0); 278 277 __this_cpu_inc(irq_stat.hmi_exceptions); 278 + 279 + if (hmi_handle_debugtrig(NULL) >= 0) 280 + return 1; 279 281 280 282 /* 281 283 * By now primary thread has already completed guest->host
+4 -5
arch/powerpc/kvm/book3s_hv_rm_mmu.c
··· 42 42 } 43 43 44 44 /* Return 1 if we need to do a global tlbie, 0 if we can use tlbiel */ 45 - static int global_invalidates(struct kvm *kvm, unsigned long flags) 45 + static int global_invalidates(struct kvm *kvm) 46 46 { 47 47 int global; 48 48 int cpu; ··· 522 522 if (v & HPTE_V_VALID) { 523 523 hpte[0] &= ~cpu_to_be64(HPTE_V_VALID); 524 524 rb = compute_tlbie_rb(v, pte_r, pte_index); 525 - do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), true); 525 + do_tlbies(kvm, &rb, 1, global_invalidates(kvm), true); 526 526 /* 527 527 * The reference (R) and change (C) bits in a HPT 528 528 * entry can be set by hardware at any time up until ··· 572 572 573 573 if (kvm_is_radix(kvm)) 574 574 return H_FUNCTION; 575 - global = global_invalidates(kvm, 0); 575 + global = global_invalidates(kvm); 576 576 for (i = 0; i < 4 && ret == H_SUCCESS; ) { 577 577 n = 0; 578 578 for (; i < 4; ++i) { ··· 732 732 rb = compute_tlbie_rb(v, r, pte_index); 733 733 hpte[0] = cpu_to_be64((pte_v & ~HPTE_V_VALID) | 734 734 HPTE_V_ABSENT); 735 - do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), 736 - true); 735 + do_tlbies(kvm, &rb, 1, global_invalidates(kvm), true); 737 736 /* Don't lose R/C bit updates done by hardware */ 738 737 r |= be64_to_cpu(hpte[1]) & (HPTE_R_R | HPTE_R_C); 739 738 hpte[1] = cpu_to_be64(r);
+5 -4
arch/powerpc/kvm/book3s_hv_rmhandlers.S
··· 1909 1909 bne 27f 1910 1910 bl kvmppc_realmode_hmi_handler 1911 1911 nop 1912 + cmpdi r3, 0 1912 1913 li r12, BOOK3S_INTERRUPT_HMI 1913 1914 /* 1914 - * At this point kvmppc_realmode_hmi_handler would have resync-ed 1915 - * the TB. Hence it is not required to subtract guest timebase 1916 - * offset from timebase. So, skip it. 1915 + * At this point kvmppc_realmode_hmi_handler may have resync-ed 1916 + * the TB, and if it has, we must not subtract the guest timebase 1917 + * offset from the timebase. So, skip it. 1917 1918 * 1918 1919 * Also, do not call kvmppc_subcore_exit_guest() because it has 1919 1920 * been invoked as part of kvmppc_realmode_hmi_handler(). 1920 1921 */ 1921 - b 30f 1922 + beq 30f 1922 1923 1923 1924 27: 1924 1925 /* Subtract timebase offset from timebase */
+2 -1
arch/powerpc/sysdev/xive/common.c
··· 367 367 * EOI the source if it hasn't been disabled and hasn't 368 368 * been passed-through to a KVM guest 369 369 */ 370 - if (!irqd_irq_disabled(d) && !irqd_is_forwarded_to_vcpu(d)) 370 + if (!irqd_irq_disabled(d) && !irqd_is_forwarded_to_vcpu(d) && 371 + !(xd->flags & XIVE_IRQ_NO_EOI)) 371 372 xive_do_source_eoi(irqd_to_hwirq(d), xd); 372 373 373 374 /*