Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

KVM: PPC: Book3S HV: Use bitmap of active threads rather than count

Currently, the entry_exit_count field in the kvmppc_vcore struct
contains two 8-bit counts, one of the threads that have started entering
the guest, and one of the threads that have started exiting the guest.
This changes it to an entry_exit_map field which contains two bitmaps
of 8 bits each. The advantage of doing this is that it gives us a
bitmap of which threads need to be signalled when exiting the guest.
That means that we no longer need to use the trick of setting the
HDEC to 0 to pull the other threads out of the guest, which led in
some cases to a spurious HDEC interrupt on the next guest entry.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>

authored by

Paul Mackerras and committed by
Alexander Graf
7d6c40da fd6d53b1

+44 -49
+8 -7
arch/powerpc/include/asm/kvm_host.h
··· 263 263 264 264 /* 265 265 * Struct for a virtual core. 266 - * Note: entry_exit_count combines an entry count in the bottom 8 bits 267 - * and an exit count in the next 8 bits. This is so that we can 268 - * atomically increment the entry count iff the exit count is 0 269 - * without taking the lock. 266 + * Note: entry_exit_map combines a bitmap of threads that have entered 267 + * in the bottom 8 bits and a bitmap of threads that have exited in the 268 + * next 8 bits. This is so that we can atomically set the entry bit 269 + * iff the exit map is 0 without taking a lock. 270 270 */ 271 271 struct kvmppc_vcore { 272 272 int n_runnable; 273 273 int num_threads; 274 - int entry_exit_count; 274 + int entry_exit_map; 275 275 int napping_threads; 276 276 int first_vcpuid; 277 277 u16 pcpu; ··· 296 296 ulong conferring_threads; 297 297 }; 298 298 299 - #define VCORE_ENTRY_COUNT(vc) ((vc)->entry_exit_count & 0xff) 300 - #define VCORE_EXIT_COUNT(vc) ((vc)->entry_exit_count >> 8) 299 + #define VCORE_ENTRY_MAP(vc) ((vc)->entry_exit_map & 0xff) 300 + #define VCORE_EXIT_MAP(vc) ((vc)->entry_exit_map >> 8) 301 + #define VCORE_IS_EXITING(vc) (VCORE_EXIT_MAP(vc) != 0) 301 302 302 303 /* Values for vcore_state */ 303 304 #define VCORE_INACTIVE 0
+1 -1
arch/powerpc/kernel/asm-offsets.c
··· 562 562 DEFINE(VCPU_ACOP, offsetof(struct kvm_vcpu, arch.acop)); 563 563 DEFINE(VCPU_WORT, offsetof(struct kvm_vcpu, arch.wort)); 564 564 DEFINE(VCPU_SHADOW_SRR1, offsetof(struct kvm_vcpu, arch.shadow_srr1)); 565 - DEFINE(VCORE_ENTRY_EXIT, offsetof(struct kvmppc_vcore, entry_exit_count)); 565 + DEFINE(VCORE_ENTRY_EXIT, offsetof(struct kvmppc_vcore, entry_exit_map)); 566 566 DEFINE(VCORE_IN_GUEST, offsetof(struct kvmppc_vcore, in_guest)); 567 567 DEFINE(VCORE_NAPPING_THREADS, offsetof(struct kvmppc_vcore, napping_threads)); 568 568 DEFINE(VCORE_KVM, offsetof(struct kvmppc_vcore, kvm));
+2 -3
arch/powerpc/kvm/book3s_hv.c
··· 1952 1952 /* 1953 1953 * Initialize *vc. 1954 1954 */ 1955 - vc->entry_exit_count = 0; 1955 + vc->entry_exit_map = 0; 1956 1956 vc->preempt_tb = TB_NIL; 1957 1957 vc->in_guest = 0; 1958 1958 vc->napping_threads = 0; ··· 2119 2119 * this thread straight away and have it join in. 2120 2120 */ 2121 2121 if (!signal_pending(current)) { 2122 - if (vc->vcore_state == VCORE_RUNNING && 2123 - VCORE_EXIT_COUNT(vc) == 0) { 2122 + if (vc->vcore_state == VCORE_RUNNING && !VCORE_IS_EXITING(vc)) { 2124 2123 kvmppc_create_dtl_entry(vcpu, vc); 2125 2124 kvmppc_start_thread(vcpu); 2126 2125 trace_kvm_guest_enter(vcpu);
+5 -5
arch/powerpc/kvm/book3s_hv_builtin.c
··· 115 115 int rv = H_SUCCESS; /* => don't yield */ 116 116 117 117 set_bit(vcpu->arch.ptid, &vc->conferring_threads); 118 - while ((get_tb() < stop) && (VCORE_EXIT_COUNT(vc) == 0)) { 119 - threads_running = VCORE_ENTRY_COUNT(vc); 120 - threads_ceded = hweight32(vc->napping_threads); 121 - threads_conferring = hweight32(vc->conferring_threads); 122 - if (threads_ceded + threads_conferring >= threads_running) { 118 + while ((get_tb() < stop) && !VCORE_IS_EXITING(vc)) { 119 + threads_running = VCORE_ENTRY_MAP(vc); 120 + threads_ceded = vc->napping_threads; 121 + threads_conferring = vc->conferring_threads; 122 + if ((threads_ceded | threads_conferring) == threads_running) { 123 123 rv = H_TOO_HARD; /* => do yield */ 124 124 break; 125 125 }
+28 -33
arch/powerpc/kvm/book3s_hv_rmhandlers.S
··· 185 185 or r3, r3, r0 186 186 stwcx. r3, 0, r6 187 187 bne 1b 188 - /* order napping_threads update vs testing entry_exit_count */ 188 + /* order napping_threads update vs testing entry_exit_map */ 189 189 isync 190 190 li r12, 0 191 191 lwz r7, VCORE_ENTRY_EXIT(r5) ··· 406 406 * We don't have to lock against concurrent tlbies, 407 407 * but we do have to coordinate across hardware threads. 408 408 */ 409 - /* Increment entry count iff exit count is zero. */ 410 - ld r5,HSTATE_KVM_VCORE(r13) 411 - addi r9,r5,VCORE_ENTRY_EXIT 412 - 21: lwarx r3,0,r9 413 - cmpwi r3,0x100 /* any threads starting to exit? */ 409 + /* Set bit in entry map iff exit map is zero. */ 410 + ld r5, HSTATE_KVM_VCORE(r13) 411 + li r7, 1 412 + lbz r6, HSTATE_PTID(r13) 413 + sld r7, r7, r6 414 + addi r9, r5, VCORE_ENTRY_EXIT 415 + 21: lwarx r3, 0, r9 416 + cmpwi r3, 0x100 /* any threads starting to exit? */ 414 417 bge secondary_too_late /* if so we're too late to the party */ 415 - addi r3,r3,1 416 - stwcx. r3,0,r9 418 + or r3, r3, r7 419 + stwcx. r3, 0, r9 417 420 bne 21b 418 421 419 422 /* Primary thread switches to guest partition. */ 420 423 ld r9,VCORE_KVM(r5) /* pointer to struct kvm */ 421 - lbz r6,HSTATE_PTID(r13) 422 424 cmpwi r6,0 423 425 bne 20f 424 426 ld r6,KVM_SDR1(r9) ··· 1479 1477 * We don't have to lock against tlbies but we do 1480 1478 * have to coordinate the hardware threads. 1481 1479 */ 1482 - /* Increment the threads-exiting-guest count in the 0xff00 1483 - bits of vcore->entry_exit_count */ 1484 - ld r5,HSTATE_KVM_VCORE(r13) 1485 - addi r6,r5,VCORE_ENTRY_EXIT 1486 - 41: lwarx r3,0,r6 1487 - addi r0,r3,0x100 1488 - stwcx. r0,0,r6 1480 + /* Set our bit in the threads-exiting-guest map in the 0xff00 1481 + bits of vcore->entry_exit_map */ 1482 + ld r5, HSTATE_KVM_VCORE(r13) 1483 + lbz r4, HSTATE_PTID(r13) 1484 + li r7, 0x100 1485 + sld r7, r7, r4 1486 + addi r6, r5, VCORE_ENTRY_EXIT 1487 + 41: lwarx r3, 0, r6 1488 + or r0, r3, r7 1489 + stwcx. r0, 0, r6 1489 1490 bne 41b 1490 1491 isync /* order stwcx. vs. reading napping_threads */ 1491 1492 ··· 1497 1492 * up to the kernel or qemu; we can't handle it in real mode. 1498 1493 * Thus we have to do a partition switch, so we have to 1499 1494 * collect the other threads, if we are the first thread 1500 - * to take an interrupt. To do this, we set the HDEC to 0, 1501 - * which causes an HDEC interrupt in all threads within 2ns 1502 - * because the HDEC register is shared between all 4 threads. 1495 + * to take an interrupt. To do this, we send a message or 1496 + * IPI to all the threads that have their bit set in the entry 1497 + * map in vcore->entry_exit_map (other than ourselves). 1503 1498 * However, we don't need to bother if this is an HDEC 1504 1499 * interrupt, since the other threads will already be on their 1505 1500 * way here in that case. ··· 1508 1503 bge 43f 1509 1504 cmpwi r12,BOOK3S_INTERRUPT_HV_DECREMENTER 1510 1505 beq 43f 1511 - li r0,0 1512 - mtspr SPRN_HDEC,r0 1513 1506 1514 - /* 1515 - * Send an IPI to any napping threads, since an HDEC interrupt 1516 - * doesn't wake CPUs up from nap. 1517 - */ 1518 - lwz r3,VCORE_NAPPING_THREADS(r5) 1519 - lbz r4,HSTATE_PTID(r13) 1520 - li r0,1 1521 - sld r0,r0,r4 1507 + srwi r0,r7,8 1522 1508 andc. r3,r3,r0 /* no sense IPI'ing ourselves */ 1523 1509 beq 43f 1524 1510 /* Order entry/exit update vs. IPIs */ ··· 2087 2091 addi r6,r5,VCORE_NAPPING_THREADS 2088 2092 31: lwarx r4,0,r6 2089 2093 or r4,r4,r0 2090 - PPC_POPCNTW(R7,R4) 2091 - cmpw r7,r8 2092 - bge kvm_cede_exit 2094 + cmpw r4,r8 2095 + beq kvm_cede_exit 2093 2096 stwcx. r4,0,r6 2094 2097 bne 31b 2095 - /* order napping_threads update vs testing entry_exit_count */ 2098 + /* order napping_threads update vs testing entry_exit_map */ 2096 2099 isync 2097 2100 li r0,NAPPING_CEDE 2098 2101 stb r0,HSTATE_NAPPING(r13)