Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

KVM: PPC: Book3S HV: Add a mechanism for recording modified HPTEs

This uses a bit in our record of the guest view of the HPTE to record
when the HPTE gets modified. We use a reserved bit for this, and ensure
that this bit is always cleared in HPTE values returned to the guest.

The recording of modified HPTEs is only done if other code indicates
its interest by setting kvm->arch.hpte_mod_interest to a non-zero value.
The reason for this is that when later commits add facilities for
userspace to read the HPT, the first pass of reading the HPT will be
quicker if there are no (or very few) HPTEs marked as modified,
rather than having most HPTEs marked as modified.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>

authored by

Paul Mackerras and committed by
Alexander Graf
44e5f6be 4879f241

+34 -4
+9
arch/powerpc/include/asm/kvm_book3s_64.h
··· 50 50 #define HPTE_V_HVLOCK 0x40UL 51 51 #define HPTE_V_ABSENT 0x20UL 52 52 53 + /* 54 + * We use this bit in the guest_rpte field of the revmap entry 55 + * to indicate a modified HPTE. 56 + */ 57 + #define HPTE_GR_MODIFIED (1ul << 62) 58 + 59 + /* These bits are reserved in the guest view of the HPTE */ 60 + #define HPTE_GR_RESERVED HPTE_GR_MODIFIED 61 + 53 62 static inline long try_lock_hpte(unsigned long *hpte, unsigned long bits) 54 63 { 55 64 unsigned long tmp, old;
+1
arch/powerpc/include/asm/kvm_host.h
··· 248 248 atomic_t vcpus_running; 249 249 unsigned long hpt_npte; 250 250 unsigned long hpt_mask; 251 + atomic_t hpte_mod_interest; 251 252 spinlock_t slot_phys_lock; 252 253 unsigned short last_vcpu[NR_CPUS]; 253 254 struct kvmppc_vcore *vcores[KVM_MAX_VCORES];
+24 -4
arch/powerpc/kvm/book3s_hv_rm_mmu.c
··· 66 66 } 67 67 EXPORT_SYMBOL_GPL(kvmppc_add_revmap_chain); 68 68 69 + /* 70 + * Note modification of an HPTE; set the HPTE modified bit 71 + * if anyone is interested. 72 + */ 73 + static inline void note_hpte_modification(struct kvm *kvm, 74 + struct revmap_entry *rev) 75 + { 76 + if (atomic_read(&kvm->arch.hpte_mod_interest)) 77 + rev->guest_rpte |= HPTE_GR_MODIFIED; 78 + } 79 + 69 80 /* Remove this HPTE from the chain for a real page */ 70 81 static void remove_revmap_chain(struct kvm *kvm, long pte_index, 71 82 struct revmap_entry *rev, ··· 149 138 unsigned long slot_fn, hva; 150 139 unsigned long *hpte; 151 140 struct revmap_entry *rev; 152 - unsigned long g_ptel = ptel; 141 + unsigned long g_ptel; 153 142 struct kvm_memory_slot *memslot; 154 143 unsigned long *physp, pte_size; 155 144 unsigned long is_io; ··· 164 153 return H_PARAMETER; 165 154 writing = hpte_is_writable(ptel); 166 155 pteh &= ~(HPTE_V_HVLOCK | HPTE_V_ABSENT | HPTE_V_VALID); 156 + ptel &= ~HPTE_GR_RESERVED; 157 + g_ptel = ptel; 167 158 168 159 /* used later to detect if we might have been invalidated */ 169 160 mmu_seq = kvm->mmu_notifier_seq; ··· 300 287 rev = &kvm->arch.revmap[pte_index]; 301 288 if (realmode) 302 289 rev = real_vmalloc_addr(rev); 303 - if (rev) 290 + if (rev) { 304 291 rev->guest_rpte = g_ptel; 292 + note_hpte_modification(kvm, rev); 293 + } 305 294 306 295 /* Link HPTE into reverse-map chain */ 307 296 if (pteh & HPTE_V_VALID) { ··· 407 392 /* Read PTE low word after tlbie to get final R/C values */ 408 393 remove_revmap_chain(kvm, pte_index, rev, v, hpte[1]); 409 394 } 410 - r = rev->guest_rpte; 395 + r = rev->guest_rpte & ~HPTE_GR_RESERVED; 396 + note_hpte_modification(kvm, rev); 411 397 unlock_hpte(hpte, 0); 412 398 413 399 vcpu->arch.gpr[4] = v; ··· 482 466 483 467 args[j] = ((0x80 | flags) << 56) + pte_index; 484 468 rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]); 469 + note_hpte_modification(kvm, rev); 485 470 486 471 if (!(hp[0] & HPTE_V_VALID)) { 487 472 /* insert R and C bits from PTE */ ··· 572 555 if (rev) { 573 556 r = (rev->guest_rpte & ~mask) | bits; 574 557 rev->guest_rpte = r; 558 + note_hpte_modification(kvm, rev); 575 559 } 576 560 r = (hpte[1] & ~mask) | bits; 577 561 ··· 624 606 v &= ~HPTE_V_ABSENT; 625 607 v |= HPTE_V_VALID; 626 608 } 627 - if (v & HPTE_V_VALID) 609 + if (v & HPTE_V_VALID) { 628 610 r = rev[i].guest_rpte | (r & (HPTE_R_R | HPTE_R_C)); 611 + r &= ~HPTE_GR_RESERVED; 612 + } 629 613 vcpu->arch.gpr[4 + i * 2] = v; 630 614 vcpu->arch.gpr[5 + i * 2] = r; 631 615 }