KVM: cleanup allocation of rmaps and page tracking data

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

Unify the flags for rmaps and page tracking data, using a
single flag in struct kvm_arch and a single loop to go
over all the address spaces and memslots. This avoids
code duplication between alloc_all_memslots_rmaps and
kvm_page_track_enable_mmu_write_tracking.

Signed-off-by: David Stevens <stevensd@chromium.org>
[This patch is the delta between David's v2 and v3, with conflicts
fixed and my own commit message. - Paolo]
Co-developed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

authored by

David Stevens and committed by

Paolo Bonzini 4 years ago 1e76a3ce 3f9808ca

+103 -123

7 changed files

expand all

arch

x86

include

asm

kvm_host.h

kvm_page_track.h

kvm

mmu

mmu.c

page_track.c

tdp_mmu.h

mmu.h

x86.c

+5 -12

arch/x86/include/asm/kvm_host.h

··· 1212 1212 #endif /* CONFIG_X86_64 */ 1213 1213 1214 1214 /* 1215 - * If set, rmaps have been allocated for all memslots and should be 1216 - * allocated for any newly created or modified memslots. 1215 + * If set, at least one shadow root has been allocated. This flag 1216 + * is used as one input when determining whether certain memslot 1217 + * related allocations are necessary. 1217 1218 */ 1218 - bool memslots_have_rmaps; 1219 - 1220 - /* 1221 - * Set when the KVM mmu needs guest write access page tracking. If 1222 - * set, the necessary gfn_track arrays have been allocated for 1223 - * all memslots and should be allocated for any newly created or 1224 - * modified memslots. 1225 - */ 1226 - bool memslots_mmu_write_tracking; 1219 + bool shadow_root_allocated; 1227 1220 1228 1221 #if IS_ENABLED(CONFIG_HYPERV) 1229 1222 hpa_t hv_root_tdp; ··· 1939 1946 1940 1947 int kvm_cpu_dirty_log_size(void); 1941 1948 1942 - int alloc_all_memslots_rmaps(struct kvm *kvm); 1949 + int memslot_rmap_alloc(struct kvm_memory_slot *slot, unsigned long npages); 1943 1950 1944 1951 #define KVM_CLOCK_VALID_FLAGS \ 1945 1952 (KVM_CLOCK_TSC_STABLE | KVM_CLOCK_REALTIME | KVM_CLOCK_HOST_TSC)

+2 -1

arch/x86/include/asm/kvm_page_track.h

··· 49 49 int kvm_page_track_init(struct kvm *kvm); 50 50 void kvm_page_track_cleanup(struct kvm *kvm); 51 51 52 - int kvm_page_track_enable_mmu_write_tracking(struct kvm *kvm); 52 + bool kvm_page_track_write_tracking_enabled(struct kvm *kvm); 53 + int kvm_page_track_write_tracking_alloc(struct kvm_memory_slot *slot); 53 54 54 55 void kvm_page_track_free_memslot(struct kvm_memory_slot *slot); 55 56 int kvm_page_track_create_memslot(struct kvm *kvm,

+17 -5

arch/x86/kvm/mmu.h

··· 304 304 int kvm_mmu_post_init_vm(struct kvm *kvm); 305 305 void kvm_mmu_pre_destroy_vm(struct kvm *kvm); 306 306 307 - static inline bool kvm_memslots_have_rmaps(struct kvm *kvm) 307 + static inline bool kvm_shadow_root_allocated(struct kvm *kvm) 308 308 { 309 309 /* 310 - * Read memslot_have_rmaps before rmap pointers. Hence, threads reading 311 - * memslots_have_rmaps in any lock context are guaranteed to see the 312 - * pointers. Pairs with smp_store_release in alloc_all_memslots_rmaps. 310 + * Read shadow_root_allocated before related pointers. Hence, threads 311 + * reading shadow_root_allocated in any lock context are guaranteed to 312 + * see the pointers. Pairs with smp_store_release in 313 + * mmu_first_shadow_root_alloc. 313 314 */ 314 - return smp_load_acquire(&kvm->arch.memslots_have_rmaps); 315 + return smp_load_acquire(&kvm->arch.shadow_root_allocated); 316 + } 317 + 318 + #ifdef CONFIG_X86_64 319 + static inline bool is_tdp_mmu_enabled(struct kvm *kvm) { return kvm->arch.tdp_mmu_enabled; } 320 + #else 321 + static inline bool is_tdp_mmu_enabled(struct kvm *kvm) { return false; } 322 + #endif 323 + 324 + static inline bool kvm_memslots_have_rmaps(struct kvm *kvm) 325 + { 326 + return !is_tdp_mmu_enabled(kvm) || kvm_shadow_root_allocated(kvm); 315 327 } 316 328 317 329 static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level)

+63 -15

arch/x86/kvm/mmu/mmu.c

··· 3397 3397 return r; 3398 3398 } 3399 3399 3400 + static int mmu_first_shadow_root_alloc(struct kvm *kvm) 3401 + { 3402 + struct kvm_memslots *slots; 3403 + struct kvm_memory_slot *slot; 3404 + int r = 0, i; 3405 + 3406 + /* 3407 + * Check if this is the first shadow root being allocated before 3408 + * taking the lock. 3409 + */ 3410 + if (kvm_shadow_root_allocated(kvm)) 3411 + return 0; 3412 + 3413 + mutex_lock(&kvm->slots_arch_lock); 3414 + 3415 + /* Recheck, under the lock, whether this is the first shadow root. */ 3416 + if (kvm_shadow_root_allocated(kvm)) 3417 + goto out_unlock; 3418 + 3419 + /* 3420 + * Check if anything actually needs to be allocated, e.g. all metadata 3421 + * will be allocated upfront if TDP is disabled. 3422 + */ 3423 + if (kvm_memslots_have_rmaps(kvm) && 3424 + kvm_page_track_write_tracking_enabled(kvm)) 3425 + goto out_success; 3426 + 3427 + for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) { 3428 + slots = __kvm_memslots(kvm, i); 3429 + kvm_for_each_memslot(slot, slots) { 3430 + /* 3431 + * Both of these functions are no-ops if the target is 3432 + * already allocated, so unconditionally calling both 3433 + * is safe. Intentionally do NOT free allocations on 3434 + * failure to avoid having to track which allocations 3435 + * were made now versus when the memslot was created. 3436 + * The metadata is guaranteed to be freed when the slot 3437 + * is freed, and will be kept/used if userspace retries 3438 + * KVM_RUN instead of killing the VM. 3439 + */ 3440 + r = memslot_rmap_alloc(slot, slot->npages); 3441 + if (r) 3442 + goto out_unlock; 3443 + r = kvm_page_track_write_tracking_alloc(slot); 3444 + if (r) 3445 + goto out_unlock; 3446 + } 3447 + } 3448 + 3449 + /* 3450 + * Ensure that shadow_root_allocated becomes true strictly after 3451 + * all the related pointers are set. 3452 + */ 3453 + out_success: 3454 + smp_store_release(&kvm->arch.shadow_root_allocated, true); 3455 + 3456 + out_unlock: 3457 + mutex_unlock(&kvm->slots_arch_lock); 3458 + return r; 3459 + } 3460 + 3400 3461 static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) 3401 3462 { 3402 3463 struct kvm_mmu *mmu = vcpu->arch.mmu; ··· 3488 3427 } 3489 3428 } 3490 3429 3491 - r = alloc_all_memslots_rmaps(vcpu->kvm); 3492 - if (r) 3493 - return r; 3494 - 3495 - r = kvm_page_track_enable_mmu_write_tracking(vcpu->kvm); 3430 + r = mmu_first_shadow_root_alloc(vcpu->kvm); 3496 3431 if (r) 3497 3432 return r; 3498 3433 ··· 5661 5604 5662 5605 spin_lock_init(&kvm->arch.mmu_unsync_pages_lock); 5663 5606 5664 - if (!kvm_mmu_init_tdp_mmu(kvm)) 5665 - /* 5666 - * No smp_load/store wrappers needed here as we are in 5667 - * VM init and there cannot be any memslots / other threads 5668 - * accessing this struct kvm yet. 5669 - */ 5670 - kvm->arch.memslots_have_rmaps = true; 5671 - 5672 - if (!tdp_enabled) 5673 - kvm->arch.memslots_mmu_write_tracking = true; 5607 + kvm_mmu_init_tdp_mmu(kvm); 5674 5608 5675 5609 node->track_write = kvm_mmu_pte_write; 5676 5610 node->track_flush_slot = kvm_mmu_invalidate_zap_pages_in_memslot;

+15 -42

arch/x86/kvm/mmu/page_track.c

··· 19 19 #include "mmu.h" 20 20 #include "mmu_internal.h" 21 21 22 - static bool write_tracking_enabled(struct kvm *kvm) 22 + bool kvm_page_track_write_tracking_enabled(struct kvm *kvm) 23 23 { 24 - /* 25 - * Read memslots_mmu_write_tracking before gfn_track pointers. Pairs 26 - * with smp_store_release in kvm_page_track_enable_mmu_write_tracking. 27 - */ 28 24 return IS_ENABLED(CONFIG_KVM_EXTERNAL_WRITE_TRACKING) || 29 - smp_load_acquire(&kvm->arch.memslots_mmu_write_tracking); 25 + !tdp_enabled || kvm_shadow_root_allocated(kvm); 30 26 } 31 27 32 28 void kvm_page_track_free_memslot(struct kvm_memory_slot *slot) ··· 42 46 int i; 43 47 44 48 for (i = 0; i < KVM_PAGE_TRACK_MAX; i++) { 45 - if (i == KVM_PAGE_TRACK_WRITE && !write_tracking_enabled(kvm)) 49 + if (i == KVM_PAGE_TRACK_WRITE && 50 + !kvm_page_track_write_tracking_enabled(kvm)) 46 51 continue; 47 52 48 53 slot->arch.gfn_track[i] = ··· 68 71 return true; 69 72 } 70 73 71 - int kvm_page_track_enable_mmu_write_tracking(struct kvm *kvm) 74 + int kvm_page_track_write_tracking_alloc(struct kvm_memory_slot *slot) 72 75 { 73 - struct kvm_memslots *slots; 74 - struct kvm_memory_slot *slot; 75 - unsigned short **gfn_track; 76 - int i; 76 + unsigned short *gfn_track; 77 77 78 - if (write_tracking_enabled(kvm)) 78 + if (slot->arch.gfn_track[KVM_PAGE_TRACK_WRITE]) 79 79 return 0; 80 80 81 - mutex_lock(&kvm->slots_arch_lock); 81 + gfn_track = kvcalloc(slot->npages, sizeof(*gfn_track), GFP_KERNEL_ACCOUNT); 82 + if (gfn_track == NULL) 83 + return -ENOMEM; 82 84 83 - if (write_tracking_enabled(kvm)) { 84 - mutex_unlock(&kvm->slots_arch_lock); 85 - return 0; 86 - } 87 - 88 - for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) { 89 - slots = __kvm_memslots(kvm, i); 90 - kvm_for_each_memslot(slot, slots) { 91 - gfn_track = slot->arch.gfn_track + KVM_PAGE_TRACK_WRITE; 92 - *gfn_track = kvcalloc(slot->npages, sizeof(*gfn_track), 93 - GFP_KERNEL_ACCOUNT); 94 - if (*gfn_track == NULL) { 95 - mutex_unlock(&kvm->slots_arch_lock); 96 - return -ENOMEM; 97 - } 98 - } 99 - } 100 - 101 - /* 102 - * Ensure that memslots_mmu_write_tracking becomes true strictly 103 - * after all the pointers are set. 104 - */ 105 - smp_store_release(&kvm->arch.memslots_mmu_write_tracking, true); 106 - mutex_unlock(&kvm->slots_arch_lock); 107 - 85 + slot->arch.gfn_track[KVM_PAGE_TRACK_WRITE] = gfn_track; 108 86 return 0; 109 87 } 110 88 ··· 119 147 return; 120 148 121 149 if (WARN_ON(mode == KVM_PAGE_TRACK_WRITE && 122 - !write_tracking_enabled(kvm))) 150 + !kvm_page_track_write_tracking_enabled(kvm))) 123 151 return; 124 152 125 153 update_gfn_track(slot, gfn, mode, 1); ··· 157 185 return; 158 186 159 187 if (WARN_ON(mode == KVM_PAGE_TRACK_WRITE && 160 - !write_tracking_enabled(kvm))) 188 + !kvm_page_track_write_tracking_enabled(kvm))) 161 189 return; 162 190 163 191 update_gfn_track(slot, gfn, mode, -1); ··· 185 213 if (!slot) 186 214 return false; 187 215 188 - if (mode == KVM_PAGE_TRACK_WRITE && !write_tracking_enabled(vcpu->kvm)) 216 + if (mode == KVM_PAGE_TRACK_WRITE && 217 + !kvm_page_track_write_tracking_enabled(vcpu->kvm)) 189 218 return false; 190 219 191 220 index = gfn_to_index(gfn, slot->base_gfn, PG_LEVEL_4K);

-2

arch/x86/kvm/mmu/tdp_mmu.h

··· 90 90 #ifdef CONFIG_X86_64 91 91 bool kvm_mmu_init_tdp_mmu(struct kvm *kvm); 92 92 void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm); 93 - static inline bool is_tdp_mmu_enabled(struct kvm *kvm) { return kvm->arch.tdp_mmu_enabled; } 94 93 static inline bool is_tdp_mmu_page(struct kvm_mmu_page *sp) { return sp->tdp_mmu_page; } 95 94 96 95 static inline bool is_tdp_mmu(struct kvm_mmu *mmu) ··· 111 112 #else 112 113 static inline bool kvm_mmu_init_tdp_mmu(struct kvm *kvm) { return false; } 113 114 static inline void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm) {} 114 - static inline bool is_tdp_mmu_enabled(struct kvm *kvm) { return false; } 115 115 static inline bool is_tdp_mmu_page(struct kvm_mmu_page *sp) { return false; } 116 116 static inline bool is_tdp_mmu(struct kvm_mmu *mmu) { return false; } 117 117 #endif

+1 -46

arch/x86/kvm/x86.c

··· 11514 11514 kvm_page_track_free_memslot(slot); 11515 11515 } 11516 11516 11517 - static int memslot_rmap_alloc(struct kvm_memory_slot *slot, 11518 - unsigned long npages) 11517 + int memslot_rmap_alloc(struct kvm_memory_slot *slot, unsigned long npages) 11519 11518 { 11520 11519 const int sz = sizeof(*slot->arch.rmap[0]); 11521 11520 int i; ··· 11533 11534 } 11534 11535 } 11535 11536 11536 - return 0; 11537 - } 11538 - 11539 - int alloc_all_memslots_rmaps(struct kvm *kvm) 11540 - { 11541 - struct kvm_memslots *slots; 11542 - struct kvm_memory_slot *slot; 11543 - int r, i; 11544 - 11545 - /* 11546 - * Check if memslots alreday have rmaps early before acquiring 11547 - * the slots_arch_lock below. 11548 - */ 11549 - if (kvm_memslots_have_rmaps(kvm)) 11550 - return 0; 11551 - 11552 - mutex_lock(&kvm->slots_arch_lock); 11553 - 11554 - /* 11555 - * Read memslots_have_rmaps again, under the slots arch lock, 11556 - * before allocating the rmaps 11557 - */ 11558 - if (kvm_memslots_have_rmaps(kvm)) { 11559 - mutex_unlock(&kvm->slots_arch_lock); 11560 - return 0; 11561 - } 11562 - 11563 - for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) { 11564 - slots = __kvm_memslots(kvm, i); 11565 - kvm_for_each_memslot(slot, slots) { 11566 - r = memslot_rmap_alloc(slot, slot->npages); 11567 - if (r) { 11568 - mutex_unlock(&kvm->slots_arch_lock); 11569 - return r; 11570 - } 11571 - } 11572 - } 11573 - 11574 - /* 11575 - * Ensure that memslots_have_rmaps becomes true strictly after 11576 - * all the rmap pointers are set. 11577 - */ 11578 - smp_store_release(&kvm->arch.memslots_have_rmaps, true); 11579 - mutex_unlock(&kvm->slots_arch_lock); 11580 11537 return 0; 11581 11538 } 11582 11539