Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

kvm: Nested KVM MMUs need PAE root too

On AMD processors, in PAE 32bit mode, nested KVM instances don't
work. The L0 host get a kernel OOPS, which is related to
arch.mmu->pae_root being NULL.

The reason for this is that when setting up nested KVM instance,
arch.mmu is set to &arch.guest_mmu (while normally, it would be
&arch.root_mmu). However, the initialization and allocation of
pae_root only creates it in root_mmu. KVM code (ie. in
mmu_alloc_shadow_roots) then accesses arch.mmu->pae_root, which is the
unallocated arch.guest_mmu->pae_root.

This fix just allocates (and frees) pae_root in both guest_mmu and
root_mmu (and also lm_root if it was allocated). The allocation is
subject to previous restrictions ie. it won't allocate anything on
64-bit and AFAIK not on Intel.

Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=203923
Fixes: 14c07ad89f4d ("x86/kvm/mmu: introduce guest_mmu")
Signed-off-by: Jiri Palecek <jpalecek@web.de>
Tested-by: Jiri Palecek <jpalecek@web.de>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

authored by

Jiří Paleček and committed by
Paolo Bonzini
1cfff4d9 c8848cee

+22 -8
+22 -8
arch/x86/kvm/mmu.c
··· 5595 5595 PT_PAGE_TABLE_LEVEL, lock_flush_tlb); 5596 5596 } 5597 5597 5598 - static void free_mmu_pages(struct kvm_vcpu *vcpu) 5598 + static void free_mmu_pages(struct kvm_mmu *mmu) 5599 5599 { 5600 - free_page((unsigned long)vcpu->arch.mmu->pae_root); 5601 - free_page((unsigned long)vcpu->arch.mmu->lm_root); 5600 + free_page((unsigned long)mmu->pae_root); 5601 + free_page((unsigned long)mmu->lm_root); 5602 5602 } 5603 5603 5604 - static int alloc_mmu_pages(struct kvm_vcpu *vcpu) 5604 + static int alloc_mmu_pages(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu) 5605 5605 { 5606 5606 struct page *page; 5607 5607 int i; ··· 5622 5622 if (!page) 5623 5623 return -ENOMEM; 5624 5624 5625 - vcpu->arch.mmu->pae_root = page_address(page); 5625 + mmu->pae_root = page_address(page); 5626 5626 for (i = 0; i < 4; ++i) 5627 - vcpu->arch.mmu->pae_root[i] = INVALID_PAGE; 5627 + mmu->pae_root[i] = INVALID_PAGE; 5628 5628 5629 5629 return 0; 5630 5630 } ··· 5632 5632 int kvm_mmu_create(struct kvm_vcpu *vcpu) 5633 5633 { 5634 5634 uint i; 5635 + int ret; 5635 5636 5636 5637 vcpu->arch.mmu = &vcpu->arch.root_mmu; 5637 5638 vcpu->arch.walk_mmu = &vcpu->arch.root_mmu; ··· 5650 5649 vcpu->arch.guest_mmu.prev_roots[i] = KVM_MMU_ROOT_INFO_INVALID; 5651 5650 5652 5651 vcpu->arch.nested_mmu.translate_gpa = translate_nested_gpa; 5653 - return alloc_mmu_pages(vcpu); 5652 + 5653 + ret = alloc_mmu_pages(vcpu, &vcpu->arch.guest_mmu); 5654 + if (ret) 5655 + return ret; 5656 + 5657 + ret = alloc_mmu_pages(vcpu, &vcpu->arch.root_mmu); 5658 + if (ret) 5659 + goto fail_allocate_root; 5660 + 5661 + return ret; 5662 + fail_allocate_root: 5663 + free_mmu_pages(&vcpu->arch.guest_mmu); 5664 + return ret; 5654 5665 } 5655 5666 5656 5667 static void kvm_mmu_invalidate_zap_pages_in_memslot(struct kvm *kvm, ··· 6087 6074 void kvm_mmu_destroy(struct kvm_vcpu *vcpu) 6088 6075 { 6089 6076 kvm_mmu_unload(vcpu); 6090 - free_mmu_pages(vcpu); 6077 + free_mmu_pages(&vcpu->arch.root_mmu); 6078 + free_mmu_pages(&vcpu->arch.guest_mmu); 6091 6079 mmu_free_memory_caches(vcpu); 6092 6080 } 6093 6081