Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'kvm-arm-for-3.18-take-2' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm

Pull second batch of changes for KVM/{arm,arm64} from Marc Zyngier:
"The most obvious thing is the sizeable MMU changes to support 48bit
VAs on arm64.

Summary:

- support for 48bit IPA and VA (EL2)
- a number of fixes for devices mapped into guests
- yet another VGIC fix for BE
- a fix for CPU hotplug
- a few compile fixes (disabled VGIC, strict mm checks)"

[ I'm pulling directly from Marc at the request of Paolo Bonzini, whose
backpack was stolen at Düsseldorf airport and will do new keys and
rebuild his web of trust. - Linus ]

* tag 'kvm-arm-for-3.18-take-2' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm:
arm/arm64: KVM: Fix BE accesses to GICv2 EISR and ELRSR regs
arm: kvm: STRICT_MM_TYPECHECKS fix for user_mem_abort
arm/arm64: KVM: Ensure memslots are within KVM_PHYS_SIZE
arm64: KVM: Implement 48 VA support for KVM EL2 and Stage-2
arm/arm64: KVM: map MMIO regions at creation time
arm64: kvm: define PAGE_S2_DEVICE as read-only by default
ARM: kvm: define PAGE_S2_DEVICE as read-only by default
arm/arm64: KVM: add 'writable' parameter to kvm_phys_addr_ioremap
arm/arm64: KVM: fix potential NULL dereference in user_mem_abort()
arm/arm64: KVM: use __GFP_ZERO not memset() to get zeroed pages
ARM: KVM: fix vgic-disabled build
arm: kvm: fix CPU hotplug

+392 -86
+27 -4
arch/arm/include/asm/kvm_mmu.h
··· 37 37 */ 38 38 #define TRAMPOLINE_VA UL(CONFIG_VECTORS_BASE) 39 39 40 + /* 41 + * KVM_MMU_CACHE_MIN_PAGES is the number of stage2 page table translation levels. 42 + */ 43 + #define KVM_MMU_CACHE_MIN_PAGES 2 44 + 40 45 #ifndef __ASSEMBLY__ 41 46 42 47 #include <asm/cacheflush.h> ··· 55 50 int kvm_alloc_stage2_pgd(struct kvm *kvm); 56 51 void kvm_free_stage2_pgd(struct kvm *kvm); 57 52 int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, 58 - phys_addr_t pa, unsigned long size); 53 + phys_addr_t pa, unsigned long size, bool writable); 59 54 60 55 int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run); 61 56 ··· 86 81 static inline void kvm_clean_pgd(pgd_t *pgd) 87 82 { 88 83 clean_dcache_area(pgd, PTRS_PER_S2_PGD * sizeof(pgd_t)); 84 + } 85 + 86 + static inline void kvm_clean_pmd(pmd_t *pmd) 87 + { 88 + clean_dcache_area(pmd, PTRS_PER_PMD * sizeof(pmd_t)); 89 89 } 90 90 91 91 static inline void kvm_clean_pmd_entry(pmd_t *pmd) ··· 133 123 } 134 124 135 125 136 - #define kvm_pte_table_empty(ptep) kvm_page_empty(ptep) 137 - #define kvm_pmd_table_empty(pmdp) kvm_page_empty(pmdp) 138 - #define kvm_pud_table_empty(pudp) (0) 126 + #define kvm_pte_table_empty(kvm, ptep) kvm_page_empty(ptep) 127 + #define kvm_pmd_table_empty(kvm, pmdp) kvm_page_empty(pmdp) 128 + #define kvm_pud_table_empty(kvm, pudp) (0) 139 129 130 + #define KVM_PREALLOC_LEVEL 0 131 + 132 + static inline int kvm_prealloc_hwpgd(struct kvm *kvm, pgd_t *pgd) 133 + { 134 + return 0; 135 + } 136 + 137 + static inline void kvm_free_hwpgd(struct kvm *kvm) { } 138 + 139 + static inline void *kvm_get_hwpgd(struct kvm *kvm) 140 + { 141 + return kvm->arch.pgd; 142 + } 140 143 141 144 struct kvm; 142 145
+1 -1
arch/arm/include/asm/pgtable.h
··· 100 100 #define PAGE_HYP _MOD_PROT(pgprot_kernel, L_PTE_HYP) 101 101 #define PAGE_HYP_DEVICE _MOD_PROT(pgprot_hyp_device, L_PTE_HYP) 102 102 #define PAGE_S2 _MOD_PROT(pgprot_s2, L_PTE_S2_RDONLY) 103 - #define PAGE_S2_DEVICE _MOD_PROT(pgprot_s2_device, L_PTE_S2_RDWR) 103 + #define PAGE_S2_DEVICE _MOD_PROT(pgprot_s2_device, L_PTE_S2_RDONLY) 104 104 105 105 #define __PAGE_NONE __pgprot(_L_PTE_DEFAULT | L_PTE_RDONLY | L_PTE_XN | L_PTE_NONE) 106 106 #define __PAGE_SHARED __pgprot(_L_PTE_DEFAULT | L_PTE_USER | L_PTE_XN)
+3 -2
arch/arm/kvm/arm.c
··· 409 409 kvm_next_vmid++; 410 410 411 411 /* update vttbr to be used with the new vmid */ 412 - pgd_phys = virt_to_phys(kvm->arch.pgd); 412 + pgd_phys = virt_to_phys(kvm_get_hwpgd(kvm)); 413 413 BUG_ON(pgd_phys & ~VTTBR_BADDR_MASK); 414 414 vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK; 415 415 kvm->arch.vttbr = pgd_phys | vmid; ··· 808 808 switch (action) { 809 809 case CPU_STARTING: 810 810 case CPU_STARTING_FROZEN: 811 - cpu_init_hyp_mode(NULL); 811 + if (__hyp_get_vectors() == hyp_default_vectors) 812 + cpu_init_hyp_mode(NULL); 812 813 break; 813 814 } 814 815
+7
arch/arm/kvm/interrupts_head.S
··· 433 433 str r3, [r11, #VGIC_V2_CPU_HCR] 434 434 str r4, [r11, #VGIC_V2_CPU_VMCR] 435 435 str r5, [r11, #VGIC_V2_CPU_MISR] 436 + #ifdef CONFIG_CPU_ENDIAN_BE8 437 + str r6, [r11, #(VGIC_V2_CPU_EISR + 4)] 438 + str r7, [r11, #VGIC_V2_CPU_EISR] 439 + str r8, [r11, #(VGIC_V2_CPU_ELRSR + 4)] 440 + str r9, [r11, #VGIC_V2_CPU_ELRSR] 441 + #else 436 442 str r6, [r11, #VGIC_V2_CPU_EISR] 437 443 str r7, [r11, #(VGIC_V2_CPU_EISR + 4)] 438 444 str r8, [r11, #VGIC_V2_CPU_ELRSR] 439 445 str r9, [r11, #(VGIC_V2_CPU_ELRSR + 4)] 446 + #endif 440 447 str r10, [r11, #VGIC_V2_CPU_APR] 441 448 442 449 /* Clear GICH_HCR */
+197 -38
arch/arm/kvm/mmu.c
··· 42 42 static unsigned long hyp_idmap_end; 43 43 static phys_addr_t hyp_idmap_vector; 44 44 45 - #define pgd_order get_order(PTRS_PER_PGD * sizeof(pgd_t)) 45 + #define hyp_pgd_order get_order(PTRS_PER_PGD * sizeof(pgd_t)) 46 46 47 47 #define kvm_pmd_huge(_x) (pmd_huge(_x) || pmd_trans_huge(_x)) 48 48 ··· 134 134 } 135 135 } while (pte++, addr += PAGE_SIZE, addr != end); 136 136 137 - if (kvm_pte_table_empty(start_pte)) 137 + if (kvm_pte_table_empty(kvm, start_pte)) 138 138 clear_pmd_entry(kvm, pmd, start_addr); 139 139 } 140 140 ··· 158 158 } 159 159 } while (pmd++, addr = next, addr != end); 160 160 161 - if (kvm_pmd_table_empty(start_pmd)) 161 + if (kvm_pmd_table_empty(kvm, start_pmd)) 162 162 clear_pud_entry(kvm, pud, start_addr); 163 163 } 164 164 ··· 182 182 } 183 183 } while (pud++, addr = next, addr != end); 184 184 185 - if (kvm_pud_table_empty(start_pud)) 185 + if (kvm_pud_table_empty(kvm, start_pud)) 186 186 clear_pgd_entry(kvm, pgd, start_addr); 187 187 } 188 188 ··· 306 306 if (boot_hyp_pgd) { 307 307 unmap_range(NULL, boot_hyp_pgd, hyp_idmap_start, PAGE_SIZE); 308 308 unmap_range(NULL, boot_hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE); 309 - free_pages((unsigned long)boot_hyp_pgd, pgd_order); 309 + free_pages((unsigned long)boot_hyp_pgd, hyp_pgd_order); 310 310 boot_hyp_pgd = NULL; 311 311 } 312 312 ··· 343 343 for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE) 344 344 unmap_range(NULL, hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE); 345 345 346 - free_pages((unsigned long)hyp_pgd, pgd_order); 346 + free_pages((unsigned long)hyp_pgd, hyp_pgd_order); 347 347 hyp_pgd = NULL; 348 348 } 349 349 ··· 401 401 return 0; 402 402 } 403 403 404 + static int create_hyp_pud_mappings(pgd_t *pgd, unsigned long start, 405 + unsigned long end, unsigned long pfn, 406 + pgprot_t prot) 407 + { 408 + pud_t *pud; 409 + pmd_t *pmd; 410 + unsigned long addr, next; 411 + int ret; 412 + 413 + addr = start; 414 + do { 415 + pud = pud_offset(pgd, addr); 416 + 417 + if (pud_none_or_clear_bad(pud)) { 418 + pmd = pmd_alloc_one(NULL, addr); 419 + if (!pmd) { 420 + kvm_err("Cannot allocate Hyp pmd\n"); 421 + return -ENOMEM; 422 + } 423 + pud_populate(NULL, pud, pmd); 424 + get_page(virt_to_page(pud)); 425 + kvm_flush_dcache_to_poc(pud, sizeof(*pud)); 426 + } 427 + 428 + next = pud_addr_end(addr, end); 429 + ret = create_hyp_pmd_mappings(pud, addr, next, pfn, prot); 430 + if (ret) 431 + return ret; 432 + pfn += (next - addr) >> PAGE_SHIFT; 433 + } while (addr = next, addr != end); 434 + 435 + return 0; 436 + } 437 + 404 438 static int __create_hyp_mappings(pgd_t *pgdp, 405 439 unsigned long start, unsigned long end, 406 440 unsigned long pfn, pgprot_t prot) 407 441 { 408 442 pgd_t *pgd; 409 443 pud_t *pud; 410 - pmd_t *pmd; 411 444 unsigned long addr, next; 412 445 int err = 0; 413 446 ··· 449 416 end = PAGE_ALIGN(end); 450 417 do { 451 418 pgd = pgdp + pgd_index(addr); 452 - pud = pud_offset(pgd, addr); 453 419 454 - if (pud_none_or_clear_bad(pud)) { 455 - pmd = pmd_alloc_one(NULL, addr); 456 - if (!pmd) { 457 - kvm_err("Cannot allocate Hyp pmd\n"); 420 + if (pgd_none(*pgd)) { 421 + pud = pud_alloc_one(NULL, addr); 422 + if (!pud) { 423 + kvm_err("Cannot allocate Hyp pud\n"); 458 424 err = -ENOMEM; 459 425 goto out; 460 426 } 461 - pud_populate(NULL, pud, pmd); 462 - get_page(virt_to_page(pud)); 463 - kvm_flush_dcache_to_poc(pud, sizeof(*pud)); 427 + pgd_populate(NULL, pgd, pud); 428 + get_page(virt_to_page(pgd)); 429 + kvm_flush_dcache_to_poc(pgd, sizeof(*pgd)); 464 430 } 465 431 466 432 next = pgd_addr_end(addr, end); 467 - err = create_hyp_pmd_mappings(pud, addr, next, pfn, prot); 433 + err = create_hyp_pud_mappings(pgd, addr, next, pfn, prot); 468 434 if (err) 469 435 goto out; 470 436 pfn += (next - addr) >> PAGE_SHIFT; ··· 553 521 */ 554 522 int kvm_alloc_stage2_pgd(struct kvm *kvm) 555 523 { 524 + int ret; 556 525 pgd_t *pgd; 557 526 558 527 if (kvm->arch.pgd != NULL) { ··· 561 528 return -EINVAL; 562 529 } 563 530 564 - pgd = (pgd_t *)__get_free_pages(GFP_KERNEL, S2_PGD_ORDER); 531 + if (KVM_PREALLOC_LEVEL > 0) { 532 + /* 533 + * Allocate fake pgd for the page table manipulation macros to 534 + * work. This is not used by the hardware and we have no 535 + * alignment requirement for this allocation. 536 + */ 537 + pgd = (pgd_t *)kmalloc(PTRS_PER_S2_PGD * sizeof(pgd_t), 538 + GFP_KERNEL | __GFP_ZERO); 539 + } else { 540 + /* 541 + * Allocate actual first-level Stage-2 page table used by the 542 + * hardware for Stage-2 page table walks. 543 + */ 544 + pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, S2_PGD_ORDER); 545 + } 546 + 565 547 if (!pgd) 566 548 return -ENOMEM; 567 549 568 - memset(pgd, 0, PTRS_PER_S2_PGD * sizeof(pgd_t)); 550 + ret = kvm_prealloc_hwpgd(kvm, pgd); 551 + if (ret) 552 + goto out_err; 553 + 569 554 kvm_clean_pgd(pgd); 570 555 kvm->arch.pgd = pgd; 571 - 572 556 return 0; 557 + out_err: 558 + if (KVM_PREALLOC_LEVEL > 0) 559 + kfree(pgd); 560 + else 561 + free_pages((unsigned long)pgd, S2_PGD_ORDER); 562 + return ret; 573 563 } 574 564 575 565 /** ··· 628 572 return; 629 573 630 574 unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE); 631 - free_pages((unsigned long)kvm->arch.pgd, S2_PGD_ORDER); 575 + kvm_free_hwpgd(kvm); 576 + if (KVM_PREALLOC_LEVEL > 0) 577 + kfree(kvm->arch.pgd); 578 + else 579 + free_pages((unsigned long)kvm->arch.pgd, S2_PGD_ORDER); 632 580 kvm->arch.pgd = NULL; 581 + } 582 + 583 + static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, 584 + phys_addr_t addr) 585 + { 586 + pgd_t *pgd; 587 + pud_t *pud; 588 + 589 + pgd = kvm->arch.pgd + pgd_index(addr); 590 + if (WARN_ON(pgd_none(*pgd))) { 591 + if (!cache) 592 + return NULL; 593 + pud = mmu_memory_cache_alloc(cache); 594 + pgd_populate(NULL, pgd, pud); 595 + get_page(virt_to_page(pgd)); 596 + } 597 + 598 + return pud_offset(pgd, addr); 633 599 } 634 600 635 601 static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, 636 602 phys_addr_t addr) 637 603 { 638 - pgd_t *pgd; 639 604 pud_t *pud; 640 605 pmd_t *pmd; 641 606 642 - pgd = kvm->arch.pgd + pgd_index(addr); 643 - pud = pud_offset(pgd, addr); 607 + pud = stage2_get_pud(kvm, cache, addr); 644 608 if (pud_none(*pud)) { 645 609 if (!cache) 646 610 return NULL; ··· 706 630 pmd_t *pmd; 707 631 pte_t *pte, old_pte; 708 632 709 - /* Create stage-2 page table mapping - Level 1 */ 633 + /* Create stage-2 page table mapping - Levels 0 and 1 */ 710 634 pmd = stage2_get_pmd(kvm, cache, addr); 711 635 if (!pmd) { 712 636 /* ··· 751 675 * @size: The size of the mapping 752 676 */ 753 677 int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, 754 - phys_addr_t pa, unsigned long size) 678 + phys_addr_t pa, unsigned long size, bool writable) 755 679 { 756 680 phys_addr_t addr, end; 757 681 int ret = 0; ··· 764 688 for (addr = guest_ipa; addr < end; addr += PAGE_SIZE) { 765 689 pte_t pte = pfn_pte(pfn, PAGE_S2_DEVICE); 766 690 767 - ret = mmu_topup_memory_cache(&cache, 2, 2); 691 + if (writable) 692 + kvm_set_s2pte_writable(&pte); 693 + 694 + ret = mmu_topup_memory_cache(&cache, KVM_MMU_CACHE_MIN_PAGES, 695 + KVM_NR_MEM_OBJS); 768 696 if (ret) 769 697 goto out; 770 698 spin_lock(&kvm->mmu_lock); ··· 857 777 /* Let's check if we will get back a huge page backed by hugetlbfs */ 858 778 down_read(&current->mm->mmap_sem); 859 779 vma = find_vma_intersection(current->mm, hva, hva + 1); 780 + if (unlikely(!vma)) { 781 + kvm_err("Failed to find VMA for hva 0x%lx\n", hva); 782 + up_read(&current->mm->mmap_sem); 783 + return -EFAULT; 784 + } 785 + 860 786 if (is_vm_hugetlb_page(vma)) { 861 787 hugetlb = true; 862 788 gfn = (fault_ipa & PMD_MASK) >> PAGE_SHIFT; ··· 883 797 up_read(&current->mm->mmap_sem); 884 798 885 799 /* We need minimum second+third level pages */ 886 - ret = mmu_topup_memory_cache(memcache, 2, KVM_NR_MEM_OBJS); 800 + ret = mmu_topup_memory_cache(memcache, KVM_MMU_CACHE_MIN_PAGES, 801 + KVM_NR_MEM_OBJS); 887 802 if (ret) 888 803 return ret; 889 804 ··· 930 843 } 931 844 coherent_cache_guest_page(vcpu, hva, PAGE_SIZE); 932 845 ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, 933 - mem_type == PAGE_S2_DEVICE); 846 + pgprot_val(mem_type) == pgprot_val(PAGE_S2_DEVICE)); 934 847 } 935 848 936 849 ··· 1002 915 ret = io_mem_abort(vcpu, run, fault_ipa); 1003 916 goto out_unlock; 1004 917 } 918 + 919 + /* Userspace should not be able to register out-of-bounds IPAs */ 920 + VM_BUG_ON(fault_ipa >= KVM_PHYS_SIZE); 1005 921 1006 922 ret = user_mem_abort(vcpu, fault_ipa, memslot, hva, fault_status); 1007 923 if (ret == 0) ··· 1162 1072 (unsigned long)phys_base); 1163 1073 } 1164 1074 1165 - hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, pgd_order); 1166 - boot_hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, pgd_order); 1075 + hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, hyp_pgd_order); 1076 + boot_hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, hyp_pgd_order); 1167 1077 1168 1078 if (!hyp_pgd || !boot_hyp_pgd) { 1169 1079 kvm_err("Hyp mode PGD not allocated\n"); ··· 1216 1126 const struct kvm_memory_slot *old, 1217 1127 enum kvm_mr_change change) 1218 1128 { 1219 - gpa_t gpa = old->base_gfn << PAGE_SHIFT; 1220 - phys_addr_t size = old->npages << PAGE_SHIFT; 1221 - if (change == KVM_MR_DELETE || change == KVM_MR_MOVE) { 1222 - spin_lock(&kvm->mmu_lock); 1223 - unmap_stage2_range(kvm, gpa, size); 1224 - spin_unlock(&kvm->mmu_lock); 1225 - } 1226 1129 } 1227 1130 1228 1131 int kvm_arch_prepare_memory_region(struct kvm *kvm, ··· 1223 1140 struct kvm_userspace_memory_region *mem, 1224 1141 enum kvm_mr_change change) 1225 1142 { 1226 - return 0; 1143 + hva_t hva = mem->userspace_addr; 1144 + hva_t reg_end = hva + mem->memory_size; 1145 + bool writable = !(mem->flags & KVM_MEM_READONLY); 1146 + int ret = 0; 1147 + 1148 + if (change != KVM_MR_CREATE && change != KVM_MR_MOVE) 1149 + return 0; 1150 + 1151 + /* 1152 + * Prevent userspace from creating a memory region outside of the IPA 1153 + * space addressable by the KVM guest IPA space. 1154 + */ 1155 + if (memslot->base_gfn + memslot->npages >= 1156 + (KVM_PHYS_SIZE >> PAGE_SHIFT)) 1157 + return -EFAULT; 1158 + 1159 + /* 1160 + * A memory region could potentially cover multiple VMAs, and any holes 1161 + * between them, so iterate over all of them to find out if we can map 1162 + * any of them right now. 1163 + * 1164 + * +--------------------------------------------+ 1165 + * +---------------+----------------+ +----------------+ 1166 + * | : VMA 1 | VMA 2 | | VMA 3 : | 1167 + * +---------------+----------------+ +----------------+ 1168 + * | memory region | 1169 + * +--------------------------------------------+ 1170 + */ 1171 + do { 1172 + struct vm_area_struct *vma = find_vma(current->mm, hva); 1173 + hva_t vm_start, vm_end; 1174 + 1175 + if (!vma || vma->vm_start >= reg_end) 1176 + break; 1177 + 1178 + /* 1179 + * Mapping a read-only VMA is only allowed if the 1180 + * memory region is configured as read-only. 1181 + */ 1182 + if (writable && !(vma->vm_flags & VM_WRITE)) { 1183 + ret = -EPERM; 1184 + break; 1185 + } 1186 + 1187 + /* 1188 + * Take the intersection of this VMA with the memory region 1189 + */ 1190 + vm_start = max(hva, vma->vm_start); 1191 + vm_end = min(reg_end, vma->vm_end); 1192 + 1193 + if (vma->vm_flags & VM_PFNMAP) { 1194 + gpa_t gpa = mem->guest_phys_addr + 1195 + (vm_start - mem->userspace_addr); 1196 + phys_addr_t pa = (vma->vm_pgoff << PAGE_SHIFT) + 1197 + vm_start - vma->vm_start; 1198 + 1199 + ret = kvm_phys_addr_ioremap(kvm, gpa, pa, 1200 + vm_end - vm_start, 1201 + writable); 1202 + if (ret) 1203 + break; 1204 + } 1205 + hva = vm_end; 1206 + } while (hva < reg_end); 1207 + 1208 + if (ret) { 1209 + spin_lock(&kvm->mmu_lock); 1210 + unmap_stage2_range(kvm, mem->guest_phys_addr, mem->memory_size); 1211 + spin_unlock(&kvm->mmu_lock); 1212 + } 1213 + return ret; 1227 1214 } 1228 1215 1229 1216 void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, ··· 1318 1165 void kvm_arch_flush_shadow_memslot(struct kvm *kvm, 1319 1166 struct kvm_memory_slot *slot) 1320 1167 { 1168 + gpa_t gpa = slot->base_gfn << PAGE_SHIFT; 1169 + phys_addr_t size = slot->npages << PAGE_SHIFT; 1170 + 1171 + spin_lock(&kvm->mmu_lock); 1172 + unmap_stage2_range(kvm, gpa, size); 1173 + spin_unlock(&kvm->mmu_lock); 1321 1174 }
+117 -10
arch/arm64/include/asm/kvm_mmu.h
··· 41 41 */ 42 42 #define TRAMPOLINE_VA (HYP_PAGE_OFFSET_MASK & PAGE_MASK) 43 43 44 + /* 45 + * KVM_MMU_CACHE_MIN_PAGES is the number of stage2 page table translation 46 + * levels in addition to the PGD and potentially the PUD which are 47 + * pre-allocated (we pre-allocate the fake PGD and the PUD when the Stage-2 48 + * tables use one level of tables less than the kernel. 49 + */ 50 + #ifdef CONFIG_ARM64_64K_PAGES 51 + #define KVM_MMU_CACHE_MIN_PAGES 1 52 + #else 53 + #define KVM_MMU_CACHE_MIN_PAGES 2 54 + #endif 55 + 44 56 #ifdef __ASSEMBLY__ 45 57 46 58 /* ··· 65 53 66 54 #else 67 55 56 + #include <asm/pgalloc.h> 68 57 #include <asm/cachetype.h> 69 58 #include <asm/cacheflush.h> 70 59 ··· 78 65 #define KVM_PHYS_SIZE (1UL << KVM_PHYS_SHIFT) 79 66 #define KVM_PHYS_MASK (KVM_PHYS_SIZE - 1UL) 80 67 81 - /* Make sure we get the right size, and thus the right alignment */ 82 - #define PTRS_PER_S2_PGD (1 << (KVM_PHYS_SHIFT - PGDIR_SHIFT)) 83 - #define S2_PGD_ORDER get_order(PTRS_PER_S2_PGD * sizeof(pgd_t)) 84 - 85 68 int create_hyp_mappings(void *from, void *to); 86 69 int create_hyp_io_mappings(void *from, void *to, phys_addr_t); 87 70 void free_boot_hyp_pgd(void); ··· 86 77 int kvm_alloc_stage2_pgd(struct kvm *kvm); 87 78 void kvm_free_stage2_pgd(struct kvm *kvm); 88 79 int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, 89 - phys_addr_t pa, unsigned long size); 80 + phys_addr_t pa, unsigned long size, bool writable); 90 81 91 82 int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run); 92 83 ··· 102 93 #define kvm_set_pmd(pmdp, pmd) set_pmd(pmdp, pmd) 103 94 104 95 static inline void kvm_clean_pgd(pgd_t *pgd) {} 96 + static inline void kvm_clean_pmd(pmd_t *pmd) {} 105 97 static inline void kvm_clean_pmd_entry(pmd_t *pmd) {} 106 98 static inline void kvm_clean_pte(pte_t *pte) {} 107 99 static inline void kvm_clean_pte_entry(pte_t *pte) {} ··· 121 111 #define kvm_pud_addr_end(addr, end) pud_addr_end(addr, end) 122 112 #define kvm_pmd_addr_end(addr, end) pmd_addr_end(addr, end) 123 113 114 + /* 115 + * In the case where PGDIR_SHIFT is larger than KVM_PHYS_SHIFT, we can address 116 + * the entire IPA input range with a single pgd entry, and we would only need 117 + * one pgd entry. Note that in this case, the pgd is actually not used by 118 + * the MMU for Stage-2 translations, but is merely a fake pgd used as a data 119 + * structure for the kernel pgtable macros to work. 120 + */ 121 + #if PGDIR_SHIFT > KVM_PHYS_SHIFT 122 + #define PTRS_PER_S2_PGD_SHIFT 0 123 + #else 124 + #define PTRS_PER_S2_PGD_SHIFT (KVM_PHYS_SHIFT - PGDIR_SHIFT) 125 + #endif 126 + #define PTRS_PER_S2_PGD (1 << PTRS_PER_S2_PGD_SHIFT) 127 + #define S2_PGD_ORDER get_order(PTRS_PER_S2_PGD * sizeof(pgd_t)) 128 + 129 + /* 130 + * If we are concatenating first level stage-2 page tables, we would have less 131 + * than or equal to 16 pointers in the fake PGD, because that's what the 132 + * architecture allows. In this case, (4 - CONFIG_ARM64_PGTABLE_LEVELS) 133 + * represents the first level for the host, and we add 1 to go to the next 134 + * level (which uses contatenation) for the stage-2 tables. 135 + */ 136 + #if PTRS_PER_S2_PGD <= 16 137 + #define KVM_PREALLOC_LEVEL (4 - CONFIG_ARM64_PGTABLE_LEVELS + 1) 138 + #else 139 + #define KVM_PREALLOC_LEVEL (0) 140 + #endif 141 + 142 + /** 143 + * kvm_prealloc_hwpgd - allocate inital table for VTTBR 144 + * @kvm: The KVM struct pointer for the VM. 145 + * @pgd: The kernel pseudo pgd 146 + * 147 + * When the kernel uses more levels of page tables than the guest, we allocate 148 + * a fake PGD and pre-populate it to point to the next-level page table, which 149 + * will be the real initial page table pointed to by the VTTBR. 150 + * 151 + * When KVM_PREALLOC_LEVEL==2, we allocate a single page for the PMD and 152 + * the kernel will use folded pud. When KVM_PREALLOC_LEVEL==1, we 153 + * allocate 2 consecutive PUD pages. 154 + */ 155 + static inline int kvm_prealloc_hwpgd(struct kvm *kvm, pgd_t *pgd) 156 + { 157 + unsigned int i; 158 + unsigned long hwpgd; 159 + 160 + if (KVM_PREALLOC_LEVEL == 0) 161 + return 0; 162 + 163 + hwpgd = __get_free_pages(GFP_KERNEL | __GFP_ZERO, PTRS_PER_S2_PGD_SHIFT); 164 + if (!hwpgd) 165 + return -ENOMEM; 166 + 167 + for (i = 0; i < PTRS_PER_S2_PGD; i++) { 168 + if (KVM_PREALLOC_LEVEL == 1) 169 + pgd_populate(NULL, pgd + i, 170 + (pud_t *)hwpgd + i * PTRS_PER_PUD); 171 + else if (KVM_PREALLOC_LEVEL == 2) 172 + pud_populate(NULL, pud_offset(pgd, 0) + i, 173 + (pmd_t *)hwpgd + i * PTRS_PER_PMD); 174 + } 175 + 176 + return 0; 177 + } 178 + 179 + static inline void *kvm_get_hwpgd(struct kvm *kvm) 180 + { 181 + pgd_t *pgd = kvm->arch.pgd; 182 + pud_t *pud; 183 + 184 + if (KVM_PREALLOC_LEVEL == 0) 185 + return pgd; 186 + 187 + pud = pud_offset(pgd, 0); 188 + if (KVM_PREALLOC_LEVEL == 1) 189 + return pud; 190 + 191 + BUG_ON(KVM_PREALLOC_LEVEL != 2); 192 + return pmd_offset(pud, 0); 193 + } 194 + 195 + static inline void kvm_free_hwpgd(struct kvm *kvm) 196 + { 197 + if (KVM_PREALLOC_LEVEL > 0) { 198 + unsigned long hwpgd = (unsigned long)kvm_get_hwpgd(kvm); 199 + free_pages(hwpgd, PTRS_PER_S2_PGD_SHIFT); 200 + } 201 + } 202 + 124 203 static inline bool kvm_page_empty(void *ptr) 125 204 { 126 205 struct page *ptr_page = virt_to_page(ptr); 127 206 return page_count(ptr_page) == 1; 128 207 } 129 208 130 - #define kvm_pte_table_empty(ptep) kvm_page_empty(ptep) 131 - #ifndef CONFIG_ARM64_64K_PAGES 132 - #define kvm_pmd_table_empty(pmdp) kvm_page_empty(pmdp) 209 + #define kvm_pte_table_empty(kvm, ptep) kvm_page_empty(ptep) 210 + 211 + #ifdef __PAGETABLE_PMD_FOLDED 212 + #define kvm_pmd_table_empty(kvm, pmdp) (0) 133 213 #else 134 - #define kvm_pmd_table_empty(pmdp) (0) 214 + #define kvm_pmd_table_empty(kvm, pmdp) \ 215 + (kvm_page_empty(pmdp) && (!(kvm) || KVM_PREALLOC_LEVEL < 2)) 135 216 #endif 136 - #define kvm_pud_table_empty(pudp) (0) 217 + 218 + #ifdef __PAGETABLE_PUD_FOLDED 219 + #define kvm_pud_table_empty(kvm, pudp) (0) 220 + #else 221 + #define kvm_pud_table_empty(kvm, pudp) \ 222 + (kvm_page_empty(pudp) && (!(kvm) || KVM_PREALLOC_LEVEL < 1)) 223 + #endif 137 224 138 225 139 226 struct kvm;
+1 -1
arch/arm64/include/asm/pgtable.h
··· 79 79 #define PAGE_HYP_DEVICE __pgprot(PROT_DEVICE_nGnRE | PTE_HYP) 80 80 81 81 #define PAGE_S2 __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_NORMAL) | PTE_S2_RDONLY) 82 - #define PAGE_S2_DEVICE __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_DEVICE_nGnRE) | PTE_S2_RDWR | PTE_UXN) 82 + #define PAGE_S2_DEVICE __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_DEVICE_nGnRE) | PTE_S2_RDONLY | PTE_UXN) 83 83 84 84 #define PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_TYPE_MASK) | PTE_PROT_NONE | PTE_PXN | PTE_UXN) 85 85 #define PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE)
+8 -4
arch/arm64/kvm/vgic-v2-switch.S
··· 67 67 str w4, [x3, #VGIC_V2_CPU_HCR] 68 68 str w5, [x3, #VGIC_V2_CPU_VMCR] 69 69 str w6, [x3, #VGIC_V2_CPU_MISR] 70 - str w7, [x3, #VGIC_V2_CPU_EISR] 71 - str w8, [x3, #(VGIC_V2_CPU_EISR + 4)] 72 - str w9, [x3, #VGIC_V2_CPU_ELRSR] 73 - str w10, [x3, #(VGIC_V2_CPU_ELRSR + 4)] 70 + CPU_LE( str w7, [x3, #VGIC_V2_CPU_EISR] ) 71 + CPU_LE( str w8, [x3, #(VGIC_V2_CPU_EISR + 4)] ) 72 + CPU_LE( str w9, [x3, #VGIC_V2_CPU_ELRSR] ) 73 + CPU_LE( str w10, [x3, #(VGIC_V2_CPU_ELRSR + 4)] ) 74 + CPU_BE( str w7, [x3, #(VGIC_V2_CPU_EISR + 4)] ) 75 + CPU_BE( str w8, [x3, #VGIC_V2_CPU_EISR] ) 76 + CPU_BE( str w9, [x3, #(VGIC_V2_CPU_ELRSR + 4)] ) 77 + CPU_BE( str w10, [x3, #VGIC_V2_CPU_ELRSR] ) 74 78 str w11, [x3, #VGIC_V2_CPU_APR] 75 79 76 80 /* Clear GICH_HCR */
+10 -2
include/kvm/arm_vgic.h
··· 219 219 u32 vgic_hcr; 220 220 u32 vgic_vmcr; 221 221 u32 vgic_misr; /* Saved only */ 222 - u32 vgic_eisr[2]; /* Saved only */ 223 - u32 vgic_elrsr[2]; /* Saved only */ 222 + u64 vgic_eisr; /* Saved only */ 223 + u64 vgic_elrsr; /* Saved only */ 224 224 u32 vgic_apr; 225 225 u32 vgic_lr[VGIC_V2_MAX_LRS]; 226 226 }; ··· 329 329 static inline int kvm_vgic_create(struct kvm *kvm) 330 330 { 331 331 return 0; 332 + } 333 + 334 + static inline void kvm_vgic_destroy(struct kvm *kvm) 335 + { 336 + } 337 + 338 + static inline void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu) 339 + { 332 340 } 333 341 334 342 static inline int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
+3 -21
virt/kvm/arm/vgic-v2.c
··· 71 71 struct vgic_lr lr_desc) 72 72 { 73 73 if (!(lr_desc.state & LR_STATE_MASK)) 74 - __set_bit(lr, (unsigned long *)vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr); 74 + vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr |= (1ULL << lr); 75 75 } 76 76 77 77 static u64 vgic_v2_get_elrsr(const struct kvm_vcpu *vcpu) 78 78 { 79 - u64 val; 80 - 81 - #if BITS_PER_LONG == 64 82 - val = vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr[1]; 83 - val <<= 32; 84 - val |= vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr[0]; 85 - #else 86 - val = *(u64 *)vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr; 87 - #endif 88 - return val; 79 + return vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr; 89 80 } 90 81 91 82 static u64 vgic_v2_get_eisr(const struct kvm_vcpu *vcpu) 92 83 { 93 - u64 val; 94 - 95 - #if BITS_PER_LONG == 64 96 - val = vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr[1]; 97 - val <<= 32; 98 - val |= vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr[0]; 99 - #else 100 - val = *(u64 *)vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr; 101 - #endif 102 - return val; 84 + return vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr; 103 85 } 104 86 105 87 static u32 vgic_v2_get_interrupt_status(const struct kvm_vcpu *vcpu)
+18 -3
virt/kvm/arm/vgic.c
··· 145 145 b->shared = NULL; 146 146 } 147 147 148 + /* 149 + * Call this function to convert a u64 value to an unsigned long * bitmask 150 + * in a way that works on both 32-bit and 64-bit LE and BE platforms. 151 + * 152 + * Warning: Calling this function may modify *val. 153 + */ 154 + static unsigned long *u64_to_bitmask(u64 *val) 155 + { 156 + #if defined(CONFIG_CPU_BIG_ENDIAN) && BITS_PER_LONG == 32 157 + *val = (*val >> 32) | (*val << 32); 158 + #endif 159 + return (unsigned long *)val; 160 + } 161 + 148 162 static u32 *vgic_bitmap_get_reg(struct vgic_bitmap *x, 149 163 int cpuid, u32 offset) 150 164 { ··· 1456 1442 * active bit. 1457 1443 */ 1458 1444 u64 eisr = vgic_get_eisr(vcpu); 1459 - unsigned long *eisr_ptr = (unsigned long *)&eisr; 1445 + unsigned long *eisr_ptr = u64_to_bitmask(&eisr); 1460 1446 int lr; 1461 1447 1462 1448 for_each_set_bit(lr, eisr_ptr, vgic->nr_lr) { ··· 1519 1505 1520 1506 level_pending = vgic_process_maintenance(vcpu); 1521 1507 elrsr = vgic_get_elrsr(vcpu); 1522 - elrsr_ptr = (unsigned long *)&elrsr; 1508 + elrsr_ptr = u64_to_bitmask(&elrsr); 1523 1509 1524 1510 /* Clear mappings for empty LRs */ 1525 1511 for_each_set_bit(lr, elrsr_ptr, vgic->nr_lr) { ··· 1913 1899 } 1914 1900 1915 1901 ret = kvm_phys_addr_ioremap(kvm, kvm->arch.vgic.vgic_cpu_base, 1916 - vgic->vcpu_base, KVM_VGIC_V2_CPU_SIZE); 1902 + vgic->vcpu_base, KVM_VGIC_V2_CPU_SIZE, 1903 + true); 1917 1904 if (ret) { 1918 1905 kvm_err("Unable to remap VGIC CPU to VCPU\n"); 1919 1906 goto out;