Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

arm/arm64: KVM: Fix and refactor unmap_range

unmap_range() was utterly broken, to quote Marc, and broke in all sorts
of situations. It was also quite complicated to follow and didn't
follow the usual scheme of having a separate iterating function for each
level of page tables.

Address this by refactoring the code and introduce a pgd_clear()
function.

Reviewed-by: Jungseok Lee <jays.lee@samsung.com>
Reviewed-by: Mario Smarduch <m.smarduch@samsung.com>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>

+115 -77
+12
arch/arm/include/asm/kvm_mmu.h
··· 127 127 (__boundary - 1 < (end) - 1)? __boundary: (end); \ 128 128 }) 129 129 130 + static inline bool kvm_page_empty(void *ptr) 131 + { 132 + struct page *ptr_page = virt_to_page(ptr); 133 + return page_count(ptr_page) == 1; 134 + } 135 + 136 + 137 + #define kvm_pte_table_empty(ptep) kvm_page_empty(ptep) 138 + #define kvm_pmd_table_empty(pmdp) kvm_page_empty(pmdp) 139 + #define kvm_pud_table_empty(pudp) (0) 140 + 141 + 130 142 struct kvm; 131 143 132 144 #define kvm_flush_dcache_to_poc(a,l) __cpuc_flush_dcache_area((a), (l))
+88 -77
arch/arm/kvm/mmu.c
··· 90 90 return p; 91 91 } 92 92 93 - static bool page_empty(void *ptr) 93 + static void clear_pgd_entry(struct kvm *kvm, pgd_t *pgd, phys_addr_t addr) 94 94 { 95 - struct page *ptr_page = virt_to_page(ptr); 96 - return page_count(ptr_page) == 1; 95 + pud_t *pud_table __maybe_unused = pud_offset(pgd, 0); 96 + pgd_clear(pgd); 97 + kvm_tlb_flush_vmid_ipa(kvm, addr); 98 + pud_free(NULL, pud_table); 99 + put_page(virt_to_page(pgd)); 97 100 } 98 101 99 102 static void clear_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr) 100 103 { 101 - if (pud_huge(*pud)) { 102 - pud_clear(pud); 103 - kvm_tlb_flush_vmid_ipa(kvm, addr); 104 - } else { 105 - pmd_t *pmd_table = pmd_offset(pud, 0); 106 - pud_clear(pud); 107 - kvm_tlb_flush_vmid_ipa(kvm, addr); 108 - pmd_free(NULL, pmd_table); 109 - } 104 + pmd_t *pmd_table = pmd_offset(pud, 0); 105 + VM_BUG_ON(pud_huge(*pud)); 106 + pud_clear(pud); 107 + kvm_tlb_flush_vmid_ipa(kvm, addr); 108 + pmd_free(NULL, pmd_table); 110 109 put_page(virt_to_page(pud)); 111 110 } 112 111 113 112 static void clear_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr) 114 113 { 115 - if (kvm_pmd_huge(*pmd)) { 116 - pmd_clear(pmd); 117 - kvm_tlb_flush_vmid_ipa(kvm, addr); 118 - } else { 119 - pte_t *pte_table = pte_offset_kernel(pmd, 0); 120 - pmd_clear(pmd); 121 - kvm_tlb_flush_vmid_ipa(kvm, addr); 122 - pte_free_kernel(NULL, pte_table); 123 - } 114 + pte_t *pte_table = pte_offset_kernel(pmd, 0); 115 + VM_BUG_ON(kvm_pmd_huge(*pmd)); 116 + pmd_clear(pmd); 117 + kvm_tlb_flush_vmid_ipa(kvm, addr); 118 + pte_free_kernel(NULL, pte_table); 124 119 put_page(virt_to_page(pmd)); 125 120 } 126 121 127 - static void clear_pte_entry(struct kvm *kvm, pte_t *pte, phys_addr_t addr) 122 + static void unmap_ptes(struct kvm *kvm, pmd_t *pmd, 123 + phys_addr_t addr, phys_addr_t end) 128 124 { 129 - if (pte_present(*pte)) { 130 - kvm_set_pte(pte, __pte(0)); 131 - put_page(virt_to_page(pte)); 132 - kvm_tlb_flush_vmid_ipa(kvm, addr); 133 - } 125 + phys_addr_t start_addr = addr; 126 + pte_t *pte, *start_pte; 127 + 128 + start_pte = pte = pte_offset_kernel(pmd, addr); 129 + do { 130 + if (!pte_none(*pte)) { 131 + kvm_set_pte(pte, __pte(0)); 132 + put_page(virt_to_page(pte)); 133 + kvm_tlb_flush_vmid_ipa(kvm, addr); 134 + } 135 + } while (pte++, addr += PAGE_SIZE, addr != end); 136 + 137 + if (kvm_pte_table_empty(start_pte)) 138 + clear_pmd_entry(kvm, pmd, start_addr); 134 139 } 135 140 136 - static void unmap_range(struct kvm *kvm, pgd_t *pgdp, 137 - unsigned long long start, u64 size) 141 + static void unmap_pmds(struct kvm *kvm, pud_t *pud, 142 + phys_addr_t addr, phys_addr_t end) 138 143 { 139 - pgd_t *pgd; 140 - pud_t *pud; 141 - pmd_t *pmd; 142 - pte_t *pte; 143 - unsigned long long addr = start, end = start + size; 144 - u64 next; 144 + phys_addr_t next, start_addr = addr; 145 + pmd_t *pmd, *start_pmd; 145 146 146 - while (addr < end) { 147 - pgd = pgdp + pgd_index(addr); 148 - pud = pud_offset(pgd, addr); 149 - pte = NULL; 150 - if (pud_none(*pud)) { 151 - addr = kvm_pud_addr_end(addr, end); 152 - continue; 153 - } 154 - 155 - if (pud_huge(*pud)) { 156 - /* 157 - * If we are dealing with a huge pud, just clear it and 158 - * move on. 159 - */ 160 - clear_pud_entry(kvm, pud, addr); 161 - addr = kvm_pud_addr_end(addr, end); 162 - continue; 163 - } 164 - 165 - pmd = pmd_offset(pud, addr); 166 - if (pmd_none(*pmd)) { 167 - addr = kvm_pmd_addr_end(addr, end); 168 - continue; 169 - } 170 - 171 - if (!kvm_pmd_huge(*pmd)) { 172 - pte = pte_offset_kernel(pmd, addr); 173 - clear_pte_entry(kvm, pte, addr); 174 - next = addr + PAGE_SIZE; 175 - } 176 - 177 - /* 178 - * If the pmd entry is to be cleared, walk back up the ladder 179 - */ 180 - if (kvm_pmd_huge(*pmd) || (pte && page_empty(pte))) { 181 - clear_pmd_entry(kvm, pmd, addr); 182 - next = kvm_pmd_addr_end(addr, end); 183 - if (page_empty(pmd) && !page_empty(pud)) { 184 - clear_pud_entry(kvm, pud, addr); 185 - next = kvm_pud_addr_end(addr, end); 147 + start_pmd = pmd = pmd_offset(pud, addr); 148 + do { 149 + next = kvm_pmd_addr_end(addr, end); 150 + if (!pmd_none(*pmd)) { 151 + if (kvm_pmd_huge(*pmd)) { 152 + pmd_clear(pmd); 153 + kvm_tlb_flush_vmid_ipa(kvm, addr); 154 + put_page(virt_to_page(pmd)); 155 + } else { 156 + unmap_ptes(kvm, pmd, addr, next); 186 157 } 187 158 } 159 + } while (pmd++, addr = next, addr != end); 188 160 189 - addr = next; 190 - } 161 + if (kvm_pmd_table_empty(start_pmd)) 162 + clear_pud_entry(kvm, pud, start_addr); 163 + } 164 + 165 + static void unmap_puds(struct kvm *kvm, pgd_t *pgd, 166 + phys_addr_t addr, phys_addr_t end) 167 + { 168 + phys_addr_t next, start_addr = addr; 169 + pud_t *pud, *start_pud; 170 + 171 + start_pud = pud = pud_offset(pgd, addr); 172 + do { 173 + next = kvm_pud_addr_end(addr, end); 174 + if (!pud_none(*pud)) { 175 + if (pud_huge(*pud)) { 176 + pud_clear(pud); 177 + kvm_tlb_flush_vmid_ipa(kvm, addr); 178 + put_page(virt_to_page(pud)); 179 + } else { 180 + unmap_pmds(kvm, pud, addr, next); 181 + } 182 + } 183 + } while (pud++, addr = next, addr != end); 184 + 185 + if (kvm_pud_table_empty(start_pud)) 186 + clear_pgd_entry(kvm, pgd, start_addr); 187 + } 188 + 189 + 190 + static void unmap_range(struct kvm *kvm, pgd_t *pgdp, 191 + phys_addr_t start, u64 size) 192 + { 193 + pgd_t *pgd; 194 + phys_addr_t addr = start, end = start + size; 195 + phys_addr_t next; 196 + 197 + pgd = pgdp + pgd_index(addr); 198 + do { 199 + next = kvm_pgd_addr_end(addr, end); 200 + unmap_puds(kvm, pgd, addr, next); 201 + } while (pgd++, addr = next, addr != end); 191 202 } 192 203 193 204 static void stage2_flush_ptes(struct kvm *kvm, pmd_t *pmd,
+15
arch/arm64/include/asm/kvm_mmu.h
··· 125 125 #define kvm_pud_addr_end(addr, end) pud_addr_end(addr, end) 126 126 #define kvm_pmd_addr_end(addr, end) pmd_addr_end(addr, end) 127 127 128 + static inline bool kvm_page_empty(void *ptr) 129 + { 130 + struct page *ptr_page = virt_to_page(ptr); 131 + return page_count(ptr_page) == 1; 132 + } 133 + 134 + #define kvm_pte_table_empty(ptep) kvm_page_empty(ptep) 135 + #ifndef CONFIG_ARM64_64K_PAGES 136 + #define kvm_pmd_table_empty(pmdp) kvm_page_empty(pmdp) 137 + #else 138 + #define kvm_pmd_table_empty(pmdp) (0) 139 + #endif 140 + #define kvm_pud_table_empty(pudp) (0) 141 + 142 + 128 143 struct kvm; 129 144 130 145 #define kvm_flush_dcache_to_poc(a,l) __flush_dcache_area((a), (l))