Merge tag 'kvm-arm-for-4.7' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm into HEAD

+11 -32

arch/arm/include/asm/kvm_mmu.h

··· 47 47 #include <linux/highmem.h> 48 48 #include <asm/cacheflush.h> 49 49 #include <asm/pgalloc.h> 50 + #include <asm/stage2_pgtable.h> 50 51 51 52 int create_hyp_mappings(void *from, void *to); 52 53 int create_hyp_io_mappings(void *from, void *to, phys_addr_t); ··· 106 105 clean_pte_table(pte); 107 106 } 108 107 109 - static inline void kvm_set_s2pte_writable(pte_t *pte) 108 + static inline pte_t kvm_s2pte_mkwrite(pte_t pte) 110 109 { 111 - pte_val(*pte) |= L_PTE_S2_RDWR; 110 + pte_val(pte) |= L_PTE_S2_RDWR; 111 + return pte; 112 112 } 113 113 114 - static inline void kvm_set_s2pmd_writable(pmd_t *pmd) 114 + static inline pmd_t kvm_s2pmd_mkwrite(pmd_t pmd) 115 115 { 116 - pmd_val(*pmd) |= L_PMD_S2_RDWR; 116 + pmd_val(pmd) |= L_PMD_S2_RDWR; 117 + return pmd; 117 118 } 118 119 119 120 static inline void kvm_set_s2pte_readonly(pte_t *pte) ··· 138 135 return (pmd_val(*pmd) & L_PMD_S2_RDWR) == L_PMD_S2_RDONLY; 139 136 } 140 137 141 - 142 - /* Open coded p*d_addr_end that can deal with 64bit addresses */ 143 - #define kvm_pgd_addr_end(addr, end) \ 144 - ({ u64 __boundary = ((addr) + PGDIR_SIZE) & PGDIR_MASK; \ 145 - (__boundary - 1 < (end) - 1)? __boundary: (end); \ 146 - }) 147 - 148 - #define kvm_pud_addr_end(addr,end) (end) 149 - 150 - #define kvm_pmd_addr_end(addr, end) \ 151 - ({ u64 __boundary = ((addr) + PMD_SIZE) & PMD_MASK; \ 152 - (__boundary - 1 < (end) - 1)? __boundary: (end); \ 153 - }) 154 - 155 - #define kvm_pgd_index(addr) pgd_index(addr) 156 - 157 138 static inline bool kvm_page_empty(void *ptr) 158 139 { 159 140 struct page *ptr_page = virt_to_page(ptr); ··· 146 159 147 160 #define kvm_pte_table_empty(kvm, ptep) kvm_page_empty(ptep) 148 161 #define kvm_pmd_table_empty(kvm, pmdp) kvm_page_empty(pmdp) 149 - #define kvm_pud_table_empty(kvm, pudp) (0) 162 + #define kvm_pud_table_empty(kvm, pudp) false 150 163 151 - #define KVM_PREALLOC_LEVEL 0 152 - 153 - static inline void *kvm_get_hwpgd(struct kvm *kvm) 154 - { 155 - return kvm->arch.pgd; 156 - } 157 - 158 - static inline unsigned int kvm_get_hwpgd_size(void) 159 - { 160 - return PTRS_PER_S2_PGD * sizeof(pgd_t); 161 - } 164 + #define hyp_pte_table_empty(ptep) kvm_page_empty(ptep) 165 + #define hyp_pmd_table_empty(pmdp) kvm_page_empty(pmdp) 166 + #define hyp_pud_table_empty(pudp) false 162 167 163 168 struct kvm; 164 169

+61

arch/arm/include/asm/stage2_pgtable.h

··· 1 + /* 2 + * Copyright (C) 2016 - ARM Ltd 3 + * 4 + * stage2 page table helpers 5 + * 6 + * This program is free software; you can redistribute it and/or modify 7 + * it under the terms of the GNU General Public License version 2 as 8 + * published by the Free Software Foundation. 9 + * 10 + * This program is distributed in the hope that it will be useful, 11 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 + * GNU General Public License for more details. 14 + * 15 + * You should have received a copy of the GNU General Public License 16 + * along with this program. If not, see <http://www.gnu.org/licenses/>. 17 + */ 18 + 19 + #ifndef __ARM_S2_PGTABLE_H_ 20 + #define __ARM_S2_PGTABLE_H_ 21 + 22 + #define stage2_pgd_none(pgd) pgd_none(pgd) 23 + #define stage2_pgd_clear(pgd) pgd_clear(pgd) 24 + #define stage2_pgd_present(pgd) pgd_present(pgd) 25 + #define stage2_pgd_populate(pgd, pud) pgd_populate(NULL, pgd, pud) 26 + #define stage2_pud_offset(pgd, address) pud_offset(pgd, address) 27 + #define stage2_pud_free(pud) pud_free(NULL, pud) 28 + 29 + #define stage2_pud_none(pud) pud_none(pud) 30 + #define stage2_pud_clear(pud) pud_clear(pud) 31 + #define stage2_pud_present(pud) pud_present(pud) 32 + #define stage2_pud_populate(pud, pmd) pud_populate(NULL, pud, pmd) 33 + #define stage2_pmd_offset(pud, address) pmd_offset(pud, address) 34 + #define stage2_pmd_free(pmd) pmd_free(NULL, pmd) 35 + 36 + #define stage2_pud_huge(pud) pud_huge(pud) 37 + 38 + /* Open coded p*d_addr_end that can deal with 64bit addresses */ 39 + static inline phys_addr_t stage2_pgd_addr_end(phys_addr_t addr, phys_addr_t end) 40 + { 41 + phys_addr_t boundary = (addr + PGDIR_SIZE) & PGDIR_MASK; 42 + 43 + return (boundary - 1 < end - 1) ? boundary : end; 44 + } 45 + 46 + #define stage2_pud_addr_end(addr, end) (end) 47 + 48 + static inline phys_addr_t stage2_pmd_addr_end(phys_addr_t addr, phys_addr_t end) 49 + { 50 + phys_addr_t boundary = (addr + PMD_SIZE) & PMD_MASK; 51 + 52 + return (boundary - 1 < end - 1) ? boundary : end; 53 + } 54 + 55 + #define stage2_pgd_index(addr) pgd_index(addr) 56 + 57 + #define stage2_pte_table_empty(ptep) kvm_page_empty(ptep) 58 + #define stage2_pmd_table_empty(pmdp) kvm_page_empty(pmdp) 59 + #define stage2_pud_table_empty(pudp) false 60 + 61 + #endif /* __ARM_S2_PGTABLE_H_ */

+1 -1

arch/arm/kvm/arm.c

··· 448 448 kvm_next_vmid &= (1 << kvm_vmid_bits) - 1; 449 449 450 450 /* update vttbr to be used with the new vmid */ 451 - pgd_phys = virt_to_phys(kvm_get_hwpgd(kvm)); 451 + pgd_phys = virt_to_phys(kvm->arch.pgd); 452 452 BUG_ON(pgd_phys & ~VTTBR_BADDR_MASK); 453 453 vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK(kvm_vmid_bits); 454 454 kvm->arch.vttbr = pgd_phys | vmid;

+221 -187

arch/arm/kvm/mmu.c

··· 43 43 static unsigned long hyp_idmap_end; 44 44 static phys_addr_t hyp_idmap_vector; 45 45 46 + #define S2_PGD_SIZE (PTRS_PER_S2_PGD * sizeof(pgd_t)) 46 47 #define hyp_pgd_order get_order(PTRS_PER_PGD * sizeof(pgd_t)) 47 - 48 - #define kvm_pmd_huge(_x) (pmd_huge(_x) || pmd_trans_huge(_x)) 49 - #define kvm_pud_huge(_x) pud_huge(_x) 50 48 51 49 #define KVM_S2PTE_FLAG_IS_IOMAP (1UL << 0) 52 50 #define KVM_S2_FLAG_LOGGING_ACTIVE (1UL << 1) ··· 67 69 68 70 static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) 69 71 { 70 - /* 71 - * This function also gets called when dealing with HYP page 72 - * tables. As HYP doesn't have an associated struct kvm (and 73 - * the HYP page tables are fairly static), we don't do 74 - * anything there. 75 - */ 76 - if (kvm) 77 - kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa); 72 + kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa); 78 73 } 79 74 80 75 /* ··· 106 115 */ 107 116 static void stage2_dissolve_pmd(struct kvm *kvm, phys_addr_t addr, pmd_t *pmd) 108 117 { 109 - if (!kvm_pmd_huge(*pmd)) 118 + if (!pmd_thp_or_huge(*pmd)) 110 119 return; 111 120 112 121 pmd_clear(pmd); ··· 146 155 return p; 147 156 } 148 157 149 - static void clear_pgd_entry(struct kvm *kvm, pgd_t *pgd, phys_addr_t addr) 158 + static void clear_stage2_pgd_entry(struct kvm *kvm, pgd_t *pgd, phys_addr_t addr) 150 159 { 151 - pud_t *pud_table __maybe_unused = pud_offset(pgd, 0); 152 - pgd_clear(pgd); 160 + pud_t *pud_table __maybe_unused = stage2_pud_offset(pgd, 0UL); 161 + stage2_pgd_clear(pgd); 153 162 kvm_tlb_flush_vmid_ipa(kvm, addr); 154 - pud_free(NULL, pud_table); 163 + stage2_pud_free(pud_table); 155 164 put_page(virt_to_page(pgd)); 156 165 } 157 166 158 - static void clear_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr) 167 + static void clear_stage2_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr) 159 168 { 160 - pmd_t *pmd_table = pmd_offset(pud, 0); 161 - VM_BUG_ON(pud_huge(*pud)); 162 - pud_clear(pud); 169 + pmd_t *pmd_table __maybe_unused = stage2_pmd_offset(pud, 0); 170 + VM_BUG_ON(stage2_pud_huge(*pud)); 171 + stage2_pud_clear(pud); 163 172 kvm_tlb_flush_vmid_ipa(kvm, addr); 164 - pmd_free(NULL, pmd_table); 173 + stage2_pmd_free(pmd_table); 165 174 put_page(virt_to_page(pud)); 166 175 } 167 176 168 - static void clear_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr) 177 + static void clear_stage2_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr) 169 178 { 170 179 pte_t *pte_table = pte_offset_kernel(pmd, 0); 171 - VM_BUG_ON(kvm_pmd_huge(*pmd)); 180 + VM_BUG_ON(pmd_thp_or_huge(*pmd)); 172 181 pmd_clear(pmd); 173 182 kvm_tlb_flush_vmid_ipa(kvm, addr); 174 183 pte_free_kernel(NULL, pte_table); ··· 195 204 * the corresponding TLBs, we call kvm_flush_dcache_p*() to make sure 196 205 * the IO subsystem will never hit in the cache. 197 206 */ 198 - static void unmap_ptes(struct kvm *kvm, pmd_t *pmd, 207 + static void unmap_stage2_ptes(struct kvm *kvm, pmd_t *pmd, 199 208 phys_addr_t addr, phys_addr_t end) 200 209 { 201 210 phys_addr_t start_addr = addr; ··· 217 226 } 218 227 } while (pte++, addr += PAGE_SIZE, addr != end); 219 228 220 - if (kvm_pte_table_empty(kvm, start_pte)) 221 - clear_pmd_entry(kvm, pmd, start_addr); 229 + if (stage2_pte_table_empty(start_pte)) 230 + clear_stage2_pmd_entry(kvm, pmd, start_addr); 222 231 } 223 232 224 - static void unmap_pmds(struct kvm *kvm, pud_t *pud, 233 + static void unmap_stage2_pmds(struct kvm *kvm, pud_t *pud, 225 234 phys_addr_t addr, phys_addr_t end) 226 235 { 227 236 phys_addr_t next, start_addr = addr; 228 237 pmd_t *pmd, *start_pmd; 229 238 230 - start_pmd = pmd = pmd_offset(pud, addr); 239 + start_pmd = pmd = stage2_pmd_offset(pud, addr); 231 240 do { 232 - next = kvm_pmd_addr_end(addr, end); 241 + next = stage2_pmd_addr_end(addr, end); 233 242 if (!pmd_none(*pmd)) { 234 - if (kvm_pmd_huge(*pmd)) { 243 + if (pmd_thp_or_huge(*pmd)) { 235 244 pmd_t old_pmd = *pmd; 236 245 237 246 pmd_clear(pmd); ··· 241 250 242 251 put_page(virt_to_page(pmd)); 243 252 } else { 244 - unmap_ptes(kvm, pmd, addr, next); 253 + unmap_stage2_ptes(kvm, pmd, addr, next); 245 254 } 246 255 } 247 256 } while (pmd++, addr = next, addr != end); 248 257 249 - if (kvm_pmd_table_empty(kvm, start_pmd)) 250 - clear_pud_entry(kvm, pud, start_addr); 258 + if (stage2_pmd_table_empty(start_pmd)) 259 + clear_stage2_pud_entry(kvm, pud, start_addr); 251 260 } 252 261 253 - static void unmap_puds(struct kvm *kvm, pgd_t *pgd, 262 + static void unmap_stage2_puds(struct kvm *kvm, pgd_t *pgd, 254 263 phys_addr_t addr, phys_addr_t end) 255 264 { 256 265 phys_addr_t next, start_addr = addr; 257 266 pud_t *pud, *start_pud; 258 267 259 - start_pud = pud = pud_offset(pgd, addr); 268 + start_pud = pud = stage2_pud_offset(pgd, addr); 260 269 do { 261 - next = kvm_pud_addr_end(addr, end); 262 - if (!pud_none(*pud)) { 263 - if (pud_huge(*pud)) { 270 + next = stage2_pud_addr_end(addr, end); 271 + if (!stage2_pud_none(*pud)) { 272 + if (stage2_pud_huge(*pud)) { 264 273 pud_t old_pud = *pud; 265 274 266 - pud_clear(pud); 275 + stage2_pud_clear(pud); 267 276 kvm_tlb_flush_vmid_ipa(kvm, addr); 268 - 269 277 kvm_flush_dcache_pud(old_pud); 270 - 271 278 put_page(virt_to_page(pud)); 272 279 } else { 273 - unmap_pmds(kvm, pud, addr, next); 280 + unmap_stage2_pmds(kvm, pud, addr, next); 274 281 } 275 282 } 276 283 } while (pud++, addr = next, addr != end); 277 284 278 - if (kvm_pud_table_empty(kvm, start_pud)) 279 - clear_pgd_entry(kvm, pgd, start_addr); 285 + if (stage2_pud_table_empty(start_pud)) 286 + clear_stage2_pgd_entry(kvm, pgd, start_addr); 280 287 } 281 288 282 - 283 - static void unmap_range(struct kvm *kvm, pgd_t *pgdp, 284 - phys_addr_t start, u64 size) 289 + /** 290 + * unmap_stage2_range -- Clear stage2 page table entries to unmap a range 291 + * @kvm: The VM pointer 292 + * @start: The intermediate physical base address of the range to unmap 293 + * @size: The size of the area to unmap 294 + * 295 + * Clear a range of stage-2 mappings, lowering the various ref-counts. Must 296 + * be called while holding mmu_lock (unless for freeing the stage2 pgd before 297 + * destroying the VM), otherwise another faulting VCPU may come in and mess 298 + * with things behind our backs. 299 + */ 300 + static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size) 285 301 { 286 302 pgd_t *pgd; 287 303 phys_addr_t addr = start, end = start + size; 288 304 phys_addr_t next; 289 305 290 - pgd = pgdp + kvm_pgd_index(addr); 306 + pgd = kvm->arch.pgd + stage2_pgd_index(addr); 291 307 do { 292 - next = kvm_pgd_addr_end(addr, end); 293 - if (!pgd_none(*pgd)) 294 - unmap_puds(kvm, pgd, addr, next); 308 + next = stage2_pgd_addr_end(addr, end); 309 + if (!stage2_pgd_none(*pgd)) 310 + unmap_stage2_puds(kvm, pgd, addr, next); 295 311 } while (pgd++, addr = next, addr != end); 296 312 } 297 313 ··· 320 322 pmd_t *pmd; 321 323 phys_addr_t next; 322 324 323 - pmd = pmd_offset(pud, addr); 325 + pmd = stage2_pmd_offset(pud, addr); 324 326 do { 325 - next = kvm_pmd_addr_end(addr, end); 327 + next = stage2_pmd_addr_end(addr, end); 326 328 if (!pmd_none(*pmd)) { 327 - if (kvm_pmd_huge(*pmd)) 329 + if (pmd_thp_or_huge(*pmd)) 328 330 kvm_flush_dcache_pmd(*pmd); 329 331 else 330 332 stage2_flush_ptes(kvm, pmd, addr, next); ··· 338 340 pud_t *pud; 339 341 phys_addr_t next; 340 342 341 - pud = pud_offset(pgd, addr); 343 + pud = stage2_pud_offset(pgd, addr); 342 344 do { 343 - next = kvm_pud_addr_end(addr, end); 344 - if (!pud_none(*pud)) { 345 - if (pud_huge(*pud)) 345 + next = stage2_pud_addr_end(addr, end); 346 + if (!stage2_pud_none(*pud)) { 347 + if (stage2_pud_huge(*pud)) 346 348 kvm_flush_dcache_pud(*pud); 347 349 else 348 350 stage2_flush_pmds(kvm, pud, addr, next); ··· 358 360 phys_addr_t next; 359 361 pgd_t *pgd; 360 362 361 - pgd = kvm->arch.pgd + kvm_pgd_index(addr); 363 + pgd = kvm->arch.pgd + stage2_pgd_index(addr); 362 364 do { 363 - next = kvm_pgd_addr_end(addr, end); 365 + next = stage2_pgd_addr_end(addr, end); 364 366 stage2_flush_puds(kvm, pgd, addr, next); 365 367 } while (pgd++, addr = next, addr != end); 366 368 } ··· 389 391 srcu_read_unlock(&kvm->srcu, idx); 390 392 } 391 393 394 + static void clear_hyp_pgd_entry(pgd_t *pgd) 395 + { 396 + pud_t *pud_table __maybe_unused = pud_offset(pgd, 0UL); 397 + pgd_clear(pgd); 398 + pud_free(NULL, pud_table); 399 + put_page(virt_to_page(pgd)); 400 + } 401 + 402 + static void clear_hyp_pud_entry(pud_t *pud) 403 + { 404 + pmd_t *pmd_table __maybe_unused = pmd_offset(pud, 0); 405 + VM_BUG_ON(pud_huge(*pud)); 406 + pud_clear(pud); 407 + pmd_free(NULL, pmd_table); 408 + put_page(virt_to_page(pud)); 409 + } 410 + 411 + static void clear_hyp_pmd_entry(pmd_t *pmd) 412 + { 413 + pte_t *pte_table = pte_offset_kernel(pmd, 0); 414 + VM_BUG_ON(pmd_thp_or_huge(*pmd)); 415 + pmd_clear(pmd); 416 + pte_free_kernel(NULL, pte_table); 417 + put_page(virt_to_page(pmd)); 418 + } 419 + 420 + static void unmap_hyp_ptes(pmd_t *pmd, phys_addr_t addr, phys_addr_t end) 421 + { 422 + pte_t *pte, *start_pte; 423 + 424 + start_pte = pte = pte_offset_kernel(pmd, addr); 425 + do { 426 + if (!pte_none(*pte)) { 427 + kvm_set_pte(pte, __pte(0)); 428 + put_page(virt_to_page(pte)); 429 + } 430 + } while (pte++, addr += PAGE_SIZE, addr != end); 431 + 432 + if (hyp_pte_table_empty(start_pte)) 433 + clear_hyp_pmd_entry(pmd); 434 + } 435 + 436 + static void unmap_hyp_pmds(pud_t *pud, phys_addr_t addr, phys_addr_t end) 437 + { 438 + phys_addr_t next; 439 + pmd_t *pmd, *start_pmd; 440 + 441 + start_pmd = pmd = pmd_offset(pud, addr); 442 + do { 443 + next = pmd_addr_end(addr, end); 444 + /* Hyp doesn't use huge pmds */ 445 + if (!pmd_none(*pmd)) 446 + unmap_hyp_ptes(pmd, addr, next); 447 + } while (pmd++, addr = next, addr != end); 448 + 449 + if (hyp_pmd_table_empty(start_pmd)) 450 + clear_hyp_pud_entry(pud); 451 + } 452 + 453 + static void unmap_hyp_puds(pgd_t *pgd, phys_addr_t addr, phys_addr_t end) 454 + { 455 + phys_addr_t next; 456 + pud_t *pud, *start_pud; 457 + 458 + start_pud = pud = pud_offset(pgd, addr); 459 + do { 460 + next = pud_addr_end(addr, end); 461 + /* Hyp doesn't use huge puds */ 462 + if (!pud_none(*pud)) 463 + unmap_hyp_pmds(pud, addr, next); 464 + } while (pud++, addr = next, addr != end); 465 + 466 + if (hyp_pud_table_empty(start_pud)) 467 + clear_hyp_pgd_entry(pgd); 468 + } 469 + 470 + static void unmap_hyp_range(pgd_t *pgdp, phys_addr_t start, u64 size) 471 + { 472 + pgd_t *pgd; 473 + phys_addr_t addr = start, end = start + size; 474 + phys_addr_t next; 475 + 476 + /* 477 + * We don't unmap anything from HYP, except at the hyp tear down. 478 + * Hence, we don't have to invalidate the TLBs here. 479 + */ 480 + pgd = pgdp + pgd_index(addr); 481 + do { 482 + next = pgd_addr_end(addr, end); 483 + if (!pgd_none(*pgd)) 484 + unmap_hyp_puds(pgd, addr, next); 485 + } while (pgd++, addr = next, addr != end); 486 + } 487 + 392 488 /** 393 489 * free_boot_hyp_pgd - free HYP boot page tables 394 490 * ··· 493 401 mutex_lock(&kvm_hyp_pgd_mutex); 494 402 495 403 if (boot_hyp_pgd) { 496 - unmap_range(NULL, boot_hyp_pgd, hyp_idmap_start, PAGE_SIZE); 497 - unmap_range(NULL, boot_hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE); 404 + unmap_hyp_range(boot_hyp_pgd, hyp_idmap_start, PAGE_SIZE); 405 + unmap_hyp_range(boot_hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE); 498 406 free_pages((unsigned long)boot_hyp_pgd, hyp_pgd_order); 499 407 boot_hyp_pgd = NULL; 500 408 } 501 409 502 410 if (hyp_pgd) 503 - unmap_range(NULL, hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE); 411 + unmap_hyp_range(hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE); 504 412 505 413 mutex_unlock(&kvm_hyp_pgd_mutex); 506 414 } ··· 525 433 526 434 if (hyp_pgd) { 527 435 for (addr = PAGE_OFFSET; virt_addr_valid(addr); addr += PGDIR_SIZE) 528 - unmap_range(NULL, hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE); 436 + unmap_hyp_range(hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE); 529 437 for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE) 530 - unmap_range(NULL, hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE); 438 + unmap_hyp_range(hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE); 531 439 532 440 free_pages((unsigned long)hyp_pgd, hyp_pgd_order); 533 441 hyp_pgd = NULL; ··· 737 645 __phys_to_pfn(phys_addr), PAGE_HYP_DEVICE); 738 646 } 739 647 740 - /* Free the HW pgd, one page at a time */ 741 - static void kvm_free_hwpgd(void *hwpgd) 742 - { 743 - free_pages_exact(hwpgd, kvm_get_hwpgd_size()); 744 - } 745 - 746 - /* Allocate the HW PGD, making sure that each page gets its own refcount */ 747 - static void *kvm_alloc_hwpgd(void) 748 - { 749 - unsigned int size = kvm_get_hwpgd_size(); 750 - 751 - return alloc_pages_exact(size, GFP_KERNEL | __GFP_ZERO); 752 - } 753 - 754 648 /** 755 649 * kvm_alloc_stage2_pgd - allocate level-1 table for stage-2 translation. 756 650 * @kvm: The KVM struct pointer for the VM. ··· 751 673 int kvm_alloc_stage2_pgd(struct kvm *kvm) 752 674 { 753 675 pgd_t *pgd; 754 - void *hwpgd; 755 676 756 677 if (kvm->arch.pgd != NULL) { 757 678 kvm_err("kvm_arch already initialized?\n"); 758 679 return -EINVAL; 759 680 } 760 681 761 - hwpgd = kvm_alloc_hwpgd(); 762 - if (!hwpgd) 682 + /* Allocate the HW PGD, making sure that each page gets its own refcount */ 683 + pgd = alloc_pages_exact(S2_PGD_SIZE, GFP_KERNEL | __GFP_ZERO); 684 + if (!pgd) 763 685 return -ENOMEM; 764 - 765 - /* When the kernel uses more levels of page tables than the 766 - * guest, we allocate a fake PGD and pre-populate it to point 767 - * to the next-level page table, which will be the real 768 - * initial page table pointed to by the VTTBR. 769 - * 770 - * When KVM_PREALLOC_LEVEL==2, we allocate a single page for 771 - * the PMD and the kernel will use folded pud. 772 - * When KVM_PREALLOC_LEVEL==1, we allocate 2 consecutive PUD 773 - * pages. 774 - */ 775 - if (KVM_PREALLOC_LEVEL > 0) { 776 - int i; 777 - 778 - /* 779 - * Allocate fake pgd for the page table manipulation macros to 780 - * work. This is not used by the hardware and we have no 781 - * alignment requirement for this allocation. 782 - */ 783 - pgd = kmalloc(PTRS_PER_S2_PGD * sizeof(pgd_t), 784 - GFP_KERNEL | __GFP_ZERO); 785 - 786 - if (!pgd) { 787 - kvm_free_hwpgd(hwpgd); 788 - return -ENOMEM; 789 - } 790 - 791 - /* Plug the HW PGD into the fake one. */ 792 - for (i = 0; i < PTRS_PER_S2_PGD; i++) { 793 - if (KVM_PREALLOC_LEVEL == 1) 794 - pgd_populate(NULL, pgd + i, 795 - (pud_t *)hwpgd + i * PTRS_PER_PUD); 796 - else if (KVM_PREALLOC_LEVEL == 2) 797 - pud_populate(NULL, pud_offset(pgd, 0) + i, 798 - (pmd_t *)hwpgd + i * PTRS_PER_PMD); 799 - } 800 - } else { 801 - /* 802 - * Allocate actual first-level Stage-2 page table used by the 803 - * hardware for Stage-2 page table walks. 804 - */ 805 - pgd = (pgd_t *)hwpgd; 806 - } 807 686 808 687 kvm_clean_pgd(pgd); 809 688 kvm->arch.pgd = pgd; 810 689 return 0; 811 - } 812 - 813 - /** 814 - * unmap_stage2_range -- Clear stage2 page table entries to unmap a range 815 - * @kvm: The VM pointer 816 - * @start: The intermediate physical base address of the range to unmap 817 - * @size: The size of the area to unmap 818 - * 819 - * Clear a range of stage-2 mappings, lowering the various ref-counts. Must 820 - * be called while holding mmu_lock (unless for freeing the stage2 pgd before 821 - * destroying the VM), otherwise another faulting VCPU may come in and mess 822 - * with things behind our backs. 823 - */ 824 - static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size) 825 - { 826 - unmap_range(kvm, kvm->arch.pgd, start, size); 827 690 } 828 691 829 692 static void stage2_unmap_memslot(struct kvm *kvm, ··· 849 830 return; 850 831 851 832 unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE); 852 - kvm_free_hwpgd(kvm_get_hwpgd(kvm)); 853 - if (KVM_PREALLOC_LEVEL > 0) 854 - kfree(kvm->arch.pgd); 855 - 833 + /* Free the HW pgd, one page at a time */ 834 + free_pages_exact(kvm->arch.pgd, S2_PGD_SIZE); 856 835 kvm->arch.pgd = NULL; 857 836 } 858 837 ··· 860 843 pgd_t *pgd; 861 844 pud_t *pud; 862 845 863 - pgd = kvm->arch.pgd + kvm_pgd_index(addr); 864 - if (WARN_ON(pgd_none(*pgd))) { 846 + pgd = kvm->arch.pgd + stage2_pgd_index(addr); 847 + if (WARN_ON(stage2_pgd_none(*pgd))) { 865 848 if (!cache) 866 849 return NULL; 867 850 pud = mmu_memory_cache_alloc(cache); 868 - pgd_populate(NULL, pgd, pud); 851 + stage2_pgd_populate(pgd, pud); 869 852 get_page(virt_to_page(pgd)); 870 853 } 871 854 872 - return pud_offset(pgd, addr); 855 + return stage2_pud_offset(pgd, addr); 873 856 } 874 857 875 858 static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, ··· 879 862 pmd_t *pmd; 880 863 881 864 pud = stage2_get_pud(kvm, cache, addr); 882 - if (pud_none(*pud)) { 865 + if (stage2_pud_none(*pud)) { 883 866 if (!cache) 884 867 return NULL; 885 868 pmd = mmu_memory_cache_alloc(cache); 886 - pud_populate(NULL, pud, pmd); 869 + stage2_pud_populate(pud, pmd); 887 870 get_page(virt_to_page(pud)); 888 871 } 889 872 890 - return pmd_offset(pud, addr); 873 + return stage2_pmd_offset(pud, addr); 891 874 } 892 875 893 876 static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache ··· 910 893 VM_BUG_ON(pmd_present(*pmd) && pmd_pfn(*pmd) != pmd_pfn(*new_pmd)); 911 894 912 895 old_pmd = *pmd; 913 - kvm_set_pmd(pmd, *new_pmd); 914 - if (pmd_present(old_pmd)) 896 + if (pmd_present(old_pmd)) { 897 + pmd_clear(pmd); 915 898 kvm_tlb_flush_vmid_ipa(kvm, addr); 916 - else 899 + } else { 917 900 get_page(virt_to_page(pmd)); 901 + } 902 + 903 + kvm_set_pmd(pmd, *new_pmd); 918 904 return 0; 919 905 } 920 906 ··· 966 946 967 947 /* Create 2nd stage page table mapping - Level 3 */ 968 948 old_pte = *pte; 969 - kvm_set_pte(pte, *new_pte); 970 - if (pte_present(old_pte)) 949 + if (pte_present(old_pte)) { 950 + kvm_set_pte(pte, __pte(0)); 971 951 kvm_tlb_flush_vmid_ipa(kvm, addr); 972 - else 952 + } else { 973 953 get_page(virt_to_page(pte)); 954 + } 974 955 956 + kvm_set_pte(pte, *new_pte); 975 957 return 0; 958 + } 959 + 960 + #ifndef __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG 961 + static int stage2_ptep_test_and_clear_young(pte_t *pte) 962 + { 963 + if (pte_young(*pte)) { 964 + *pte = pte_mkold(*pte); 965 + return 1; 966 + } 967 + return 0; 968 + } 969 + #else 970 + static int stage2_ptep_test_and_clear_young(pte_t *pte) 971 + { 972 + return __ptep_test_and_clear_young(pte); 973 + } 974 + #endif 975 + 976 + static int stage2_pmdp_test_and_clear_young(pmd_t *pmd) 977 + { 978 + return stage2_ptep_test_and_clear_young((pte_t *)pmd); 976 979 } 977 980 978 981 /** ··· 1021 978 pte_t pte = pfn_pte(pfn, PAGE_S2_DEVICE); 1022 979 1023 980 if (writable) 1024 - kvm_set_s2pte_writable(&pte); 981 + pte = kvm_s2pte_mkwrite(pte); 1025 982 1026 983 ret = mmu_topup_memory_cache(&cache, KVM_MMU_CACHE_MIN_PAGES, 1027 984 KVM_NR_MEM_OBJS); ··· 1121 1078 pmd_t *pmd; 1122 1079 phys_addr_t next; 1123 1080 1124 - pmd = pmd_offset(pud, addr); 1081 + pmd = stage2_pmd_offset(pud, addr); 1125 1082 1126 1083 do { 1127 - next = kvm_pmd_addr_end(addr, end); 1084 + next = stage2_pmd_addr_end(addr, end); 1128 1085 if (!pmd_none(*pmd)) { 1129 - if (kvm_pmd_huge(*pmd)) { 1086 + if (pmd_thp_or_huge(*pmd)) { 1130 1087 if (!kvm_s2pmd_readonly(pmd)) 1131 1088 kvm_set_s2pmd_readonly(pmd); 1132 1089 } else { ··· 1149 1106 pud_t *pud; 1150 1107 phys_addr_t next; 1151 1108 1152 - pud = pud_offset(pgd, addr); 1109 + pud = stage2_pud_offset(pgd, addr); 1153 1110 do { 1154 - next = kvm_pud_addr_end(addr, end); 1155 - if (!pud_none(*pud)) { 1111 + next = stage2_pud_addr_end(addr, end); 1112 + if (!stage2_pud_none(*pud)) { 1156 1113 /* TODO:PUD not supported, revisit later if supported */ 1157 - BUG_ON(kvm_pud_huge(*pud)); 1114 + BUG_ON(stage2_pud_huge(*pud)); 1158 1115 stage2_wp_pmds(pud, addr, next); 1159 1116 } 1160 1117 } while (pud++, addr = next, addr != end); ··· 1171 1128 pgd_t *pgd; 1172 1129 phys_addr_t next; 1173 1130 1174 - pgd = kvm->arch.pgd + kvm_pgd_index(addr); 1131 + pgd = kvm->arch.pgd + stage2_pgd_index(addr); 1175 1132 do { 1176 1133 /* 1177 1134 * Release kvm_mmu_lock periodically if the memory region is ··· 1183 1140 if (need_resched() || spin_needbreak(&kvm->mmu_lock)) 1184 1141 cond_resched_lock(&kvm->mmu_lock); 1185 1142 1186 - next = kvm_pgd_addr_end(addr, end); 1187 - if (pgd_present(*pgd)) 1143 + next = stage2_pgd_addr_end(addr, end); 1144 + if (stage2_pgd_present(*pgd)) 1188 1145 stage2_wp_puds(pgd, addr, next); 1189 1146 } while (pgd++, addr = next, addr != end); 1190 1147 } ··· 1363 1320 pmd_t new_pmd = pfn_pmd(pfn, mem_type); 1364 1321 new_pmd = pmd_mkhuge(new_pmd); 1365 1322 if (writable) { 1366 - kvm_set_s2pmd_writable(&new_pmd); 1323 + new_pmd = kvm_s2pmd_mkwrite(new_pmd); 1367 1324 kvm_set_pfn_dirty(pfn); 1368 1325 } 1369 1326 coherent_cache_guest_page(vcpu, pfn, PMD_SIZE, fault_ipa_uncached); ··· 1372 1329 pte_t new_pte = pfn_pte(pfn, mem_type); 1373 1330 1374 1331 if (writable) { 1375 - kvm_set_s2pte_writable(&new_pte); 1332 + new_pte = kvm_s2pte_mkwrite(new_pte); 1376 1333 kvm_set_pfn_dirty(pfn); 1377 1334 mark_page_dirty(kvm, gfn); 1378 1335 } ··· 1391 1348 * Resolve the access fault by making the page young again. 1392 1349 * Note that because the faulting entry is guaranteed not to be 1393 1350 * cached in the TLB, we don't need to invalidate anything. 1351 + * Only the HW Access Flag updates are supported for Stage 2 (no DBM), 1352 + * so there is no need for atomic (pte|pmd)_mkyoung operations. 1394 1353 */ 1395 1354 static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa) 1396 1355 { ··· 1409 1364 if (!pmd || pmd_none(*pmd)) /* Nothing there */ 1410 1365 goto out; 1411 1366 1412 - if (kvm_pmd_huge(*pmd)) { /* THP, HugeTLB */ 1367 + if (pmd_thp_or_huge(*pmd)) { /* THP, HugeTLB */ 1413 1368 *pmd = pmd_mkyoung(*pmd); 1414 1369 pfn = pmd_pfn(*pmd); 1415 1370 pfn_valid = true; ··· 1633 1588 if (!pmd || pmd_none(*pmd)) /* Nothing there */ 1634 1589 return 0; 1635 1590 1636 - if (kvm_pmd_huge(*pmd)) { /* THP, HugeTLB */ 1637 - if (pmd_young(*pmd)) { 1638 - *pmd = pmd_mkold(*pmd); 1639 - return 1; 1640 - } 1641 - 1642 - return 0; 1643 - } 1591 + if (pmd_thp_or_huge(*pmd)) /* THP, HugeTLB */ 1592 + return stage2_pmdp_test_and_clear_young(pmd); 1644 1593 1645 1594 pte = pte_offset_kernel(pmd, gpa); 1646 1595 if (pte_none(*pte)) 1647 1596 return 0; 1648 1597 1649 - if (pte_young(*pte)) { 1650 - *pte = pte_mkold(*pte); /* Just a page... */ 1651 - return 1; 1652 - } 1653 - 1654 - return 0; 1598 + return stage2_ptep_test_and_clear_young(pte); 1655 1599 } 1656 1600 1657 1601 static int kvm_test_age_hva_handler(struct kvm *kvm, gpa_t gpa, void *data) ··· 1652 1618 if (!pmd || pmd_none(*pmd)) /* Nothing there */ 1653 1619 return 0; 1654 1620 1655 - if (kvm_pmd_huge(*pmd)) /* THP, HugeTLB */ 1621 + if (pmd_thp_or_huge(*pmd)) /* THP, HugeTLB */ 1656 1622 return pmd_young(*pmd); 1657 1623 1658 1624 pte = pte_offset_kernel(pmd, gpa);

+50 -35

arch/arm64/include/asm/kvm_arm.h

··· 96 96 SCTLR_EL2_SA | SCTLR_EL2_I) 97 97 98 98 /* TCR_EL2 Registers bits */ 99 - #define TCR_EL2_RES1 ((1 << 31) | (1 << 23)) 100 - #define TCR_EL2_TBI (1 << 20) 101 - #define TCR_EL2_PS (7 << 16) 102 - #define TCR_EL2_PS_40B (2 << 16) 103 - #define TCR_EL2_TG0 (1 << 14) 104 - #define TCR_EL2_SH0 (3 << 12) 105 - #define TCR_EL2_ORGN0 (3 << 10) 106 - #define TCR_EL2_IRGN0 (3 << 8) 107 - #define TCR_EL2_T0SZ 0x3f 108 - #define TCR_EL2_MASK (TCR_EL2_TG0 | TCR_EL2_SH0 | \ 109 - TCR_EL2_ORGN0 | TCR_EL2_IRGN0 | TCR_EL2_T0SZ) 99 + #define TCR_EL2_RES1 ((1 << 31) | (1 << 23)) 100 + #define TCR_EL2_TBI (1 << 20) 101 + #define TCR_EL2_PS_SHIFT 16 102 + #define TCR_EL2_PS_MASK (7 << TCR_EL2_PS_SHIFT) 103 + #define TCR_EL2_PS_40B (2 << TCR_EL2_PS_SHIFT) 104 + #define TCR_EL2_TG0_MASK TCR_TG0_MASK 105 + #define TCR_EL2_SH0_MASK TCR_SH0_MASK 106 + #define TCR_EL2_ORGN0_MASK TCR_ORGN0_MASK 107 + #define TCR_EL2_IRGN0_MASK TCR_IRGN0_MASK 108 + #define TCR_EL2_T0SZ_MASK 0x3f 109 + #define TCR_EL2_MASK (TCR_EL2_TG0_MASK | TCR_EL2_SH0_MASK | \ 110 + TCR_EL2_ORGN0_MASK | TCR_EL2_IRGN0_MASK | TCR_EL2_T0SZ_MASK) 110 111 111 112 /* VTCR_EL2 Registers bits */ 112 113 #define VTCR_EL2_RES1 (1 << 31) 113 - #define VTCR_EL2_PS_MASK (7 << 16) 114 - #define VTCR_EL2_TG0_MASK (1 << 14) 115 - #define VTCR_EL2_TG0_4K (0 << 14) 116 - #define VTCR_EL2_TG0_64K (1 << 14) 117 - #define VTCR_EL2_SH0_MASK (3 << 12) 118 - #define VTCR_EL2_SH0_INNER (3 << 12) 119 - #define VTCR_EL2_ORGN0_MASK (3 << 10) 120 - #define VTCR_EL2_ORGN0_WBWA (1 << 10) 121 - #define VTCR_EL2_IRGN0_MASK (3 << 8) 122 - #define VTCR_EL2_IRGN0_WBWA (1 << 8) 123 - #define VTCR_EL2_SL0_MASK (3 << 6) 124 - #define VTCR_EL2_SL0_LVL1 (1 << 6) 114 + #define VTCR_EL2_HD (1 << 22) 115 + #define VTCR_EL2_HA (1 << 21) 116 + #define VTCR_EL2_PS_MASK TCR_EL2_PS_MASK 117 + #define VTCR_EL2_TG0_MASK TCR_TG0_MASK 118 + #define VTCR_EL2_TG0_4K TCR_TG0_4K 119 + #define VTCR_EL2_TG0_16K TCR_TG0_16K 120 + #define VTCR_EL2_TG0_64K TCR_TG0_64K 121 + #define VTCR_EL2_SH0_MASK TCR_SH0_MASK 122 + #define VTCR_EL2_SH0_INNER TCR_SH0_INNER 123 + #define VTCR_EL2_ORGN0_MASK TCR_ORGN0_MASK 124 + #define VTCR_EL2_ORGN0_WBWA TCR_ORGN0_WBWA 125 + #define VTCR_EL2_IRGN0_MASK TCR_IRGN0_MASK 126 + #define VTCR_EL2_IRGN0_WBWA TCR_IRGN0_WBWA 127 + #define VTCR_EL2_SL0_SHIFT 6 128 + #define VTCR_EL2_SL0_MASK (3 << VTCR_EL2_SL0_SHIFT) 129 + #define VTCR_EL2_SL0_LVL1 (1 << VTCR_EL2_SL0_SHIFT) 125 130 #define VTCR_EL2_T0SZ_MASK 0x3f 126 131 #define VTCR_EL2_T0SZ_40B 24 127 132 #define VTCR_EL2_VS_SHIFT 19 ··· 142 137 * (see hyp-init.S). 143 138 * 144 139 * Note that when using 4K pages, we concatenate two first level page tables 145 - * together. 140 + * together. With 16K pages, we concatenate 16 first level page tables. 146 141 * 147 142 * The magic numbers used for VTTBR_X in this patch can be found in Tables 148 143 * D4-23 and D4-25 in ARM DDI 0487A.b. 149 144 */ 145 + 146 + #define VTCR_EL2_T0SZ_IPA VTCR_EL2_T0SZ_40B 147 + #define VTCR_EL2_COMMON_BITS (VTCR_EL2_SH0_INNER | VTCR_EL2_ORGN0_WBWA | \ 148 + VTCR_EL2_IRGN0_WBWA | VTCR_EL2_RES1) 149 + 150 150 #ifdef CONFIG_ARM64_64K_PAGES 151 151 /* 152 152 * Stage2 translation configuration: 153 - * 40bits input (T0SZ = 24) 154 153 * 64kB pages (TG0 = 1) 155 154 * 2 level page tables (SL = 1) 156 155 */ 157 - #define VTCR_EL2_FLAGS (VTCR_EL2_TG0_64K | VTCR_EL2_SH0_INNER | \ 158 - VTCR_EL2_ORGN0_WBWA | VTCR_EL2_IRGN0_WBWA | \ 159 - VTCR_EL2_SL0_LVL1 | VTCR_EL2_RES1) 160 - #define VTTBR_X (38 - VTCR_EL2_T0SZ_40B) 161 - #else 156 + #define VTCR_EL2_TGRAN_FLAGS (VTCR_EL2_TG0_64K | VTCR_EL2_SL0_LVL1) 157 + #define VTTBR_X_TGRAN_MAGIC 38 158 + #elif defined(CONFIG_ARM64_16K_PAGES) 162 159 /* 163 160 * Stage2 translation configuration: 164 - * 40bits input (T0SZ = 24) 161 + * 16kB pages (TG0 = 2) 162 + * 2 level page tables (SL = 1) 163 + */ 164 + #define VTCR_EL2_TGRAN_FLAGS (VTCR_EL2_TG0_16K | VTCR_EL2_SL0_LVL1) 165 + #define VTTBR_X_TGRAN_MAGIC 42 166 + #else /* 4K */ 167 + /* 168 + * Stage2 translation configuration: 165 169 * 4kB pages (TG0 = 0) 166 170 * 3 level page tables (SL = 1) 167 171 */ 168 - #define VTCR_EL2_FLAGS (VTCR_EL2_TG0_4K | VTCR_EL2_SH0_INNER | \ 169 - VTCR_EL2_ORGN0_WBWA | VTCR_EL2_IRGN0_WBWA | \ 170 - VTCR_EL2_SL0_LVL1 | VTCR_EL2_RES1) 171 - #define VTTBR_X (37 - VTCR_EL2_T0SZ_40B) 172 + #define VTCR_EL2_TGRAN_FLAGS (VTCR_EL2_TG0_4K | VTCR_EL2_SL0_LVL1) 173 + #define VTTBR_X_TGRAN_MAGIC 37 172 174 #endif 175 + 176 + #define VTCR_EL2_FLAGS (VTCR_EL2_COMMON_BITS | VTCR_EL2_TGRAN_FLAGS) 177 + #define VTTBR_X (VTTBR_X_TGRAN_MAGIC - VTCR_EL2_T0SZ_IPA) 173 178 174 179 #define VTTBR_BADDR_SHIFT (VTTBR_X - 1) 175 180 #define VTTBR_BADDR_MASK (((UL(1) << (PHYS_MASK_SHIFT - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT)

+27 -84

arch/arm64/include/asm/kvm_mmu.h

··· 45 45 */ 46 46 #define TRAMPOLINE_VA (HYP_PAGE_OFFSET_MASK & PAGE_MASK) 47 47 48 - /* 49 - * KVM_MMU_CACHE_MIN_PAGES is the number of stage2 page table translation 50 - * levels in addition to the PGD and potentially the PUD which are 51 - * pre-allocated (we pre-allocate the fake PGD and the PUD when the Stage-2 52 - * tables use one level of tables less than the kernel. 53 - */ 54 - #ifdef CONFIG_ARM64_64K_PAGES 55 - #define KVM_MMU_CACHE_MIN_PAGES 1 56 - #else 57 - #define KVM_MMU_CACHE_MIN_PAGES 2 58 - #endif 59 - 60 48 #ifdef __ASSEMBLY__ 61 49 62 50 #include <asm/alternative.h> ··· 79 91 #define KVM_PHYS_SIZE (1UL << KVM_PHYS_SHIFT) 80 92 #define KVM_PHYS_MASK (KVM_PHYS_SIZE - 1UL) 81 93 94 + #include <asm/stage2_pgtable.h> 95 + 82 96 int create_hyp_mappings(void *from, void *to); 83 97 int create_hyp_io_mappings(void *from, void *to, phys_addr_t); 84 98 void free_boot_hyp_pgd(void); ··· 111 121 static inline void kvm_clean_pte(pte_t *pte) {} 112 122 static inline void kvm_clean_pte_entry(pte_t *pte) {} 113 123 114 - static inline void kvm_set_s2pte_writable(pte_t *pte) 124 + static inline pte_t kvm_s2pte_mkwrite(pte_t pte) 115 125 { 116 - pte_val(*pte) |= PTE_S2_RDWR; 126 + pte_val(pte) |= PTE_S2_RDWR; 127 + return pte; 117 128 } 118 129 119 - static inline void kvm_set_s2pmd_writable(pmd_t *pmd) 130 + static inline pmd_t kvm_s2pmd_mkwrite(pmd_t pmd) 120 131 { 121 - pmd_val(*pmd) |= PMD_S2_RDWR; 132 + pmd_val(pmd) |= PMD_S2_RDWR; 133 + return pmd; 122 134 } 123 135 124 136 static inline void kvm_set_s2pte_readonly(pte_t *pte) 125 137 { 126 - pte_val(*pte) = (pte_val(*pte) & ~PTE_S2_RDWR) | PTE_S2_RDONLY; 138 + pteval_t pteval; 139 + unsigned long tmp; 140 + 141 + asm volatile("// kvm_set_s2pte_readonly\n" 142 + " prfm pstl1strm, %2\n" 143 + "1: ldxr %0, %2\n" 144 + " and %0, %0, %3 // clear PTE_S2_RDWR\n" 145 + " orr %0, %0, %4 // set PTE_S2_RDONLY\n" 146 + " stxr %w1, %0, %2\n" 147 + " cbnz %w1, 1b\n" 148 + : "=&r" (pteval), "=&r" (tmp), "+Q" (pte_val(*pte)) 149 + : "L" (~PTE_S2_RDWR), "L" (PTE_S2_RDONLY)); 127 150 } 128 151 129 152 static inline bool kvm_s2pte_readonly(pte_t *pte) ··· 146 143 147 144 static inline void kvm_set_s2pmd_readonly(pmd_t *pmd) 148 145 { 149 - pmd_val(*pmd) = (pmd_val(*pmd) & ~PMD_S2_RDWR) | PMD_S2_RDONLY; 146 + kvm_set_s2pte_readonly((pte_t *)pmd); 150 147 } 151 148 152 149 static inline bool kvm_s2pmd_readonly(pmd_t *pmd) 153 150 { 154 - return (pmd_val(*pmd) & PMD_S2_RDWR) == PMD_S2_RDONLY; 155 - } 156 - 157 - 158 - #define kvm_pgd_addr_end(addr, end) pgd_addr_end(addr, end) 159 - #define kvm_pud_addr_end(addr, end) pud_addr_end(addr, end) 160 - #define kvm_pmd_addr_end(addr, end) pmd_addr_end(addr, end) 161 - 162 - /* 163 - * In the case where PGDIR_SHIFT is larger than KVM_PHYS_SHIFT, we can address 164 - * the entire IPA input range with a single pgd entry, and we would only need 165 - * one pgd entry. Note that in this case, the pgd is actually not used by 166 - * the MMU for Stage-2 translations, but is merely a fake pgd used as a data 167 - * structure for the kernel pgtable macros to work. 168 - */ 169 - #if PGDIR_SHIFT > KVM_PHYS_SHIFT 170 - #define PTRS_PER_S2_PGD_SHIFT 0 171 - #else 172 - #define PTRS_PER_S2_PGD_SHIFT (KVM_PHYS_SHIFT - PGDIR_SHIFT) 173 - #endif 174 - #define PTRS_PER_S2_PGD (1 << PTRS_PER_S2_PGD_SHIFT) 175 - 176 - #define kvm_pgd_index(addr) (((addr) >> PGDIR_SHIFT) & (PTRS_PER_S2_PGD - 1)) 177 - 178 - /* 179 - * If we are concatenating first level stage-2 page tables, we would have less 180 - * than or equal to 16 pointers in the fake PGD, because that's what the 181 - * architecture allows. In this case, (4 - CONFIG_PGTABLE_LEVELS) 182 - * represents the first level for the host, and we add 1 to go to the next 183 - * level (which uses contatenation) for the stage-2 tables. 184 - */ 185 - #if PTRS_PER_S2_PGD <= 16 186 - #define KVM_PREALLOC_LEVEL (4 - CONFIG_PGTABLE_LEVELS + 1) 187 - #else 188 - #define KVM_PREALLOC_LEVEL (0) 189 - #endif 190 - 191 - static inline void *kvm_get_hwpgd(struct kvm *kvm) 192 - { 193 - pgd_t *pgd = kvm->arch.pgd; 194 - pud_t *pud; 195 - 196 - if (KVM_PREALLOC_LEVEL == 0) 197 - return pgd; 198 - 199 - pud = pud_offset(pgd, 0); 200 - if (KVM_PREALLOC_LEVEL == 1) 201 - return pud; 202 - 203 - BUG_ON(KVM_PREALLOC_LEVEL != 2); 204 - return pmd_offset(pud, 0); 205 - } 206 - 207 - static inline unsigned int kvm_get_hwpgd_size(void) 208 - { 209 - if (KVM_PREALLOC_LEVEL > 0) 210 - return PTRS_PER_S2_PGD * PAGE_SIZE; 211 - return PTRS_PER_S2_PGD * sizeof(pgd_t); 151 + return kvm_s2pte_readonly((pte_t *)pmd); 212 152 } 213 153 214 154 static inline bool kvm_page_empty(void *ptr) ··· 160 214 return page_count(ptr_page) == 1; 161 215 } 162 216 163 - #define kvm_pte_table_empty(kvm, ptep) kvm_page_empty(ptep) 217 + #define hyp_pte_table_empty(ptep) kvm_page_empty(ptep) 164 218 165 219 #ifdef __PAGETABLE_PMD_FOLDED 166 - #define kvm_pmd_table_empty(kvm, pmdp) (0) 220 + #define hyp_pmd_table_empty(pmdp) (0) 167 221 #else 168 - #define kvm_pmd_table_empty(kvm, pmdp) \ 169 - (kvm_page_empty(pmdp) && (!(kvm) || KVM_PREALLOC_LEVEL < 2)) 222 + #define hyp_pmd_table_empty(pmdp) kvm_page_empty(pmdp) 170 223 #endif 171 224 172 225 #ifdef __PAGETABLE_PUD_FOLDED 173 - #define kvm_pud_table_empty(kvm, pudp) (0) 226 + #define hyp_pud_table_empty(pudp) (0) 174 227 #else 175 - #define kvm_pud_table_empty(kvm, pudp) \ 176 - (kvm_page_empty(pudp) && (!(kvm) || KVM_PREALLOC_LEVEL < 1)) 228 + #define hyp_pud_table_empty(pudp) kvm_page_empty(pudp) 177 229 #endif 178 - 179 230 180 231 struct kvm; 181 232

+63 -17

arch/arm64/include/asm/pgtable-hwdef.h

··· 208 208 #define TCR_T1SZ(x) ((UL(64) - (x)) << TCR_T1SZ_OFFSET) 209 209 #define TCR_TxSZ(x) (TCR_T0SZ(x) | TCR_T1SZ(x)) 210 210 #define TCR_TxSZ_WIDTH 6 211 - #define TCR_IRGN_NC ((UL(0) << 8) | (UL(0) << 24)) 212 - #define TCR_IRGN_WBWA ((UL(1) << 8) | (UL(1) << 24)) 213 - #define TCR_IRGN_WT ((UL(2) << 8) | (UL(2) << 24)) 214 - #define TCR_IRGN_WBnWA ((UL(3) << 8) | (UL(3) << 24)) 215 - #define TCR_IRGN_MASK ((UL(3) << 8) | (UL(3) << 24)) 216 - #define TCR_ORGN_NC ((UL(0) << 10) | (UL(0) << 26)) 217 - #define TCR_ORGN_WBWA ((UL(1) << 10) | (UL(1) << 26)) 218 - #define TCR_ORGN_WT ((UL(2) << 10) | (UL(2) << 26)) 219 - #define TCR_ORGN_WBnWA ((UL(3) << 10) | (UL(3) << 26)) 220 - #define TCR_ORGN_MASK ((UL(3) << 10) | (UL(3) << 26)) 221 - #define TCR_SHARED ((UL(3) << 12) | (UL(3) << 28)) 222 - #define TCR_TG0_4K (UL(0) << 14) 223 - #define TCR_TG0_64K (UL(1) << 14) 224 - #define TCR_TG0_16K (UL(2) << 14) 225 - #define TCR_TG1_16K (UL(1) << 30) 226 - #define TCR_TG1_4K (UL(2) << 30) 227 - #define TCR_TG1_64K (UL(3) << 30) 211 + 212 + #define TCR_IRGN0_SHIFT 8 213 + #define TCR_IRGN0_MASK (UL(3) << TCR_IRGN0_SHIFT) 214 + #define TCR_IRGN0_NC (UL(0) << TCR_IRGN0_SHIFT) 215 + #define TCR_IRGN0_WBWA (UL(1) << TCR_IRGN0_SHIFT) 216 + #define TCR_IRGN0_WT (UL(2) << TCR_IRGN0_SHIFT) 217 + #define TCR_IRGN0_WBnWA (UL(3) << TCR_IRGN0_SHIFT) 218 + 219 + #define TCR_IRGN1_SHIFT 24 220 + #define TCR_IRGN1_MASK (UL(3) << TCR_IRGN1_SHIFT) 221 + #define TCR_IRGN1_NC (UL(0) << TCR_IRGN1_SHIFT) 222 + #define TCR_IRGN1_WBWA (UL(1) << TCR_IRGN1_SHIFT) 223 + #define TCR_IRGN1_WT (UL(2) << TCR_IRGN1_SHIFT) 224 + #define TCR_IRGN1_WBnWA (UL(3) << TCR_IRGN1_SHIFT) 225 + 226 + #define TCR_IRGN_NC (TCR_IRGN0_NC | TCR_IRGN1_NC) 227 + #define TCR_IRGN_WBWA (TCR_IRGN0_WBWA | TCR_IRGN1_WBWA) 228 + #define TCR_IRGN_WT (TCR_IRGN0_WT | TCR_IRGN1_WT) 229 + #define TCR_IRGN_WBnWA (TCR_IRGN0_WBnWA | TCR_IRGN1_WBnWA) 230 + #define TCR_IRGN_MASK (TCR_IRGN0_MASK | TCR_IRGN1_MASK) 231 + 232 + 233 + #define TCR_ORGN0_SHIFT 10 234 + #define TCR_ORGN0_MASK (UL(3) << TCR_ORGN0_SHIFT) 235 + #define TCR_ORGN0_NC (UL(0) << TCR_ORGN0_SHIFT) 236 + #define TCR_ORGN0_WBWA (UL(1) << TCR_ORGN0_SHIFT) 237 + #define TCR_ORGN0_WT (UL(2) << TCR_ORGN0_SHIFT) 238 + #define TCR_ORGN0_WBnWA (UL(3) << TCR_ORGN0_SHIFT) 239 + 240 + #define TCR_ORGN1_SHIFT 26 241 + #define TCR_ORGN1_MASK (UL(3) << TCR_ORGN1_SHIFT) 242 + #define TCR_ORGN1_NC (UL(0) << TCR_ORGN1_SHIFT) 243 + #define TCR_ORGN1_WBWA (UL(1) << TCR_ORGN1_SHIFT) 244 + #define TCR_ORGN1_WT (UL(2) << TCR_ORGN1_SHIFT) 245 + #define TCR_ORGN1_WBnWA (UL(3) << TCR_ORGN1_SHIFT) 246 + 247 + #define TCR_ORGN_NC (TCR_ORGN0_NC | TCR_ORGN1_NC) 248 + #define TCR_ORGN_WBWA (TCR_ORGN0_WBWA | TCR_ORGN1_WBWA) 249 + #define TCR_ORGN_WT (TCR_ORGN0_WT | TCR_ORGN1_WT) 250 + #define TCR_ORGN_WBnWA (TCR_ORGN0_WBnWA | TCR_ORGN1_WBnWA) 251 + #define TCR_ORGN_MASK (TCR_ORGN0_MASK | TCR_ORGN1_MASK) 252 + 253 + #define TCR_SH0_SHIFT 12 254 + #define TCR_SH0_MASK (UL(3) << TCR_SH0_SHIFT) 255 + #define TCR_SH0_INNER (UL(3) << TCR_SH0_SHIFT) 256 + 257 + #define TCR_SH1_SHIFT 28 258 + #define TCR_SH1_MASK (UL(3) << TCR_SH1_SHIFT) 259 + #define TCR_SH1_INNER (UL(3) << TCR_SH1_SHIFT) 260 + #define TCR_SHARED (TCR_SH0_INNER | TCR_SH1_INNER) 261 + 262 + #define TCR_TG0_SHIFT 14 263 + #define TCR_TG0_MASK (UL(3) << TCR_TG0_SHIFT) 264 + #define TCR_TG0_4K (UL(0) << TCR_TG0_SHIFT) 265 + #define TCR_TG0_64K (UL(1) << TCR_TG0_SHIFT) 266 + #define TCR_TG0_16K (UL(2) << TCR_TG0_SHIFT) 267 + 268 + #define TCR_TG1_SHIFT 30 269 + #define TCR_TG1_MASK (UL(3) << TCR_TG1_SHIFT) 270 + #define TCR_TG1_16K (UL(1) << TCR_TG1_SHIFT) 271 + #define TCR_TG1_4K (UL(2) << TCR_TG1_SHIFT) 272 + #define TCR_TG1_64K (UL(3) << TCR_TG1_SHIFT) 273 + 228 274 #define TCR_ASID16 (UL(1) << 36) 229 275 #define TCR_TBI0 (UL(1) << 37) 230 276 #define TCR_HA (UL(1) << 39)

+11 -4

arch/arm64/include/asm/pgtable.h

··· 290 290 #define pmd_mkyoung(pmd) pte_pmd(pte_mkyoung(pmd_pte(pmd))) 291 291 #define pmd_mknotpresent(pmd) (__pmd(pmd_val(pmd) & ~PMD_TYPE_MASK)) 292 292 293 + #define pmd_thp_or_huge(pmd) (pmd_huge(pmd) || pmd_trans_huge(pmd)) 294 + 293 295 #define __HAVE_ARCH_PMD_WRITE 294 296 #define pmd_write(pmd) pte_write(pmd_pte(pmd)) 295 297 ··· 532 530 * Atomic pte/pmd modifications. 533 531 */ 534 532 #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG 535 - static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, 536 - unsigned long address, 537 - pte_t *ptep) 533 + static inline int __ptep_test_and_clear_young(pte_t *ptep) 538 534 { 539 535 pteval_t pteval; 540 536 unsigned int tmp, res; 541 537 542 - asm volatile("// ptep_test_and_clear_young\n" 538 + asm volatile("// __ptep_test_and_clear_young\n" 543 539 " prfm pstl1strm, %2\n" 544 540 "1: ldxr %0, %2\n" 545 541 " ubfx %w3, %w0, %5, #1 // extract PTE_AF (young)\n" ··· 548 548 : "L" (~PTE_AF), "I" (ilog2(PTE_AF))); 549 549 550 550 return res; 551 + } 552 + 553 + static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, 554 + unsigned long address, 555 + pte_t *ptep) 556 + { 557 + return __ptep_test_and_clear_young(ptep); 551 558 } 552 559 553 560 #ifdef CONFIG_TRANSPARENT_HUGEPAGE

+42

arch/arm64/include/asm/stage2_pgtable-nopmd.h

··· 1 + /* 2 + * Copyright (C) 2016 - ARM Ltd 3 + * 4 + * This program is free software; you can redistribute it and/or modify 5 + * it under the terms of the GNU General Public License version 2 as 6 + * published by the Free Software Foundation. 7 + * 8 + * This program is distributed in the hope that it will be useful, 9 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 + * GNU General Public License for more details. 12 + * 13 + * You should have received a copy of the GNU General Public License 14 + * along with this program. If not, see <http://www.gnu.org/licenses/>. 15 + */ 16 + 17 + #ifndef __ARM64_S2_PGTABLE_NOPMD_H_ 18 + #define __ARM64_S2_PGTABLE_NOPMD_H_ 19 + 20 + #include <asm/stage2_pgtable-nopud.h> 21 + 22 + #define __S2_PGTABLE_PMD_FOLDED 23 + 24 + #define S2_PMD_SHIFT S2_PUD_SHIFT 25 + #define S2_PTRS_PER_PMD 1 26 + #define S2_PMD_SIZE (1UL << S2_PMD_SHIFT) 27 + #define S2_PMD_MASK (~(S2_PMD_SIZE-1)) 28 + 29 + #define stage2_pud_none(pud) (0) 30 + #define stage2_pud_present(pud) (1) 31 + #define stage2_pud_clear(pud) do { } while (0) 32 + #define stage2_pud_populate(pud, pmd) do { } while (0) 33 + #define stage2_pmd_offset(pud, address) ((pmd_t *)(pud)) 34 + 35 + #define stage2_pmd_free(pmd) do { } while (0) 36 + 37 + #define stage2_pmd_addr_end(addr, end) (end) 38 + 39 + #define stage2_pud_huge(pud) (0) 40 + #define stage2_pmd_table_empty(pmdp) (0) 41 + 42 + #endif

+39

arch/arm64/include/asm/stage2_pgtable-nopud.h

··· 1 + /* 2 + * Copyright (C) 2016 - ARM Ltd 3 + * 4 + * This program is free software; you can redistribute it and/or modify 5 + * it under the terms of the GNU General Public License version 2 as 6 + * published by the Free Software Foundation. 7 + * 8 + * This program is distributed in the hope that it will be useful, 9 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 + * GNU General Public License for more details. 12 + * 13 + * You should have received a copy of the GNU General Public License 14 + * along with this program. If not, see <http://www.gnu.org/licenses/>. 15 + */ 16 + 17 + #ifndef __ARM64_S2_PGTABLE_NOPUD_H_ 18 + #define __ARM64_S2_PGTABLE_NOPUD_H_ 19 + 20 + #define __S2_PGTABLE_PUD_FOLDED 21 + 22 + #define S2_PUD_SHIFT S2_PGDIR_SHIFT 23 + #define S2_PTRS_PER_PUD 1 24 + #define S2_PUD_SIZE (_AC(1, UL) << S2_PUD_SHIFT) 25 + #define S2_PUD_MASK (~(S2_PUD_SIZE-1)) 26 + 27 + #define stage2_pgd_none(pgd) (0) 28 + #define stage2_pgd_present(pgd) (1) 29 + #define stage2_pgd_clear(pgd) do { } while (0) 30 + #define stage2_pgd_populate(pgd, pud) do { } while (0) 31 + 32 + #define stage2_pud_offset(pgd, address) ((pud_t *)(pgd)) 33 + 34 + #define stage2_pud_free(x) do { } while (0) 35 + 36 + #define stage2_pud_addr_end(addr, end) (end) 37 + #define stage2_pud_table_empty(pmdp) (0) 38 + 39 + #endif

+142

arch/arm64/include/asm/stage2_pgtable.h

··· 1 + /* 2 + * Copyright (C) 2016 - ARM Ltd 3 + * 4 + * stage2 page table helpers 5 + * 6 + * This program is free software; you can redistribute it and/or modify 7 + * it under the terms of the GNU General Public License version 2 as 8 + * published by the Free Software Foundation. 9 + * 10 + * This program is distributed in the hope that it will be useful, 11 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 + * GNU General Public License for more details. 14 + * 15 + * You should have received a copy of the GNU General Public License 16 + * along with this program. If not, see <http://www.gnu.org/licenses/>. 17 + */ 18 + 19 + #ifndef __ARM64_S2_PGTABLE_H_ 20 + #define __ARM64_S2_PGTABLE_H_ 21 + 22 + #include <asm/pgtable.h> 23 + 24 + /* 25 + * The hardware supports concatenation of up to 16 tables at stage2 entry level 26 + * and we use the feature whenever possible. 27 + * 28 + * Now, the minimum number of bits resolved at any level is (PAGE_SHIFT - 3). 29 + * On arm64, the smallest PAGE_SIZE supported is 4k, which means 30 + * (PAGE_SHIFT - 3) > 4 holds for all page sizes. 31 + * This implies, the total number of page table levels at stage2 expected 32 + * by the hardware is actually the number of levels required for (KVM_PHYS_SHIFT - 4) 33 + * in normal translations(e.g, stage1), since we cannot have another level in 34 + * the range (KVM_PHYS_SHIFT, KVM_PHYS_SHIFT - 4). 35 + */ 36 + #define STAGE2_PGTABLE_LEVELS ARM64_HW_PGTABLE_LEVELS(KVM_PHYS_SHIFT - 4) 37 + 38 + /* 39 + * With all the supported VA_BITs and 40bit guest IPA, the following condition 40 + * is always true: 41 + * 42 + * STAGE2_PGTABLE_LEVELS <= CONFIG_PGTABLE_LEVELS 43 + * 44 + * We base our stage-2 page table walker helpers on this assumption and 45 + * fall back to using the host version of the helper wherever possible. 46 + * i.e, if a particular level is not folded (e.g, PUD) at stage2, we fall back 47 + * to using the host version, since it is guaranteed it is not folded at host. 48 + * 49 + * If the condition breaks in the future, we can rearrange the host level 50 + * definitions and reuse them for stage2. Till then... 51 + */ 52 + #if STAGE2_PGTABLE_LEVELS > CONFIG_PGTABLE_LEVELS 53 + #error "Unsupported combination of guest IPA and host VA_BITS." 54 + #endif 55 + 56 + /* S2_PGDIR_SHIFT is the size mapped by top-level stage2 entry */ 57 + #define S2_PGDIR_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(4 - STAGE2_PGTABLE_LEVELS) 58 + #define S2_PGDIR_SIZE (_AC(1, UL) << S2_PGDIR_SHIFT) 59 + #define S2_PGDIR_MASK (~(S2_PGDIR_SIZE - 1)) 60 + 61 + /* 62 + * The number of PTRS across all concatenated stage2 tables given by the 63 + * number of bits resolved at the initial level. 64 + */ 65 + #define PTRS_PER_S2_PGD (1 << (KVM_PHYS_SHIFT - S2_PGDIR_SHIFT)) 66 + 67 + /* 68 + * KVM_MMU_CACHE_MIN_PAGES is the number of stage2 page table translation 69 + * levels in addition to the PGD. 70 + */ 71 + #define KVM_MMU_CACHE_MIN_PAGES (STAGE2_PGTABLE_LEVELS - 1) 72 + 73 + 74 + #if STAGE2_PGTABLE_LEVELS > 3 75 + 76 + #define S2_PUD_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(1) 77 + #define S2_PUD_SIZE (_AC(1, UL) << S2_PUD_SHIFT) 78 + #define S2_PUD_MASK (~(S2_PUD_SIZE - 1)) 79 + 80 + #define stage2_pgd_none(pgd) pgd_none(pgd) 81 + #define stage2_pgd_clear(pgd) pgd_clear(pgd) 82 + #define stage2_pgd_present(pgd) pgd_present(pgd) 83 + #define stage2_pgd_populate(pgd, pud) pgd_populate(NULL, pgd, pud) 84 + #define stage2_pud_offset(pgd, address) pud_offset(pgd, address) 85 + #define stage2_pud_free(pud) pud_free(NULL, pud) 86 + 87 + #define stage2_pud_table_empty(pudp) kvm_page_empty(pudp) 88 + 89 + static inline phys_addr_t stage2_pud_addr_end(phys_addr_t addr, phys_addr_t end) 90 + { 91 + phys_addr_t boundary = (addr + S2_PUD_SIZE) & S2_PUD_MASK; 92 + 93 + return (boundary - 1 < end - 1) ? boundary : end; 94 + } 95 + 96 + #endif /* STAGE2_PGTABLE_LEVELS > 3 */ 97 + 98 + 99 + #if STAGE2_PGTABLE_LEVELS > 2 100 + 101 + #define S2_PMD_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(2) 102 + #define S2_PMD_SIZE (_AC(1, UL) << S2_PMD_SHIFT) 103 + #define S2_PMD_MASK (~(S2_PMD_SIZE - 1)) 104 + 105 + #define stage2_pud_none(pud) pud_none(pud) 106 + #define stage2_pud_clear(pud) pud_clear(pud) 107 + #define stage2_pud_present(pud) pud_present(pud) 108 + #define stage2_pud_populate(pud, pmd) pud_populate(NULL, pud, pmd) 109 + #define stage2_pmd_offset(pud, address) pmd_offset(pud, address) 110 + #define stage2_pmd_free(pmd) pmd_free(NULL, pmd) 111 + 112 + #define stage2_pud_huge(pud) pud_huge(pud) 113 + #define stage2_pmd_table_empty(pmdp) kvm_page_empty(pmdp) 114 + 115 + static inline phys_addr_t stage2_pmd_addr_end(phys_addr_t addr, phys_addr_t end) 116 + { 117 + phys_addr_t boundary = (addr + S2_PMD_SIZE) & S2_PMD_MASK; 118 + 119 + return (boundary - 1 < end - 1) ? boundary : end; 120 + } 121 + 122 + #endif /* STAGE2_PGTABLE_LEVELS > 2 */ 123 + 124 + #define stage2_pte_table_empty(ptep) kvm_page_empty(ptep) 125 + 126 + #if STAGE2_PGTABLE_LEVELS == 2 127 + #include <asm/stage2_pgtable-nopmd.h> 128 + #elif STAGE2_PGTABLE_LEVELS == 3 129 + #include <asm/stage2_pgtable-nopud.h> 130 + #endif 131 + 132 + 133 + #define stage2_pgd_index(addr) (((addr) >> S2_PGDIR_SHIFT) & (PTRS_PER_S2_PGD - 1)) 134 + 135 + static inline phys_addr_t stage2_pgd_addr_end(phys_addr_t addr, phys_addr_t end) 136 + { 137 + phys_addr_t boundary = (addr + S2_PGDIR_SIZE) & S2_PGDIR_MASK; 138 + 139 + return (boundary - 1 < end - 1) ? boundary : end; 140 + } 141 + 142 + #endif /* __ARM64_S2_PGTABLE_H_ */

-1

arch/arm64/kvm/Kconfig

··· 22 22 config KVM 23 23 bool "Kernel-based Virtual Machine (KVM) support" 24 24 depends on OF 25 - depends on !ARM64_16K_PAGES 26 25 select MMU_NOTIFIER 27 26 select PREEMPT_NOTIFIERS 28 27 select ANON_INODES

+8

arch/arm64/kvm/hyp/s2-setup.c

··· 66 66 val |= 64 - (parange > 40 ? 40 : parange); 67 67 68 68 /* 69 + * Check the availability of Hardware Access Flag / Dirty Bit 70 + * Management in ID_AA64MMFR1_EL1 and enable the feature in VTCR_EL2. 71 + */ 72 + tmp = (read_sysreg(id_aa64mmfr1_el1) >> ID_AA64MMFR1_HADBS_SHIFT) & 0xf; 73 + if (IS_ENABLED(CONFIG_ARM64_HW_AFDBM) && tmp) 74 + val |= VTCR_EL2_HA; 75 + 76 + /* 69 77 * Read the VMIDBits bits from ID_AA64MMFR1_EL1 and set the VS 70 78 * bit in VTCR_EL2. 71 79 */

+7 -4

drivers/clocksource/arm_arch_timer.c

··· 468 468 .mask = CLOCKSOURCE_MASK(56), 469 469 }; 470 470 471 - static struct timecounter timecounter; 471 + static struct arch_timer_kvm_info arch_timer_kvm_info; 472 472 473 - struct timecounter *arch_timer_get_timecounter(void) 473 + struct arch_timer_kvm_info *arch_timer_get_kvm_info(void) 474 474 { 475 - return &timecounter; 475 + return &arch_timer_kvm_info; 476 476 } 477 477 478 478 static void __init arch_counter_register(unsigned type) ··· 500 500 clocksource_register_hz(&clocksource_counter, arch_timer_rate); 501 501 cyclecounter.mult = clocksource_counter.mult; 502 502 cyclecounter.shift = clocksource_counter.shift; 503 - timecounter_init(&timecounter, &cyclecounter, start_count); 503 + timecounter_init(&arch_timer_kvm_info.timecounter, 504 + &cyclecounter, start_count); 504 505 505 506 /* 56 bits minimum, so we assume worst case rollover */ 506 507 sched_clock_register(arch_timer_read_counter, 56, arch_timer_rate); ··· 745 744 746 745 arch_timer_register(); 747 746 arch_timer_common_init(); 747 + 748 + arch_timer_kvm_info.virtual_irq = arch_timer_ppi[VIRT_PPI]; 748 749 } 749 750 750 751 static void __init arch_timer_of_init(struct device_node *np)

+13

drivers/irqchip/irq-gic-common.c

··· 21 21 22 22 #include "irq-gic-common.h" 23 23 24 + static const struct gic_kvm_info *gic_kvm_info; 25 + 26 + const struct gic_kvm_info *gic_get_kvm_info(void) 27 + { 28 + return gic_kvm_info; 29 + } 30 + 31 + void gic_set_kvm_info(const struct gic_kvm_info *info) 32 + { 33 + BUG_ON(gic_kvm_info != NULL); 34 + gic_kvm_info = info; 35 + } 36 + 24 37 void gic_enable_quirks(u32 iidr, const struct gic_quirk *quirks, 25 38 void *data) 26 39 {

+3

drivers/irqchip/irq-gic-common.h

··· 19 19 20 20 #include <linux/of.h> 21 21 #include <linux/irqdomain.h> 22 + #include <linux/irqchip/arm-gic-common.h> 22 23 23 24 struct gic_quirk { 24 25 const char *desc; ··· 35 34 void gic_cpu_config(void __iomem *base, void (*sync_access)(void)); 36 35 void gic_enable_quirks(u32 iidr, const struct gic_quirk *quirks, 37 36 void *data); 37 + 38 + void gic_set_kvm_info(const struct gic_kvm_info *info); 38 39 39 40 #endif /* _IRQ_GIC_COMMON_H */

+148 -28

drivers/irqchip/irq-gic-v3.c

··· 15 15 * along with this program. If not, see <http://www.gnu.org/licenses/>. 16 16 */ 17 17 18 + #define pr_fmt(fmt) "GICv3: " fmt 19 + 18 20 #include <linux/acpi.h> 19 21 #include <linux/cpu.h> 20 22 #include <linux/cpu_pm.h> ··· 30 28 #include <linux/slab.h> 31 29 32 30 #include <linux/irqchip.h> 31 + #include <linux/irqchip/arm-gic-common.h> 33 32 #include <linux/irqchip/arm-gic-v3.h> 34 33 35 34 #include <asm/cputype.h> ··· 58 55 59 56 static struct gic_chip_data gic_data __read_mostly; 60 57 static struct static_key supports_deactivate = STATIC_KEY_INIT_TRUE; 58 + 59 + static struct gic_kvm_info gic_v3_kvm_info; 61 60 62 61 #define gic_data_rdist() (this_cpu_ptr(gic_data.rdists.rdist)) 63 62 #define gic_data_rdist_rd_base() (gic_data_rdist()->rd_base) ··· 906 901 return 0; 907 902 } 908 903 904 + static void __init gic_of_setup_kvm_info(struct device_node *node) 905 + { 906 + int ret; 907 + struct resource r; 908 + u32 gicv_idx; 909 + 910 + gic_v3_kvm_info.type = GIC_V3; 911 + 912 + gic_v3_kvm_info.maint_irq = irq_of_parse_and_map(node, 0); 913 + if (!gic_v3_kvm_info.maint_irq) 914 + return; 915 + 916 + if (of_property_read_u32(node, "#redistributor-regions", 917 + &gicv_idx)) 918 + gicv_idx = 1; 919 + 920 + gicv_idx += 3; /* Also skip GICD, GICC, GICH */ 921 + ret = of_address_to_resource(node, gicv_idx, &r); 922 + if (!ret) 923 + gic_v3_kvm_info.vcpu = r; 924 + 925 + gic_set_kvm_info(&gic_v3_kvm_info); 926 + } 927 + 909 928 static int __init gic_of_init(struct device_node *node, struct device_node *parent) 910 929 { 911 930 void __iomem *dist_base; ··· 981 952 982 953 err = gic_init_bases(dist_base, rdist_regs, nr_redist_regions, 983 954 redist_stride, &node->fwnode); 984 - if (!err) 955 + if (!err) { 956 + gic_of_setup_kvm_info(node); 985 957 return 0; 958 + } 986 959 987 960 out_unmap_rdist: 988 961 for (i = 0; i < nr_redist_regions; i++) ··· 999 968 IRQCHIP_DECLARE(gic_v3, "arm,gic-v3", gic_of_init); 1000 969 1001 970 #ifdef CONFIG_ACPI 1002 - static void __iomem *dist_base; 1003 - static struct redist_region *redist_regs __initdata; 1004 - static u32 nr_redist_regions __initdata; 1005 - static bool single_redist; 971 + static struct 972 + { 973 + void __iomem *dist_base; 974 + struct redist_region *redist_regs; 975 + u32 nr_redist_regions; 976 + bool single_redist; 977 + u32 maint_irq; 978 + int maint_irq_mode; 979 + phys_addr_t vcpu_base; 980 + } acpi_data __initdata; 1006 981 1007 982 static void __init 1008 983 gic_acpi_register_redist(phys_addr_t phys_base, void __iomem *redist_base) 1009 984 { 1010 985 static int count = 0; 1011 986 1012 - redist_regs[count].phys_base = phys_base; 1013 - redist_regs[count].redist_base = redist_base; 1014 - redist_regs[count].single_redist = single_redist; 987 + acpi_data.redist_regs[count].phys_base = phys_base; 988 + acpi_data.redist_regs[count].redist_base = redist_base; 989 + acpi_data.redist_regs[count].single_redist = acpi_data.single_redist; 1015 990 count++; 1016 991 } 1017 992 ··· 1045 1008 { 1046 1009 struct acpi_madt_generic_interrupt *gicc = 1047 1010 (struct acpi_madt_generic_interrupt *)header; 1048 - u32 reg = readl_relaxed(dist_base + GICD_PIDR2) & GIC_PIDR2_ARCH_MASK; 1011 + u32 reg = readl_relaxed(acpi_data.dist_base + GICD_PIDR2) & GIC_PIDR2_ARCH_MASK; 1049 1012 u32 size = reg == GIC_PIDR2_ARCH_GICv4 ? SZ_64K * 4 : SZ_64K * 2; 1050 1013 void __iomem *redist_base; 1051 1014 ··· 1062 1025 acpi_tbl_entry_handler redist_parser; 1063 1026 enum acpi_madt_type type; 1064 1027 1065 - if (single_redist) { 1028 + if (acpi_data.single_redist) { 1066 1029 type = ACPI_MADT_TYPE_GENERIC_INTERRUPT; 1067 1030 redist_parser = gic_acpi_parse_madt_gicc; 1068 1031 } else { ··· 1113 1076 count = acpi_table_parse_madt(ACPI_MADT_TYPE_GENERIC_REDISTRIBUTOR, 1114 1077 gic_acpi_match_gicr, 0); 1115 1078 if (count > 0) { 1116 - single_redist = false; 1079 + acpi_data.single_redist = false; 1117 1080 return count; 1118 1081 } 1119 1082 1120 1083 count = acpi_table_parse_madt(ACPI_MADT_TYPE_GENERIC_INTERRUPT, 1121 1084 gic_acpi_match_gicc, 0); 1122 1085 if (count > 0) 1123 - single_redist = true; 1086 + acpi_data.single_redist = true; 1124 1087 1125 1088 return count; 1126 1089 } ··· 1140 1103 if (count <= 0) 1141 1104 return false; 1142 1105 1143 - nr_redist_regions = count; 1106 + acpi_data.nr_redist_regions = count; 1144 1107 return true; 1145 1108 } 1146 1109 1110 + static int __init gic_acpi_parse_virt_madt_gicc(struct acpi_subtable_header *header, 1111 + const unsigned long end) 1112 + { 1113 + struct acpi_madt_generic_interrupt *gicc = 1114 + (struct acpi_madt_generic_interrupt *)header; 1115 + int maint_irq_mode; 1116 + static int first_madt = true; 1117 + 1118 + /* Skip unusable CPUs */ 1119 + if (!(gicc->flags & ACPI_MADT_ENABLED)) 1120 + return 0; 1121 + 1122 + maint_irq_mode = (gicc->flags & ACPI_MADT_VGIC_IRQ_MODE) ? 1123 + ACPI_EDGE_SENSITIVE : ACPI_LEVEL_SENSITIVE; 1124 + 1125 + if (first_madt) { 1126 + first_madt = false; 1127 + 1128 + acpi_data.maint_irq = gicc->vgic_interrupt; 1129 + acpi_data.maint_irq_mode = maint_irq_mode; 1130 + acpi_data.vcpu_base = gicc->gicv_base_address; 1131 + 1132 + return 0; 1133 + } 1134 + 1135 + /* 1136 + * The maintenance interrupt and GICV should be the same for every CPU 1137 + */ 1138 + if ((acpi_data.maint_irq != gicc->vgic_interrupt) || 1139 + (acpi_data.maint_irq_mode != maint_irq_mode) || 1140 + (acpi_data.vcpu_base != gicc->gicv_base_address)) 1141 + return -EINVAL; 1142 + 1143 + return 0; 1144 + } 1145 + 1146 + static bool __init gic_acpi_collect_virt_info(void) 1147 + { 1148 + int count; 1149 + 1150 + count = acpi_table_parse_madt(ACPI_MADT_TYPE_GENERIC_INTERRUPT, 1151 + gic_acpi_parse_virt_madt_gicc, 0); 1152 + 1153 + return (count > 0); 1154 + } 1155 + 1147 1156 #define ACPI_GICV3_DIST_MEM_SIZE (SZ_64K) 1157 + #define ACPI_GICV2_VCTRL_MEM_SIZE (SZ_4K) 1158 + #define ACPI_GICV2_VCPU_MEM_SIZE (SZ_8K) 1159 + 1160 + static void __init gic_acpi_setup_kvm_info(void) 1161 + { 1162 + int irq; 1163 + 1164 + if (!gic_acpi_collect_virt_info()) { 1165 + pr_warn("Unable to get hardware information used for virtualization\n"); 1166 + return; 1167 + } 1168 + 1169 + gic_v3_kvm_info.type = GIC_V3; 1170 + 1171 + irq = acpi_register_gsi(NULL, acpi_data.maint_irq, 1172 + acpi_data.maint_irq_mode, 1173 + ACPI_ACTIVE_HIGH); 1174 + if (irq <= 0) 1175 + return; 1176 + 1177 + gic_v3_kvm_info.maint_irq = irq; 1178 + 1179 + if (acpi_data.vcpu_base) { 1180 + struct resource *vcpu = &gic_v3_kvm_info.vcpu; 1181 + 1182 + vcpu->flags = IORESOURCE_MEM; 1183 + vcpu->start = acpi_data.vcpu_base; 1184 + vcpu->end = vcpu->start + ACPI_GICV2_VCPU_MEM_SIZE - 1; 1185 + } 1186 + 1187 + gic_set_kvm_info(&gic_v3_kvm_info); 1188 + } 1148 1189 1149 1190 static int __init 1150 1191 gic_acpi_init(struct acpi_subtable_header *header, const unsigned long end) 1151 1192 { 1152 1193 struct acpi_madt_generic_distributor *dist; 1153 1194 struct fwnode_handle *domain_handle; 1195 + size_t size; 1154 1196 int i, err; 1155 1197 1156 1198 /* Get distributor base address */ 1157 1199 dist = (struct acpi_madt_generic_distributor *)header; 1158 - dist_base = ioremap(dist->base_address, ACPI_GICV3_DIST_MEM_SIZE); 1159 - if (!dist_base) { 1200 + acpi_data.dist_base = ioremap(dist->base_address, 1201 + ACPI_GICV3_DIST_MEM_SIZE); 1202 + if (!acpi_data.dist_base) { 1160 1203 pr_err("Unable to map GICD registers\n"); 1161 1204 return -ENOMEM; 1162 1205 } 1163 1206 1164 - err = gic_validate_dist_version(dist_base); 1207 + err = gic_validate_dist_version(acpi_data.dist_base); 1165 1208 if (err) { 1166 - pr_err("No distributor detected at @%p, giving up", dist_base); 1209 + pr_err("No distributor detected at @%p, giving up", 1210 + acpi_data.dist_base); 1167 1211 goto out_dist_unmap; 1168 1212 } 1169 1213 1170 - redist_regs = kzalloc(sizeof(*redist_regs) * nr_redist_regions, 1171 - GFP_KERNEL); 1172 - if (!redist_regs) { 1214 + size = sizeof(*acpi_data.redist_regs) * acpi_data.nr_redist_regions; 1215 + acpi_data.redist_regs = kzalloc(size, GFP_KERNEL); 1216 + if (!acpi_data.redist_regs) { 1173 1217 err = -ENOMEM; 1174 1218 goto out_dist_unmap; 1175 1219 } ··· 1259 1141 if (err) 1260 1142 goto out_redist_unmap; 1261 1143 1262 - domain_handle = irq_domain_alloc_fwnode(dist_base); 1144 + domain_handle = irq_domain_alloc_fwnode(acpi_data.dist_base); 1263 1145 if (!domain_handle) { 1264 1146 err = -ENOMEM; 1265 1147 goto out_redist_unmap; 1266 1148 } 1267 1149 1268 - err = gic_init_bases(dist_base, redist_regs, nr_redist_regions, 0, 1269 - domain_handle); 1150 + err = gic_init_bases(acpi_data.dist_base, acpi_data.redist_regs, 1151 + acpi_data.nr_redist_regions, 0, domain_handle); 1270 1152 if (err) 1271 1153 goto out_fwhandle_free; 1272 1154 1273 1155 acpi_set_irq_model(ACPI_IRQ_MODEL_GIC, domain_handle); 1156 + gic_acpi_setup_kvm_info(); 1157 + 1274 1158 return 0; 1275 1159 1276 1160 out_fwhandle_free: 1277 1161 irq_domain_free_fwnode(domain_handle); 1278 1162 out_redist_unmap: 1279 - for (i = 0; i < nr_redist_regions; i++) 1280 - if (redist_regs[i].redist_base) 1281 - iounmap(redist_regs[i].redist_base); 1282 - kfree(redist_regs); 1163 + for (i = 0; i < acpi_data.nr_redist_regions; i++) 1164 + if (acpi_data.redist_regs[i].redist_base) 1165 + iounmap(acpi_data.redist_regs[i].redist_base); 1166 + kfree(acpi_data.redist_regs); 1283 1167 out_dist_unmap: 1284 - iounmap(dist_base); 1168 + iounmap(acpi_data.dist_base); 1285 1169 return err; 1286 1170 } 1287 1171 IRQCHIP_ACPI_DECLARE(gic_v3, ACPI_MADT_TYPE_GENERIC_DISTRIBUTOR,

+82 -5

drivers/irqchip/irq-gic.c

··· 102 102 103 103 static struct gic_chip_data gic_data[CONFIG_ARM_GIC_MAX_NR] __read_mostly; 104 104 105 + static struct gic_kvm_info gic_v2_kvm_info; 106 + 105 107 #ifdef CONFIG_GIC_NON_BANKED 106 108 static void __iomem *gic_get_percpu_base(union gic_base *base) 107 109 { ··· 1191 1189 return true; 1192 1190 } 1193 1191 1192 + static void __init gic_of_setup_kvm_info(struct device_node *node) 1193 + { 1194 + int ret; 1195 + struct resource *vctrl_res = &gic_v2_kvm_info.vctrl; 1196 + struct resource *vcpu_res = &gic_v2_kvm_info.vcpu; 1197 + 1198 + gic_v2_kvm_info.type = GIC_V2; 1199 + 1200 + gic_v2_kvm_info.maint_irq = irq_of_parse_and_map(node, 0); 1201 + if (!gic_v2_kvm_info.maint_irq) 1202 + return; 1203 + 1204 + ret = of_address_to_resource(node, 2, vctrl_res); 1205 + if (ret) 1206 + return; 1207 + 1208 + ret = of_address_to_resource(node, 3, vcpu_res); 1209 + if (ret) 1210 + return; 1211 + 1212 + gic_set_kvm_info(&gic_v2_kvm_info); 1213 + } 1214 + 1194 1215 int __init 1195 1216 gic_of_init(struct device_node *node, struct device_node *parent) 1196 1217 { ··· 1243 1218 1244 1219 __gic_init_bases(gic_cnt, -1, dist_base, cpu_base, percpu_offset, 1245 1220 &node->fwnode); 1246 - if (!gic_cnt) 1221 + if (!gic_cnt) { 1247 1222 gic_init_physaddr(node); 1223 + gic_of_setup_kvm_info(node); 1224 + } 1248 1225 1249 1226 if (parent) { 1250 1227 irq = irq_of_parse_and_map(node, 0); ··· 1272 1245 #endif 1273 1246 1274 1247 #ifdef CONFIG_ACPI 1275 - static phys_addr_t cpu_phy_base __initdata; 1248 + static struct 1249 + { 1250 + phys_addr_t cpu_phys_base; 1251 + u32 maint_irq; 1252 + int maint_irq_mode; 1253 + phys_addr_t vctrl_base; 1254 + phys_addr_t vcpu_base; 1255 + } acpi_data __initdata; 1276 1256 1277 1257 static int __init 1278 1258 gic_acpi_parse_madt_cpu(struct acpi_subtable_header *header, ··· 1299 1265 * All CPU interface addresses have to be the same. 1300 1266 */ 1301 1267 gic_cpu_base = processor->base_address; 1302 - if (cpu_base_assigned && gic_cpu_base != cpu_phy_base) 1268 + if (cpu_base_assigned && gic_cpu_base != acpi_data.cpu_phys_base) 1303 1269 return -EINVAL; 1304 1270 1305 - cpu_phy_base = gic_cpu_base; 1271 + acpi_data.cpu_phys_base = gic_cpu_base; 1272 + acpi_data.maint_irq = processor->vgic_interrupt; 1273 + acpi_data.maint_irq_mode = (processor->flags & ACPI_MADT_VGIC_IRQ_MODE) ? 1274 + ACPI_EDGE_SENSITIVE : ACPI_LEVEL_SENSITIVE; 1275 + acpi_data.vctrl_base = processor->gich_base_address; 1276 + acpi_data.vcpu_base = processor->gicv_base_address; 1277 + 1306 1278 cpu_base_assigned = 1; 1307 1279 return 0; 1308 1280 } ··· 1339 1299 1340 1300 #define ACPI_GICV2_DIST_MEM_SIZE (SZ_4K) 1341 1301 #define ACPI_GIC_CPU_IF_MEM_SIZE (SZ_8K) 1302 + #define ACPI_GICV2_VCTRL_MEM_SIZE (SZ_4K) 1303 + #define ACPI_GICV2_VCPU_MEM_SIZE (SZ_8K) 1304 + 1305 + static void __init gic_acpi_setup_kvm_info(void) 1306 + { 1307 + int irq; 1308 + struct resource *vctrl_res = &gic_v2_kvm_info.vctrl; 1309 + struct resource *vcpu_res = &gic_v2_kvm_info.vcpu; 1310 + 1311 + gic_v2_kvm_info.type = GIC_V2; 1312 + 1313 + if (!acpi_data.vctrl_base) 1314 + return; 1315 + 1316 + vctrl_res->flags = IORESOURCE_MEM; 1317 + vctrl_res->start = acpi_data.vctrl_base; 1318 + vctrl_res->end = vctrl_res->start + ACPI_GICV2_VCTRL_MEM_SIZE - 1; 1319 + 1320 + if (!acpi_data.vcpu_base) 1321 + return; 1322 + 1323 + vcpu_res->flags = IORESOURCE_MEM; 1324 + vcpu_res->start = acpi_data.vcpu_base; 1325 + vcpu_res->end = vcpu_res->start + ACPI_GICV2_VCPU_MEM_SIZE - 1; 1326 + 1327 + irq = acpi_register_gsi(NULL, acpi_data.maint_irq, 1328 + acpi_data.maint_irq_mode, 1329 + ACPI_ACTIVE_HIGH); 1330 + if (irq <= 0) 1331 + return; 1332 + 1333 + gic_v2_kvm_info.maint_irq = irq; 1334 + 1335 + gic_set_kvm_info(&gic_v2_kvm_info); 1336 + } 1342 1337 1343 1338 static int __init gic_v2_acpi_init(struct acpi_subtable_header *header, 1344 1339 const unsigned long end) ··· 1391 1316 return -EINVAL; 1392 1317 } 1393 1318 1394 - cpu_base = ioremap(cpu_phy_base, ACPI_GIC_CPU_IF_MEM_SIZE); 1319 + cpu_base = ioremap(acpi_data.cpu_phys_base, ACPI_GIC_CPU_IF_MEM_SIZE); 1395 1320 if (!cpu_base) { 1396 1321 pr_err("Unable to map GICC registers\n"); 1397 1322 return -ENOMEM; ··· 1430 1355 1431 1356 if (IS_ENABLED(CONFIG_ARM_GIC_V2M)) 1432 1357 gicv2m_init(NULL, gic_data[0].domain); 1358 + 1359 + gic_acpi_setup_kvm_info(); 1433 1360 1434 1361 return 0; 1435 1362 }

+6 -6

include/clocksource/arm_arch_timer.h

··· 49 49 50 50 #define ARCH_TIMER_EVT_STREAM_FREQ 10000 /* 100us */ 51 51 52 + struct arch_timer_kvm_info { 53 + struct timecounter timecounter; 54 + int virtual_irq; 55 + }; 56 + 52 57 #ifdef CONFIG_ARM_ARCH_TIMER 53 58 54 59 extern u32 arch_timer_get_rate(void); 55 60 extern u64 (*arch_timer_read_counter)(void); 56 - extern struct timecounter *arch_timer_get_timecounter(void); 61 + extern struct arch_timer_kvm_info *arch_timer_get_kvm_info(void); 57 62 58 63 #else 59 64 ··· 70 65 static inline u64 arch_timer_read_counter(void) 71 66 { 72 67 return 0; 73 - } 74 - 75 - static inline struct timecounter *arch_timer_get_timecounter(void) 76 - { 77 - return NULL; 78 68 } 79 69 80 70 #endif

+4 -3

include/kvm/arm_vgic.h

··· 25 25 #include <linux/spinlock.h> 26 26 #include <linux/types.h> 27 27 #include <kvm/iodev.h> 28 + #include <linux/irqchip/arm-gic-common.h> 28 29 29 30 #define VGIC_NR_IRQS_LEGACY 256 30 31 #define VGIC_NR_SGIS 16 ··· 354 353 #define vgic_initialized(k) (!!((k)->arch.vgic.nr_cpus)) 355 354 #define vgic_ready(k) ((k)->arch.vgic.ready) 356 355 357 - int vgic_v2_probe(struct device_node *vgic_node, 356 + int vgic_v2_probe(const struct gic_kvm_info *gic_kvm_info, 358 357 const struct vgic_ops **ops, 359 358 const struct vgic_params **params); 360 359 #ifdef CONFIG_KVM_ARM_VGIC_V3 361 - int vgic_v3_probe(struct device_node *vgic_node, 360 + int vgic_v3_probe(const struct gic_kvm_info *gic_kvm_info, 362 361 const struct vgic_ops **ops, 363 362 const struct vgic_params **params); 364 363 #else 365 - static inline int vgic_v3_probe(struct device_node *vgic_node, 364 + static inline int vgic_v3_probe(const struct gic_kvm_info *gic_kvm_info, 366 365 const struct vgic_ops **ops, 367 366 const struct vgic_params **params) 368 367 {

+34

include/linux/irqchip/arm-gic-common.h

··· 1 + /* 2 + * include/linux/irqchip/arm-gic-common.h 3 + * 4 + * Copyright (C) 2016 ARM Limited, All Rights Reserved. 5 + * 6 + * This program is free software; you can redistribute it and/or modify 7 + * it under the terms of the GNU General Public License version 2 as 8 + * published by the Free Software Foundation. 9 + */ 10 + #ifndef __LINUX_IRQCHIP_ARM_GIC_COMMON_H 11 + #define __LINUX_IRQCHIP_ARM_GIC_COMMON_H 12 + 13 + #include <linux/types.h> 14 + #include <linux/ioport.h> 15 + 16 + enum gic_type { 17 + GIC_V2, 18 + GIC_V3, 19 + }; 20 + 21 + struct gic_kvm_info { 22 + /* GIC type */ 23 + enum gic_type type; 24 + /* Virtual CPU interface */ 25 + struct resource vcpu; 26 + /* Interrupt number */ 27 + unsigned int maint_irq; 28 + /* Virtual control interface */ 29 + struct resource vctrl; 30 + }; 31 + 32 + const struct gic_kvm_info *gic_get_kvm_info(void); 33 + 34 + #endif /* __LINUX_IRQCHIP_ARM_GIC_COMMON_H */

+11 -29

virt/kvm/arm/arch_timer.c

··· 17 17 */ 18 18 19 19 #include <linux/cpu.h> 20 - #include <linux/of_irq.h> 21 20 #include <linux/kvm.h> 22 21 #include <linux/kvm_host.h> 23 22 #include <linux/interrupt.h> ··· 437 438 .notifier_call = kvm_timer_cpu_notify, 438 439 }; 439 440 440 - static const struct of_device_id arch_timer_of_match[] = { 441 - { .compatible = "arm,armv7-timer", }, 442 - { .compatible = "arm,armv8-timer", }, 443 - {}, 444 - }; 445 - 446 441 int kvm_timer_hyp_init(void) 447 442 { 448 - struct device_node *np; 449 - unsigned int ppi; 443 + struct arch_timer_kvm_info *info; 450 444 int err; 451 445 452 - timecounter = arch_timer_get_timecounter(); 453 - if (!timecounter) 454 - return -ENODEV; 446 + info = arch_timer_get_kvm_info(); 447 + timecounter = &info->timecounter; 455 448 456 - np = of_find_matching_node(NULL, arch_timer_of_match); 457 - if (!np) { 458 - kvm_err("kvm_arch_timer: can't find DT node\n"); 449 + if (info->virtual_irq <= 0) { 450 + kvm_err("kvm_arch_timer: invalid virtual timer IRQ: %d\n", 451 + info->virtual_irq); 459 452 return -ENODEV; 460 453 } 454 + host_vtimer_irq = info->virtual_irq; 461 455 462 - ppi = irq_of_parse_and_map(np, 2); 463 - if (!ppi) { 464 - kvm_err("kvm_arch_timer: no virtual timer interrupt\n"); 465 - err = -EINVAL; 466 - goto out; 467 - } 468 - 469 - err = request_percpu_irq(ppi, kvm_arch_timer_handler, 456 + err = request_percpu_irq(host_vtimer_irq, kvm_arch_timer_handler, 470 457 "kvm guest timer", kvm_get_running_vcpus()); 471 458 if (err) { 472 459 kvm_err("kvm_arch_timer: can't request interrupt %d (%d)\n", 473 - ppi, err); 460 + host_vtimer_irq, err); 474 461 goto out; 475 462 } 476 - 477 - host_vtimer_irq = ppi; 478 463 479 464 err = __register_cpu_notifier(&kvm_timer_cpu_nb); 480 465 if (err) { ··· 472 489 goto out_free; 473 490 } 474 491 475 - kvm_info("%s IRQ%d\n", np->name, ppi); 492 + kvm_info("virtual timer IRQ%d\n", host_vtimer_irq); 476 493 on_each_cpu(kvm_timer_init_interrupt, NULL, 1); 477 494 478 495 goto out; 479 496 out_free: 480 - free_percpu_irq(ppi, kvm_get_running_vcpus()); 497 + free_percpu_irq(host_vtimer_irq, kvm_get_running_vcpus()); 481 498 out: 482 - of_node_put(np); 483 499 return err; 484 500 } 485 501

+29 -38

virt/kvm/arm/vgic-v2.c

··· 20 20 #include <linux/kvm_host.h> 21 21 #include <linux/interrupt.h> 22 22 #include <linux/io.h> 23 - #include <linux/of.h> 24 - #include <linux/of_address.h> 25 - #include <linux/of_irq.h> 26 23 27 24 #include <linux/irqchip/arm-gic.h> 28 25 ··· 183 186 } 184 187 185 188 /** 186 - * vgic_v2_probe - probe for a GICv2 compatible interrupt controller in DT 187 - * @node: pointer to the DT node 188 - * @ops: address of a pointer to the GICv2 operations 189 - * @params: address of a pointer to HW-specific parameters 189 + * vgic_v2_probe - probe for a GICv2 compatible interrupt controller 190 + * @gic_kvm_info: pointer to the GIC description 191 + * @ops: address of a pointer to the GICv2 operations 192 + * @params: address of a pointer to HW-specific parameters 190 193 * 191 194 * Returns 0 if a GICv2 has been found, with the low level operations 192 195 * in *ops and the HW parameters in *params. Returns an error code 193 196 * otherwise. 194 197 */ 195 - int vgic_v2_probe(struct device_node *vgic_node, 196 - const struct vgic_ops **ops, 197 - const struct vgic_params **params) 198 + int vgic_v2_probe(const struct gic_kvm_info *gic_kvm_info, 199 + const struct vgic_ops **ops, 200 + const struct vgic_params **params) 198 201 { 199 202 int ret; 200 - struct resource vctrl_res; 201 - struct resource vcpu_res; 202 203 struct vgic_params *vgic = &vgic_v2_params; 204 + const struct resource *vctrl_res = &gic_kvm_info->vctrl; 205 + const struct resource *vcpu_res = &gic_kvm_info->vcpu; 203 206 204 - vgic->maint_irq = irq_of_parse_and_map(vgic_node, 0); 205 - if (!vgic->maint_irq) { 206 - kvm_err("error getting vgic maintenance irq from DT\n"); 207 + if (!gic_kvm_info->maint_irq) { 208 + kvm_err("error getting vgic maintenance irq\n"); 209 + ret = -ENXIO; 210 + goto out; 211 + } 212 + vgic->maint_irq = gic_kvm_info->maint_irq; 213 + 214 + if (!gic_kvm_info->vctrl.start) { 215 + kvm_err("GICH not present in the firmware table\n"); 207 216 ret = -ENXIO; 208 217 goto out; 209 218 } 210 219 211 - ret = of_address_to_resource(vgic_node, 2, &vctrl_res); 212 - if (ret) { 213 - kvm_err("Cannot obtain GICH resource\n"); 214 - goto out; 215 - } 216 - 217 - vgic->vctrl_base = of_iomap(vgic_node, 2); 220 + vgic->vctrl_base = ioremap(gic_kvm_info->vctrl.start, 221 + resource_size(&gic_kvm_info->vctrl)); 218 222 if (!vgic->vctrl_base) { 219 223 kvm_err("Cannot ioremap GICH\n"); 220 224 ret = -ENOMEM; ··· 226 228 vgic->nr_lr = (vgic->nr_lr & 0x3f) + 1; 227 229 228 230 ret = create_hyp_io_mappings(vgic->vctrl_base, 229 - vgic->vctrl_base + resource_size(&vctrl_res), 230 - vctrl_res.start); 231 + vgic->vctrl_base + resource_size(vctrl_res), 232 + vctrl_res->start); 231 233 if (ret) { 232 234 kvm_err("Cannot map VCTRL into hyp\n"); 233 235 goto out_unmap; 234 236 } 235 237 236 - if (of_address_to_resource(vgic_node, 3, &vcpu_res)) { 237 - kvm_err("Cannot obtain GICV resource\n"); 238 - ret = -ENXIO; 239 - goto out_unmap; 240 - } 241 - 242 - if (!PAGE_ALIGNED(vcpu_res.start)) { 238 + if (!PAGE_ALIGNED(vcpu_res->start)) { 243 239 kvm_err("GICV physical address 0x%llx not page aligned\n", 244 - (unsigned long long)vcpu_res.start); 240 + (unsigned long long)vcpu_res->start); 245 241 ret = -ENXIO; 246 242 goto out_unmap; 247 243 } 248 244 249 - if (!PAGE_ALIGNED(resource_size(&vcpu_res))) { 245 + if (!PAGE_ALIGNED(resource_size(vcpu_res))) { 250 246 kvm_err("GICV size 0x%llx not a multiple of page size 0x%lx\n", 251 - (unsigned long long)resource_size(&vcpu_res), 247 + (unsigned long long)resource_size(vcpu_res), 252 248 PAGE_SIZE); 253 249 ret = -ENXIO; 254 250 goto out_unmap; ··· 251 259 vgic->can_emulate_gicv2 = true; 252 260 kvm_register_device_ops(&kvm_arm_vgic_v2_ops, KVM_DEV_TYPE_ARM_VGIC_V2); 253 261 254 - vgic->vcpu_base = vcpu_res.start; 262 + vgic->vcpu_base = vcpu_res->start; 255 263 256 - kvm_info("%s@%llx IRQ%d\n", vgic_node->name, 257 - vctrl_res.start, vgic->maint_irq); 264 + kvm_info("GICH base=0x%llx, GICV base=0x%llx, IRQ=%d\n", 265 + gic_kvm_info->vctrl.start, vgic->vcpu_base, vgic->maint_irq); 258 266 259 267 vgic->type = VGIC_V2; 260 268 vgic->max_gic_vcpus = VGIC_V2_MAX_CPUS; ··· 268 276 out_unmap: 269 277 iounmap(vgic->vctrl_base); 270 278 out: 271 - of_node_put(vgic_node); 272 279 return ret; 273 280 }

+16 -31

virt/kvm/arm/vgic-v3.c

··· 20 20 #include <linux/kvm_host.h> 21 21 #include <linux/interrupt.h> 22 22 #include <linux/io.h> 23 - #include <linux/of.h> 24 - #include <linux/of_address.h> 25 - #include <linux/of_irq.h> 26 23 27 24 #include <linux/irqchip/arm-gic-v3.h> 25 + #include <linux/irqchip/arm-gic-common.h> 28 26 29 27 #include <asm/kvm_emulate.h> 30 28 #include <asm/kvm_arm.h> ··· 220 222 } 221 223 222 224 /** 223 - * vgic_v3_probe - probe for a GICv3 compatible interrupt controller in DT 224 - * @node: pointer to the DT node 225 - * @ops: address of a pointer to the GICv3 operations 226 - * @params: address of a pointer to HW-specific parameters 225 + * vgic_v3_probe - probe for a GICv3 compatible interrupt controller 226 + * @gic_kvm_info: pointer to the GIC description 227 + * @ops: address of a pointer to the GICv3 operations 228 + * @params: address of a pointer to HW-specific parameters 227 229 * 228 230 * Returns 0 if a GICv3 has been found, with the low level operations 229 231 * in *ops and the HW parameters in *params. Returns an error code 230 232 * otherwise. 231 233 */ 232 - int vgic_v3_probe(struct device_node *vgic_node, 234 + int vgic_v3_probe(const struct gic_kvm_info *gic_kvm_info, 233 235 const struct vgic_ops **ops, 234 236 const struct vgic_params **params) 235 237 { 236 238 int ret = 0; 237 - u32 gicv_idx; 238 - struct resource vcpu_res; 239 239 struct vgic_params *vgic = &vgic_v3_params; 240 + const struct resource *vcpu_res = &gic_kvm_info->vcpu; 240 241 241 - vgic->maint_irq = irq_of_parse_and_map(vgic_node, 0); 242 - if (!vgic->maint_irq) { 243 - kvm_err("error getting vgic maintenance irq from DT\n"); 244 - ret = -ENXIO; 245 - goto out; 246 - } 242 + vgic->maint_irq = gic_kvm_info->maint_irq; 247 243 248 244 ich_vtr_el2 = kvm_call_hyp(__vgic_v3_get_ich_vtr_el2); 249 245 ··· 248 256 vgic->nr_lr = (ich_vtr_el2 & 0xf) + 1; 249 257 vgic->can_emulate_gicv2 = false; 250 258 251 - if (of_property_read_u32(vgic_node, "#redistributor-regions", &gicv_idx)) 252 - gicv_idx = 1; 253 - 254 - gicv_idx += 3; /* Also skip GICD, GICC, GICH */ 255 - if (of_address_to_resource(vgic_node, gicv_idx, &vcpu_res)) { 259 + if (!vcpu_res->start) { 256 260 kvm_info("GICv3: no GICV resource entry\n"); 257 261 vgic->vcpu_base = 0; 258 - } else if (!PAGE_ALIGNED(vcpu_res.start)) { 262 + } else if (!PAGE_ALIGNED(vcpu_res->start)) { 259 263 pr_warn("GICV physical address 0x%llx not page aligned\n", 260 - (unsigned long long)vcpu_res.start); 264 + (unsigned long long)vcpu_res->start); 261 265 vgic->vcpu_base = 0; 262 - } else if (!PAGE_ALIGNED(resource_size(&vcpu_res))) { 266 + } else if (!PAGE_ALIGNED(resource_size(vcpu_res))) { 263 267 pr_warn("GICV size 0x%llx not a multiple of page size 0x%lx\n", 264 - (unsigned long long)resource_size(&vcpu_res), 268 + (unsigned long long)resource_size(vcpu_res), 265 269 PAGE_SIZE); 266 - vgic->vcpu_base = 0; 267 270 } else { 268 - vgic->vcpu_base = vcpu_res.start; 271 + vgic->vcpu_base = vcpu_res->start; 269 272 vgic->can_emulate_gicv2 = true; 270 273 kvm_register_device_ops(&kvm_arm_vgic_v2_ops, 271 274 KVM_DEV_TYPE_ARM_VGIC_V2); ··· 273 286 vgic->type = VGIC_V3; 274 287 vgic->max_gic_vcpus = VGIC_V3_MAX_CPUS; 275 288 276 - kvm_info("%s@%llx IRQ%d\n", vgic_node->name, 277 - vcpu_res.start, vgic->maint_irq); 289 + kvm_info("GICV base=0x%llx, IRQ=%d\n", 290 + vgic->vcpu_base, vgic->maint_irq); 278 291 279 292 on_each_cpu(vgic_cpu_init_lrs, vgic, 1); 280 293 281 294 *ops = &vgic_v3_ops; 282 295 *params = vgic; 283 296 284 - out: 285 - of_node_put(vgic_node); 286 297 return ret; 287 298 }

+28 -24

virt/kvm/arm/vgic.c

··· 21 21 #include <linux/kvm_host.h> 22 22 #include <linux/interrupt.h> 23 23 #include <linux/io.h> 24 - #include <linux/of.h> 25 - #include <linux/of_address.h> 26 - #include <linux/of_irq.h> 24 + #include <linux/irq.h> 27 25 #include <linux/rculist.h> 28 26 #include <linux/uaccess.h> 29 27 ··· 31 33 #include <trace/events/kvm.h> 32 34 #include <asm/kvm.h> 33 35 #include <kvm/iodev.h> 36 + #include <linux/irqchip/arm-gic-common.h> 34 37 35 38 #define CREATE_TRACE_POINTS 36 39 #include "trace.h" ··· 2388 2389 .notifier_call = vgic_cpu_notify, 2389 2390 }; 2390 2391 2391 - static const struct of_device_id vgic_ids[] = { 2392 - { .compatible = "arm,cortex-a15-gic", .data = vgic_v2_probe, }, 2393 - { .compatible = "arm,cortex-a7-gic", .data = vgic_v2_probe, }, 2394 - { .compatible = "arm,gic-400", .data = vgic_v2_probe, }, 2395 - { .compatible = "arm,gic-v3", .data = vgic_v3_probe, }, 2396 - {}, 2397 - }; 2392 + static int kvm_vgic_probe(void) 2393 + { 2394 + const struct gic_kvm_info *gic_kvm_info; 2395 + int ret; 2396 + 2397 + gic_kvm_info = gic_get_kvm_info(); 2398 + if (!gic_kvm_info) 2399 + return -ENODEV; 2400 + 2401 + switch (gic_kvm_info->type) { 2402 + case GIC_V2: 2403 + ret = vgic_v2_probe(gic_kvm_info, &vgic_ops, &vgic); 2404 + break; 2405 + case GIC_V3: 2406 + ret = vgic_v3_probe(gic_kvm_info, &vgic_ops, &vgic); 2407 + break; 2408 + default: 2409 + ret = -ENODEV; 2410 + } 2411 + 2412 + return ret; 2413 + } 2398 2414 2399 2415 int kvm_vgic_hyp_init(void) 2400 2416 { 2401 - const struct of_device_id *matched_id; 2402 - const int (*vgic_probe)(struct device_node *,const struct vgic_ops **, 2403 - const struct vgic_params **); 2404 - struct device_node *vgic_node; 2405 2417 int ret; 2406 2418 2407 - vgic_node = of_find_matching_node_and_match(NULL, 2408 - vgic_ids, &matched_id); 2409 - if (!vgic_node) { 2410 - kvm_err("error: no compatible GIC node found\n"); 2411 - return -ENODEV; 2412 - } 2413 - 2414 - vgic_probe = matched_id->data; 2415 - ret = vgic_probe(vgic_node, &vgic_ops, &vgic); 2416 - if (ret) 2419 + ret = kvm_vgic_probe(); 2420 + if (ret) { 2421 + kvm_err("error: KVM vGIC probing failed\n"); 2417 2422 return ret; 2423 + } 2418 2424 2419 2425 ret = request_percpu_irq(vgic->maint_irq, vgic_maintenance_handler, 2420 2426 "vgic", kvm_get_running_vcpus());