Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'kvm-arm-for-4.7' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm into HEAD

KVM/ARM Changes for Linux v4.7

Reworks our stage 2 page table handling to have page table manipulation
macros separate from those of the host systems as the underlying
hardware page tables can be configured to be noticably different in
layout from the stage 1 page tables used by the host.

Adds 16K page size support based on the above.

Adds a generic firmware probing layer for the timer and GIC so that KVM
initializes using the same logic based on both ACPI and FDT.

Finally adds support for hardware updating of the access flag.

+1057 -529
+11 -32
arch/arm/include/asm/kvm_mmu.h
··· 47 47 #include <linux/highmem.h> 48 48 #include <asm/cacheflush.h> 49 49 #include <asm/pgalloc.h> 50 + #include <asm/stage2_pgtable.h> 50 51 51 52 int create_hyp_mappings(void *from, void *to); 52 53 int create_hyp_io_mappings(void *from, void *to, phys_addr_t); ··· 106 105 clean_pte_table(pte); 107 106 } 108 107 109 - static inline void kvm_set_s2pte_writable(pte_t *pte) 108 + static inline pte_t kvm_s2pte_mkwrite(pte_t pte) 110 109 { 111 - pte_val(*pte) |= L_PTE_S2_RDWR; 110 + pte_val(pte) |= L_PTE_S2_RDWR; 111 + return pte; 112 112 } 113 113 114 - static inline void kvm_set_s2pmd_writable(pmd_t *pmd) 114 + static inline pmd_t kvm_s2pmd_mkwrite(pmd_t pmd) 115 115 { 116 - pmd_val(*pmd) |= L_PMD_S2_RDWR; 116 + pmd_val(pmd) |= L_PMD_S2_RDWR; 117 + return pmd; 117 118 } 118 119 119 120 static inline void kvm_set_s2pte_readonly(pte_t *pte) ··· 138 135 return (pmd_val(*pmd) & L_PMD_S2_RDWR) == L_PMD_S2_RDONLY; 139 136 } 140 137 141 - 142 - /* Open coded p*d_addr_end that can deal with 64bit addresses */ 143 - #define kvm_pgd_addr_end(addr, end) \ 144 - ({ u64 __boundary = ((addr) + PGDIR_SIZE) & PGDIR_MASK; \ 145 - (__boundary - 1 < (end) - 1)? __boundary: (end); \ 146 - }) 147 - 148 - #define kvm_pud_addr_end(addr,end) (end) 149 - 150 - #define kvm_pmd_addr_end(addr, end) \ 151 - ({ u64 __boundary = ((addr) + PMD_SIZE) & PMD_MASK; \ 152 - (__boundary - 1 < (end) - 1)? __boundary: (end); \ 153 - }) 154 - 155 - #define kvm_pgd_index(addr) pgd_index(addr) 156 - 157 138 static inline bool kvm_page_empty(void *ptr) 158 139 { 159 140 struct page *ptr_page = virt_to_page(ptr); ··· 146 159 147 160 #define kvm_pte_table_empty(kvm, ptep) kvm_page_empty(ptep) 148 161 #define kvm_pmd_table_empty(kvm, pmdp) kvm_page_empty(pmdp) 149 - #define kvm_pud_table_empty(kvm, pudp) (0) 162 + #define kvm_pud_table_empty(kvm, pudp) false 150 163 151 - #define KVM_PREALLOC_LEVEL 0 152 - 153 - static inline void *kvm_get_hwpgd(struct kvm *kvm) 154 - { 155 - return kvm->arch.pgd; 156 - } 157 - 158 - static inline unsigned int kvm_get_hwpgd_size(void) 159 - { 160 - return PTRS_PER_S2_PGD * sizeof(pgd_t); 161 - } 164 + #define hyp_pte_table_empty(ptep) kvm_page_empty(ptep) 165 + #define hyp_pmd_table_empty(pmdp) kvm_page_empty(pmdp) 166 + #define hyp_pud_table_empty(pudp) false 162 167 163 168 struct kvm; 164 169
+61
arch/arm/include/asm/stage2_pgtable.h
··· 1 + /* 2 + * Copyright (C) 2016 - ARM Ltd 3 + * 4 + * stage2 page table helpers 5 + * 6 + * This program is free software; you can redistribute it and/or modify 7 + * it under the terms of the GNU General Public License version 2 as 8 + * published by the Free Software Foundation. 9 + * 10 + * This program is distributed in the hope that it will be useful, 11 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 + * GNU General Public License for more details. 14 + * 15 + * You should have received a copy of the GNU General Public License 16 + * along with this program. If not, see <http://www.gnu.org/licenses/>. 17 + */ 18 + 19 + #ifndef __ARM_S2_PGTABLE_H_ 20 + #define __ARM_S2_PGTABLE_H_ 21 + 22 + #define stage2_pgd_none(pgd) pgd_none(pgd) 23 + #define stage2_pgd_clear(pgd) pgd_clear(pgd) 24 + #define stage2_pgd_present(pgd) pgd_present(pgd) 25 + #define stage2_pgd_populate(pgd, pud) pgd_populate(NULL, pgd, pud) 26 + #define stage2_pud_offset(pgd, address) pud_offset(pgd, address) 27 + #define stage2_pud_free(pud) pud_free(NULL, pud) 28 + 29 + #define stage2_pud_none(pud) pud_none(pud) 30 + #define stage2_pud_clear(pud) pud_clear(pud) 31 + #define stage2_pud_present(pud) pud_present(pud) 32 + #define stage2_pud_populate(pud, pmd) pud_populate(NULL, pud, pmd) 33 + #define stage2_pmd_offset(pud, address) pmd_offset(pud, address) 34 + #define stage2_pmd_free(pmd) pmd_free(NULL, pmd) 35 + 36 + #define stage2_pud_huge(pud) pud_huge(pud) 37 + 38 + /* Open coded p*d_addr_end that can deal with 64bit addresses */ 39 + static inline phys_addr_t stage2_pgd_addr_end(phys_addr_t addr, phys_addr_t end) 40 + { 41 + phys_addr_t boundary = (addr + PGDIR_SIZE) & PGDIR_MASK; 42 + 43 + return (boundary - 1 < end - 1) ? boundary : end; 44 + } 45 + 46 + #define stage2_pud_addr_end(addr, end) (end) 47 + 48 + static inline phys_addr_t stage2_pmd_addr_end(phys_addr_t addr, phys_addr_t end) 49 + { 50 + phys_addr_t boundary = (addr + PMD_SIZE) & PMD_MASK; 51 + 52 + return (boundary - 1 < end - 1) ? boundary : end; 53 + } 54 + 55 + #define stage2_pgd_index(addr) pgd_index(addr) 56 + 57 + #define stage2_pte_table_empty(ptep) kvm_page_empty(ptep) 58 + #define stage2_pmd_table_empty(pmdp) kvm_page_empty(pmdp) 59 + #define stage2_pud_table_empty(pudp) false 60 + 61 + #endif /* __ARM_S2_PGTABLE_H_ */
+1 -1
arch/arm/kvm/arm.c
··· 448 448 kvm_next_vmid &= (1 << kvm_vmid_bits) - 1; 449 449 450 450 /* update vttbr to be used with the new vmid */ 451 - pgd_phys = virt_to_phys(kvm_get_hwpgd(kvm)); 451 + pgd_phys = virt_to_phys(kvm->arch.pgd); 452 452 BUG_ON(pgd_phys & ~VTTBR_BADDR_MASK); 453 453 vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK(kvm_vmid_bits); 454 454 kvm->arch.vttbr = pgd_phys | vmid;
+221 -187
arch/arm/kvm/mmu.c
··· 43 43 static unsigned long hyp_idmap_end; 44 44 static phys_addr_t hyp_idmap_vector; 45 45 46 + #define S2_PGD_SIZE (PTRS_PER_S2_PGD * sizeof(pgd_t)) 46 47 #define hyp_pgd_order get_order(PTRS_PER_PGD * sizeof(pgd_t)) 47 - 48 - #define kvm_pmd_huge(_x) (pmd_huge(_x) || pmd_trans_huge(_x)) 49 - #define kvm_pud_huge(_x) pud_huge(_x) 50 48 51 49 #define KVM_S2PTE_FLAG_IS_IOMAP (1UL << 0) 52 50 #define KVM_S2_FLAG_LOGGING_ACTIVE (1UL << 1) ··· 67 69 68 70 static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) 69 71 { 70 - /* 71 - * This function also gets called when dealing with HYP page 72 - * tables. As HYP doesn't have an associated struct kvm (and 73 - * the HYP page tables are fairly static), we don't do 74 - * anything there. 75 - */ 76 - if (kvm) 77 - kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa); 72 + kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa); 78 73 } 79 74 80 75 /* ··· 106 115 */ 107 116 static void stage2_dissolve_pmd(struct kvm *kvm, phys_addr_t addr, pmd_t *pmd) 108 117 { 109 - if (!kvm_pmd_huge(*pmd)) 118 + if (!pmd_thp_or_huge(*pmd)) 110 119 return; 111 120 112 121 pmd_clear(pmd); ··· 146 155 return p; 147 156 } 148 157 149 - static void clear_pgd_entry(struct kvm *kvm, pgd_t *pgd, phys_addr_t addr) 158 + static void clear_stage2_pgd_entry(struct kvm *kvm, pgd_t *pgd, phys_addr_t addr) 150 159 { 151 - pud_t *pud_table __maybe_unused = pud_offset(pgd, 0); 152 - pgd_clear(pgd); 160 + pud_t *pud_table __maybe_unused = stage2_pud_offset(pgd, 0UL); 161 + stage2_pgd_clear(pgd); 153 162 kvm_tlb_flush_vmid_ipa(kvm, addr); 154 - pud_free(NULL, pud_table); 163 + stage2_pud_free(pud_table); 155 164 put_page(virt_to_page(pgd)); 156 165 } 157 166 158 - static void clear_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr) 167 + static void clear_stage2_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr) 159 168 { 160 - pmd_t *pmd_table = pmd_offset(pud, 0); 161 - VM_BUG_ON(pud_huge(*pud)); 162 - pud_clear(pud); 169 + pmd_t *pmd_table __maybe_unused = stage2_pmd_offset(pud, 0); 170 + VM_BUG_ON(stage2_pud_huge(*pud)); 171 + stage2_pud_clear(pud); 163 172 kvm_tlb_flush_vmid_ipa(kvm, addr); 164 - pmd_free(NULL, pmd_table); 173 + stage2_pmd_free(pmd_table); 165 174 put_page(virt_to_page(pud)); 166 175 } 167 176 168 - static void clear_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr) 177 + static void clear_stage2_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr) 169 178 { 170 179 pte_t *pte_table = pte_offset_kernel(pmd, 0); 171 - VM_BUG_ON(kvm_pmd_huge(*pmd)); 180 + VM_BUG_ON(pmd_thp_or_huge(*pmd)); 172 181 pmd_clear(pmd); 173 182 kvm_tlb_flush_vmid_ipa(kvm, addr); 174 183 pte_free_kernel(NULL, pte_table); ··· 195 204 * the corresponding TLBs, we call kvm_flush_dcache_p*() to make sure 196 205 * the IO subsystem will never hit in the cache. 197 206 */ 198 - static void unmap_ptes(struct kvm *kvm, pmd_t *pmd, 207 + static void unmap_stage2_ptes(struct kvm *kvm, pmd_t *pmd, 199 208 phys_addr_t addr, phys_addr_t end) 200 209 { 201 210 phys_addr_t start_addr = addr; ··· 217 226 } 218 227 } while (pte++, addr += PAGE_SIZE, addr != end); 219 228 220 - if (kvm_pte_table_empty(kvm, start_pte)) 221 - clear_pmd_entry(kvm, pmd, start_addr); 229 + if (stage2_pte_table_empty(start_pte)) 230 + clear_stage2_pmd_entry(kvm, pmd, start_addr); 222 231 } 223 232 224 - static void unmap_pmds(struct kvm *kvm, pud_t *pud, 233 + static void unmap_stage2_pmds(struct kvm *kvm, pud_t *pud, 225 234 phys_addr_t addr, phys_addr_t end) 226 235 { 227 236 phys_addr_t next, start_addr = addr; 228 237 pmd_t *pmd, *start_pmd; 229 238 230 - start_pmd = pmd = pmd_offset(pud, addr); 239 + start_pmd = pmd = stage2_pmd_offset(pud, addr); 231 240 do { 232 - next = kvm_pmd_addr_end(addr, end); 241 + next = stage2_pmd_addr_end(addr, end); 233 242 if (!pmd_none(*pmd)) { 234 - if (kvm_pmd_huge(*pmd)) { 243 + if (pmd_thp_or_huge(*pmd)) { 235 244 pmd_t old_pmd = *pmd; 236 245 237 246 pmd_clear(pmd); ··· 241 250 242 251 put_page(virt_to_page(pmd)); 243 252 } else { 244 - unmap_ptes(kvm, pmd, addr, next); 253 + unmap_stage2_ptes(kvm, pmd, addr, next); 245 254 } 246 255 } 247 256 } while (pmd++, addr = next, addr != end); 248 257 249 - if (kvm_pmd_table_empty(kvm, start_pmd)) 250 - clear_pud_entry(kvm, pud, start_addr); 258 + if (stage2_pmd_table_empty(start_pmd)) 259 + clear_stage2_pud_entry(kvm, pud, start_addr); 251 260 } 252 261 253 - static void unmap_puds(struct kvm *kvm, pgd_t *pgd, 262 + static void unmap_stage2_puds(struct kvm *kvm, pgd_t *pgd, 254 263 phys_addr_t addr, phys_addr_t end) 255 264 { 256 265 phys_addr_t next, start_addr = addr; 257 266 pud_t *pud, *start_pud; 258 267 259 - start_pud = pud = pud_offset(pgd, addr); 268 + start_pud = pud = stage2_pud_offset(pgd, addr); 260 269 do { 261 - next = kvm_pud_addr_end(addr, end); 262 - if (!pud_none(*pud)) { 263 - if (pud_huge(*pud)) { 270 + next = stage2_pud_addr_end(addr, end); 271 + if (!stage2_pud_none(*pud)) { 272 + if (stage2_pud_huge(*pud)) { 264 273 pud_t old_pud = *pud; 265 274 266 - pud_clear(pud); 275 + stage2_pud_clear(pud); 267 276 kvm_tlb_flush_vmid_ipa(kvm, addr); 268 - 269 277 kvm_flush_dcache_pud(old_pud); 270 - 271 278 put_page(virt_to_page(pud)); 272 279 } else { 273 - unmap_pmds(kvm, pud, addr, next); 280 + unmap_stage2_pmds(kvm, pud, addr, next); 274 281 } 275 282 } 276 283 } while (pud++, addr = next, addr != end); 277 284 278 - if (kvm_pud_table_empty(kvm, start_pud)) 279 - clear_pgd_entry(kvm, pgd, start_addr); 285 + if (stage2_pud_table_empty(start_pud)) 286 + clear_stage2_pgd_entry(kvm, pgd, start_addr); 280 287 } 281 288 282 - 283 - static void unmap_range(struct kvm *kvm, pgd_t *pgdp, 284 - phys_addr_t start, u64 size) 289 + /** 290 + * unmap_stage2_range -- Clear stage2 page table entries to unmap a range 291 + * @kvm: The VM pointer 292 + * @start: The intermediate physical base address of the range to unmap 293 + * @size: The size of the area to unmap 294 + * 295 + * Clear a range of stage-2 mappings, lowering the various ref-counts. Must 296 + * be called while holding mmu_lock (unless for freeing the stage2 pgd before 297 + * destroying the VM), otherwise another faulting VCPU may come in and mess 298 + * with things behind our backs. 299 + */ 300 + static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size) 285 301 { 286 302 pgd_t *pgd; 287 303 phys_addr_t addr = start, end = start + size; 288 304 phys_addr_t next; 289 305 290 - pgd = pgdp + kvm_pgd_index(addr); 306 + pgd = kvm->arch.pgd + stage2_pgd_index(addr); 291 307 do { 292 - next = kvm_pgd_addr_end(addr, end); 293 - if (!pgd_none(*pgd)) 294 - unmap_puds(kvm, pgd, addr, next); 308 + next = stage2_pgd_addr_end(addr, end); 309 + if (!stage2_pgd_none(*pgd)) 310 + unmap_stage2_puds(kvm, pgd, addr, next); 295 311 } while (pgd++, addr = next, addr != end); 296 312 } 297 313 ··· 320 322 pmd_t *pmd; 321 323 phys_addr_t next; 322 324 323 - pmd = pmd_offset(pud, addr); 325 + pmd = stage2_pmd_offset(pud, addr); 324 326 do { 325 - next = kvm_pmd_addr_end(addr, end); 327 + next = stage2_pmd_addr_end(addr, end); 326 328 if (!pmd_none(*pmd)) { 327 - if (kvm_pmd_huge(*pmd)) 329 + if (pmd_thp_or_huge(*pmd)) 328 330 kvm_flush_dcache_pmd(*pmd); 329 331 else 330 332 stage2_flush_ptes(kvm, pmd, addr, next); ··· 338 340 pud_t *pud; 339 341 phys_addr_t next; 340 342 341 - pud = pud_offset(pgd, addr); 343 + pud = stage2_pud_offset(pgd, addr); 342 344 do { 343 - next = kvm_pud_addr_end(addr, end); 344 - if (!pud_none(*pud)) { 345 - if (pud_huge(*pud)) 345 + next = stage2_pud_addr_end(addr, end); 346 + if (!stage2_pud_none(*pud)) { 347 + if (stage2_pud_huge(*pud)) 346 348 kvm_flush_dcache_pud(*pud); 347 349 else 348 350 stage2_flush_pmds(kvm, pud, addr, next); ··· 358 360 phys_addr_t next; 359 361 pgd_t *pgd; 360 362 361 - pgd = kvm->arch.pgd + kvm_pgd_index(addr); 363 + pgd = kvm->arch.pgd + stage2_pgd_index(addr); 362 364 do { 363 - next = kvm_pgd_addr_end(addr, end); 365 + next = stage2_pgd_addr_end(addr, end); 364 366 stage2_flush_puds(kvm, pgd, addr, next); 365 367 } while (pgd++, addr = next, addr != end); 366 368 } ··· 389 391 srcu_read_unlock(&kvm->srcu, idx); 390 392 } 391 393 394 + static void clear_hyp_pgd_entry(pgd_t *pgd) 395 + { 396 + pud_t *pud_table __maybe_unused = pud_offset(pgd, 0UL); 397 + pgd_clear(pgd); 398 + pud_free(NULL, pud_table); 399 + put_page(virt_to_page(pgd)); 400 + } 401 + 402 + static void clear_hyp_pud_entry(pud_t *pud) 403 + { 404 + pmd_t *pmd_table __maybe_unused = pmd_offset(pud, 0); 405 + VM_BUG_ON(pud_huge(*pud)); 406 + pud_clear(pud); 407 + pmd_free(NULL, pmd_table); 408 + put_page(virt_to_page(pud)); 409 + } 410 + 411 + static void clear_hyp_pmd_entry(pmd_t *pmd) 412 + { 413 + pte_t *pte_table = pte_offset_kernel(pmd, 0); 414 + VM_BUG_ON(pmd_thp_or_huge(*pmd)); 415 + pmd_clear(pmd); 416 + pte_free_kernel(NULL, pte_table); 417 + put_page(virt_to_page(pmd)); 418 + } 419 + 420 + static void unmap_hyp_ptes(pmd_t *pmd, phys_addr_t addr, phys_addr_t end) 421 + { 422 + pte_t *pte, *start_pte; 423 + 424 + start_pte = pte = pte_offset_kernel(pmd, addr); 425 + do { 426 + if (!pte_none(*pte)) { 427 + kvm_set_pte(pte, __pte(0)); 428 + put_page(virt_to_page(pte)); 429 + } 430 + } while (pte++, addr += PAGE_SIZE, addr != end); 431 + 432 + if (hyp_pte_table_empty(start_pte)) 433 + clear_hyp_pmd_entry(pmd); 434 + } 435 + 436 + static void unmap_hyp_pmds(pud_t *pud, phys_addr_t addr, phys_addr_t end) 437 + { 438 + phys_addr_t next; 439 + pmd_t *pmd, *start_pmd; 440 + 441 + start_pmd = pmd = pmd_offset(pud, addr); 442 + do { 443 + next = pmd_addr_end(addr, end); 444 + /* Hyp doesn't use huge pmds */ 445 + if (!pmd_none(*pmd)) 446 + unmap_hyp_ptes(pmd, addr, next); 447 + } while (pmd++, addr = next, addr != end); 448 + 449 + if (hyp_pmd_table_empty(start_pmd)) 450 + clear_hyp_pud_entry(pud); 451 + } 452 + 453 + static void unmap_hyp_puds(pgd_t *pgd, phys_addr_t addr, phys_addr_t end) 454 + { 455 + phys_addr_t next; 456 + pud_t *pud, *start_pud; 457 + 458 + start_pud = pud = pud_offset(pgd, addr); 459 + do { 460 + next = pud_addr_end(addr, end); 461 + /* Hyp doesn't use huge puds */ 462 + if (!pud_none(*pud)) 463 + unmap_hyp_pmds(pud, addr, next); 464 + } while (pud++, addr = next, addr != end); 465 + 466 + if (hyp_pud_table_empty(start_pud)) 467 + clear_hyp_pgd_entry(pgd); 468 + } 469 + 470 + static void unmap_hyp_range(pgd_t *pgdp, phys_addr_t start, u64 size) 471 + { 472 + pgd_t *pgd; 473 + phys_addr_t addr = start, end = start + size; 474 + phys_addr_t next; 475 + 476 + /* 477 + * We don't unmap anything from HYP, except at the hyp tear down. 478 + * Hence, we don't have to invalidate the TLBs here. 479 + */ 480 + pgd = pgdp + pgd_index(addr); 481 + do { 482 + next = pgd_addr_end(addr, end); 483 + if (!pgd_none(*pgd)) 484 + unmap_hyp_puds(pgd, addr, next); 485 + } while (pgd++, addr = next, addr != end); 486 + } 487 + 392 488 /** 393 489 * free_boot_hyp_pgd - free HYP boot page tables 394 490 * ··· 493 401 mutex_lock(&kvm_hyp_pgd_mutex); 494 402 495 403 if (boot_hyp_pgd) { 496 - unmap_range(NULL, boot_hyp_pgd, hyp_idmap_start, PAGE_SIZE); 497 - unmap_range(NULL, boot_hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE); 404 + unmap_hyp_range(boot_hyp_pgd, hyp_idmap_start, PAGE_SIZE); 405 + unmap_hyp_range(boot_hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE); 498 406 free_pages((unsigned long)boot_hyp_pgd, hyp_pgd_order); 499 407 boot_hyp_pgd = NULL; 500 408 } 501 409 502 410 if (hyp_pgd) 503 - unmap_range(NULL, hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE); 411 + unmap_hyp_range(hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE); 504 412 505 413 mutex_unlock(&kvm_hyp_pgd_mutex); 506 414 } ··· 525 433 526 434 if (hyp_pgd) { 527 435 for (addr = PAGE_OFFSET; virt_addr_valid(addr); addr += PGDIR_SIZE) 528 - unmap_range(NULL, hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE); 436 + unmap_hyp_range(hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE); 529 437 for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE) 530 - unmap_range(NULL, hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE); 438 + unmap_hyp_range(hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE); 531 439 532 440 free_pages((unsigned long)hyp_pgd, hyp_pgd_order); 533 441 hyp_pgd = NULL; ··· 737 645 __phys_to_pfn(phys_addr), PAGE_HYP_DEVICE); 738 646 } 739 647 740 - /* Free the HW pgd, one page at a time */ 741 - static void kvm_free_hwpgd(void *hwpgd) 742 - { 743 - free_pages_exact(hwpgd, kvm_get_hwpgd_size()); 744 - } 745 - 746 - /* Allocate the HW PGD, making sure that each page gets its own refcount */ 747 - static void *kvm_alloc_hwpgd(void) 748 - { 749 - unsigned int size = kvm_get_hwpgd_size(); 750 - 751 - return alloc_pages_exact(size, GFP_KERNEL | __GFP_ZERO); 752 - } 753 - 754 648 /** 755 649 * kvm_alloc_stage2_pgd - allocate level-1 table for stage-2 translation. 756 650 * @kvm: The KVM struct pointer for the VM. ··· 751 673 int kvm_alloc_stage2_pgd(struct kvm *kvm) 752 674 { 753 675 pgd_t *pgd; 754 - void *hwpgd; 755 676 756 677 if (kvm->arch.pgd != NULL) { 757 678 kvm_err("kvm_arch already initialized?\n"); 758 679 return -EINVAL; 759 680 } 760 681 761 - hwpgd = kvm_alloc_hwpgd(); 762 - if (!hwpgd) 682 + /* Allocate the HW PGD, making sure that each page gets its own refcount */ 683 + pgd = alloc_pages_exact(S2_PGD_SIZE, GFP_KERNEL | __GFP_ZERO); 684 + if (!pgd) 763 685 return -ENOMEM; 764 - 765 - /* When the kernel uses more levels of page tables than the 766 - * guest, we allocate a fake PGD and pre-populate it to point 767 - * to the next-level page table, which will be the real 768 - * initial page table pointed to by the VTTBR. 769 - * 770 - * When KVM_PREALLOC_LEVEL==2, we allocate a single page for 771 - * the PMD and the kernel will use folded pud. 772 - * When KVM_PREALLOC_LEVEL==1, we allocate 2 consecutive PUD 773 - * pages. 774 - */ 775 - if (KVM_PREALLOC_LEVEL > 0) { 776 - int i; 777 - 778 - /* 779 - * Allocate fake pgd for the page table manipulation macros to 780 - * work. This is not used by the hardware and we have no 781 - * alignment requirement for this allocation. 782 - */ 783 - pgd = kmalloc(PTRS_PER_S2_PGD * sizeof(pgd_t), 784 - GFP_KERNEL | __GFP_ZERO); 785 - 786 - if (!pgd) { 787 - kvm_free_hwpgd(hwpgd); 788 - return -ENOMEM; 789 - } 790 - 791 - /* Plug the HW PGD into the fake one. */ 792 - for (i = 0; i < PTRS_PER_S2_PGD; i++) { 793 - if (KVM_PREALLOC_LEVEL == 1) 794 - pgd_populate(NULL, pgd + i, 795 - (pud_t *)hwpgd + i * PTRS_PER_PUD); 796 - else if (KVM_PREALLOC_LEVEL == 2) 797 - pud_populate(NULL, pud_offset(pgd, 0) + i, 798 - (pmd_t *)hwpgd + i * PTRS_PER_PMD); 799 - } 800 - } else { 801 - /* 802 - * Allocate actual first-level Stage-2 page table used by the 803 - * hardware for Stage-2 page table walks. 804 - */ 805 - pgd = (pgd_t *)hwpgd; 806 - } 807 686 808 687 kvm_clean_pgd(pgd); 809 688 kvm->arch.pgd = pgd; 810 689 return 0; 811 - } 812 - 813 - /** 814 - * unmap_stage2_range -- Clear stage2 page table entries to unmap a range 815 - * @kvm: The VM pointer 816 - * @start: The intermediate physical base address of the range to unmap 817 - * @size: The size of the area to unmap 818 - * 819 - * Clear a range of stage-2 mappings, lowering the various ref-counts. Must 820 - * be called while holding mmu_lock (unless for freeing the stage2 pgd before 821 - * destroying the VM), otherwise another faulting VCPU may come in and mess 822 - * with things behind our backs. 823 - */ 824 - static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size) 825 - { 826 - unmap_range(kvm, kvm->arch.pgd, start, size); 827 690 } 828 691 829 692 static void stage2_unmap_memslot(struct kvm *kvm, ··· 849 830 return; 850 831 851 832 unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE); 852 - kvm_free_hwpgd(kvm_get_hwpgd(kvm)); 853 - if (KVM_PREALLOC_LEVEL > 0) 854 - kfree(kvm->arch.pgd); 855 - 833 + /* Free the HW pgd, one page at a time */ 834 + free_pages_exact(kvm->arch.pgd, S2_PGD_SIZE); 856 835 kvm->arch.pgd = NULL; 857 836 } 858 837 ··· 860 843 pgd_t *pgd; 861 844 pud_t *pud; 862 845 863 - pgd = kvm->arch.pgd + kvm_pgd_index(addr); 864 - if (WARN_ON(pgd_none(*pgd))) { 846 + pgd = kvm->arch.pgd + stage2_pgd_index(addr); 847 + if (WARN_ON(stage2_pgd_none(*pgd))) { 865 848 if (!cache) 866 849 return NULL; 867 850 pud = mmu_memory_cache_alloc(cache); 868 - pgd_populate(NULL, pgd, pud); 851 + stage2_pgd_populate(pgd, pud); 869 852 get_page(virt_to_page(pgd)); 870 853 } 871 854 872 - return pud_offset(pgd, addr); 855 + return stage2_pud_offset(pgd, addr); 873 856 } 874 857 875 858 static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, ··· 879 862 pmd_t *pmd; 880 863 881 864 pud = stage2_get_pud(kvm, cache, addr); 882 - if (pud_none(*pud)) { 865 + if (stage2_pud_none(*pud)) { 883 866 if (!cache) 884 867 return NULL; 885 868 pmd = mmu_memory_cache_alloc(cache); 886 - pud_populate(NULL, pud, pmd); 869 + stage2_pud_populate(pud, pmd); 887 870 get_page(virt_to_page(pud)); 888 871 } 889 872 890 - return pmd_offset(pud, addr); 873 + return stage2_pmd_offset(pud, addr); 891 874 } 892 875 893 876 static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache ··· 910 893 VM_BUG_ON(pmd_present(*pmd) && pmd_pfn(*pmd) != pmd_pfn(*new_pmd)); 911 894 912 895 old_pmd = *pmd; 913 - kvm_set_pmd(pmd, *new_pmd); 914 - if (pmd_present(old_pmd)) 896 + if (pmd_present(old_pmd)) { 897 + pmd_clear(pmd); 915 898 kvm_tlb_flush_vmid_ipa(kvm, addr); 916 - else 899 + } else { 917 900 get_page(virt_to_page(pmd)); 901 + } 902 + 903 + kvm_set_pmd(pmd, *new_pmd); 918 904 return 0; 919 905 } 920 906 ··· 966 946 967 947 /* Create 2nd stage page table mapping - Level 3 */ 968 948 old_pte = *pte; 969 - kvm_set_pte(pte, *new_pte); 970 - if (pte_present(old_pte)) 949 + if (pte_present(old_pte)) { 950 + kvm_set_pte(pte, __pte(0)); 971 951 kvm_tlb_flush_vmid_ipa(kvm, addr); 972 - else 952 + } else { 973 953 get_page(virt_to_page(pte)); 954 + } 974 955 956 + kvm_set_pte(pte, *new_pte); 975 957 return 0; 958 + } 959 + 960 + #ifndef __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG 961 + static int stage2_ptep_test_and_clear_young(pte_t *pte) 962 + { 963 + if (pte_young(*pte)) { 964 + *pte = pte_mkold(*pte); 965 + return 1; 966 + } 967 + return 0; 968 + } 969 + #else 970 + static int stage2_ptep_test_and_clear_young(pte_t *pte) 971 + { 972 + return __ptep_test_and_clear_young(pte); 973 + } 974 + #endif 975 + 976 + static int stage2_pmdp_test_and_clear_young(pmd_t *pmd) 977 + { 978 + return stage2_ptep_test_and_clear_young((pte_t *)pmd); 976 979 } 977 980 978 981 /** ··· 1021 978 pte_t pte = pfn_pte(pfn, PAGE_S2_DEVICE); 1022 979 1023 980 if (writable) 1024 - kvm_set_s2pte_writable(&pte); 981 + pte = kvm_s2pte_mkwrite(pte); 1025 982 1026 983 ret = mmu_topup_memory_cache(&cache, KVM_MMU_CACHE_MIN_PAGES, 1027 984 KVM_NR_MEM_OBJS); ··· 1121 1078 pmd_t *pmd; 1122 1079 phys_addr_t next; 1123 1080 1124 - pmd = pmd_offset(pud, addr); 1081 + pmd = stage2_pmd_offset(pud, addr); 1125 1082 1126 1083 do { 1127 - next = kvm_pmd_addr_end(addr, end); 1084 + next = stage2_pmd_addr_end(addr, end); 1128 1085 if (!pmd_none(*pmd)) { 1129 - if (kvm_pmd_huge(*pmd)) { 1086 + if (pmd_thp_or_huge(*pmd)) { 1130 1087 if (!kvm_s2pmd_readonly(pmd)) 1131 1088 kvm_set_s2pmd_readonly(pmd); 1132 1089 } else { ··· 1149 1106 pud_t *pud; 1150 1107 phys_addr_t next; 1151 1108 1152 - pud = pud_offset(pgd, addr); 1109 + pud = stage2_pud_offset(pgd, addr); 1153 1110 do { 1154 - next = kvm_pud_addr_end(addr, end); 1155 - if (!pud_none(*pud)) { 1111 + next = stage2_pud_addr_end(addr, end); 1112 + if (!stage2_pud_none(*pud)) { 1156 1113 /* TODO:PUD not supported, revisit later if supported */ 1157 - BUG_ON(kvm_pud_huge(*pud)); 1114 + BUG_ON(stage2_pud_huge(*pud)); 1158 1115 stage2_wp_pmds(pud, addr, next); 1159 1116 } 1160 1117 } while (pud++, addr = next, addr != end); ··· 1171 1128 pgd_t *pgd; 1172 1129 phys_addr_t next; 1173 1130 1174 - pgd = kvm->arch.pgd + kvm_pgd_index(addr); 1131 + pgd = kvm->arch.pgd + stage2_pgd_index(addr); 1175 1132 do { 1176 1133 /* 1177 1134 * Release kvm_mmu_lock periodically if the memory region is ··· 1183 1140 if (need_resched() || spin_needbreak(&kvm->mmu_lock)) 1184 1141 cond_resched_lock(&kvm->mmu_lock); 1185 1142 1186 - next = kvm_pgd_addr_end(addr, end); 1187 - if (pgd_present(*pgd)) 1143 + next = stage2_pgd_addr_end(addr, end); 1144 + if (stage2_pgd_present(*pgd)) 1188 1145 stage2_wp_puds(pgd, addr, next); 1189 1146 } while (pgd++, addr = next, addr != end); 1190 1147 } ··· 1363 1320 pmd_t new_pmd = pfn_pmd(pfn, mem_type); 1364 1321 new_pmd = pmd_mkhuge(new_pmd); 1365 1322 if (writable) { 1366 - kvm_set_s2pmd_writable(&new_pmd); 1323 + new_pmd = kvm_s2pmd_mkwrite(new_pmd); 1367 1324 kvm_set_pfn_dirty(pfn); 1368 1325 } 1369 1326 coherent_cache_guest_page(vcpu, pfn, PMD_SIZE, fault_ipa_uncached); ··· 1372 1329 pte_t new_pte = pfn_pte(pfn, mem_type); 1373 1330 1374 1331 if (writable) { 1375 - kvm_set_s2pte_writable(&new_pte); 1332 + new_pte = kvm_s2pte_mkwrite(new_pte); 1376 1333 kvm_set_pfn_dirty(pfn); 1377 1334 mark_page_dirty(kvm, gfn); 1378 1335 } ··· 1391 1348 * Resolve the access fault by making the page young again. 1392 1349 * Note that because the faulting entry is guaranteed not to be 1393 1350 * cached in the TLB, we don't need to invalidate anything. 1351 + * Only the HW Access Flag updates are supported for Stage 2 (no DBM), 1352 + * so there is no need for atomic (pte|pmd)_mkyoung operations. 1394 1353 */ 1395 1354 static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa) 1396 1355 { ··· 1409 1364 if (!pmd || pmd_none(*pmd)) /* Nothing there */ 1410 1365 goto out; 1411 1366 1412 - if (kvm_pmd_huge(*pmd)) { /* THP, HugeTLB */ 1367 + if (pmd_thp_or_huge(*pmd)) { /* THP, HugeTLB */ 1413 1368 *pmd = pmd_mkyoung(*pmd); 1414 1369 pfn = pmd_pfn(*pmd); 1415 1370 pfn_valid = true; ··· 1633 1588 if (!pmd || pmd_none(*pmd)) /* Nothing there */ 1634 1589 return 0; 1635 1590 1636 - if (kvm_pmd_huge(*pmd)) { /* THP, HugeTLB */ 1637 - if (pmd_young(*pmd)) { 1638 - *pmd = pmd_mkold(*pmd); 1639 - return 1; 1640 - } 1641 - 1642 - return 0; 1643 - } 1591 + if (pmd_thp_or_huge(*pmd)) /* THP, HugeTLB */ 1592 + return stage2_pmdp_test_and_clear_young(pmd); 1644 1593 1645 1594 pte = pte_offset_kernel(pmd, gpa); 1646 1595 if (pte_none(*pte)) 1647 1596 return 0; 1648 1597 1649 - if (pte_young(*pte)) { 1650 - *pte = pte_mkold(*pte); /* Just a page... */ 1651 - return 1; 1652 - } 1653 - 1654 - return 0; 1598 + return stage2_ptep_test_and_clear_young(pte); 1655 1599 } 1656 1600 1657 1601 static int kvm_test_age_hva_handler(struct kvm *kvm, gpa_t gpa, void *data) ··· 1652 1618 if (!pmd || pmd_none(*pmd)) /* Nothing there */ 1653 1619 return 0; 1654 1620 1655 - if (kvm_pmd_huge(*pmd)) /* THP, HugeTLB */ 1621 + if (pmd_thp_or_huge(*pmd)) /* THP, HugeTLB */ 1656 1622 return pmd_young(*pmd); 1657 1623 1658 1624 pte = pte_offset_kernel(pmd, gpa);
+50 -35
arch/arm64/include/asm/kvm_arm.h
··· 96 96 SCTLR_EL2_SA | SCTLR_EL2_I) 97 97 98 98 /* TCR_EL2 Registers bits */ 99 - #define TCR_EL2_RES1 ((1 << 31) | (1 << 23)) 100 - #define TCR_EL2_TBI (1 << 20) 101 - #define TCR_EL2_PS (7 << 16) 102 - #define TCR_EL2_PS_40B (2 << 16) 103 - #define TCR_EL2_TG0 (1 << 14) 104 - #define TCR_EL2_SH0 (3 << 12) 105 - #define TCR_EL2_ORGN0 (3 << 10) 106 - #define TCR_EL2_IRGN0 (3 << 8) 107 - #define TCR_EL2_T0SZ 0x3f 108 - #define TCR_EL2_MASK (TCR_EL2_TG0 | TCR_EL2_SH0 | \ 109 - TCR_EL2_ORGN0 | TCR_EL2_IRGN0 | TCR_EL2_T0SZ) 99 + #define TCR_EL2_RES1 ((1 << 31) | (1 << 23)) 100 + #define TCR_EL2_TBI (1 << 20) 101 + #define TCR_EL2_PS_SHIFT 16 102 + #define TCR_EL2_PS_MASK (7 << TCR_EL2_PS_SHIFT) 103 + #define TCR_EL2_PS_40B (2 << TCR_EL2_PS_SHIFT) 104 + #define TCR_EL2_TG0_MASK TCR_TG0_MASK 105 + #define TCR_EL2_SH0_MASK TCR_SH0_MASK 106 + #define TCR_EL2_ORGN0_MASK TCR_ORGN0_MASK 107 + #define TCR_EL2_IRGN0_MASK TCR_IRGN0_MASK 108 + #define TCR_EL2_T0SZ_MASK 0x3f 109 + #define TCR_EL2_MASK (TCR_EL2_TG0_MASK | TCR_EL2_SH0_MASK | \ 110 + TCR_EL2_ORGN0_MASK | TCR_EL2_IRGN0_MASK | TCR_EL2_T0SZ_MASK) 110 111 111 112 /* VTCR_EL2 Registers bits */ 112 113 #define VTCR_EL2_RES1 (1 << 31) 113 - #define VTCR_EL2_PS_MASK (7 << 16) 114 - #define VTCR_EL2_TG0_MASK (1 << 14) 115 - #define VTCR_EL2_TG0_4K (0 << 14) 116 - #define VTCR_EL2_TG0_64K (1 << 14) 117 - #define VTCR_EL2_SH0_MASK (3 << 12) 118 - #define VTCR_EL2_SH0_INNER (3 << 12) 119 - #define VTCR_EL2_ORGN0_MASK (3 << 10) 120 - #define VTCR_EL2_ORGN0_WBWA (1 << 10) 121 - #define VTCR_EL2_IRGN0_MASK (3 << 8) 122 - #define VTCR_EL2_IRGN0_WBWA (1 << 8) 123 - #define VTCR_EL2_SL0_MASK (3 << 6) 124 - #define VTCR_EL2_SL0_LVL1 (1 << 6) 114 + #define VTCR_EL2_HD (1 << 22) 115 + #define VTCR_EL2_HA (1 << 21) 116 + #define VTCR_EL2_PS_MASK TCR_EL2_PS_MASK 117 + #define VTCR_EL2_TG0_MASK TCR_TG0_MASK 118 + #define VTCR_EL2_TG0_4K TCR_TG0_4K 119 + #define VTCR_EL2_TG0_16K TCR_TG0_16K 120 + #define VTCR_EL2_TG0_64K TCR_TG0_64K 121 + #define VTCR_EL2_SH0_MASK TCR_SH0_MASK 122 + #define VTCR_EL2_SH0_INNER TCR_SH0_INNER 123 + #define VTCR_EL2_ORGN0_MASK TCR_ORGN0_MASK 124 + #define VTCR_EL2_ORGN0_WBWA TCR_ORGN0_WBWA 125 + #define VTCR_EL2_IRGN0_MASK TCR_IRGN0_MASK 126 + #define VTCR_EL2_IRGN0_WBWA TCR_IRGN0_WBWA 127 + #define VTCR_EL2_SL0_SHIFT 6 128 + #define VTCR_EL2_SL0_MASK (3 << VTCR_EL2_SL0_SHIFT) 129 + #define VTCR_EL2_SL0_LVL1 (1 << VTCR_EL2_SL0_SHIFT) 125 130 #define VTCR_EL2_T0SZ_MASK 0x3f 126 131 #define VTCR_EL2_T0SZ_40B 24 127 132 #define VTCR_EL2_VS_SHIFT 19 ··· 142 137 * (see hyp-init.S). 143 138 * 144 139 * Note that when using 4K pages, we concatenate two first level page tables 145 - * together. 140 + * together. With 16K pages, we concatenate 16 first level page tables. 146 141 * 147 142 * The magic numbers used for VTTBR_X in this patch can be found in Tables 148 143 * D4-23 and D4-25 in ARM DDI 0487A.b. 149 144 */ 145 + 146 + #define VTCR_EL2_T0SZ_IPA VTCR_EL2_T0SZ_40B 147 + #define VTCR_EL2_COMMON_BITS (VTCR_EL2_SH0_INNER | VTCR_EL2_ORGN0_WBWA | \ 148 + VTCR_EL2_IRGN0_WBWA | VTCR_EL2_RES1) 149 + 150 150 #ifdef CONFIG_ARM64_64K_PAGES 151 151 /* 152 152 * Stage2 translation configuration: 153 - * 40bits input (T0SZ = 24) 154 153 * 64kB pages (TG0 = 1) 155 154 * 2 level page tables (SL = 1) 156 155 */ 157 - #define VTCR_EL2_FLAGS (VTCR_EL2_TG0_64K | VTCR_EL2_SH0_INNER | \ 158 - VTCR_EL2_ORGN0_WBWA | VTCR_EL2_IRGN0_WBWA | \ 159 - VTCR_EL2_SL0_LVL1 | VTCR_EL2_RES1) 160 - #define VTTBR_X (38 - VTCR_EL2_T0SZ_40B) 161 - #else 156 + #define VTCR_EL2_TGRAN_FLAGS (VTCR_EL2_TG0_64K | VTCR_EL2_SL0_LVL1) 157 + #define VTTBR_X_TGRAN_MAGIC 38 158 + #elif defined(CONFIG_ARM64_16K_PAGES) 162 159 /* 163 160 * Stage2 translation configuration: 164 - * 40bits input (T0SZ = 24) 161 + * 16kB pages (TG0 = 2) 162 + * 2 level page tables (SL = 1) 163 + */ 164 + #define VTCR_EL2_TGRAN_FLAGS (VTCR_EL2_TG0_16K | VTCR_EL2_SL0_LVL1) 165 + #define VTTBR_X_TGRAN_MAGIC 42 166 + #else /* 4K */ 167 + /* 168 + * Stage2 translation configuration: 165 169 * 4kB pages (TG0 = 0) 166 170 * 3 level page tables (SL = 1) 167 171 */ 168 - #define VTCR_EL2_FLAGS (VTCR_EL2_TG0_4K | VTCR_EL2_SH0_INNER | \ 169 - VTCR_EL2_ORGN0_WBWA | VTCR_EL2_IRGN0_WBWA | \ 170 - VTCR_EL2_SL0_LVL1 | VTCR_EL2_RES1) 171 - #define VTTBR_X (37 - VTCR_EL2_T0SZ_40B) 172 + #define VTCR_EL2_TGRAN_FLAGS (VTCR_EL2_TG0_4K | VTCR_EL2_SL0_LVL1) 173 + #define VTTBR_X_TGRAN_MAGIC 37 172 174 #endif 175 + 176 + #define VTCR_EL2_FLAGS (VTCR_EL2_COMMON_BITS | VTCR_EL2_TGRAN_FLAGS) 177 + #define VTTBR_X (VTTBR_X_TGRAN_MAGIC - VTCR_EL2_T0SZ_IPA) 173 178 174 179 #define VTTBR_BADDR_SHIFT (VTTBR_X - 1) 175 180 #define VTTBR_BADDR_MASK (((UL(1) << (PHYS_MASK_SHIFT - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT)
+27 -84
arch/arm64/include/asm/kvm_mmu.h
··· 45 45 */ 46 46 #define TRAMPOLINE_VA (HYP_PAGE_OFFSET_MASK & PAGE_MASK) 47 47 48 - /* 49 - * KVM_MMU_CACHE_MIN_PAGES is the number of stage2 page table translation 50 - * levels in addition to the PGD and potentially the PUD which are 51 - * pre-allocated (we pre-allocate the fake PGD and the PUD when the Stage-2 52 - * tables use one level of tables less than the kernel. 53 - */ 54 - #ifdef CONFIG_ARM64_64K_PAGES 55 - #define KVM_MMU_CACHE_MIN_PAGES 1 56 - #else 57 - #define KVM_MMU_CACHE_MIN_PAGES 2 58 - #endif 59 - 60 48 #ifdef __ASSEMBLY__ 61 49 62 50 #include <asm/alternative.h> ··· 79 91 #define KVM_PHYS_SIZE (1UL << KVM_PHYS_SHIFT) 80 92 #define KVM_PHYS_MASK (KVM_PHYS_SIZE - 1UL) 81 93 94 + #include <asm/stage2_pgtable.h> 95 + 82 96 int create_hyp_mappings(void *from, void *to); 83 97 int create_hyp_io_mappings(void *from, void *to, phys_addr_t); 84 98 void free_boot_hyp_pgd(void); ··· 111 121 static inline void kvm_clean_pte(pte_t *pte) {} 112 122 static inline void kvm_clean_pte_entry(pte_t *pte) {} 113 123 114 - static inline void kvm_set_s2pte_writable(pte_t *pte) 124 + static inline pte_t kvm_s2pte_mkwrite(pte_t pte) 115 125 { 116 - pte_val(*pte) |= PTE_S2_RDWR; 126 + pte_val(pte) |= PTE_S2_RDWR; 127 + return pte; 117 128 } 118 129 119 - static inline void kvm_set_s2pmd_writable(pmd_t *pmd) 130 + static inline pmd_t kvm_s2pmd_mkwrite(pmd_t pmd) 120 131 { 121 - pmd_val(*pmd) |= PMD_S2_RDWR; 132 + pmd_val(pmd) |= PMD_S2_RDWR; 133 + return pmd; 122 134 } 123 135 124 136 static inline void kvm_set_s2pte_readonly(pte_t *pte) 125 137 { 126 - pte_val(*pte) = (pte_val(*pte) & ~PTE_S2_RDWR) | PTE_S2_RDONLY; 138 + pteval_t pteval; 139 + unsigned long tmp; 140 + 141 + asm volatile("// kvm_set_s2pte_readonly\n" 142 + " prfm pstl1strm, %2\n" 143 + "1: ldxr %0, %2\n" 144 + " and %0, %0, %3 // clear PTE_S2_RDWR\n" 145 + " orr %0, %0, %4 // set PTE_S2_RDONLY\n" 146 + " stxr %w1, %0, %2\n" 147 + " cbnz %w1, 1b\n" 148 + : "=&r" (pteval), "=&r" (tmp), "+Q" (pte_val(*pte)) 149 + : "L" (~PTE_S2_RDWR), "L" (PTE_S2_RDONLY)); 127 150 } 128 151 129 152 static inline bool kvm_s2pte_readonly(pte_t *pte) ··· 146 143 147 144 static inline void kvm_set_s2pmd_readonly(pmd_t *pmd) 148 145 { 149 - pmd_val(*pmd) = (pmd_val(*pmd) & ~PMD_S2_RDWR) | PMD_S2_RDONLY; 146 + kvm_set_s2pte_readonly((pte_t *)pmd); 150 147 } 151 148 152 149 static inline bool kvm_s2pmd_readonly(pmd_t *pmd) 153 150 { 154 - return (pmd_val(*pmd) & PMD_S2_RDWR) == PMD_S2_RDONLY; 155 - } 156 - 157 - 158 - #define kvm_pgd_addr_end(addr, end) pgd_addr_end(addr, end) 159 - #define kvm_pud_addr_end(addr, end) pud_addr_end(addr, end) 160 - #define kvm_pmd_addr_end(addr, end) pmd_addr_end(addr, end) 161 - 162 - /* 163 - * In the case where PGDIR_SHIFT is larger than KVM_PHYS_SHIFT, we can address 164 - * the entire IPA input range with a single pgd entry, and we would only need 165 - * one pgd entry. Note that in this case, the pgd is actually not used by 166 - * the MMU for Stage-2 translations, but is merely a fake pgd used as a data 167 - * structure for the kernel pgtable macros to work. 168 - */ 169 - #if PGDIR_SHIFT > KVM_PHYS_SHIFT 170 - #define PTRS_PER_S2_PGD_SHIFT 0 171 - #else 172 - #define PTRS_PER_S2_PGD_SHIFT (KVM_PHYS_SHIFT - PGDIR_SHIFT) 173 - #endif 174 - #define PTRS_PER_S2_PGD (1 << PTRS_PER_S2_PGD_SHIFT) 175 - 176 - #define kvm_pgd_index(addr) (((addr) >> PGDIR_SHIFT) & (PTRS_PER_S2_PGD - 1)) 177 - 178 - /* 179 - * If we are concatenating first level stage-2 page tables, we would have less 180 - * than or equal to 16 pointers in the fake PGD, because that's what the 181 - * architecture allows. In this case, (4 - CONFIG_PGTABLE_LEVELS) 182 - * represents the first level for the host, and we add 1 to go to the next 183 - * level (which uses contatenation) for the stage-2 tables. 184 - */ 185 - #if PTRS_PER_S2_PGD <= 16 186 - #define KVM_PREALLOC_LEVEL (4 - CONFIG_PGTABLE_LEVELS + 1) 187 - #else 188 - #define KVM_PREALLOC_LEVEL (0) 189 - #endif 190 - 191 - static inline void *kvm_get_hwpgd(struct kvm *kvm) 192 - { 193 - pgd_t *pgd = kvm->arch.pgd; 194 - pud_t *pud; 195 - 196 - if (KVM_PREALLOC_LEVEL == 0) 197 - return pgd; 198 - 199 - pud = pud_offset(pgd, 0); 200 - if (KVM_PREALLOC_LEVEL == 1) 201 - return pud; 202 - 203 - BUG_ON(KVM_PREALLOC_LEVEL != 2); 204 - return pmd_offset(pud, 0); 205 - } 206 - 207 - static inline unsigned int kvm_get_hwpgd_size(void) 208 - { 209 - if (KVM_PREALLOC_LEVEL > 0) 210 - return PTRS_PER_S2_PGD * PAGE_SIZE; 211 - return PTRS_PER_S2_PGD * sizeof(pgd_t); 151 + return kvm_s2pte_readonly((pte_t *)pmd); 212 152 } 213 153 214 154 static inline bool kvm_page_empty(void *ptr) ··· 160 214 return page_count(ptr_page) == 1; 161 215 } 162 216 163 - #define kvm_pte_table_empty(kvm, ptep) kvm_page_empty(ptep) 217 + #define hyp_pte_table_empty(ptep) kvm_page_empty(ptep) 164 218 165 219 #ifdef __PAGETABLE_PMD_FOLDED 166 - #define kvm_pmd_table_empty(kvm, pmdp) (0) 220 + #define hyp_pmd_table_empty(pmdp) (0) 167 221 #else 168 - #define kvm_pmd_table_empty(kvm, pmdp) \ 169 - (kvm_page_empty(pmdp) && (!(kvm) || KVM_PREALLOC_LEVEL < 2)) 222 + #define hyp_pmd_table_empty(pmdp) kvm_page_empty(pmdp) 170 223 #endif 171 224 172 225 #ifdef __PAGETABLE_PUD_FOLDED 173 - #define kvm_pud_table_empty(kvm, pudp) (0) 226 + #define hyp_pud_table_empty(pudp) (0) 174 227 #else 175 - #define kvm_pud_table_empty(kvm, pudp) \ 176 - (kvm_page_empty(pudp) && (!(kvm) || KVM_PREALLOC_LEVEL < 1)) 228 + #define hyp_pud_table_empty(pudp) kvm_page_empty(pudp) 177 229 #endif 178 - 179 230 180 231 struct kvm; 181 232
+63 -17
arch/arm64/include/asm/pgtable-hwdef.h
··· 208 208 #define TCR_T1SZ(x) ((UL(64) - (x)) << TCR_T1SZ_OFFSET) 209 209 #define TCR_TxSZ(x) (TCR_T0SZ(x) | TCR_T1SZ(x)) 210 210 #define TCR_TxSZ_WIDTH 6 211 - #define TCR_IRGN_NC ((UL(0) << 8) | (UL(0) << 24)) 212 - #define TCR_IRGN_WBWA ((UL(1) << 8) | (UL(1) << 24)) 213 - #define TCR_IRGN_WT ((UL(2) << 8) | (UL(2) << 24)) 214 - #define TCR_IRGN_WBnWA ((UL(3) << 8) | (UL(3) << 24)) 215 - #define TCR_IRGN_MASK ((UL(3) << 8) | (UL(3) << 24)) 216 - #define TCR_ORGN_NC ((UL(0) << 10) | (UL(0) << 26)) 217 - #define TCR_ORGN_WBWA ((UL(1) << 10) | (UL(1) << 26)) 218 - #define TCR_ORGN_WT ((UL(2) << 10) | (UL(2) << 26)) 219 - #define TCR_ORGN_WBnWA ((UL(3) << 10) | (UL(3) << 26)) 220 - #define TCR_ORGN_MASK ((UL(3) << 10) | (UL(3) << 26)) 221 - #define TCR_SHARED ((UL(3) << 12) | (UL(3) << 28)) 222 - #define TCR_TG0_4K (UL(0) << 14) 223 - #define TCR_TG0_64K (UL(1) << 14) 224 - #define TCR_TG0_16K (UL(2) << 14) 225 - #define TCR_TG1_16K (UL(1) << 30) 226 - #define TCR_TG1_4K (UL(2) << 30) 227 - #define TCR_TG1_64K (UL(3) << 30) 211 + 212 + #define TCR_IRGN0_SHIFT 8 213 + #define TCR_IRGN0_MASK (UL(3) << TCR_IRGN0_SHIFT) 214 + #define TCR_IRGN0_NC (UL(0) << TCR_IRGN0_SHIFT) 215 + #define TCR_IRGN0_WBWA (UL(1) << TCR_IRGN0_SHIFT) 216 + #define TCR_IRGN0_WT (UL(2) << TCR_IRGN0_SHIFT) 217 + #define TCR_IRGN0_WBnWA (UL(3) << TCR_IRGN0_SHIFT) 218 + 219 + #define TCR_IRGN1_SHIFT 24 220 + #define TCR_IRGN1_MASK (UL(3) << TCR_IRGN1_SHIFT) 221 + #define TCR_IRGN1_NC (UL(0) << TCR_IRGN1_SHIFT) 222 + #define TCR_IRGN1_WBWA (UL(1) << TCR_IRGN1_SHIFT) 223 + #define TCR_IRGN1_WT (UL(2) << TCR_IRGN1_SHIFT) 224 + #define TCR_IRGN1_WBnWA (UL(3) << TCR_IRGN1_SHIFT) 225 + 226 + #define TCR_IRGN_NC (TCR_IRGN0_NC | TCR_IRGN1_NC) 227 + #define TCR_IRGN_WBWA (TCR_IRGN0_WBWA | TCR_IRGN1_WBWA) 228 + #define TCR_IRGN_WT (TCR_IRGN0_WT | TCR_IRGN1_WT) 229 + #define TCR_IRGN_WBnWA (TCR_IRGN0_WBnWA | TCR_IRGN1_WBnWA) 230 + #define TCR_IRGN_MASK (TCR_IRGN0_MASK | TCR_IRGN1_MASK) 231 + 232 + 233 + #define TCR_ORGN0_SHIFT 10 234 + #define TCR_ORGN0_MASK (UL(3) << TCR_ORGN0_SHIFT) 235 + #define TCR_ORGN0_NC (UL(0) << TCR_ORGN0_SHIFT) 236 + #define TCR_ORGN0_WBWA (UL(1) << TCR_ORGN0_SHIFT) 237 + #define TCR_ORGN0_WT (UL(2) << TCR_ORGN0_SHIFT) 238 + #define TCR_ORGN0_WBnWA (UL(3) << TCR_ORGN0_SHIFT) 239 + 240 + #define TCR_ORGN1_SHIFT 26 241 + #define TCR_ORGN1_MASK (UL(3) << TCR_ORGN1_SHIFT) 242 + #define TCR_ORGN1_NC (UL(0) << TCR_ORGN1_SHIFT) 243 + #define TCR_ORGN1_WBWA (UL(1) << TCR_ORGN1_SHIFT) 244 + #define TCR_ORGN1_WT (UL(2) << TCR_ORGN1_SHIFT) 245 + #define TCR_ORGN1_WBnWA (UL(3) << TCR_ORGN1_SHIFT) 246 + 247 + #define TCR_ORGN_NC (TCR_ORGN0_NC | TCR_ORGN1_NC) 248 + #define TCR_ORGN_WBWA (TCR_ORGN0_WBWA | TCR_ORGN1_WBWA) 249 + #define TCR_ORGN_WT (TCR_ORGN0_WT | TCR_ORGN1_WT) 250 + #define TCR_ORGN_WBnWA (TCR_ORGN0_WBnWA | TCR_ORGN1_WBnWA) 251 + #define TCR_ORGN_MASK (TCR_ORGN0_MASK | TCR_ORGN1_MASK) 252 + 253 + #define TCR_SH0_SHIFT 12 254 + #define TCR_SH0_MASK (UL(3) << TCR_SH0_SHIFT) 255 + #define TCR_SH0_INNER (UL(3) << TCR_SH0_SHIFT) 256 + 257 + #define TCR_SH1_SHIFT 28 258 + #define TCR_SH1_MASK (UL(3) << TCR_SH1_SHIFT) 259 + #define TCR_SH1_INNER (UL(3) << TCR_SH1_SHIFT) 260 + #define TCR_SHARED (TCR_SH0_INNER | TCR_SH1_INNER) 261 + 262 + #define TCR_TG0_SHIFT 14 263 + #define TCR_TG0_MASK (UL(3) << TCR_TG0_SHIFT) 264 + #define TCR_TG0_4K (UL(0) << TCR_TG0_SHIFT) 265 + #define TCR_TG0_64K (UL(1) << TCR_TG0_SHIFT) 266 + #define TCR_TG0_16K (UL(2) << TCR_TG0_SHIFT) 267 + 268 + #define TCR_TG1_SHIFT 30 269 + #define TCR_TG1_MASK (UL(3) << TCR_TG1_SHIFT) 270 + #define TCR_TG1_16K (UL(1) << TCR_TG1_SHIFT) 271 + #define TCR_TG1_4K (UL(2) << TCR_TG1_SHIFT) 272 + #define TCR_TG1_64K (UL(3) << TCR_TG1_SHIFT) 273 + 228 274 #define TCR_ASID16 (UL(1) << 36) 229 275 #define TCR_TBI0 (UL(1) << 37) 230 276 #define TCR_HA (UL(1) << 39)
+11 -4
arch/arm64/include/asm/pgtable.h
··· 290 290 #define pmd_mkyoung(pmd) pte_pmd(pte_mkyoung(pmd_pte(pmd))) 291 291 #define pmd_mknotpresent(pmd) (__pmd(pmd_val(pmd) & ~PMD_TYPE_MASK)) 292 292 293 + #define pmd_thp_or_huge(pmd) (pmd_huge(pmd) || pmd_trans_huge(pmd)) 294 + 293 295 #define __HAVE_ARCH_PMD_WRITE 294 296 #define pmd_write(pmd) pte_write(pmd_pte(pmd)) 295 297 ··· 532 530 * Atomic pte/pmd modifications. 533 531 */ 534 532 #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG 535 - static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, 536 - unsigned long address, 537 - pte_t *ptep) 533 + static inline int __ptep_test_and_clear_young(pte_t *ptep) 538 534 { 539 535 pteval_t pteval; 540 536 unsigned int tmp, res; 541 537 542 - asm volatile("// ptep_test_and_clear_young\n" 538 + asm volatile("// __ptep_test_and_clear_young\n" 543 539 " prfm pstl1strm, %2\n" 544 540 "1: ldxr %0, %2\n" 545 541 " ubfx %w3, %w0, %5, #1 // extract PTE_AF (young)\n" ··· 548 548 : "L" (~PTE_AF), "I" (ilog2(PTE_AF))); 549 549 550 550 return res; 551 + } 552 + 553 + static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, 554 + unsigned long address, 555 + pte_t *ptep) 556 + { 557 + return __ptep_test_and_clear_young(ptep); 551 558 } 552 559 553 560 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
+42
arch/arm64/include/asm/stage2_pgtable-nopmd.h
··· 1 + /* 2 + * Copyright (C) 2016 - ARM Ltd 3 + * 4 + * This program is free software; you can redistribute it and/or modify 5 + * it under the terms of the GNU General Public License version 2 as 6 + * published by the Free Software Foundation. 7 + * 8 + * This program is distributed in the hope that it will be useful, 9 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 + * GNU General Public License for more details. 12 + * 13 + * You should have received a copy of the GNU General Public License 14 + * along with this program. If not, see <http://www.gnu.org/licenses/>. 15 + */ 16 + 17 + #ifndef __ARM64_S2_PGTABLE_NOPMD_H_ 18 + #define __ARM64_S2_PGTABLE_NOPMD_H_ 19 + 20 + #include <asm/stage2_pgtable-nopud.h> 21 + 22 + #define __S2_PGTABLE_PMD_FOLDED 23 + 24 + #define S2_PMD_SHIFT S2_PUD_SHIFT 25 + #define S2_PTRS_PER_PMD 1 26 + #define S2_PMD_SIZE (1UL << S2_PMD_SHIFT) 27 + #define S2_PMD_MASK (~(S2_PMD_SIZE-1)) 28 + 29 + #define stage2_pud_none(pud) (0) 30 + #define stage2_pud_present(pud) (1) 31 + #define stage2_pud_clear(pud) do { } while (0) 32 + #define stage2_pud_populate(pud, pmd) do { } while (0) 33 + #define stage2_pmd_offset(pud, address) ((pmd_t *)(pud)) 34 + 35 + #define stage2_pmd_free(pmd) do { } while (0) 36 + 37 + #define stage2_pmd_addr_end(addr, end) (end) 38 + 39 + #define stage2_pud_huge(pud) (0) 40 + #define stage2_pmd_table_empty(pmdp) (0) 41 + 42 + #endif
+39
arch/arm64/include/asm/stage2_pgtable-nopud.h
··· 1 + /* 2 + * Copyright (C) 2016 - ARM Ltd 3 + * 4 + * This program is free software; you can redistribute it and/or modify 5 + * it under the terms of the GNU General Public License version 2 as 6 + * published by the Free Software Foundation. 7 + * 8 + * This program is distributed in the hope that it will be useful, 9 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 + * GNU General Public License for more details. 12 + * 13 + * You should have received a copy of the GNU General Public License 14 + * along with this program. If not, see <http://www.gnu.org/licenses/>. 15 + */ 16 + 17 + #ifndef __ARM64_S2_PGTABLE_NOPUD_H_ 18 + #define __ARM64_S2_PGTABLE_NOPUD_H_ 19 + 20 + #define __S2_PGTABLE_PUD_FOLDED 21 + 22 + #define S2_PUD_SHIFT S2_PGDIR_SHIFT 23 + #define S2_PTRS_PER_PUD 1 24 + #define S2_PUD_SIZE (_AC(1, UL) << S2_PUD_SHIFT) 25 + #define S2_PUD_MASK (~(S2_PUD_SIZE-1)) 26 + 27 + #define stage2_pgd_none(pgd) (0) 28 + #define stage2_pgd_present(pgd) (1) 29 + #define stage2_pgd_clear(pgd) do { } while (0) 30 + #define stage2_pgd_populate(pgd, pud) do { } while (0) 31 + 32 + #define stage2_pud_offset(pgd, address) ((pud_t *)(pgd)) 33 + 34 + #define stage2_pud_free(x) do { } while (0) 35 + 36 + #define stage2_pud_addr_end(addr, end) (end) 37 + #define stage2_pud_table_empty(pmdp) (0) 38 + 39 + #endif
+142
arch/arm64/include/asm/stage2_pgtable.h
··· 1 + /* 2 + * Copyright (C) 2016 - ARM Ltd 3 + * 4 + * stage2 page table helpers 5 + * 6 + * This program is free software; you can redistribute it and/or modify 7 + * it under the terms of the GNU General Public License version 2 as 8 + * published by the Free Software Foundation. 9 + * 10 + * This program is distributed in the hope that it will be useful, 11 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 + * GNU General Public License for more details. 14 + * 15 + * You should have received a copy of the GNU General Public License 16 + * along with this program. If not, see <http://www.gnu.org/licenses/>. 17 + */ 18 + 19 + #ifndef __ARM64_S2_PGTABLE_H_ 20 + #define __ARM64_S2_PGTABLE_H_ 21 + 22 + #include <asm/pgtable.h> 23 + 24 + /* 25 + * The hardware supports concatenation of up to 16 tables at stage2 entry level 26 + * and we use the feature whenever possible. 27 + * 28 + * Now, the minimum number of bits resolved at any level is (PAGE_SHIFT - 3). 29 + * On arm64, the smallest PAGE_SIZE supported is 4k, which means 30 + * (PAGE_SHIFT - 3) > 4 holds for all page sizes. 31 + * This implies, the total number of page table levels at stage2 expected 32 + * by the hardware is actually the number of levels required for (KVM_PHYS_SHIFT - 4) 33 + * in normal translations(e.g, stage1), since we cannot have another level in 34 + * the range (KVM_PHYS_SHIFT, KVM_PHYS_SHIFT - 4). 35 + */ 36 + #define STAGE2_PGTABLE_LEVELS ARM64_HW_PGTABLE_LEVELS(KVM_PHYS_SHIFT - 4) 37 + 38 + /* 39 + * With all the supported VA_BITs and 40bit guest IPA, the following condition 40 + * is always true: 41 + * 42 + * STAGE2_PGTABLE_LEVELS <= CONFIG_PGTABLE_LEVELS 43 + * 44 + * We base our stage-2 page table walker helpers on this assumption and 45 + * fall back to using the host version of the helper wherever possible. 46 + * i.e, if a particular level is not folded (e.g, PUD) at stage2, we fall back 47 + * to using the host version, since it is guaranteed it is not folded at host. 48 + * 49 + * If the condition breaks in the future, we can rearrange the host level 50 + * definitions and reuse them for stage2. Till then... 51 + */ 52 + #if STAGE2_PGTABLE_LEVELS > CONFIG_PGTABLE_LEVELS 53 + #error "Unsupported combination of guest IPA and host VA_BITS." 54 + #endif 55 + 56 + /* S2_PGDIR_SHIFT is the size mapped by top-level stage2 entry */ 57 + #define S2_PGDIR_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(4 - STAGE2_PGTABLE_LEVELS) 58 + #define S2_PGDIR_SIZE (_AC(1, UL) << S2_PGDIR_SHIFT) 59 + #define S2_PGDIR_MASK (~(S2_PGDIR_SIZE - 1)) 60 + 61 + /* 62 + * The number of PTRS across all concatenated stage2 tables given by the 63 + * number of bits resolved at the initial level. 64 + */ 65 + #define PTRS_PER_S2_PGD (1 << (KVM_PHYS_SHIFT - S2_PGDIR_SHIFT)) 66 + 67 + /* 68 + * KVM_MMU_CACHE_MIN_PAGES is the number of stage2 page table translation 69 + * levels in addition to the PGD. 70 + */ 71 + #define KVM_MMU_CACHE_MIN_PAGES (STAGE2_PGTABLE_LEVELS - 1) 72 + 73 + 74 + #if STAGE2_PGTABLE_LEVELS > 3 75 + 76 + #define S2_PUD_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(1) 77 + #define S2_PUD_SIZE (_AC(1, UL) << S2_PUD_SHIFT) 78 + #define S2_PUD_MASK (~(S2_PUD_SIZE - 1)) 79 + 80 + #define stage2_pgd_none(pgd) pgd_none(pgd) 81 + #define stage2_pgd_clear(pgd) pgd_clear(pgd) 82 + #define stage2_pgd_present(pgd) pgd_present(pgd) 83 + #define stage2_pgd_populate(pgd, pud) pgd_populate(NULL, pgd, pud) 84 + #define stage2_pud_offset(pgd, address) pud_offset(pgd, address) 85 + #define stage2_pud_free(pud) pud_free(NULL, pud) 86 + 87 + #define stage2_pud_table_empty(pudp) kvm_page_empty(pudp) 88 + 89 + static inline phys_addr_t stage2_pud_addr_end(phys_addr_t addr, phys_addr_t end) 90 + { 91 + phys_addr_t boundary = (addr + S2_PUD_SIZE) & S2_PUD_MASK; 92 + 93 + return (boundary - 1 < end - 1) ? boundary : end; 94 + } 95 + 96 + #endif /* STAGE2_PGTABLE_LEVELS > 3 */ 97 + 98 + 99 + #if STAGE2_PGTABLE_LEVELS > 2 100 + 101 + #define S2_PMD_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(2) 102 + #define S2_PMD_SIZE (_AC(1, UL) << S2_PMD_SHIFT) 103 + #define S2_PMD_MASK (~(S2_PMD_SIZE - 1)) 104 + 105 + #define stage2_pud_none(pud) pud_none(pud) 106 + #define stage2_pud_clear(pud) pud_clear(pud) 107 + #define stage2_pud_present(pud) pud_present(pud) 108 + #define stage2_pud_populate(pud, pmd) pud_populate(NULL, pud, pmd) 109 + #define stage2_pmd_offset(pud, address) pmd_offset(pud, address) 110 + #define stage2_pmd_free(pmd) pmd_free(NULL, pmd) 111 + 112 + #define stage2_pud_huge(pud) pud_huge(pud) 113 + #define stage2_pmd_table_empty(pmdp) kvm_page_empty(pmdp) 114 + 115 + static inline phys_addr_t stage2_pmd_addr_end(phys_addr_t addr, phys_addr_t end) 116 + { 117 + phys_addr_t boundary = (addr + S2_PMD_SIZE) & S2_PMD_MASK; 118 + 119 + return (boundary - 1 < end - 1) ? boundary : end; 120 + } 121 + 122 + #endif /* STAGE2_PGTABLE_LEVELS > 2 */ 123 + 124 + #define stage2_pte_table_empty(ptep) kvm_page_empty(ptep) 125 + 126 + #if STAGE2_PGTABLE_LEVELS == 2 127 + #include <asm/stage2_pgtable-nopmd.h> 128 + #elif STAGE2_PGTABLE_LEVELS == 3 129 + #include <asm/stage2_pgtable-nopud.h> 130 + #endif 131 + 132 + 133 + #define stage2_pgd_index(addr) (((addr) >> S2_PGDIR_SHIFT) & (PTRS_PER_S2_PGD - 1)) 134 + 135 + static inline phys_addr_t stage2_pgd_addr_end(phys_addr_t addr, phys_addr_t end) 136 + { 137 + phys_addr_t boundary = (addr + S2_PGDIR_SIZE) & S2_PGDIR_MASK; 138 + 139 + return (boundary - 1 < end - 1) ? boundary : end; 140 + } 141 + 142 + #endif /* __ARM64_S2_PGTABLE_H_ */
-1
arch/arm64/kvm/Kconfig
··· 22 22 config KVM 23 23 bool "Kernel-based Virtual Machine (KVM) support" 24 24 depends on OF 25 - depends on !ARM64_16K_PAGES 26 25 select MMU_NOTIFIER 27 26 select PREEMPT_NOTIFIERS 28 27 select ANON_INODES
+8
arch/arm64/kvm/hyp/s2-setup.c
··· 66 66 val |= 64 - (parange > 40 ? 40 : parange); 67 67 68 68 /* 69 + * Check the availability of Hardware Access Flag / Dirty Bit 70 + * Management in ID_AA64MMFR1_EL1 and enable the feature in VTCR_EL2. 71 + */ 72 + tmp = (read_sysreg(id_aa64mmfr1_el1) >> ID_AA64MMFR1_HADBS_SHIFT) & 0xf; 73 + if (IS_ENABLED(CONFIG_ARM64_HW_AFDBM) && tmp) 74 + val |= VTCR_EL2_HA; 75 + 76 + /* 69 77 * Read the VMIDBits bits from ID_AA64MMFR1_EL1 and set the VS 70 78 * bit in VTCR_EL2. 71 79 */
+7 -4
drivers/clocksource/arm_arch_timer.c
··· 468 468 .mask = CLOCKSOURCE_MASK(56), 469 469 }; 470 470 471 - static struct timecounter timecounter; 471 + static struct arch_timer_kvm_info arch_timer_kvm_info; 472 472 473 - struct timecounter *arch_timer_get_timecounter(void) 473 + struct arch_timer_kvm_info *arch_timer_get_kvm_info(void) 474 474 { 475 - return &timecounter; 475 + return &arch_timer_kvm_info; 476 476 } 477 477 478 478 static void __init arch_counter_register(unsigned type) ··· 500 500 clocksource_register_hz(&clocksource_counter, arch_timer_rate); 501 501 cyclecounter.mult = clocksource_counter.mult; 502 502 cyclecounter.shift = clocksource_counter.shift; 503 - timecounter_init(&timecounter, &cyclecounter, start_count); 503 + timecounter_init(&arch_timer_kvm_info.timecounter, 504 + &cyclecounter, start_count); 504 505 505 506 /* 56 bits minimum, so we assume worst case rollover */ 506 507 sched_clock_register(arch_timer_read_counter, 56, arch_timer_rate); ··· 745 744 746 745 arch_timer_register(); 747 746 arch_timer_common_init(); 747 + 748 + arch_timer_kvm_info.virtual_irq = arch_timer_ppi[VIRT_PPI]; 748 749 } 749 750 750 751 static void __init arch_timer_of_init(struct device_node *np)
+13
drivers/irqchip/irq-gic-common.c
··· 21 21 22 22 #include "irq-gic-common.h" 23 23 24 + static const struct gic_kvm_info *gic_kvm_info; 25 + 26 + const struct gic_kvm_info *gic_get_kvm_info(void) 27 + { 28 + return gic_kvm_info; 29 + } 30 + 31 + void gic_set_kvm_info(const struct gic_kvm_info *info) 32 + { 33 + BUG_ON(gic_kvm_info != NULL); 34 + gic_kvm_info = info; 35 + } 36 + 24 37 void gic_enable_quirks(u32 iidr, const struct gic_quirk *quirks, 25 38 void *data) 26 39 {
+3
drivers/irqchip/irq-gic-common.h
··· 19 19 20 20 #include <linux/of.h> 21 21 #include <linux/irqdomain.h> 22 + #include <linux/irqchip/arm-gic-common.h> 22 23 23 24 struct gic_quirk { 24 25 const char *desc; ··· 35 34 void gic_cpu_config(void __iomem *base, void (*sync_access)(void)); 36 35 void gic_enable_quirks(u32 iidr, const struct gic_quirk *quirks, 37 36 void *data); 37 + 38 + void gic_set_kvm_info(const struct gic_kvm_info *info); 38 39 39 40 #endif /* _IRQ_GIC_COMMON_H */
+148 -28
drivers/irqchip/irq-gic-v3.c
··· 15 15 * along with this program. If not, see <http://www.gnu.org/licenses/>. 16 16 */ 17 17 18 + #define pr_fmt(fmt) "GICv3: " fmt 19 + 18 20 #include <linux/acpi.h> 19 21 #include <linux/cpu.h> 20 22 #include <linux/cpu_pm.h> ··· 30 28 #include <linux/slab.h> 31 29 32 30 #include <linux/irqchip.h> 31 + #include <linux/irqchip/arm-gic-common.h> 33 32 #include <linux/irqchip/arm-gic-v3.h> 34 33 35 34 #include <asm/cputype.h> ··· 58 55 59 56 static struct gic_chip_data gic_data __read_mostly; 60 57 static struct static_key supports_deactivate = STATIC_KEY_INIT_TRUE; 58 + 59 + static struct gic_kvm_info gic_v3_kvm_info; 61 60 62 61 #define gic_data_rdist() (this_cpu_ptr(gic_data.rdists.rdist)) 63 62 #define gic_data_rdist_rd_base() (gic_data_rdist()->rd_base) ··· 906 901 return 0; 907 902 } 908 903 904 + static void __init gic_of_setup_kvm_info(struct device_node *node) 905 + { 906 + int ret; 907 + struct resource r; 908 + u32 gicv_idx; 909 + 910 + gic_v3_kvm_info.type = GIC_V3; 911 + 912 + gic_v3_kvm_info.maint_irq = irq_of_parse_and_map(node, 0); 913 + if (!gic_v3_kvm_info.maint_irq) 914 + return; 915 + 916 + if (of_property_read_u32(node, "#redistributor-regions", 917 + &gicv_idx)) 918 + gicv_idx = 1; 919 + 920 + gicv_idx += 3; /* Also skip GICD, GICC, GICH */ 921 + ret = of_address_to_resource(node, gicv_idx, &r); 922 + if (!ret) 923 + gic_v3_kvm_info.vcpu = r; 924 + 925 + gic_set_kvm_info(&gic_v3_kvm_info); 926 + } 927 + 909 928 static int __init gic_of_init(struct device_node *node, struct device_node *parent) 910 929 { 911 930 void __iomem *dist_base; ··· 981 952 982 953 err = gic_init_bases(dist_base, rdist_regs, nr_redist_regions, 983 954 redist_stride, &node->fwnode); 984 - if (!err) 955 + if (!err) { 956 + gic_of_setup_kvm_info(node); 985 957 return 0; 958 + } 986 959 987 960 out_unmap_rdist: 988 961 for (i = 0; i < nr_redist_regions; i++) ··· 999 968 IRQCHIP_DECLARE(gic_v3, "arm,gic-v3", gic_of_init); 1000 969 1001 970 #ifdef CONFIG_ACPI 1002 - static void __iomem *dist_base; 1003 - static struct redist_region *redist_regs __initdata; 1004 - static u32 nr_redist_regions __initdata; 1005 - static bool single_redist; 971 + static struct 972 + { 973 + void __iomem *dist_base; 974 + struct redist_region *redist_regs; 975 + u32 nr_redist_regions; 976 + bool single_redist; 977 + u32 maint_irq; 978 + int maint_irq_mode; 979 + phys_addr_t vcpu_base; 980 + } acpi_data __initdata; 1006 981 1007 982 static void __init 1008 983 gic_acpi_register_redist(phys_addr_t phys_base, void __iomem *redist_base) 1009 984 { 1010 985 static int count = 0; 1011 986 1012 - redist_regs[count].phys_base = phys_base; 1013 - redist_regs[count].redist_base = redist_base; 1014 - redist_regs[count].single_redist = single_redist; 987 + acpi_data.redist_regs[count].phys_base = phys_base; 988 + acpi_data.redist_regs[count].redist_base = redist_base; 989 + acpi_data.redist_regs[count].single_redist = acpi_data.single_redist; 1015 990 count++; 1016 991 } 1017 992 ··· 1045 1008 { 1046 1009 struct acpi_madt_generic_interrupt *gicc = 1047 1010 (struct acpi_madt_generic_interrupt *)header; 1048 - u32 reg = readl_relaxed(dist_base + GICD_PIDR2) & GIC_PIDR2_ARCH_MASK; 1011 + u32 reg = readl_relaxed(acpi_data.dist_base + GICD_PIDR2) & GIC_PIDR2_ARCH_MASK; 1049 1012 u32 size = reg == GIC_PIDR2_ARCH_GICv4 ? SZ_64K * 4 : SZ_64K * 2; 1050 1013 void __iomem *redist_base; 1051 1014 ··· 1062 1025 acpi_tbl_entry_handler redist_parser; 1063 1026 enum acpi_madt_type type; 1064 1027 1065 - if (single_redist) { 1028 + if (acpi_data.single_redist) { 1066 1029 type = ACPI_MADT_TYPE_GENERIC_INTERRUPT; 1067 1030 redist_parser = gic_acpi_parse_madt_gicc; 1068 1031 } else { ··· 1113 1076 count = acpi_table_parse_madt(ACPI_MADT_TYPE_GENERIC_REDISTRIBUTOR, 1114 1077 gic_acpi_match_gicr, 0); 1115 1078 if (count > 0) { 1116 - single_redist = false; 1079 + acpi_data.single_redist = false; 1117 1080 return count; 1118 1081 } 1119 1082 1120 1083 count = acpi_table_parse_madt(ACPI_MADT_TYPE_GENERIC_INTERRUPT, 1121 1084 gic_acpi_match_gicc, 0); 1122 1085 if (count > 0) 1123 - single_redist = true; 1086 + acpi_data.single_redist = true; 1124 1087 1125 1088 return count; 1126 1089 } ··· 1140 1103 if (count <= 0) 1141 1104 return false; 1142 1105 1143 - nr_redist_regions = count; 1106 + acpi_data.nr_redist_regions = count; 1144 1107 return true; 1145 1108 } 1146 1109 1110 + static int __init gic_acpi_parse_virt_madt_gicc(struct acpi_subtable_header *header, 1111 + const unsigned long end) 1112 + { 1113 + struct acpi_madt_generic_interrupt *gicc = 1114 + (struct acpi_madt_generic_interrupt *)header; 1115 + int maint_irq_mode; 1116 + static int first_madt = true; 1117 + 1118 + /* Skip unusable CPUs */ 1119 + if (!(gicc->flags & ACPI_MADT_ENABLED)) 1120 + return 0; 1121 + 1122 + maint_irq_mode = (gicc->flags & ACPI_MADT_VGIC_IRQ_MODE) ? 1123 + ACPI_EDGE_SENSITIVE : ACPI_LEVEL_SENSITIVE; 1124 + 1125 + if (first_madt) { 1126 + first_madt = false; 1127 + 1128 + acpi_data.maint_irq = gicc->vgic_interrupt; 1129 + acpi_data.maint_irq_mode = maint_irq_mode; 1130 + acpi_data.vcpu_base = gicc->gicv_base_address; 1131 + 1132 + return 0; 1133 + } 1134 + 1135 + /* 1136 + * The maintenance interrupt and GICV should be the same for every CPU 1137 + */ 1138 + if ((acpi_data.maint_irq != gicc->vgic_interrupt) || 1139 + (acpi_data.maint_irq_mode != maint_irq_mode) || 1140 + (acpi_data.vcpu_base != gicc->gicv_base_address)) 1141 + return -EINVAL; 1142 + 1143 + return 0; 1144 + } 1145 + 1146 + static bool __init gic_acpi_collect_virt_info(void) 1147 + { 1148 + int count; 1149 + 1150 + count = acpi_table_parse_madt(ACPI_MADT_TYPE_GENERIC_INTERRUPT, 1151 + gic_acpi_parse_virt_madt_gicc, 0); 1152 + 1153 + return (count > 0); 1154 + } 1155 + 1147 1156 #define ACPI_GICV3_DIST_MEM_SIZE (SZ_64K) 1157 + #define ACPI_GICV2_VCTRL_MEM_SIZE (SZ_4K) 1158 + #define ACPI_GICV2_VCPU_MEM_SIZE (SZ_8K) 1159 + 1160 + static void __init gic_acpi_setup_kvm_info(void) 1161 + { 1162 + int irq; 1163 + 1164 + if (!gic_acpi_collect_virt_info()) { 1165 + pr_warn("Unable to get hardware information used for virtualization\n"); 1166 + return; 1167 + } 1168 + 1169 + gic_v3_kvm_info.type = GIC_V3; 1170 + 1171 + irq = acpi_register_gsi(NULL, acpi_data.maint_irq, 1172 + acpi_data.maint_irq_mode, 1173 + ACPI_ACTIVE_HIGH); 1174 + if (irq <= 0) 1175 + return; 1176 + 1177 + gic_v3_kvm_info.maint_irq = irq; 1178 + 1179 + if (acpi_data.vcpu_base) { 1180 + struct resource *vcpu = &gic_v3_kvm_info.vcpu; 1181 + 1182 + vcpu->flags = IORESOURCE_MEM; 1183 + vcpu->start = acpi_data.vcpu_base; 1184 + vcpu->end = vcpu->start + ACPI_GICV2_VCPU_MEM_SIZE - 1; 1185 + } 1186 + 1187 + gic_set_kvm_info(&gic_v3_kvm_info); 1188 + } 1148 1189 1149 1190 static int __init 1150 1191 gic_acpi_init(struct acpi_subtable_header *header, const unsigned long end) 1151 1192 { 1152 1193 struct acpi_madt_generic_distributor *dist; 1153 1194 struct fwnode_handle *domain_handle; 1195 + size_t size; 1154 1196 int i, err; 1155 1197 1156 1198 /* Get distributor base address */ 1157 1199 dist = (struct acpi_madt_generic_distributor *)header; 1158 - dist_base = ioremap(dist->base_address, ACPI_GICV3_DIST_MEM_SIZE); 1159 - if (!dist_base) { 1200 + acpi_data.dist_base = ioremap(dist->base_address, 1201 + ACPI_GICV3_DIST_MEM_SIZE); 1202 + if (!acpi_data.dist_base) { 1160 1203 pr_err("Unable to map GICD registers\n"); 1161 1204 return -ENOMEM; 1162 1205 } 1163 1206 1164 - err = gic_validate_dist_version(dist_base); 1207 + err = gic_validate_dist_version(acpi_data.dist_base); 1165 1208 if (err) { 1166 - pr_err("No distributor detected at @%p, giving up", dist_base); 1209 + pr_err("No distributor detected at @%p, giving up", 1210 + acpi_data.dist_base); 1167 1211 goto out_dist_unmap; 1168 1212 } 1169 1213 1170 - redist_regs = kzalloc(sizeof(*redist_regs) * nr_redist_regions, 1171 - GFP_KERNEL); 1172 - if (!redist_regs) { 1214 + size = sizeof(*acpi_data.redist_regs) * acpi_data.nr_redist_regions; 1215 + acpi_data.redist_regs = kzalloc(size, GFP_KERNEL); 1216 + if (!acpi_data.redist_regs) { 1173 1217 err = -ENOMEM; 1174 1218 goto out_dist_unmap; 1175 1219 } ··· 1259 1141 if (err) 1260 1142 goto out_redist_unmap; 1261 1143 1262 - domain_handle = irq_domain_alloc_fwnode(dist_base); 1144 + domain_handle = irq_domain_alloc_fwnode(acpi_data.dist_base); 1263 1145 if (!domain_handle) { 1264 1146 err = -ENOMEM; 1265 1147 goto out_redist_unmap; 1266 1148 } 1267 1149 1268 - err = gic_init_bases(dist_base, redist_regs, nr_redist_regions, 0, 1269 - domain_handle); 1150 + err = gic_init_bases(acpi_data.dist_base, acpi_data.redist_regs, 1151 + acpi_data.nr_redist_regions, 0, domain_handle); 1270 1152 if (err) 1271 1153 goto out_fwhandle_free; 1272 1154 1273 1155 acpi_set_irq_model(ACPI_IRQ_MODEL_GIC, domain_handle); 1156 + gic_acpi_setup_kvm_info(); 1157 + 1274 1158 return 0; 1275 1159 1276 1160 out_fwhandle_free: 1277 1161 irq_domain_free_fwnode(domain_handle); 1278 1162 out_redist_unmap: 1279 - for (i = 0; i < nr_redist_regions; i++) 1280 - if (redist_regs[i].redist_base) 1281 - iounmap(redist_regs[i].redist_base); 1282 - kfree(redist_regs); 1163 + for (i = 0; i < acpi_data.nr_redist_regions; i++) 1164 + if (acpi_data.redist_regs[i].redist_base) 1165 + iounmap(acpi_data.redist_regs[i].redist_base); 1166 + kfree(acpi_data.redist_regs); 1283 1167 out_dist_unmap: 1284 - iounmap(dist_base); 1168 + iounmap(acpi_data.dist_base); 1285 1169 return err; 1286 1170 } 1287 1171 IRQCHIP_ACPI_DECLARE(gic_v3, ACPI_MADT_TYPE_GENERIC_DISTRIBUTOR,
+82 -5
drivers/irqchip/irq-gic.c
··· 102 102 103 103 static struct gic_chip_data gic_data[CONFIG_ARM_GIC_MAX_NR] __read_mostly; 104 104 105 + static struct gic_kvm_info gic_v2_kvm_info; 106 + 105 107 #ifdef CONFIG_GIC_NON_BANKED 106 108 static void __iomem *gic_get_percpu_base(union gic_base *base) 107 109 { ··· 1191 1189 return true; 1192 1190 } 1193 1191 1192 + static void __init gic_of_setup_kvm_info(struct device_node *node) 1193 + { 1194 + int ret; 1195 + struct resource *vctrl_res = &gic_v2_kvm_info.vctrl; 1196 + struct resource *vcpu_res = &gic_v2_kvm_info.vcpu; 1197 + 1198 + gic_v2_kvm_info.type = GIC_V2; 1199 + 1200 + gic_v2_kvm_info.maint_irq = irq_of_parse_and_map(node, 0); 1201 + if (!gic_v2_kvm_info.maint_irq) 1202 + return; 1203 + 1204 + ret = of_address_to_resource(node, 2, vctrl_res); 1205 + if (ret) 1206 + return; 1207 + 1208 + ret = of_address_to_resource(node, 3, vcpu_res); 1209 + if (ret) 1210 + return; 1211 + 1212 + gic_set_kvm_info(&gic_v2_kvm_info); 1213 + } 1214 + 1194 1215 int __init 1195 1216 gic_of_init(struct device_node *node, struct device_node *parent) 1196 1217 { ··· 1243 1218 1244 1219 __gic_init_bases(gic_cnt, -1, dist_base, cpu_base, percpu_offset, 1245 1220 &node->fwnode); 1246 - if (!gic_cnt) 1221 + if (!gic_cnt) { 1247 1222 gic_init_physaddr(node); 1223 + gic_of_setup_kvm_info(node); 1224 + } 1248 1225 1249 1226 if (parent) { 1250 1227 irq = irq_of_parse_and_map(node, 0); ··· 1272 1245 #endif 1273 1246 1274 1247 #ifdef CONFIG_ACPI 1275 - static phys_addr_t cpu_phy_base __initdata; 1248 + static struct 1249 + { 1250 + phys_addr_t cpu_phys_base; 1251 + u32 maint_irq; 1252 + int maint_irq_mode; 1253 + phys_addr_t vctrl_base; 1254 + phys_addr_t vcpu_base; 1255 + } acpi_data __initdata; 1276 1256 1277 1257 static int __init 1278 1258 gic_acpi_parse_madt_cpu(struct acpi_subtable_header *header, ··· 1299 1265 * All CPU interface addresses have to be the same. 1300 1266 */ 1301 1267 gic_cpu_base = processor->base_address; 1302 - if (cpu_base_assigned && gic_cpu_base != cpu_phy_base) 1268 + if (cpu_base_assigned && gic_cpu_base != acpi_data.cpu_phys_base) 1303 1269 return -EINVAL; 1304 1270 1305 - cpu_phy_base = gic_cpu_base; 1271 + acpi_data.cpu_phys_base = gic_cpu_base; 1272 + acpi_data.maint_irq = processor->vgic_interrupt; 1273 + acpi_data.maint_irq_mode = (processor->flags & ACPI_MADT_VGIC_IRQ_MODE) ? 1274 + ACPI_EDGE_SENSITIVE : ACPI_LEVEL_SENSITIVE; 1275 + acpi_data.vctrl_base = processor->gich_base_address; 1276 + acpi_data.vcpu_base = processor->gicv_base_address; 1277 + 1306 1278 cpu_base_assigned = 1; 1307 1279 return 0; 1308 1280 } ··· 1339 1299 1340 1300 #define ACPI_GICV2_DIST_MEM_SIZE (SZ_4K) 1341 1301 #define ACPI_GIC_CPU_IF_MEM_SIZE (SZ_8K) 1302 + #define ACPI_GICV2_VCTRL_MEM_SIZE (SZ_4K) 1303 + #define ACPI_GICV2_VCPU_MEM_SIZE (SZ_8K) 1304 + 1305 + static void __init gic_acpi_setup_kvm_info(void) 1306 + { 1307 + int irq; 1308 + struct resource *vctrl_res = &gic_v2_kvm_info.vctrl; 1309 + struct resource *vcpu_res = &gic_v2_kvm_info.vcpu; 1310 + 1311 + gic_v2_kvm_info.type = GIC_V2; 1312 + 1313 + if (!acpi_data.vctrl_base) 1314 + return; 1315 + 1316 + vctrl_res->flags = IORESOURCE_MEM; 1317 + vctrl_res->start = acpi_data.vctrl_base; 1318 + vctrl_res->end = vctrl_res->start + ACPI_GICV2_VCTRL_MEM_SIZE - 1; 1319 + 1320 + if (!acpi_data.vcpu_base) 1321 + return; 1322 + 1323 + vcpu_res->flags = IORESOURCE_MEM; 1324 + vcpu_res->start = acpi_data.vcpu_base; 1325 + vcpu_res->end = vcpu_res->start + ACPI_GICV2_VCPU_MEM_SIZE - 1; 1326 + 1327 + irq = acpi_register_gsi(NULL, acpi_data.maint_irq, 1328 + acpi_data.maint_irq_mode, 1329 + ACPI_ACTIVE_HIGH); 1330 + if (irq <= 0) 1331 + return; 1332 + 1333 + gic_v2_kvm_info.maint_irq = irq; 1334 + 1335 + gic_set_kvm_info(&gic_v2_kvm_info); 1336 + } 1342 1337 1343 1338 static int __init gic_v2_acpi_init(struct acpi_subtable_header *header, 1344 1339 const unsigned long end) ··· 1391 1316 return -EINVAL; 1392 1317 } 1393 1318 1394 - cpu_base = ioremap(cpu_phy_base, ACPI_GIC_CPU_IF_MEM_SIZE); 1319 + cpu_base = ioremap(acpi_data.cpu_phys_base, ACPI_GIC_CPU_IF_MEM_SIZE); 1395 1320 if (!cpu_base) { 1396 1321 pr_err("Unable to map GICC registers\n"); 1397 1322 return -ENOMEM; ··· 1430 1355 1431 1356 if (IS_ENABLED(CONFIG_ARM_GIC_V2M)) 1432 1357 gicv2m_init(NULL, gic_data[0].domain); 1358 + 1359 + gic_acpi_setup_kvm_info(); 1433 1360 1434 1361 return 0; 1435 1362 }
+6 -6
include/clocksource/arm_arch_timer.h
··· 49 49 50 50 #define ARCH_TIMER_EVT_STREAM_FREQ 10000 /* 100us */ 51 51 52 + struct arch_timer_kvm_info { 53 + struct timecounter timecounter; 54 + int virtual_irq; 55 + }; 56 + 52 57 #ifdef CONFIG_ARM_ARCH_TIMER 53 58 54 59 extern u32 arch_timer_get_rate(void); 55 60 extern u64 (*arch_timer_read_counter)(void); 56 - extern struct timecounter *arch_timer_get_timecounter(void); 61 + extern struct arch_timer_kvm_info *arch_timer_get_kvm_info(void); 57 62 58 63 #else 59 64 ··· 70 65 static inline u64 arch_timer_read_counter(void) 71 66 { 72 67 return 0; 73 - } 74 - 75 - static inline struct timecounter *arch_timer_get_timecounter(void) 76 - { 77 - return NULL; 78 68 } 79 69 80 70 #endif
+4 -3
include/kvm/arm_vgic.h
··· 25 25 #include <linux/spinlock.h> 26 26 #include <linux/types.h> 27 27 #include <kvm/iodev.h> 28 + #include <linux/irqchip/arm-gic-common.h> 28 29 29 30 #define VGIC_NR_IRQS_LEGACY 256 30 31 #define VGIC_NR_SGIS 16 ··· 354 353 #define vgic_initialized(k) (!!((k)->arch.vgic.nr_cpus)) 355 354 #define vgic_ready(k) ((k)->arch.vgic.ready) 356 355 357 - int vgic_v2_probe(struct device_node *vgic_node, 356 + int vgic_v2_probe(const struct gic_kvm_info *gic_kvm_info, 358 357 const struct vgic_ops **ops, 359 358 const struct vgic_params **params); 360 359 #ifdef CONFIG_KVM_ARM_VGIC_V3 361 - int vgic_v3_probe(struct device_node *vgic_node, 360 + int vgic_v3_probe(const struct gic_kvm_info *gic_kvm_info, 362 361 const struct vgic_ops **ops, 363 362 const struct vgic_params **params); 364 363 #else 365 - static inline int vgic_v3_probe(struct device_node *vgic_node, 364 + static inline int vgic_v3_probe(const struct gic_kvm_info *gic_kvm_info, 366 365 const struct vgic_ops **ops, 367 366 const struct vgic_params **params) 368 367 {
+34
include/linux/irqchip/arm-gic-common.h
··· 1 + /* 2 + * include/linux/irqchip/arm-gic-common.h 3 + * 4 + * Copyright (C) 2016 ARM Limited, All Rights Reserved. 5 + * 6 + * This program is free software; you can redistribute it and/or modify 7 + * it under the terms of the GNU General Public License version 2 as 8 + * published by the Free Software Foundation. 9 + */ 10 + #ifndef __LINUX_IRQCHIP_ARM_GIC_COMMON_H 11 + #define __LINUX_IRQCHIP_ARM_GIC_COMMON_H 12 + 13 + #include <linux/types.h> 14 + #include <linux/ioport.h> 15 + 16 + enum gic_type { 17 + GIC_V2, 18 + GIC_V3, 19 + }; 20 + 21 + struct gic_kvm_info { 22 + /* GIC type */ 23 + enum gic_type type; 24 + /* Virtual CPU interface */ 25 + struct resource vcpu; 26 + /* Interrupt number */ 27 + unsigned int maint_irq; 28 + /* Virtual control interface */ 29 + struct resource vctrl; 30 + }; 31 + 32 + const struct gic_kvm_info *gic_get_kvm_info(void); 33 + 34 + #endif /* __LINUX_IRQCHIP_ARM_GIC_COMMON_H */
+11 -29
virt/kvm/arm/arch_timer.c
··· 17 17 */ 18 18 19 19 #include <linux/cpu.h> 20 - #include <linux/of_irq.h> 21 20 #include <linux/kvm.h> 22 21 #include <linux/kvm_host.h> 23 22 #include <linux/interrupt.h> ··· 437 438 .notifier_call = kvm_timer_cpu_notify, 438 439 }; 439 440 440 - static const struct of_device_id arch_timer_of_match[] = { 441 - { .compatible = "arm,armv7-timer", }, 442 - { .compatible = "arm,armv8-timer", }, 443 - {}, 444 - }; 445 - 446 441 int kvm_timer_hyp_init(void) 447 442 { 448 - struct device_node *np; 449 - unsigned int ppi; 443 + struct arch_timer_kvm_info *info; 450 444 int err; 451 445 452 - timecounter = arch_timer_get_timecounter(); 453 - if (!timecounter) 454 - return -ENODEV; 446 + info = arch_timer_get_kvm_info(); 447 + timecounter = &info->timecounter; 455 448 456 - np = of_find_matching_node(NULL, arch_timer_of_match); 457 - if (!np) { 458 - kvm_err("kvm_arch_timer: can't find DT node\n"); 449 + if (info->virtual_irq <= 0) { 450 + kvm_err("kvm_arch_timer: invalid virtual timer IRQ: %d\n", 451 + info->virtual_irq); 459 452 return -ENODEV; 460 453 } 454 + host_vtimer_irq = info->virtual_irq; 461 455 462 - ppi = irq_of_parse_and_map(np, 2); 463 - if (!ppi) { 464 - kvm_err("kvm_arch_timer: no virtual timer interrupt\n"); 465 - err = -EINVAL; 466 - goto out; 467 - } 468 - 469 - err = request_percpu_irq(ppi, kvm_arch_timer_handler, 456 + err = request_percpu_irq(host_vtimer_irq, kvm_arch_timer_handler, 470 457 "kvm guest timer", kvm_get_running_vcpus()); 471 458 if (err) { 472 459 kvm_err("kvm_arch_timer: can't request interrupt %d (%d)\n", 473 - ppi, err); 460 + host_vtimer_irq, err); 474 461 goto out; 475 462 } 476 - 477 - host_vtimer_irq = ppi; 478 463 479 464 err = __register_cpu_notifier(&kvm_timer_cpu_nb); 480 465 if (err) { ··· 472 489 goto out_free; 473 490 } 474 491 475 - kvm_info("%s IRQ%d\n", np->name, ppi); 492 + kvm_info("virtual timer IRQ%d\n", host_vtimer_irq); 476 493 on_each_cpu(kvm_timer_init_interrupt, NULL, 1); 477 494 478 495 goto out; 479 496 out_free: 480 - free_percpu_irq(ppi, kvm_get_running_vcpus()); 497 + free_percpu_irq(host_vtimer_irq, kvm_get_running_vcpus()); 481 498 out: 482 - of_node_put(np); 483 499 return err; 484 500 } 485 501
+29 -38
virt/kvm/arm/vgic-v2.c
··· 20 20 #include <linux/kvm_host.h> 21 21 #include <linux/interrupt.h> 22 22 #include <linux/io.h> 23 - #include <linux/of.h> 24 - #include <linux/of_address.h> 25 - #include <linux/of_irq.h> 26 23 27 24 #include <linux/irqchip/arm-gic.h> 28 25 ··· 183 186 } 184 187 185 188 /** 186 - * vgic_v2_probe - probe for a GICv2 compatible interrupt controller in DT 187 - * @node: pointer to the DT node 188 - * @ops: address of a pointer to the GICv2 operations 189 - * @params: address of a pointer to HW-specific parameters 189 + * vgic_v2_probe - probe for a GICv2 compatible interrupt controller 190 + * @gic_kvm_info: pointer to the GIC description 191 + * @ops: address of a pointer to the GICv2 operations 192 + * @params: address of a pointer to HW-specific parameters 190 193 * 191 194 * Returns 0 if a GICv2 has been found, with the low level operations 192 195 * in *ops and the HW parameters in *params. Returns an error code 193 196 * otherwise. 194 197 */ 195 - int vgic_v2_probe(struct device_node *vgic_node, 196 - const struct vgic_ops **ops, 197 - const struct vgic_params **params) 198 + int vgic_v2_probe(const struct gic_kvm_info *gic_kvm_info, 199 + const struct vgic_ops **ops, 200 + const struct vgic_params **params) 198 201 { 199 202 int ret; 200 - struct resource vctrl_res; 201 - struct resource vcpu_res; 202 203 struct vgic_params *vgic = &vgic_v2_params; 204 + const struct resource *vctrl_res = &gic_kvm_info->vctrl; 205 + const struct resource *vcpu_res = &gic_kvm_info->vcpu; 203 206 204 - vgic->maint_irq = irq_of_parse_and_map(vgic_node, 0); 205 - if (!vgic->maint_irq) { 206 - kvm_err("error getting vgic maintenance irq from DT\n"); 207 + if (!gic_kvm_info->maint_irq) { 208 + kvm_err("error getting vgic maintenance irq\n"); 209 + ret = -ENXIO; 210 + goto out; 211 + } 212 + vgic->maint_irq = gic_kvm_info->maint_irq; 213 + 214 + if (!gic_kvm_info->vctrl.start) { 215 + kvm_err("GICH not present in the firmware table\n"); 207 216 ret = -ENXIO; 208 217 goto out; 209 218 } 210 219 211 - ret = of_address_to_resource(vgic_node, 2, &vctrl_res); 212 - if (ret) { 213 - kvm_err("Cannot obtain GICH resource\n"); 214 - goto out; 215 - } 216 - 217 - vgic->vctrl_base = of_iomap(vgic_node, 2); 220 + vgic->vctrl_base = ioremap(gic_kvm_info->vctrl.start, 221 + resource_size(&gic_kvm_info->vctrl)); 218 222 if (!vgic->vctrl_base) { 219 223 kvm_err("Cannot ioremap GICH\n"); 220 224 ret = -ENOMEM; ··· 226 228 vgic->nr_lr = (vgic->nr_lr & 0x3f) + 1; 227 229 228 230 ret = create_hyp_io_mappings(vgic->vctrl_base, 229 - vgic->vctrl_base + resource_size(&vctrl_res), 230 - vctrl_res.start); 231 + vgic->vctrl_base + resource_size(vctrl_res), 232 + vctrl_res->start); 231 233 if (ret) { 232 234 kvm_err("Cannot map VCTRL into hyp\n"); 233 235 goto out_unmap; 234 236 } 235 237 236 - if (of_address_to_resource(vgic_node, 3, &vcpu_res)) { 237 - kvm_err("Cannot obtain GICV resource\n"); 238 - ret = -ENXIO; 239 - goto out_unmap; 240 - } 241 - 242 - if (!PAGE_ALIGNED(vcpu_res.start)) { 238 + if (!PAGE_ALIGNED(vcpu_res->start)) { 243 239 kvm_err("GICV physical address 0x%llx not page aligned\n", 244 - (unsigned long long)vcpu_res.start); 240 + (unsigned long long)vcpu_res->start); 245 241 ret = -ENXIO; 246 242 goto out_unmap; 247 243 } 248 244 249 - if (!PAGE_ALIGNED(resource_size(&vcpu_res))) { 245 + if (!PAGE_ALIGNED(resource_size(vcpu_res))) { 250 246 kvm_err("GICV size 0x%llx not a multiple of page size 0x%lx\n", 251 - (unsigned long long)resource_size(&vcpu_res), 247 + (unsigned long long)resource_size(vcpu_res), 252 248 PAGE_SIZE); 253 249 ret = -ENXIO; 254 250 goto out_unmap; ··· 251 259 vgic->can_emulate_gicv2 = true; 252 260 kvm_register_device_ops(&kvm_arm_vgic_v2_ops, KVM_DEV_TYPE_ARM_VGIC_V2); 253 261 254 - vgic->vcpu_base = vcpu_res.start; 262 + vgic->vcpu_base = vcpu_res->start; 255 263 256 - kvm_info("%s@%llx IRQ%d\n", vgic_node->name, 257 - vctrl_res.start, vgic->maint_irq); 264 + kvm_info("GICH base=0x%llx, GICV base=0x%llx, IRQ=%d\n", 265 + gic_kvm_info->vctrl.start, vgic->vcpu_base, vgic->maint_irq); 258 266 259 267 vgic->type = VGIC_V2; 260 268 vgic->max_gic_vcpus = VGIC_V2_MAX_CPUS; ··· 268 276 out_unmap: 269 277 iounmap(vgic->vctrl_base); 270 278 out: 271 - of_node_put(vgic_node); 272 279 return ret; 273 280 }
+16 -31
virt/kvm/arm/vgic-v3.c
··· 20 20 #include <linux/kvm_host.h> 21 21 #include <linux/interrupt.h> 22 22 #include <linux/io.h> 23 - #include <linux/of.h> 24 - #include <linux/of_address.h> 25 - #include <linux/of_irq.h> 26 23 27 24 #include <linux/irqchip/arm-gic-v3.h> 25 + #include <linux/irqchip/arm-gic-common.h> 28 26 29 27 #include <asm/kvm_emulate.h> 30 28 #include <asm/kvm_arm.h> ··· 220 222 } 221 223 222 224 /** 223 - * vgic_v3_probe - probe for a GICv3 compatible interrupt controller in DT 224 - * @node: pointer to the DT node 225 - * @ops: address of a pointer to the GICv3 operations 226 - * @params: address of a pointer to HW-specific parameters 225 + * vgic_v3_probe - probe for a GICv3 compatible interrupt controller 226 + * @gic_kvm_info: pointer to the GIC description 227 + * @ops: address of a pointer to the GICv3 operations 228 + * @params: address of a pointer to HW-specific parameters 227 229 * 228 230 * Returns 0 if a GICv3 has been found, with the low level operations 229 231 * in *ops and the HW parameters in *params. Returns an error code 230 232 * otherwise. 231 233 */ 232 - int vgic_v3_probe(struct device_node *vgic_node, 234 + int vgic_v3_probe(const struct gic_kvm_info *gic_kvm_info, 233 235 const struct vgic_ops **ops, 234 236 const struct vgic_params **params) 235 237 { 236 238 int ret = 0; 237 - u32 gicv_idx; 238 - struct resource vcpu_res; 239 239 struct vgic_params *vgic = &vgic_v3_params; 240 + const struct resource *vcpu_res = &gic_kvm_info->vcpu; 240 241 241 - vgic->maint_irq = irq_of_parse_and_map(vgic_node, 0); 242 - if (!vgic->maint_irq) { 243 - kvm_err("error getting vgic maintenance irq from DT\n"); 244 - ret = -ENXIO; 245 - goto out; 246 - } 242 + vgic->maint_irq = gic_kvm_info->maint_irq; 247 243 248 244 ich_vtr_el2 = kvm_call_hyp(__vgic_v3_get_ich_vtr_el2); 249 245 ··· 248 256 vgic->nr_lr = (ich_vtr_el2 & 0xf) + 1; 249 257 vgic->can_emulate_gicv2 = false; 250 258 251 - if (of_property_read_u32(vgic_node, "#redistributor-regions", &gicv_idx)) 252 - gicv_idx = 1; 253 - 254 - gicv_idx += 3; /* Also skip GICD, GICC, GICH */ 255 - if (of_address_to_resource(vgic_node, gicv_idx, &vcpu_res)) { 259 + if (!vcpu_res->start) { 256 260 kvm_info("GICv3: no GICV resource entry\n"); 257 261 vgic->vcpu_base = 0; 258 - } else if (!PAGE_ALIGNED(vcpu_res.start)) { 262 + } else if (!PAGE_ALIGNED(vcpu_res->start)) { 259 263 pr_warn("GICV physical address 0x%llx not page aligned\n", 260 - (unsigned long long)vcpu_res.start); 264 + (unsigned long long)vcpu_res->start); 261 265 vgic->vcpu_base = 0; 262 - } else if (!PAGE_ALIGNED(resource_size(&vcpu_res))) { 266 + } else if (!PAGE_ALIGNED(resource_size(vcpu_res))) { 263 267 pr_warn("GICV size 0x%llx not a multiple of page size 0x%lx\n", 264 - (unsigned long long)resource_size(&vcpu_res), 268 + (unsigned long long)resource_size(vcpu_res), 265 269 PAGE_SIZE); 266 - vgic->vcpu_base = 0; 267 270 } else { 268 - vgic->vcpu_base = vcpu_res.start; 271 + vgic->vcpu_base = vcpu_res->start; 269 272 vgic->can_emulate_gicv2 = true; 270 273 kvm_register_device_ops(&kvm_arm_vgic_v2_ops, 271 274 KVM_DEV_TYPE_ARM_VGIC_V2); ··· 273 286 vgic->type = VGIC_V3; 274 287 vgic->max_gic_vcpus = VGIC_V3_MAX_CPUS; 275 288 276 - kvm_info("%s@%llx IRQ%d\n", vgic_node->name, 277 - vcpu_res.start, vgic->maint_irq); 289 + kvm_info("GICV base=0x%llx, IRQ=%d\n", 290 + vgic->vcpu_base, vgic->maint_irq); 278 291 279 292 on_each_cpu(vgic_cpu_init_lrs, vgic, 1); 280 293 281 294 *ops = &vgic_v3_ops; 282 295 *params = vgic; 283 296 284 - out: 285 - of_node_put(vgic_node); 286 297 return ret; 287 298 }
+28 -24
virt/kvm/arm/vgic.c
··· 21 21 #include <linux/kvm_host.h> 22 22 #include <linux/interrupt.h> 23 23 #include <linux/io.h> 24 - #include <linux/of.h> 25 - #include <linux/of_address.h> 26 - #include <linux/of_irq.h> 24 + #include <linux/irq.h> 27 25 #include <linux/rculist.h> 28 26 #include <linux/uaccess.h> 29 27 ··· 31 33 #include <trace/events/kvm.h> 32 34 #include <asm/kvm.h> 33 35 #include <kvm/iodev.h> 36 + #include <linux/irqchip/arm-gic-common.h> 34 37 35 38 #define CREATE_TRACE_POINTS 36 39 #include "trace.h" ··· 2388 2389 .notifier_call = vgic_cpu_notify, 2389 2390 }; 2390 2391 2391 - static const struct of_device_id vgic_ids[] = { 2392 - { .compatible = "arm,cortex-a15-gic", .data = vgic_v2_probe, }, 2393 - { .compatible = "arm,cortex-a7-gic", .data = vgic_v2_probe, }, 2394 - { .compatible = "arm,gic-400", .data = vgic_v2_probe, }, 2395 - { .compatible = "arm,gic-v3", .data = vgic_v3_probe, }, 2396 - {}, 2397 - }; 2392 + static int kvm_vgic_probe(void) 2393 + { 2394 + const struct gic_kvm_info *gic_kvm_info; 2395 + int ret; 2396 + 2397 + gic_kvm_info = gic_get_kvm_info(); 2398 + if (!gic_kvm_info) 2399 + return -ENODEV; 2400 + 2401 + switch (gic_kvm_info->type) { 2402 + case GIC_V2: 2403 + ret = vgic_v2_probe(gic_kvm_info, &vgic_ops, &vgic); 2404 + break; 2405 + case GIC_V3: 2406 + ret = vgic_v3_probe(gic_kvm_info, &vgic_ops, &vgic); 2407 + break; 2408 + default: 2409 + ret = -ENODEV; 2410 + } 2411 + 2412 + return ret; 2413 + } 2398 2414 2399 2415 int kvm_vgic_hyp_init(void) 2400 2416 { 2401 - const struct of_device_id *matched_id; 2402 - const int (*vgic_probe)(struct device_node *,const struct vgic_ops **, 2403 - const struct vgic_params **); 2404 - struct device_node *vgic_node; 2405 2417 int ret; 2406 2418 2407 - vgic_node = of_find_matching_node_and_match(NULL, 2408 - vgic_ids, &matched_id); 2409 - if (!vgic_node) { 2410 - kvm_err("error: no compatible GIC node found\n"); 2411 - return -ENODEV; 2412 - } 2413 - 2414 - vgic_probe = matched_id->data; 2415 - ret = vgic_probe(vgic_node, &vgic_ops, &vgic); 2416 - if (ret) 2419 + ret = kvm_vgic_probe(); 2420 + if (ret) { 2421 + kvm_err("error: KVM vGIC probing failed\n"); 2417 2422 return ret; 2423 + } 2418 2424 2419 2425 ret = request_percpu_irq(vgic->maint_irq, vgic_maintenance_handler, 2420 2426 "vgic", kvm_get_running_vcpus());