Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

KVM: arm64: Split kvm_pgtable_stage2_destroy()

Split kvm_pgtable_stage2_destroy() into two:
- kvm_pgtable_stage2_destroy_range(), that performs the
page-table walk and free the entries over a range of addresses.
- kvm_pgtable_stage2_destroy_pgd(), that frees the PGD.

This refactoring enables subsequent patches to free large page-tables
in chunks, calling cond_resched() between each chunk, to yield the
CPU as necessary.

Existing callers of kvm_pgtable_stage2_destroy(), that probably cannot
take advantage of this (such as nVMHE), will continue to function as is.

Signed-off-by: Raghavendra Rao Ananta <rananta@google.com>
Suggested-by: Oliver Upton <oupton@kernel.org>
Link: https://msgid.link/20251113052452.975081-3-rananta@google.com
Signed-off-by: Oliver Upton <oupton@kernel.org>

authored by

Raghavendra Rao Ananta and committed by
Oliver Upton
d68d66e5 156f70af

+73 -9
+30
arch/arm64/include/asm/kvm_pgtable.h
··· 355 355 return pteref; 356 356 } 357 357 358 + static inline kvm_pte_t *kvm_dereference_pteref_raw(kvm_pteref_t pteref) 359 + { 360 + return pteref; 361 + } 362 + 358 363 static inline int kvm_pgtable_walk_begin(struct kvm_pgtable_walker *walker) 359 364 { 360 365 /* ··· 387 382 kvm_pteref_t pteref) 388 383 { 389 384 return rcu_dereference_check(pteref, !(walker->flags & KVM_PGTABLE_WALK_SHARED)); 385 + } 386 + 387 + static inline kvm_pte_t *kvm_dereference_pteref_raw(kvm_pteref_t pteref) 388 + { 389 + return rcu_dereference_raw(pteref); 390 390 } 391 391 392 392 static inline int kvm_pgtable_walk_begin(struct kvm_pgtable_walker *walker) ··· 560 550 * to freeing and therefore no TLB invalidation is performed. 561 551 */ 562 552 void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt); 553 + 554 + /** 555 + * kvm_pgtable_stage2_destroy_range() - Destroy the unlinked range of addresses. 556 + * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*(). 557 + * @addr: Intermediate physical address at which to place the mapping. 558 + * @size: Size of the mapping. 559 + * 560 + * The page-table is assumed to be unreachable by any hardware walkers prior 561 + * to freeing and therefore no TLB invalidation is performed. 562 + */ 563 + void kvm_pgtable_stage2_destroy_range(struct kvm_pgtable *pgt, 564 + u64 addr, u64 size); 565 + 566 + /** 567 + * kvm_pgtable_stage2_destroy_pgd() - Destroy the PGD of guest stage-2 page-table. 568 + * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*(). 569 + * 570 + * It is assumed that the rest of the page-table is freed before this operation. 571 + */ 572 + void kvm_pgtable_stage2_destroy_pgd(struct kvm_pgtable *pgt); 563 573 564 574 /** 565 575 * kvm_pgtable_stage2_free_unlinked() - Free an unlinked stage-2 paging structure.
+3 -1
arch/arm64/include/asm/kvm_pkvm.h
··· 180 180 181 181 int pkvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu, 182 182 struct kvm_pgtable_mm_ops *mm_ops); 183 - void pkvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt); 183 + void pkvm_pgtable_stage2_destroy_range(struct kvm_pgtable *pgt, 184 + u64 addr, u64 size); 185 + void pkvm_pgtable_stage2_destroy_pgd(struct kvm_pgtable *pgt); 184 186 int pkvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys, 185 187 enum kvm_pgtable_prot prot, void *mc, 186 188 enum kvm_pgtable_walk_flags flags);
+21 -4
arch/arm64/kvm/hyp/pgtable.c
··· 1577 1577 } 1578 1578 } 1579 1579 1580 - void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt) 1580 + void kvm_pgtable_stage2_destroy_range(struct kvm_pgtable *pgt, 1581 + u64 addr, u64 size) 1581 1582 { 1582 - size_t pgd_sz; 1583 1583 struct kvm_pgtable_walker walker = { 1584 1584 .cb = stage2_free_walker, 1585 1585 .flags = KVM_PGTABLE_WALK_LEAF | 1586 1586 KVM_PGTABLE_WALK_TABLE_POST, 1587 1587 }; 1588 1588 1589 - WARN_ON(kvm_pgtable_walk(pgt, 0, BIT(pgt->ia_bits), &walker)); 1589 + WARN_ON(kvm_pgtable_walk(pgt, addr, size, &walker)); 1590 + } 1591 + 1592 + void kvm_pgtable_stage2_destroy_pgd(struct kvm_pgtable *pgt) 1593 + { 1594 + size_t pgd_sz; 1595 + 1590 1596 pgd_sz = kvm_pgd_pages(pgt->ia_bits, pgt->start_level) * PAGE_SIZE; 1591 - pgt->mm_ops->free_pages_exact(kvm_dereference_pteref(&walker, pgt->pgd), pgd_sz); 1597 + 1598 + /* 1599 + * Since the pgtable is unlinked at this point, and not shared with 1600 + * other walkers, safely deference pgd with kvm_dereference_pteref_raw() 1601 + */ 1602 + pgt->mm_ops->free_pages_exact(kvm_dereference_pteref_raw(pgt->pgd), pgd_sz); 1592 1603 pgt->pgd = NULL; 1604 + } 1605 + 1606 + void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt) 1607 + { 1608 + kvm_pgtable_stage2_destroy_range(pgt, 0, BIT(pgt->ia_bits)); 1609 + kvm_pgtable_stage2_destroy_pgd(pgt); 1593 1610 } 1594 1611 1595 1612 void kvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, s8 level)
+10 -2
arch/arm64/kvm/mmu.c
··· 904 904 return 0; 905 905 } 906 906 907 + static void kvm_stage2_destroy(struct kvm_pgtable *pgt) 908 + { 909 + unsigned int ia_bits = VTCR_EL2_IPA(pgt->mmu->vtcr); 910 + 911 + KVM_PGT_FN(kvm_pgtable_stage2_destroy_range)(pgt, 0, BIT(ia_bits)); 912 + KVM_PGT_FN(kvm_pgtable_stage2_destroy_pgd)(pgt); 913 + } 914 + 907 915 /** 908 916 * kvm_init_stage2_mmu - Initialise a S2 MMU structure 909 917 * @kvm: The pointer to the KVM structure ··· 988 980 return 0; 989 981 990 982 out_destroy_pgtable: 991 - KVM_PGT_FN(kvm_pgtable_stage2_destroy)(pgt); 983 + kvm_stage2_destroy(pgt); 992 984 out_free_pgtable: 993 985 kfree(pgt); 994 986 return err; ··· 1089 1081 write_unlock(&kvm->mmu_lock); 1090 1082 1091 1083 if (pgt) { 1092 - KVM_PGT_FN(kvm_pgtable_stage2_destroy)(pgt); 1084 + kvm_stage2_destroy(pgt); 1093 1085 kfree(pgt); 1094 1086 } 1095 1087 }
+9 -2
arch/arm64/kvm/pkvm.c
··· 344 344 return 0; 345 345 } 346 346 347 - void pkvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt) 347 + void pkvm_pgtable_stage2_destroy_range(struct kvm_pgtable *pgt, 348 + u64 addr, u64 size) 348 349 { 349 - __pkvm_pgtable_stage2_unmap(pgt, 0, ~(0ULL)); 350 + __pkvm_pgtable_stage2_unmap(pgt, addr, addr + size); 351 + } 352 + 353 + void pkvm_pgtable_stage2_destroy_pgd(struct kvm_pgtable *pgt) 354 + { 355 + /* Expected to be called after all pKVM mappings have been released. */ 356 + WARN_ON_ONCE(!RB_EMPTY_ROOT(&pgt->pkvm_mappings.rb_root)); 350 357 } 351 358 352 359 int pkvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,