Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

arm64: hugetlb: Refine tlb maintenance scope

When operating on contiguous blocks of ptes (or pmds) for some hugetlb
sizes, we must honour break-before-make requirements and clear down the
block to invalid state in the pgtable then invalidate the relevant tlb
entries before making the pgtable entries valid again.

However, the tlb maintenance is currently always done assuming the worst
case stride (PAGE_SIZE), last_level (false) and tlb_level
(TLBI_TTL_UNKNOWN). We can do much better with the hinting; In reality,
we know the stride from the huge_pte pgsize, we are always operating
only on the last level, and we always know the tlb_level, again based on
pgsize. So let's start providing these hints.

Additionally, avoid tlb maintenace in set_huge_pte_at().
Break-before-make is only required if we are transitioning the
contiguous pte block from valid -> valid. So let's elide the
clear-and-flush ("break") if the pte range was previously invalid.

Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Anshuman Khandual <anshuman.khandual@arm.com>
Signed-off-by: Ryan Roberts <ryan.roberts@arm.com>
Tested-by: Luiz Capitulino <luizcap@redhat.com>
Link: https://lore.kernel.org/r/20250422081822.1836315-3-ryan.roberts@arm.com
Signed-off-by: Will Deacon <will@kernel.org>

authored by

Ryan Roberts and committed by
Will Deacon
5b3f8917 29cb8051

+31 -19
+25 -16
arch/arm64/include/asm/hugetlb.h
··· 69 69 70 70 #include <asm-generic/hugetlb.h> 71 71 72 + static inline void __flush_hugetlb_tlb_range(struct vm_area_struct *vma, 73 + unsigned long start, 74 + unsigned long end, 75 + unsigned long stride, 76 + bool last_level) 77 + { 78 + switch (stride) { 79 + #ifndef __PAGETABLE_PMD_FOLDED 80 + case PUD_SIZE: 81 + __flush_tlb_range(vma, start, end, PUD_SIZE, last_level, 1); 82 + break; 83 + #endif 84 + case CONT_PMD_SIZE: 85 + case PMD_SIZE: 86 + __flush_tlb_range(vma, start, end, PMD_SIZE, last_level, 2); 87 + break; 88 + case CONT_PTE_SIZE: 89 + __flush_tlb_range(vma, start, end, PAGE_SIZE, last_level, 3); 90 + break; 91 + default: 92 + __flush_tlb_range(vma, start, end, PAGE_SIZE, last_level, TLBI_TTL_UNKNOWN); 93 + } 94 + } 95 + 72 96 #define __HAVE_ARCH_FLUSH_HUGETLB_TLB_RANGE 73 97 static inline void flush_hugetlb_tlb_range(struct vm_area_struct *vma, 74 98 unsigned long start, ··· 100 76 { 101 77 unsigned long stride = huge_page_size(hstate_vma(vma)); 102 78 103 - switch (stride) { 104 - #ifndef __PAGETABLE_PMD_FOLDED 105 - case PUD_SIZE: 106 - __flush_tlb_range(vma, start, end, PUD_SIZE, false, 1); 107 - break; 108 - #endif 109 - case CONT_PMD_SIZE: 110 - case PMD_SIZE: 111 - __flush_tlb_range(vma, start, end, PMD_SIZE, false, 2); 112 - break; 113 - case CONT_PTE_SIZE: 114 - __flush_tlb_range(vma, start, end, PAGE_SIZE, false, 3); 115 - break; 116 - default: 117 - __flush_tlb_range(vma, start, end, PAGE_SIZE, false, TLBI_TTL_UNKNOWN); 118 - } 79 + __flush_hugetlb_tlb_range(vma, start, end, stride, false); 119 80 } 120 81 121 82 #endif /* __ASM_HUGETLB_H */
+6 -3
arch/arm64/mm/hugetlbpage.c
··· 183 183 { 184 184 pte_t orig_pte = get_clear_contig(mm, addr, ptep, pgsize, ncontig); 185 185 struct vm_area_struct vma = TLB_FLUSH_VMA(mm, 0); 186 + unsigned long end = addr + (pgsize * ncontig); 186 187 187 - flush_tlb_range(&vma, addr, addr + (pgsize * ncontig)); 188 + __flush_hugetlb_tlb_range(&vma, addr, end, pgsize, true); 188 189 return orig_pte; 189 190 } 190 191 ··· 210 209 for (i = 0; i < ncontig; i++, addr += pgsize, ptep++) 211 210 __ptep_get_and_clear(mm, addr, ptep); 212 211 213 - flush_tlb_range(&vma, saddr, addr); 212 + __flush_hugetlb_tlb_range(&vma, saddr, addr, pgsize, true); 214 213 } 215 214 216 215 void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, ··· 239 238 dpfn = pgsize >> PAGE_SHIFT; 240 239 hugeprot = pte_pgprot(pte); 241 240 242 - clear_flush(mm, addr, ptep, pgsize, ncontig); 241 + /* Only need to "break" if transitioning valid -> valid. */ 242 + if (pte_valid(__ptep_get(ptep))) 243 + clear_flush(mm, addr, ptep, pgsize, ncontig); 243 244 244 245 for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn) 245 246 __set_ptes(mm, addr, ptep, pfn_pte(pfn, hugeprot), 1);