Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

arm64/mm: Update tlb invalidation routines for FEAT_LPA2

FEAT_LPA2 impacts tlb invalidation in 2 ways; Firstly, the TTL field in
the non-range tlbi instructions can now validly take a 0 value as a
level hint for the 4KB granule (this is due to the extra level of
translation) - previously TTL=0b0100 meant no hint and was treated as
0b0000. Secondly, The BADDR field of the range-based tlbi instructions
is specified in 64KB units when LPA2 is in use (TCR.DS=1), whereas it is
in page units otherwise. Changes are required for tlbi to continue to
operate correctly when LPA2 is in use.

Solve the first problem by always adding the level hint if the level is
between [0, 3] (previously anything other than 0 was hinted, which
breaks in the new level -1 case from kvm). When running on non-LPA2 HW,
0 is still safe to hint as the HW will fall back to non-hinted. While we
are at it, we replace the notion of 0 being the non-hinted sentinel with
a macro, TLBI_TTL_UNKNOWN. This means callers won't need updating
if/when translation depth increases in future.

The second issue is more complex: When LPA2 is in use, use the non-range
tlbi instructions to forward align to a 64KB boundary first, then use
range-based tlbi from there on, until we have either invalidated all
pages or we have a single page remaining. If the latter, that is done
with non-range tlbi. We determine whether LPA2 is in use based on
lpa2_is_enabled() (for kernel calls) or kvm_lpa2_is_enabled() (for kvm
calls).

Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Oliver Upton <oliver.upton@linux.dev>
Signed-off-by: Ryan Roberts <ryan.roberts@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20231127111737.1897081-4-ryan.roberts@arm.com

authored by

Ryan Roberts and committed by
Marc Zyngier
c910f2b6 936a4ec2

+68 -37
+10 -5
arch/arm64/include/asm/tlb.h
··· 22 22 #include <asm-generic/tlb.h> 23 23 24 24 /* 25 - * get the tlbi levels in arm64. Default value is 0 if more than one 26 - * of cleared_* is set or neither is set. 27 - * Arm64 doesn't support p4ds now. 25 + * get the tlbi levels in arm64. Default value is TLBI_TTL_UNKNOWN if more than 26 + * one of cleared_* is set or neither is set - this elides the level hinting to 27 + * the hardware. 28 28 */ 29 29 static inline int tlb_get_level(struct mmu_gather *tlb) 30 30 { 31 31 /* The TTL field is only valid for the leaf entry. */ 32 32 if (tlb->freed_tables) 33 - return 0; 33 + return TLBI_TTL_UNKNOWN; 34 34 35 35 if (tlb->cleared_ptes && !(tlb->cleared_pmds || 36 36 tlb->cleared_puds || ··· 47 47 tlb->cleared_p4ds)) 48 48 return 1; 49 49 50 - return 0; 50 + if (tlb->cleared_p4ds && !(tlb->cleared_ptes || 51 + tlb->cleared_pmds || 52 + tlb->cleared_puds)) 53 + return 0; 54 + 55 + return TLBI_TTL_UNKNOWN; 51 56 } 52 57 53 58 static inline void tlb_flush(struct mmu_gather *tlb)
+58 -32
arch/arm64/include/asm/tlbflush.h
··· 94 94 * When ARMv8.4-TTL exists, TLBI operations take an additional hint for 95 95 * the level at which the invalidation must take place. If the level is 96 96 * wrong, no invalidation may take place. In the case where the level 97 - * cannot be easily determined, a 0 value for the level parameter will 98 - * perform a non-hinted invalidation. 97 + * cannot be easily determined, the value TLBI_TTL_UNKNOWN will perform 98 + * a non-hinted invalidation. Any provided level outside the hint range 99 + * will also cause fall-back to non-hinted invalidation. 99 100 * 100 101 * For Stage-2 invalidation, use the level values provided to that effect 101 102 * in asm/stage2_pgtable.h. 102 103 */ 103 104 #define TLBI_TTL_MASK GENMASK_ULL(47, 44) 104 105 106 + #define TLBI_TTL_UNKNOWN INT_MAX 107 + 105 108 #define __tlbi_level(op, addr, level) do { \ 106 109 u64 arg = addr; \ 107 110 \ 108 111 if (alternative_has_cap_unlikely(ARM64_HAS_ARMv8_4_TTL) && \ 109 - level) { \ 112 + level >= 0 && level <= 3) { \ 110 113 u64 ttl = level & 3; \ 111 114 ttl |= get_trans_granule() << 2; \ 112 115 arg &= ~TLBI_TTL_MASK; \ ··· 125 122 } while (0) 126 123 127 124 /* 128 - * This macro creates a properly formatted VA operand for the TLB RANGE. 129 - * The value bit assignments are: 125 + * This macro creates a properly formatted VA operand for the TLB RANGE. The 126 + * value bit assignments are: 130 127 * 131 128 * +----------+------+-------+-------+-------+----------------------+ 132 129 * | ASID | TG | SCALE | NUM | TTL | BADDR | 133 130 * +-----------------+-------+-------+-------+----------------------+ 134 131 * |63 48|47 46|45 44|43 39|38 37|36 0| 135 132 * 136 - * The address range is determined by below formula: 137 - * [BADDR, BADDR + (NUM + 1) * 2^(5*SCALE + 1) * PAGESIZE) 133 + * The address range is determined by below formula: [BADDR, BADDR + (NUM + 1) * 134 + * 2^(5*SCALE + 1) * PAGESIZE) 135 + * 136 + * Note that the first argument, baddr, is pre-shifted; If LPA2 is in use, BADDR 137 + * holds addr[52:16]. Else BADDR holds page number. See for example ARM DDI 138 + * 0487J.a section C5.5.60 "TLBI VAE1IS, TLBI VAE1ISNXS, TLB Invalidate by VA, 139 + * EL1, Inner Shareable". 138 140 * 139 141 */ 140 - #define __TLBI_VADDR_RANGE(addr, asid, scale, num, ttl) \ 141 - ({ \ 142 - unsigned long __ta = (addr) >> PAGE_SHIFT; \ 143 - __ta &= GENMASK_ULL(36, 0); \ 144 - __ta |= (unsigned long)(ttl) << 37; \ 145 - __ta |= (unsigned long)(num) << 39; \ 146 - __ta |= (unsigned long)(scale) << 44; \ 147 - __ta |= get_trans_granule() << 46; \ 148 - __ta |= (unsigned long)(asid) << 48; \ 149 - __ta; \ 142 + #define __TLBI_VADDR_RANGE(baddr, asid, scale, num, ttl) \ 143 + ({ \ 144 + unsigned long __ta = (baddr); \ 145 + unsigned long __ttl = (ttl >= 1 && ttl <= 3) ? ttl : 0; \ 146 + __ta &= GENMASK_ULL(36, 0); \ 147 + __ta |= __ttl << 37; \ 148 + __ta |= (unsigned long)(num) << 39; \ 149 + __ta |= (unsigned long)(scale) << 44; \ 150 + __ta |= get_trans_granule() << 46; \ 151 + __ta |= (unsigned long)(asid) << 48; \ 152 + __ta; \ 150 153 }) 151 154 152 155 /* These macros are used by the TLBI RANGE feature. */ ··· 225 216 * CPUs, ensuring that any walk-cache entries associated with the 226 217 * translation are also invalidated. 227 218 * 228 - * __flush_tlb_range(vma, start, end, stride, last_level) 219 + * __flush_tlb_range(vma, start, end, stride, last_level, tlb_level) 229 220 * Invalidate the virtual-address range '[start, end)' on all 230 221 * CPUs for the user address space corresponding to 'vma->mm'. 231 222 * The invalidation operations are issued at a granularity 232 223 * determined by 'stride' and only affect any walk-cache entries 233 - * if 'last_level' is equal to false. 224 + * if 'last_level' is equal to false. tlb_level is the level at 225 + * which the invalidation must take place. If the level is wrong, 226 + * no invalidation may take place. In the case where the level 227 + * cannot be easily determined, the value TLBI_TTL_UNKNOWN will 228 + * perform a non-hinted invalidation. 234 229 * 235 230 * 236 231 * Finally, take a look at asm/tlb.h to see how tlb_flush() is implemented ··· 358 345 * @tlb_level: Translation Table level hint, if known 359 346 * @tlbi_user: If 'true', call an additional __tlbi_user() 360 347 * (typically for user ASIDs). 'flase' for IPA instructions 348 + * @lpa2: If 'true', the lpa2 scheme is used as set out below 361 349 * 362 350 * When the CPU does not support TLB range operations, flush the TLB 363 351 * entries one by one at the granularity of 'stride'. If the TLB 364 352 * range ops are supported, then: 365 353 * 366 - * 1. The minimum range granularity is decided by 'scale', so multiple range 354 + * 1. If FEAT_LPA2 is in use, the start address of a range operation must be 355 + * 64KB aligned, so flush pages one by one until the alignment is reached 356 + * using the non-range operations. This step is skipped if LPA2 is not in 357 + * use. 358 + * 359 + * 2. The minimum range granularity is decided by 'scale', so multiple range 367 360 * TLBI operations may be required. Start from scale = 3, flush the largest 368 361 * possible number of pages ((num+1)*2^(5*scale+1)) that fit into the 369 362 * requested range, then decrement scale and continue until one or zero pages 370 - * are left. 363 + * are left. We must start from highest scale to ensure 64KB start alignment 364 + * is maintained in the LPA2 case. 371 365 * 372 - * 2. If there is 1 page remaining, flush it through non-range operations. Range 373 - * operations can only span an even number of pages. 366 + * 3. If there is 1 page remaining, flush it through non-range operations. Range 367 + * operations can only span an even number of pages. We save this for last to 368 + * ensure 64KB start alignment is maintained for the LPA2 case. 374 369 * 375 370 * Note that certain ranges can be represented by either num = 31 and 376 371 * scale or num = 0 and scale + 1. The loop below favours the latter 377 372 * since num is limited to 30 by the __TLBI_RANGE_NUM() macro. 378 373 */ 379 374 #define __flush_tlb_range_op(op, start, pages, stride, \ 380 - asid, tlb_level, tlbi_user) \ 375 + asid, tlb_level, tlbi_user, lpa2) \ 381 376 do { \ 382 377 int num = 0; \ 383 378 int scale = 3; \ 379 + int shift = lpa2 ? 16 : PAGE_SHIFT; \ 384 380 unsigned long addr; \ 385 381 \ 386 382 while (pages > 0) { \ 387 383 if (!system_supports_tlb_range() || \ 388 - pages == 1) { \ 384 + pages == 1 || \ 385 + (lpa2 && start != ALIGN(start, SZ_64K))) { \ 389 386 addr = __TLBI_VADDR(start, asid); \ 390 387 __tlbi_level(op, addr, tlb_level); \ 391 388 if (tlbi_user) \ ··· 407 384 \ 408 385 num = __TLBI_RANGE_NUM(pages, scale); \ 409 386 if (num >= 0) { \ 410 - addr = __TLBI_VADDR_RANGE(start, asid, scale, \ 411 - num, tlb_level); \ 387 + addr = __TLBI_VADDR_RANGE(start >> shift, asid, \ 388 + scale, num, tlb_level); \ 412 389 __tlbi(r##op, addr); \ 413 390 if (tlbi_user) \ 414 391 __tlbi_user(r##op, addr); \ ··· 420 397 } while (0) 421 398 422 399 #define __flush_s2_tlb_range_op(op, start, pages, stride, tlb_level) \ 423 - __flush_tlb_range_op(op, start, pages, stride, 0, tlb_level, false) 400 + __flush_tlb_range_op(op, start, pages, stride, 0, tlb_level, false, kvm_lpa2_is_enabled()); 424 401 425 402 static inline void __flush_tlb_range(struct vm_area_struct *vma, 426 403 unsigned long start, unsigned long end, ··· 450 427 asid = ASID(vma->vm_mm); 451 428 452 429 if (last_level) 453 - __flush_tlb_range_op(vale1is, start, pages, stride, asid, tlb_level, true); 430 + __flush_tlb_range_op(vale1is, start, pages, stride, asid, 431 + tlb_level, true, lpa2_is_enabled()); 454 432 else 455 - __flush_tlb_range_op(vae1is, start, pages, stride, asid, tlb_level, true); 433 + __flush_tlb_range_op(vae1is, start, pages, stride, asid, 434 + tlb_level, true, lpa2_is_enabled()); 456 435 457 436 dsb(ish); 458 437 mmu_notifier_arch_invalidate_secondary_tlbs(vma->vm_mm, start, end); ··· 466 441 /* 467 442 * We cannot use leaf-only invalidation here, since we may be invalidating 468 443 * table entries as part of collapsing hugepages or moving page tables. 469 - * Set the tlb_level to 0 because we can not get enough information here. 444 + * Set the tlb_level to TLBI_TTL_UNKNOWN because we can not get enough 445 + * information here. 470 446 */ 471 - __flush_tlb_range(vma, start, end, PAGE_SIZE, false, 0); 447 + __flush_tlb_range(vma, start, end, PAGE_SIZE, false, TLBI_TTL_UNKNOWN); 472 448 } 473 449 474 450 static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end)