Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux

Pull arm64 updates from Will Deacon:
"There's a reasonable amount here and the juicy details are all below.

It's worth noting that the MTE/KASAN changes strayed outside of our
usual directories due to core mm changes and some associated changes
to some other architectures; Andrew asked for us to carry these [1]
rather that take them via the -mm tree.

Summary:

- Optimise SVE switching for CPUs with 128-bit implementations.

- Fix output format from SVE selftest.

- Add support for versions v1.2 and 1.3 of the SMC calling
convention.

- Allow Pointer Authentication to be configured independently for
kernel and userspace.

- PMU driver cleanups for managing IRQ affinity and exposing event
attributes via sysfs.

- KASAN optimisations for both hardware tagging (MTE) and out-of-line
software tagging implementations.

- Relax frame record alignment requirements to facilitate 8-byte
alignment with KASAN and Clang.

- Cleanup of page-table definitions and removal of unused memory
types.

- Reduction of ARCH_DMA_MINALIGN back to 64 bytes.

- Refactoring of our instruction decoding routines and addition of
some missing encodings.

- Move entry code moved into C and hardened against harmful compiler
instrumentation.

- Update booting requirements for the FEAT_HCX feature, added to v8.7
of the architecture.

- Fix resume from idle when pNMI is being used.

- Additional CPU sanity checks for MTE and preparatory changes for
systems where not all of the CPUs support 32-bit EL0.

- Update our kernel string routines to the latest Cortex Strings
implementation.

- Big cleanup of our cache maintenance routines, which were
confusingly named and inconsistent in their implementations.

- Tweak linker flags so that GDB can understand vmlinux when using
RELR relocations.

- Boot path cleanups to enable early initialisation of per-cpu
operations needed by KCSAN.

- Non-critical fixes and miscellaneous cleanup"

* tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux: (150 commits)
arm64: tlb: fix the TTL value of tlb_get_level
arm64: Restrict undef hook for cpufeature registers
arm64/mm: Rename ARM64_SWAPPER_USES_SECTION_MAPS
arm64: insn: avoid circular include dependency
arm64: smp: Bump debugging information print down to KERN_DEBUG
drivers/perf: fix the missed ida_simple_remove() in ddr_perf_probe()
perf/arm-cmn: Fix invalid pointer when access dtc object sharing the same IRQ number
arm64: suspend: Use cpuidle context helpers in cpu_suspend()
PSCI: Use cpuidle context helpers in psci_cpu_suspend_enter()
arm64: Convert cpu_do_idle() to using cpuidle context helpers
arm64: Add cpuidle context save/restore helpers
arm64: head: fix code comments in set_cpu_boot_mode_flag
arm64: mm: drop unused __pa(__idmap_text_start)
arm64: mm: fix the count comments in compute_indices
arm64/mm: Fix ttbr0 values stored in struct thread_info for software-pan
arm64: mm: Pass original fault address to handle_mm_fault()
arm64/mm: Drop SECTION_[SHIFT|SIZE|MASK]
arm64/mm: Use CONT_PMD_SHIFT for ARM64_MEMSTART_SHIFT
arm64/mm: Drop SWAPPER_INIT_MAP_SIZE
arm64: Conditionally configure PTR_AUTH key of the kernel.
...

+3390 -2583
+6
Documentation/arm64/booting.rst
··· 277 277 278 278 - SCR_EL3.FGTEn (bit 27) must be initialised to 0b1. 279 279 280 + For CPUs with support for HCRX_EL2 (FEAT_HCX) present: 281 + 282 + - If EL3 is present and the kernel is entered at EL2: 283 + 284 + - SCR_EL3.HXEn (bit 38) must be initialised to 0b1. 285 + 280 286 For CPUs with Advanced SIMD and floating point support: 281 287 282 288 - If EL3 is present:
+1 -1
Makefile
··· 1039 1039 endif 1040 1040 1041 1041 ifeq ($(CONFIG_RELR),y) 1042 - LDFLAGS_vmlinux += --pack-dyn-relocs=relr 1042 + LDFLAGS_vmlinux += --pack-dyn-relocs=relr --use-android-relr-tags 1043 1043 endif 1044 1044 1045 1045 # We never want expected sections to be placed heuristically by the
+3 -3
arch/alpha/include/asm/page.h
··· 17 17 extern void clear_page(void *page); 18 18 #define clear_user_page(page, vaddr, pg) clear_page(page) 19 19 20 - #define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \ 21 - alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vmaddr) 22 - #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE 20 + #define alloc_zeroed_user_highpage_movable(vma, vaddr) \ 21 + alloc_page_vma(GFP_HIGHUSER_MOVABLE | __GFP_ZERO, vma, vmaddr) 22 + #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE_MOVABLE 23 23 24 24 extern void copy_page(void * _to, void * _from); 25 25 #define copy_user_page(to, from, vaddr, pg) copy_page(to, from)
+5
arch/arm/include/asm/cpuidle.h
··· 50 50 51 51 extern int arm_cpuidle_init(int cpu); 52 52 53 + struct arm_cpuidle_irq_context { }; 54 + 55 + #define arm_cpuidle_save_irq_context(c) (void)c 56 + #define arm_cpuidle_restore_irq_context(c) (void)c 57 + 53 58 #endif
+2 -2
arch/arm/kernel/perf_event_v7.c
··· 773 773 pr_err("CPU%u writing wrong counter %d\n", 774 774 smp_processor_id(), idx); 775 775 } else if (idx == ARMV7_IDX_CYCLE_COUNTER) { 776 - asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (value)); 776 + asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" ((u32)value)); 777 777 } else { 778 778 armv7_pmnc_select_counter(idx); 779 - asm volatile("mcr p15, 0, %0, c9, c13, 2" : : "r" (value)); 779 + asm volatile("mcr p15, 0, %0, c9, c13, 2" : : "r" ((u32)value)); 780 780 } 781 781 } 782 782
+19 -14
arch/arm64/Kconfig
··· 1481 1481 config ARM64_PTR_AUTH 1482 1482 bool "Enable support for pointer authentication" 1483 1483 default y 1484 - depends on (CC_HAS_SIGN_RETURN_ADDRESS || CC_HAS_BRANCH_PROT_PAC_RET) && AS_HAS_PAC 1485 - # Modern compilers insert a .note.gnu.property section note for PAC 1486 - # which is only understood by binutils starting with version 2.33.1. 1487 - depends on LD_IS_LLD || LD_VERSION >= 23301 || (CC_IS_GCC && GCC_VERSION < 90100) 1488 - depends on !CC_IS_CLANG || AS_HAS_CFI_NEGATE_RA_STATE 1489 - depends on (!FUNCTION_GRAPH_TRACER || DYNAMIC_FTRACE_WITH_REGS) 1490 1484 help 1491 1485 Pointer authentication (part of the ARMv8.3 Extensions) provides 1492 1486 instructions for signing and authenticating pointers against secret ··· 1492 1498 for each process at exec() time, with these keys being 1493 1499 context-switched along with the process. 1494 1500 1495 - If the compiler supports the -mbranch-protection or 1496 - -msign-return-address flag (e.g. GCC 7 or later), then this option 1497 - will also cause the kernel itself to be compiled with return address 1498 - protection. In this case, and if the target hardware is known to 1499 - support pointer authentication, then CONFIG_STACKPROTECTOR can be 1500 - disabled with minimal loss of protection. 1501 - 1502 1501 The feature is detected at runtime. If the feature is not present in 1503 1502 hardware it will not be advertised to userspace/KVM guest nor will it 1504 1503 be enabled. ··· 1501 1514 address auth and the late CPU has then the late CPU will still boot 1502 1515 but with the feature disabled. On such a system, this option should 1503 1516 not be selected. 1517 + 1518 + config ARM64_PTR_AUTH_KERNEL 1519 + bool "Use pointer authentication for kernel" 1520 + default y 1521 + depends on ARM64_PTR_AUTH 1522 + depends on (CC_HAS_SIGN_RETURN_ADDRESS || CC_HAS_BRANCH_PROT_PAC_RET) && AS_HAS_PAC 1523 + # Modern compilers insert a .note.gnu.property section note for PAC 1524 + # which is only understood by binutils starting with version 2.33.1. 1525 + depends on LD_IS_LLD || LD_VERSION >= 23301 || (CC_IS_GCC && GCC_VERSION < 90100) 1526 + depends on !CC_IS_CLANG || AS_HAS_CFI_NEGATE_RA_STATE 1527 + depends on (!FUNCTION_GRAPH_TRACER || DYNAMIC_FTRACE_WITH_REGS) 1528 + help 1529 + If the compiler supports the -mbranch-protection or 1530 + -msign-return-address flag (e.g. GCC 7 or later), then this option 1531 + will cause the kernel itself to be compiled with return address 1532 + protection. In this case, and if the target hardware is known to 1533 + support pointer authentication, then CONFIG_STACKPROTECTOR can be 1534 + disabled with minimal loss of protection. 1504 1535 1505 1536 This feature works with FUNCTION_GRAPH_TRACER option only if 1506 1537 DYNAMIC_FTRACE_WITH_REGS is enabled. ··· 1611 1606 bool "Use Branch Target Identification for kernel" 1612 1607 default y 1613 1608 depends on ARM64_BTI 1614 - depends on ARM64_PTR_AUTH 1609 + depends on ARM64_PTR_AUTH_KERNEL 1615 1610 depends on CC_HAS_BRANCH_PROT_PAC_RET_BTI 1616 1611 # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94697 1617 1612 depends on !CC_IS_GCC || GCC_VERSION >= 100100
+1 -1
arch/arm64/Makefile
··· 70 70 # off, this will be overridden if we are using branch protection. 71 71 branch-prot-flags-y += $(call cc-option,-mbranch-protection=none) 72 72 73 - ifeq ($(CONFIG_ARM64_PTR_AUTH),y) 73 + ifeq ($(CONFIG_ARM64_PTR_AUTH_KERNEL),y) 74 74 branch-prot-flags-$(CONFIG_CC_HAS_SIGN_RETURN_ADDRESS) := -msign-return-address=all 75 75 # We enable additional protection for leaf functions as there is some 76 76 # narrow potential for ROP protection benefits and no substantial
+1 -8
arch/arm64/include/asm/alternative-macros.h
··· 3 3 #define __ASM_ALTERNATIVE_MACROS_H 4 4 5 5 #include <asm/cpucaps.h> 6 + #include <asm/insn-def.h> 6 7 7 8 #define ARM64_CB_PATCH ARM64_NCAPS 8 - 9 - /* A64 instructions are always 32 bits. */ 10 - #define AARCH64_INSN_SIZE 4 11 9 12 10 #ifndef __ASSEMBLY__ 13 11 ··· 194 196 195 197 #define _ALTERNATIVE_CFG(insn1, insn2, cap, cfg, ...) \ 196 198 alternative_insn insn1, insn2, cap, IS_ENABLED(cfg) 197 - 198 - .macro user_alt, label, oldinstr, newinstr, cond 199 - 9999: alternative_insn "\oldinstr", "\newinstr", \cond 200 - _asm_extable 9999b, \label 201 - .endm 202 199 203 200 #endif /* __ASSEMBLY__ */ 204 201
+2 -1
arch/arm64/include/asm/arch_gicv3.h
··· 124 124 #define gic_read_lpir(c) readq_relaxed(c) 125 125 #define gic_write_lpir(v, c) writeq_relaxed(v, c) 126 126 127 - #define gic_flush_dcache_to_poc(a,l) __flush_dcache_area((a), (l)) 127 + #define gic_flush_dcache_to_poc(a,l) \ 128 + dcache_clean_inval_poc((unsigned long)(a), (unsigned long)(a)+(l)) 128 129 129 130 #define gits_read_baser(c) readq_relaxed(c) 130 131 #define gits_write_baser(v, c) writeq_relaxed(v, c)
+6
arch/arm64/include/asm/asm-prototypes.h
··· 23 23 long long __ashrti3(long long a, int b); 24 24 long long __lshrti3(long long a, int b); 25 25 26 + /* 27 + * This function uses a custom calling convention and cannot be called from C so 28 + * this prototype is not entirely accurate. 29 + */ 30 + void __hwasan_tag_mismatch(unsigned long addr, unsigned long access_info); 31 + 26 32 #endif /* __ASM_PROTOTYPES_H */
+29 -20
arch/arm64/include/asm/asm_pointer_auth.h
··· 7 7 #include <asm/cpufeature.h> 8 8 #include <asm/sysreg.h> 9 9 10 - #ifdef CONFIG_ARM64_PTR_AUTH 11 - /* 12 - * thread.keys_user.ap* as offset exceeds the #imm offset range 13 - * so use the base value of ldp as thread.keys_user and offset as 14 - * thread.keys_user.ap*. 15 - */ 16 - .macro __ptrauth_keys_install_user tsk, tmp1, tmp2, tmp3 17 - mov \tmp1, #THREAD_KEYS_USER 18 - add \tmp1, \tsk, \tmp1 19 - ldp \tmp2, \tmp3, [\tmp1, #PTRAUTH_USER_KEY_APIA] 20 - msr_s SYS_APIAKEYLO_EL1, \tmp2 21 - msr_s SYS_APIAKEYHI_EL1, \tmp3 22 - .endm 10 + #ifdef CONFIG_ARM64_PTR_AUTH_KERNEL 23 11 24 12 .macro __ptrauth_keys_install_kernel_nosync tsk, tmp1, tmp2, tmp3 25 13 mov \tmp1, #THREAD_KEYS_KERNEL ··· 28 40 __ptrauth_keys_install_kernel_nosync \tsk, \tmp1, \tmp2, \tmp3 29 41 isb 30 42 alternative_else_nop_endif 43 + .endm 44 + 45 + #else /* CONFIG_ARM64_PTR_AUTH_KERNEL */ 46 + 47 + .macro __ptrauth_keys_install_kernel_nosync tsk, tmp1, tmp2, tmp3 48 + .endm 49 + 50 + .macro ptrauth_keys_install_kernel_nosync tsk, tmp1, tmp2, tmp3 51 + .endm 52 + 53 + .macro ptrauth_keys_install_kernel tsk, tmp1, tmp2, tmp3 54 + .endm 55 + 56 + #endif /* CONFIG_ARM64_PTR_AUTH_KERNEL */ 57 + 58 + #ifdef CONFIG_ARM64_PTR_AUTH 59 + /* 60 + * thread.keys_user.ap* as offset exceeds the #imm offset range 61 + * so use the base value of ldp as thread.keys_user and offset as 62 + * thread.keys_user.ap*. 63 + */ 64 + .macro __ptrauth_keys_install_user tsk, tmp1, tmp2, tmp3 65 + mov \tmp1, #THREAD_KEYS_USER 66 + add \tmp1, \tsk, \tmp1 67 + ldp \tmp2, \tmp3, [\tmp1, #PTRAUTH_USER_KEY_APIA] 68 + msr_s SYS_APIAKEYLO_EL1, \tmp2 69 + msr_s SYS_APIAKEYHI_EL1, \tmp3 31 70 .endm 32 71 33 72 .macro __ptrauth_keys_init_cpu tsk, tmp1, tmp2, tmp3 ··· 79 64 .Lno_addr_auth\@: 80 65 .endm 81 66 82 - #else /* CONFIG_ARM64_PTR_AUTH */ 67 + #else /* !CONFIG_ARM64_PTR_AUTH */ 83 68 84 69 .macro ptrauth_keys_install_user tsk, tmp1, tmp2, tmp3 85 - .endm 86 - 87 - .macro ptrauth_keys_install_kernel_nosync tsk, tmp1, tmp2, tmp3 88 - .endm 89 - 90 - .macro ptrauth_keys_install_kernel tsk, tmp1, tmp2, tmp3 91 70 .endm 92 71 93 72 #endif /* CONFIG_ARM64_PTR_AUTH */
+62 -38
arch/arm64/include/asm/assembler.h
··· 130 130 .endm 131 131 132 132 /* 133 - * Emit an entry into the exception table 133 + * Create an exception table entry for `insn`, which will branch to `fixup` 134 + * when an unhandled fault is taken. 134 135 */ 135 - .macro _asm_extable, from, to 136 + .macro _asm_extable, insn, fixup 136 137 .pushsection __ex_table, "a" 137 138 .align 3 138 - .long (\from - .), (\to - .) 139 + .long (\insn - .), (\fixup - .) 139 140 .popsection 140 141 .endm 142 + 143 + /* 144 + * Create an exception table entry for `insn` if `fixup` is provided. Otherwise 145 + * do nothing. 146 + */ 147 + .macro _cond_extable, insn, fixup 148 + .ifnc \fixup, 149 + _asm_extable \insn, \fixup 150 + .endif 151 + .endm 152 + 141 153 142 154 #define USER(l, x...) \ 143 155 9999: x; \ ··· 244 232 * @dst: destination register 245 233 */ 246 234 #if defined(__KVM_NVHE_HYPERVISOR__) || defined(__KVM_VHE_HYPERVISOR__) 247 - .macro this_cpu_offset, dst 235 + .macro get_this_cpu_offset, dst 248 236 mrs \dst, tpidr_el2 249 237 .endm 250 238 #else 251 - .macro this_cpu_offset, dst 239 + .macro get_this_cpu_offset, dst 252 240 alternative_if_not ARM64_HAS_VIRT_HOST_EXTN 253 241 mrs \dst, tpidr_el1 254 242 alternative_else 255 243 mrs \dst, tpidr_el2 244 + alternative_endif 245 + .endm 246 + 247 + .macro set_this_cpu_offset, src 248 + alternative_if_not ARM64_HAS_VIRT_HOST_EXTN 249 + msr tpidr_el1, \src 250 + alternative_else 251 + msr tpidr_el2, \src 256 252 alternative_endif 257 253 .endm 258 254 #endif ··· 273 253 .macro adr_this_cpu, dst, sym, tmp 274 254 adrp \tmp, \sym 275 255 add \dst, \tmp, #:lo12:\sym 276 - this_cpu_offset \tmp 256 + get_this_cpu_offset \tmp 277 257 add \dst, \dst, \tmp 278 258 .endm 279 259 ··· 284 264 */ 285 265 .macro ldr_this_cpu dst, sym, tmp 286 266 adr_l \dst, \sym 287 - this_cpu_offset \tmp 267 + get_this_cpu_offset \tmp 288 268 ldr \dst, [\dst, \tmp] 289 269 .endm 290 270 ··· 395 375 bfi \tcr, \tmp0, \pos, #3 396 376 .endm 397 377 398 - /* 399 - * Macro to perform a data cache maintenance for the interval 400 - * [kaddr, kaddr + size) 401 - * 402 - * op: operation passed to dc instruction 403 - * domain: domain used in dsb instruciton 404 - * kaddr: starting virtual address of the region 405 - * size: size of the region 406 - * Corrupts: kaddr, size, tmp1, tmp2 407 - */ 408 - .macro __dcache_op_workaround_clean_cache, op, kaddr 378 + .macro __dcache_op_workaround_clean_cache, op, addr 409 379 alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE 410 - dc \op, \kaddr 380 + dc \op, \addr 411 381 alternative_else 412 - dc civac, \kaddr 382 + dc civac, \addr 413 383 alternative_endif 414 384 .endm 415 385 416 - .macro dcache_by_line_op op, domain, kaddr, size, tmp1, tmp2 386 + /* 387 + * Macro to perform a data cache maintenance for the interval 388 + * [start, end) 389 + * 390 + * op: operation passed to dc instruction 391 + * domain: domain used in dsb instruciton 392 + * start: starting virtual address of the region 393 + * end: end virtual address of the region 394 + * fixup: optional label to branch to on user fault 395 + * Corrupts: start, end, tmp1, tmp2 396 + */ 397 + .macro dcache_by_line_op op, domain, start, end, tmp1, tmp2, fixup 417 398 dcache_line_size \tmp1, \tmp2 418 - add \size, \kaddr, \size 419 399 sub \tmp2, \tmp1, #1 420 - bic \kaddr, \kaddr, \tmp2 421 - 9998: 400 + bic \start, \start, \tmp2 401 + .Ldcache_op\@: 422 402 .ifc \op, cvau 423 - __dcache_op_workaround_clean_cache \op, \kaddr 403 + __dcache_op_workaround_clean_cache \op, \start 424 404 .else 425 405 .ifc \op, cvac 426 - __dcache_op_workaround_clean_cache \op, \kaddr 406 + __dcache_op_workaround_clean_cache \op, \start 427 407 .else 428 408 .ifc \op, cvap 429 - sys 3, c7, c12, 1, \kaddr // dc cvap 409 + sys 3, c7, c12, 1, \start // dc cvap 430 410 .else 431 411 .ifc \op, cvadp 432 - sys 3, c7, c13, 1, \kaddr // dc cvadp 412 + sys 3, c7, c13, 1, \start // dc cvadp 433 413 .else 434 - dc \op, \kaddr 414 + dc \op, \start 435 415 .endif 436 416 .endif 437 417 .endif 438 418 .endif 439 - add \kaddr, \kaddr, \tmp1 440 - cmp \kaddr, \size 441 - b.lo 9998b 419 + add \start, \start, \tmp1 420 + cmp \start, \end 421 + b.lo .Ldcache_op\@ 442 422 dsb \domain 423 + 424 + _cond_extable .Ldcache_op\@, \fixup 443 425 .endm 444 426 445 427 /* ··· 449 427 * [start, end) 450 428 * 451 429 * start, end: virtual addresses describing the region 452 - * label: A label to branch to on user fault. 430 + * fixup: optional label to branch to on user fault 453 431 * Corrupts: tmp1, tmp2 454 432 */ 455 - .macro invalidate_icache_by_line start, end, tmp1, tmp2, label 433 + .macro invalidate_icache_by_line start, end, tmp1, tmp2, fixup 456 434 icache_line_size \tmp1, \tmp2 457 435 sub \tmp2, \tmp1, #1 458 436 bic \tmp2, \start, \tmp2 459 - 9997: 460 - USER(\label, ic ivau, \tmp2) // invalidate I line PoU 437 + .Licache_op\@: 438 + ic ivau, \tmp2 // invalidate I line PoU 461 439 add \tmp2, \tmp2, \tmp1 462 440 cmp \tmp2, \end 463 - b.lo 9997b 441 + b.lo .Licache_op\@ 464 442 dsb ish 465 443 isb 444 + 445 + _cond_extable .Licache_op\@, \fixup 466 446 .endm 467 447 468 448 /* ··· 769 745 cbz \tmp, \lbl 770 746 #endif 771 747 adr_l \tmp, irq_stat + IRQ_CPUSTAT_SOFTIRQ_PENDING 772 - this_cpu_offset \tmp2 748 + get_this_cpu_offset \tmp2 773 749 ldr w\tmp, [\tmp, \tmp2] 774 750 cbnz w\tmp, \lbl // yield on pending softirq in task context 775 751 .Lnoyield_\@:
+1 -1
arch/arm64/include/asm/cache.h
··· 47 47 * cache before the transfer is done, causing old data to be seen by 48 48 * the CPU. 49 49 */ 50 - #define ARCH_DMA_MINALIGN (128) 50 + #define ARCH_DMA_MINALIGN L1_CACHE_BYTES 51 51 52 52 #ifdef CONFIG_KASAN_SW_TAGS 53 53 #define ARCH_SLAB_MINALIGN (1ULL << KASAN_SHADOW_SCALE_SHIFT)
+42 -29
arch/arm64/include/asm/cacheflush.h
··· 30 30 * the implementation assumes non-aliasing VIPT D-cache and (aliasing) 31 31 * VIPT I-cache. 32 32 * 33 - * flush_icache_range(start, end) 33 + * All functions below apply to the interval [start, end) 34 + * - start - virtual start address (inclusive) 35 + * - end - virtual end address (exclusive) 34 36 * 35 - * Ensure coherency between the I-cache and the D-cache in the 36 - * region described by start, end. 37 - * - start - virtual start address 38 - * - end - virtual end address 37 + * caches_clean_inval_pou(start, end) 39 38 * 40 - * invalidate_icache_range(start, end) 39 + * Ensure coherency between the I-cache and the D-cache region to 40 + * the Point of Unification. 41 41 * 42 - * Invalidate the I-cache in the region described by start, end. 43 - * - start - virtual start address 44 - * - end - virtual end address 42 + * caches_clean_inval_user_pou(start, end) 45 43 * 46 - * __flush_cache_user_range(start, end) 44 + * Ensure coherency between the I-cache and the D-cache region to 45 + * the Point of Unification. 46 + * Use only if the region might access user memory. 47 47 * 48 - * Ensure coherency between the I-cache and the D-cache in the 49 - * region described by start, end. 50 - * - start - virtual start address 51 - * - end - virtual end address 48 + * icache_inval_pou(start, end) 52 49 * 53 - * __flush_dcache_area(kaddr, size) 50 + * Invalidate I-cache region to the Point of Unification. 54 51 * 55 - * Ensure that the data held in page is written back. 56 - * - kaddr - page address 57 - * - size - region size 52 + * dcache_clean_inval_poc(start, end) 53 + * 54 + * Clean and invalidate D-cache region to the Point of Coherency. 55 + * 56 + * dcache_inval_poc(start, end) 57 + * 58 + * Invalidate D-cache region to the Point of Coherency. 59 + * 60 + * dcache_clean_poc(start, end) 61 + * 62 + * Clean D-cache region to the Point of Coherency. 63 + * 64 + * dcache_clean_pop(start, end) 65 + * 66 + * Clean D-cache region to the Point of Persistence. 67 + * 68 + * dcache_clean_pou(start, end) 69 + * 70 + * Clean D-cache region to the Point of Unification. 58 71 */ 59 - extern void __flush_icache_range(unsigned long start, unsigned long end); 60 - extern int invalidate_icache_range(unsigned long start, unsigned long end); 61 - extern void __flush_dcache_area(void *addr, size_t len); 62 - extern void __inval_dcache_area(void *addr, size_t len); 63 - extern void __clean_dcache_area_poc(void *addr, size_t len); 64 - extern void __clean_dcache_area_pop(void *addr, size_t len); 65 - extern void __clean_dcache_area_pou(void *addr, size_t len); 66 - extern long __flush_cache_user_range(unsigned long start, unsigned long end); 67 - extern void sync_icache_aliases(void *kaddr, unsigned long len); 72 + extern void caches_clean_inval_pou(unsigned long start, unsigned long end); 73 + extern void icache_inval_pou(unsigned long start, unsigned long end); 74 + extern void dcache_clean_inval_poc(unsigned long start, unsigned long end); 75 + extern void dcache_inval_poc(unsigned long start, unsigned long end); 76 + extern void dcache_clean_poc(unsigned long start, unsigned long end); 77 + extern void dcache_clean_pop(unsigned long start, unsigned long end); 78 + extern void dcache_clean_pou(unsigned long start, unsigned long end); 79 + extern long caches_clean_inval_user_pou(unsigned long start, unsigned long end); 80 + extern void sync_icache_aliases(unsigned long start, unsigned long end); 68 81 69 82 static inline void flush_icache_range(unsigned long start, unsigned long end) 70 83 { 71 - __flush_icache_range(start, end); 84 + caches_clean_inval_pou(start, end); 72 85 73 86 /* 74 87 * IPI all online CPUs so that they undergo a context synchronization ··· 135 122 #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 136 123 extern void flush_dcache_page(struct page *); 137 124 138 - static __always_inline void __flush_icache_all(void) 125 + static __always_inline void icache_inval_all_pou(void) 139 126 { 140 127 if (cpus_have_const_cap(ARM64_HAS_CACHE_DIC)) 141 128 return;
+25 -20
arch/arm64/include/asm/cpu.h
··· 12 12 /* 13 13 * Records attributes of an individual CPU. 14 14 */ 15 - struct cpuinfo_arm64 { 16 - struct cpu cpu; 17 - struct kobject kobj; 18 - u32 reg_ctr; 19 - u32 reg_cntfrq; 20 - u32 reg_dczid; 21 - u32 reg_midr; 22 - u32 reg_revidr; 23 - 24 - u64 reg_id_aa64dfr0; 25 - u64 reg_id_aa64dfr1; 26 - u64 reg_id_aa64isar0; 27 - u64 reg_id_aa64isar1; 28 - u64 reg_id_aa64mmfr0; 29 - u64 reg_id_aa64mmfr1; 30 - u64 reg_id_aa64mmfr2; 31 - u64 reg_id_aa64pfr0; 32 - u64 reg_id_aa64pfr1; 33 - u64 reg_id_aa64zfr0; 34 - 15 + struct cpuinfo_32bit { 35 16 u32 reg_id_dfr0; 36 17 u32 reg_id_dfr1; 37 18 u32 reg_id_isar0; ··· 35 54 u32 reg_mvfr0; 36 55 u32 reg_mvfr1; 37 56 u32 reg_mvfr2; 57 + }; 58 + 59 + struct cpuinfo_arm64 { 60 + struct cpu cpu; 61 + struct kobject kobj; 62 + u64 reg_ctr; 63 + u64 reg_cntfrq; 64 + u64 reg_dczid; 65 + u64 reg_midr; 66 + u64 reg_revidr; 67 + u64 reg_gmid; 68 + 69 + u64 reg_id_aa64dfr0; 70 + u64 reg_id_aa64dfr1; 71 + u64 reg_id_aa64isar0; 72 + u64 reg_id_aa64isar1; 73 + u64 reg_id_aa64mmfr0; 74 + u64 reg_id_aa64mmfr1; 75 + u64 reg_id_aa64mmfr2; 76 + u64 reg_id_aa64pfr0; 77 + u64 reg_id_aa64pfr1; 78 + u64 reg_id_aa64zfr0; 79 + 80 + struct cpuinfo_32bit aarch32; 38 81 39 82 /* pseudo-ZCR for recording maximum ZCR_EL1 LEN value: */ 40 83 u64 reg_zcr;
+14 -1
arch/arm64/include/asm/cpufeature.h
··· 619 619 return val > 0; 620 620 } 621 621 622 + static inline bool id_aa64pfr1_mte(u64 pfr1) 623 + { 624 + u32 val = cpuid_feature_extract_unsigned_field(pfr1, ID_AA64PFR1_MTE_SHIFT); 625 + 626 + return val >= ID_AA64PFR1_MTE; 627 + } 628 + 622 629 void __init setup_cpu_features(void); 623 630 void check_local_cpu_capabilities(void); 624 631 ··· 637 630 return id_aa64mmfr0_mixed_endian_el0(read_cpuid(ID_AA64MMFR0_EL1)); 638 631 } 639 632 633 + const struct cpumask *system_32bit_el0_cpumask(void); 634 + DECLARE_STATIC_KEY_FALSE(arm64_mismatched_32bit_el0); 635 + 640 636 static inline bool system_supports_32bit_el0(void) 641 637 { 642 - return cpus_have_const_cap(ARM64_HAS_32BIT_EL0); 638 + u64 pfr0 = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1); 639 + 640 + return static_branch_unlikely(&arm64_mismatched_32bit_el0) || 641 + id_aa64pfr0_32bit_el0(pfr0); 643 642 } 644 643 645 644 static inline bool system_supports_4kb_granule(void)
+35
arch/arm64/include/asm/cpuidle.h
··· 18 18 return -EOPNOTSUPP; 19 19 } 20 20 #endif 21 + 22 + #ifdef CONFIG_ARM64_PSEUDO_NMI 23 + #include <asm/arch_gicv3.h> 24 + 25 + struct arm_cpuidle_irq_context { 26 + unsigned long pmr; 27 + unsigned long daif_bits; 28 + }; 29 + 30 + #define arm_cpuidle_save_irq_context(__c) \ 31 + do { \ 32 + struct arm_cpuidle_irq_context *c = __c; \ 33 + if (system_uses_irq_prio_masking()) { \ 34 + c->daif_bits = read_sysreg(daif); \ 35 + write_sysreg(c->daif_bits | PSR_I_BIT | PSR_F_BIT, \ 36 + daif); \ 37 + c->pmr = gic_read_pmr(); \ 38 + gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); \ 39 + } \ 40 + } while (0) 41 + 42 + #define arm_cpuidle_restore_irq_context(__c) \ 43 + do { \ 44 + struct arm_cpuidle_irq_context *c = __c; \ 45 + if (system_uses_irq_prio_masking()) { \ 46 + gic_write_pmr(c->pmr); \ 47 + write_sysreg(c->daif_bits, daif); \ 48 + } \ 49 + } while (0) 50 + #else 51 + struct arm_cpuidle_irq_context { }; 52 + 53 + #define arm_cpuidle_save_irq_context(c) (void)c 54 + #define arm_cpuidle_restore_irq_context(c) (void)c 55 + #endif 21 56 #endif
+1 -1
arch/arm64/include/asm/efi.h
··· 137 137 138 138 static inline void efi_capsule_flush_cache_range(void *addr, int size) 139 139 { 140 - __flush_dcache_area(addr, size); 140 + dcache_clean_inval_poc((unsigned long)addr, (unsigned long)addr + size); 141 141 } 142 142 143 143 #endif /* _ASM_EFI_H */
+26 -8
arch/arm64/include/asm/exception.h
··· 31 31 return esr; 32 32 } 33 33 34 - asmlinkage void el1_sync_handler(struct pt_regs *regs); 35 - asmlinkage void el0_sync_handler(struct pt_regs *regs); 36 - asmlinkage void el0_sync_compat_handler(struct pt_regs *regs); 34 + asmlinkage void handle_bad_stack(struct pt_regs *regs); 37 35 38 - asmlinkage void noinstr enter_el1_irq_or_nmi(struct pt_regs *regs); 39 - asmlinkage void noinstr exit_el1_irq_or_nmi(struct pt_regs *regs); 36 + asmlinkage void el1t_64_sync_handler(struct pt_regs *regs); 37 + asmlinkage void el1t_64_irq_handler(struct pt_regs *regs); 38 + asmlinkage void el1t_64_fiq_handler(struct pt_regs *regs); 39 + asmlinkage void el1t_64_error_handler(struct pt_regs *regs); 40 + 41 + asmlinkage void el1h_64_sync_handler(struct pt_regs *regs); 42 + asmlinkage void el1h_64_irq_handler(struct pt_regs *regs); 43 + asmlinkage void el1h_64_fiq_handler(struct pt_regs *regs); 44 + asmlinkage void el1h_64_error_handler(struct pt_regs *regs); 45 + 46 + asmlinkage void el0t_64_sync_handler(struct pt_regs *regs); 47 + asmlinkage void el0t_64_irq_handler(struct pt_regs *regs); 48 + asmlinkage void el0t_64_fiq_handler(struct pt_regs *regs); 49 + asmlinkage void el0t_64_error_handler(struct pt_regs *regs); 50 + 51 + asmlinkage void el0t_32_sync_handler(struct pt_regs *regs); 52 + asmlinkage void el0t_32_irq_handler(struct pt_regs *regs); 53 + asmlinkage void el0t_32_fiq_handler(struct pt_regs *regs); 54 + asmlinkage void el0t_32_error_handler(struct pt_regs *regs); 55 + 56 + asmlinkage void call_on_irq_stack(struct pt_regs *regs, 57 + void (*func)(struct pt_regs *)); 40 58 asmlinkage void enter_from_user_mode(void); 41 59 asmlinkage void exit_to_user_mode(void); 42 - void arm64_enter_nmi(struct pt_regs *regs); 43 - void arm64_exit_nmi(struct pt_regs *regs); 44 60 void do_mem_abort(unsigned long far, unsigned int esr, struct pt_regs *regs); 45 61 void do_undefinstr(struct pt_regs *regs); 46 62 void do_bti(struct pt_regs *regs); 47 - asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr); 48 63 void do_debug_exception(unsigned long addr_if_watchpoint, unsigned int esr, 49 64 struct pt_regs *regs); 50 65 void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs); ··· 72 57 void do_el0_svc(struct pt_regs *regs); 73 58 void do_el0_svc_compat(struct pt_regs *regs); 74 59 void do_ptrauth_fault(struct pt_regs *regs, unsigned int esr); 60 + void do_serror(struct pt_regs *regs, unsigned int esr); 61 + 62 + void panic_bad_stack(struct pt_regs *regs, unsigned int esr, unsigned long far); 75 63 #endif /* __ASM_EXCEPTION_H */
+1 -1
arch/arm64/include/asm/fpsimd.h
··· 69 69 extern void sve_save_state(void *state, u32 *pfpsr); 70 70 extern void sve_load_state(void const *state, u32 const *pfpsr, 71 71 unsigned long vq_minus_1); 72 - extern void sve_flush_live(void); 72 + extern void sve_flush_live(unsigned long vq_minus_1); 73 73 extern void sve_load_from_fpsimd_state(struct user_fpsimd_state const *state, 74 74 unsigned long vq_minus_1); 75 75 extern unsigned int sve_get_vl(void);
+3 -1
arch/arm64/include/asm/fpsimdmacros.h
··· 213 213 mov v\nz\().16b, v\nz\().16b 214 214 .endm 215 215 216 - .macro sve_flush 216 + .macro sve_flush_z 217 217 _for n, 0, 31, _sve_flush_z \n 218 + .endm 219 + .macro sve_flush_p_ffr 218 220 _for n, 0, 15, _sve_pfalse \n 219 221 _sve_wrffr 0 220 222 .endm
+9
arch/arm64/include/asm/insn-def.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0-only */ 2 + 3 + #ifndef __ASM_INSN_DEF_H 4 + #define __ASM_INSN_DEF_H 5 + 6 + /* A64 instructions are always 32 bits. */ 7 + #define AARCH64_INSN_SIZE 4 8 + 9 + #endif /* __ASM_INSN_DEF_H */
+61 -6
arch/arm64/include/asm/insn.h
··· 10 10 #include <linux/build_bug.h> 11 11 #include <linux/types.h> 12 12 13 - #include <asm/alternative.h> 13 + #include <asm/insn-def.h> 14 14 15 15 #ifndef __ASSEMBLY__ 16 16 /* ··· 30 30 */ 31 31 enum aarch64_insn_encoding_class { 32 32 AARCH64_INSN_CLS_UNKNOWN, /* UNALLOCATED */ 33 + AARCH64_INSN_CLS_SVE, /* SVE instructions */ 33 34 AARCH64_INSN_CLS_DP_IMM, /* Data processing - immediate */ 34 35 AARCH64_INSN_CLS_DP_REG, /* Data processing - register */ 35 36 AARCH64_INSN_CLS_DP_FPSIMD, /* Data processing - SIMD and FP */ ··· 295 294 __AARCH64_INSN_FUNCS(adrp, 0x9F000000, 0x90000000) 296 295 __AARCH64_INSN_FUNCS(prfm, 0x3FC00000, 0x39800000) 297 296 __AARCH64_INSN_FUNCS(prfm_lit, 0xFF000000, 0xD8000000) 297 + __AARCH64_INSN_FUNCS(store_imm, 0x3FC00000, 0x39000000) 298 + __AARCH64_INSN_FUNCS(load_imm, 0x3FC00000, 0x39400000) 299 + __AARCH64_INSN_FUNCS(store_pre, 0x3FE00C00, 0x38000C00) 300 + __AARCH64_INSN_FUNCS(load_pre, 0x3FE00C00, 0x38400C00) 301 + __AARCH64_INSN_FUNCS(store_post, 0x3FE00C00, 0x38000400) 302 + __AARCH64_INSN_FUNCS(load_post, 0x3FE00C00, 0x38400400) 298 303 __AARCH64_INSN_FUNCS(str_reg, 0x3FE0EC00, 0x38206800) 299 304 __AARCH64_INSN_FUNCS(ldadd, 0x3F20FC00, 0x38200000) 300 305 __AARCH64_INSN_FUNCS(ldr_reg, 0x3FE0EC00, 0x38606800) ··· 309 302 __AARCH64_INSN_FUNCS(exclusive, 0x3F800000, 0x08000000) 310 303 __AARCH64_INSN_FUNCS(load_ex, 0x3F400000, 0x08400000) 311 304 __AARCH64_INSN_FUNCS(store_ex, 0x3F400000, 0x08000000) 305 + __AARCH64_INSN_FUNCS(stp, 0x7FC00000, 0x29000000) 306 + __AARCH64_INSN_FUNCS(ldp, 0x7FC00000, 0x29400000) 312 307 __AARCH64_INSN_FUNCS(stp_post, 0x7FC00000, 0x28800000) 313 308 __AARCH64_INSN_FUNCS(ldp_post, 0x7FC00000, 0x28C00000) 314 309 __AARCH64_INSN_FUNCS(stp_pre, 0x7FC00000, 0x29800000) ··· 343 334 __AARCH64_INSN_FUNCS(and, 0x7F200000, 0x0A000000) 344 335 __AARCH64_INSN_FUNCS(bic, 0x7F200000, 0x0A200000) 345 336 __AARCH64_INSN_FUNCS(orr, 0x7F200000, 0x2A000000) 337 + __AARCH64_INSN_FUNCS(mov_reg, 0x7FE0FFE0, 0x2A0003E0) 346 338 __AARCH64_INSN_FUNCS(orn, 0x7F200000, 0x2A200000) 347 339 __AARCH64_INSN_FUNCS(eor, 0x7F200000, 0x4A000000) 348 340 __AARCH64_INSN_FUNCS(eon, 0x7F200000, 0x4A200000) ··· 378 368 __AARCH64_INSN_FUNCS(mrs, 0xFFF00000, 0xD5300000) 379 369 __AARCH64_INSN_FUNCS(msr_imm, 0xFFF8F01F, 0xD500401F) 380 370 __AARCH64_INSN_FUNCS(msr_reg, 0xFFF00000, 0xD5100000) 371 + __AARCH64_INSN_FUNCS(dmb, 0xFFFFF0FF, 0xD50330BF) 372 + __AARCH64_INSN_FUNCS(dsb_base, 0xFFFFF0FF, 0xD503309F) 373 + __AARCH64_INSN_FUNCS(dsb_nxs, 0xFFFFF3FF, 0xD503323F) 374 + __AARCH64_INSN_FUNCS(isb, 0xFFFFF0FF, 0xD50330DF) 375 + __AARCH64_INSN_FUNCS(sb, 0xFFFFFFFF, 0xD50330FF) 376 + __AARCH64_INSN_FUNCS(clrex, 0xFFFFF0FF, 0xD503305F) 377 + __AARCH64_INSN_FUNCS(ssbb, 0xFFFFFFFF, 0xD503309F) 378 + __AARCH64_INSN_FUNCS(pssbb, 0xFFFFFFFF, 0xD503349F) 381 379 382 380 #undef __AARCH64_INSN_FUNCS 383 381 ··· 397 379 return aarch64_insn_is_adr(insn) || aarch64_insn_is_adrp(insn); 398 380 } 399 381 400 - int aarch64_insn_read(void *addr, u32 *insnp); 401 - int aarch64_insn_write(void *addr, u32 insn); 382 + static inline bool aarch64_insn_is_dsb(u32 insn) 383 + { 384 + return aarch64_insn_is_dsb_base(insn) || aarch64_insn_is_dsb_nxs(insn); 385 + } 386 + 387 + static inline bool aarch64_insn_is_barrier(u32 insn) 388 + { 389 + return aarch64_insn_is_dmb(insn) || aarch64_insn_is_dsb(insn) || 390 + aarch64_insn_is_isb(insn) || aarch64_insn_is_sb(insn) || 391 + aarch64_insn_is_clrex(insn) || aarch64_insn_is_ssbb(insn) || 392 + aarch64_insn_is_pssbb(insn); 393 + } 394 + 395 + static inline bool aarch64_insn_is_store_single(u32 insn) 396 + { 397 + return aarch64_insn_is_store_imm(insn) || 398 + aarch64_insn_is_store_pre(insn) || 399 + aarch64_insn_is_store_post(insn); 400 + } 401 + 402 + static inline bool aarch64_insn_is_store_pair(u32 insn) 403 + { 404 + return aarch64_insn_is_stp(insn) || 405 + aarch64_insn_is_stp_pre(insn) || 406 + aarch64_insn_is_stp_post(insn); 407 + } 408 + 409 + static inline bool aarch64_insn_is_load_single(u32 insn) 410 + { 411 + return aarch64_insn_is_load_imm(insn) || 412 + aarch64_insn_is_load_pre(insn) || 413 + aarch64_insn_is_load_post(insn); 414 + } 415 + 416 + static inline bool aarch64_insn_is_load_pair(u32 insn) 417 + { 418 + return aarch64_insn_is_ldp(insn) || 419 + aarch64_insn_is_ldp_pre(insn) || 420 + aarch64_insn_is_ldp_post(insn); 421 + } 422 + 402 423 enum aarch64_insn_encoding_class aarch64_get_insn_class(u32 insn); 403 424 bool aarch64_insn_uses_literal(u32 insn); 404 425 bool aarch64_insn_is_branch(u32 insn); ··· 544 487 s32 aarch64_get_branch_offset(u32 insn); 545 488 u32 aarch64_set_branch_offset(u32 insn, s32 offset); 546 489 547 - int aarch64_insn_patch_text_nosync(void *addr, u32 insn); 548 - int aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt); 549 - 550 490 s32 aarch64_insn_adrp_get_offset(u32 insn); 551 491 u32 aarch64_insn_adrp_set_offset(u32 insn, s32 offset); 552 492 ··· 560 506 561 507 typedef bool (pstate_check_t)(unsigned long); 562 508 extern pstate_check_t * const aarch32_opcode_cond_checks[16]; 509 + 563 510 #endif /* __ASSEMBLY__ */ 564 511 565 512 #endif /* __ASM_INSN_H */
+8 -11
arch/arm64/include/asm/kernel-pgtable.h
··· 18 18 * 64K (section size = 512M). 19 19 */ 20 20 #ifdef CONFIG_ARM64_4K_PAGES 21 - #define ARM64_SWAPPER_USES_SECTION_MAPS 1 21 + #define ARM64_KERNEL_USES_PMD_MAPS 1 22 22 #else 23 - #define ARM64_SWAPPER_USES_SECTION_MAPS 0 23 + #define ARM64_KERNEL_USES_PMD_MAPS 0 24 24 #endif 25 25 26 26 /* ··· 33 33 * VA range, so pages required to map highest possible PA are reserved in all 34 34 * cases. 35 35 */ 36 - #if ARM64_SWAPPER_USES_SECTION_MAPS 36 + #if ARM64_KERNEL_USES_PMD_MAPS 37 37 #define SWAPPER_PGTABLE_LEVELS (CONFIG_PGTABLE_LEVELS - 1) 38 38 #define IDMAP_PGTABLE_LEVELS (ARM64_HW_PGTABLE_LEVELS(PHYS_MASK_SHIFT) - 1) 39 39 #else ··· 90 90 #define IDMAP_DIR_SIZE (IDMAP_PGTABLE_LEVELS * PAGE_SIZE) 91 91 92 92 /* Initial memory map size */ 93 - #if ARM64_SWAPPER_USES_SECTION_MAPS 94 - #define SWAPPER_BLOCK_SHIFT SECTION_SHIFT 95 - #define SWAPPER_BLOCK_SIZE SECTION_SIZE 93 + #if ARM64_KERNEL_USES_PMD_MAPS 94 + #define SWAPPER_BLOCK_SHIFT PMD_SHIFT 95 + #define SWAPPER_BLOCK_SIZE PMD_SIZE 96 96 #define SWAPPER_TABLE_SHIFT PUD_SHIFT 97 97 #else 98 98 #define SWAPPER_BLOCK_SHIFT PAGE_SHIFT ··· 100 100 #define SWAPPER_TABLE_SHIFT PMD_SHIFT 101 101 #endif 102 102 103 - /* The size of the initial kernel direct mapping */ 104 - #define SWAPPER_INIT_MAP_SIZE (_AC(1, UL) << SWAPPER_TABLE_SHIFT) 105 - 106 103 /* 107 104 * Initial memory map attributes. 108 105 */ 109 106 #define SWAPPER_PTE_FLAGS (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED) 110 107 #define SWAPPER_PMD_FLAGS (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S) 111 108 112 - #if ARM64_SWAPPER_USES_SECTION_MAPS 109 + #if ARM64_KERNEL_USES_PMD_MAPS 113 110 #define SWAPPER_MM_MMUFLAGS (PMD_ATTRINDX(MT_NORMAL) | SWAPPER_PMD_FLAGS) 114 111 #else 115 112 #define SWAPPER_MM_MMUFLAGS (PTE_ATTRINDX(MT_NORMAL) | SWAPPER_PTE_FLAGS) ··· 122 125 #if defined(CONFIG_ARM64_4K_PAGES) 123 126 #define ARM64_MEMSTART_SHIFT PUD_SHIFT 124 127 #elif defined(CONFIG_ARM64_16K_PAGES) 125 - #define ARM64_MEMSTART_SHIFT (PMD_SHIFT + 5) 128 + #define ARM64_MEMSTART_SHIFT CONT_PMD_SHIFT 126 129 #else 127 130 #define ARM64_MEMSTART_SHIFT PMD_SHIFT 128 131 #endif
+1
arch/arm64/include/asm/kvm_asm.h
··· 8 8 #define __ARM_KVM_ASM_H__ 9 9 10 10 #include <asm/hyp_image.h> 11 + #include <asm/insn.h> 11 12 #include <asm/virt.h> 12 13 13 14 #define ARM_EXIT_WITH_SERROR_BIT 31
+4 -3
arch/arm64/include/asm/kvm_mmu.h
··· 180 180 181 181 struct kvm; 182 182 183 - #define kvm_flush_dcache_to_poc(a,l) __flush_dcache_area((a), (l)) 183 + #define kvm_flush_dcache_to_poc(a,l) \ 184 + dcache_clean_inval_poc((unsigned long)(a), (unsigned long)(a)+(l)) 184 185 185 186 static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu) 186 187 { ··· 209 208 { 210 209 if (icache_is_aliasing()) { 211 210 /* any kind of VIPT cache */ 212 - __flush_icache_all(); 211 + icache_inval_all_pou(); 213 212 } else if (is_kernel_in_hyp_mode() || !icache_is_vpipt()) { 214 213 /* PIPT or VPIPT at EL2 (see comment in __kvm_tlb_flush_vmid_ipa) */ 215 214 void *va = page_address(pfn_to_page(pfn)); 216 215 217 - invalidate_icache_range((unsigned long)va, 216 + icache_inval_pou((unsigned long)va, 218 217 (unsigned long)va + size); 219 218 } 220 219 }
+8
arch/arm64/include/asm/linkage.h
··· 56 56 SYM_FUNC_START_ALIAS(__pi_##x); \ 57 57 SYM_FUNC_START_WEAK(x) 58 58 59 + #define SYM_FUNC_START_WEAK_ALIAS_PI(x) \ 60 + SYM_FUNC_START_ALIAS(__pi_##x); \ 61 + SYM_START(x, SYM_L_WEAK, SYM_A_ALIGN) 62 + 59 63 #define SYM_FUNC_END_PI(x) \ 60 64 SYM_FUNC_END(x); \ 65 + SYM_FUNC_END_ALIAS(__pi_##x) 66 + 67 + #define SYM_FUNC_END_ALIAS_PI(x) \ 68 + SYM_FUNC_END_ALIAS(x); \ 61 69 SYM_FUNC_END_ALIAS(__pi_##x) 62 70 63 71 #endif
+2 -4
arch/arm64/include/asm/memory.h
··· 135 135 #define MT_NORMAL 0 136 136 #define MT_NORMAL_TAGGED 1 137 137 #define MT_NORMAL_NC 2 138 - #define MT_NORMAL_WT 3 139 - #define MT_DEVICE_nGnRnE 4 140 - #define MT_DEVICE_nGnRE 5 141 - #define MT_DEVICE_GRE 6 138 + #define MT_DEVICE_nGnRnE 3 139 + #define MT_DEVICE_nGnRE 4 142 140 143 141 /* 144 142 * Memory types for Stage-2 translation
+2 -2
arch/arm64/include/asm/mmu_context.h
··· 177 177 return; 178 178 179 179 if (mm == &init_mm) 180 - ttbr = __pa_symbol(reserved_pg_dir); 180 + ttbr = phys_to_ttbr(__pa_symbol(reserved_pg_dir)); 181 181 else 182 - ttbr = virt_to_phys(mm->pgd) | ASID(mm) << 48; 182 + ttbr = phys_to_ttbr(virt_to_phys(mm->pgd)) | ASID(mm) << 48; 183 183 184 184 WRITE_ONCE(task_thread_info(tsk)->ttbr0, ttbr); 185 185 }
+15 -2
arch/arm64/include/asm/module.lds.h
··· 1 - #ifdef CONFIG_ARM64_MODULE_PLTS 2 1 SECTIONS { 2 + #ifdef CONFIG_ARM64_MODULE_PLTS 3 3 .plt 0 (NOLOAD) : { BYTE(0) } 4 4 .init.plt 0 (NOLOAD) : { BYTE(0) } 5 5 .text.ftrace_trampoline 0 (NOLOAD) : { BYTE(0) } 6 - } 7 6 #endif 7 + 8 + #ifdef CONFIG_KASAN_SW_TAGS 9 + /* 10 + * Outlined checks go into comdat-deduplicated sections named .text.hot. 11 + * Because they are in comdats they are not combined by the linker and 12 + * we otherwise end up with multiple sections with the same .text.hot 13 + * name in the .ko file. The kernel module loader warns if it sees 14 + * multiple sections with the same name so we use this sections 15 + * directive to force them into a single section and silence the 16 + * warning. 17 + */ 18 + .text.hot : { *(.text.hot) } 19 + #endif 20 + }
+67 -26
arch/arm64/include/asm/mte-kasan.h
··· 48 48 return mte_get_ptr_tag(addr); 49 49 } 50 50 51 + static inline u64 __stg_post(u64 p) 52 + { 53 + asm volatile(__MTE_PREAMBLE "stg %0, [%0], #16" 54 + : "+r"(p) 55 + : 56 + : "memory"); 57 + return p; 58 + } 59 + 60 + static inline u64 __stzg_post(u64 p) 61 + { 62 + asm volatile(__MTE_PREAMBLE "stzg %0, [%0], #16" 63 + : "+r"(p) 64 + : 65 + : "memory"); 66 + return p; 67 + } 68 + 69 + static inline void __dc_gva(u64 p) 70 + { 71 + asm volatile(__MTE_PREAMBLE "dc gva, %0" : : "r"(p) : "memory"); 72 + } 73 + 74 + static inline void __dc_gzva(u64 p) 75 + { 76 + asm volatile(__MTE_PREAMBLE "dc gzva, %0" : : "r"(p) : "memory"); 77 + } 78 + 51 79 /* 52 80 * Assign allocation tags for a region of memory based on the pointer tag. 53 81 * Note: The address must be non-NULL and MTE_GRANULE_SIZE aligned and 54 - * size must be non-zero and MTE_GRANULE_SIZE aligned. 82 + * size must be MTE_GRANULE_SIZE aligned. 55 83 */ 56 - static inline void mte_set_mem_tag_range(void *addr, size_t size, 57 - u8 tag, bool init) 84 + static inline void mte_set_mem_tag_range(void *addr, size_t size, u8 tag, 85 + bool init) 58 86 { 59 - u64 curr, end; 87 + u64 curr, mask, dczid_bs, end1, end2, end3; 60 88 61 - if (!size) 62 - return; 89 + /* Read DC G(Z)VA block size from the system register. */ 90 + dczid_bs = 4ul << (read_cpuid(DCZID_EL0) & 0xf); 63 91 64 92 curr = (u64)__tag_set(addr, tag); 65 - end = curr + size; 93 + mask = dczid_bs - 1; 94 + /* STG/STZG up to the end of the first block. */ 95 + end1 = curr | mask; 96 + end3 = curr + size; 97 + /* DC GVA / GZVA in [end1, end2) */ 98 + end2 = end3 & ~mask; 66 99 67 100 /* 68 - * 'asm volatile' is required to prevent the compiler to move 69 - * the statement outside of the loop. 101 + * The following code uses STG on the first DC GVA block even if the 102 + * start address is aligned - it appears to be faster than an alignment 103 + * check + conditional branch. Also, if the range size is at least 2 DC 104 + * GVA blocks, the first two loops can use post-condition to save one 105 + * branch each. 70 106 */ 71 - if (init) { 72 - do { 73 - asm volatile(__MTE_PREAMBLE "stzg %0, [%0]" 74 - : 75 - : "r" (curr) 76 - : "memory"); 77 - curr += MTE_GRANULE_SIZE; 78 - } while (curr != end); 79 - } else { 80 - do { 81 - asm volatile(__MTE_PREAMBLE "stg %0, [%0]" 82 - : 83 - : "r" (curr) 84 - : "memory"); 85 - curr += MTE_GRANULE_SIZE; 86 - } while (curr != end); 87 - } 107 + #define SET_MEMTAG_RANGE(stg_post, dc_gva) \ 108 + do { \ 109 + if (size >= 2 * dczid_bs) { \ 110 + do { \ 111 + curr = stg_post(curr); \ 112 + } while (curr < end1); \ 113 + \ 114 + do { \ 115 + dc_gva(curr); \ 116 + curr += dczid_bs; \ 117 + } while (curr < end2); \ 118 + } \ 119 + \ 120 + while (curr < end3) \ 121 + curr = stg_post(curr); \ 122 + } while (0) 123 + 124 + if (init) 125 + SET_MEMTAG_RANGE(__stzg_post, __dc_gzva); 126 + else 127 + SET_MEMTAG_RANGE(__stg_post, __dc_gva); 128 + #undef SET_MEMTAG_RANGE 88 129 } 89 130 90 131 void mte_enable_kernel_sync(void);
+4
arch/arm64/include/asm/mte.h
··· 37 37 /* track which pages have valid allocation tags */ 38 38 #define PG_mte_tagged PG_arch_2 39 39 40 + void mte_zero_clear_page_tags(void *addr); 40 41 void mte_sync_tags(pte_t *ptep, pte_t pte); 41 42 void mte_copy_page_tags(void *kto, const void *kfrom); 42 43 void mte_thread_init_user(void); ··· 54 53 /* unused if !CONFIG_ARM64_MTE, silence the compiler */ 55 54 #define PG_mte_tagged 0 56 55 56 + static inline void mte_zero_clear_page_tags(void *addr) 57 + { 58 + } 57 59 static inline void mte_sync_tags(pte_t *ptep, pte_t pte) 58 60 { 59 61 }
+7 -3
arch/arm64/include/asm/page.h
··· 13 13 #ifndef __ASSEMBLY__ 14 14 15 15 #include <linux/personality.h> /* for READ_IMPLIES_EXEC */ 16 + #include <linux/types.h> /* for gfp_t */ 16 17 #include <asm/pgtable-types.h> 17 18 18 19 struct page; ··· 29 28 void copy_highpage(struct page *to, struct page *from); 30 29 #define __HAVE_ARCH_COPY_HIGHPAGE 31 30 32 - #define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \ 33 - alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr) 34 - #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE 31 + struct page *alloc_zeroed_user_highpage_movable(struct vm_area_struct *vma, 32 + unsigned long vaddr); 33 + #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE_MOVABLE 34 + 35 + void tag_clear_highpage(struct page *to); 36 + #define __HAVE_ARCH_TAG_CLEAR_HIGHPAGE 35 37 36 38 #define clear_user_page(page, vaddr, pg) clear_page(page) 37 39 #define copy_user_page(to, from, vaddr, pg) copy_page(to, from)
+13
arch/arm64/include/asm/patching.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0-only */ 2 + #ifndef __ASM_PATCHING_H 3 + #define __ASM_PATCHING_H 4 + 5 + #include <linux/types.h> 6 + 7 + int aarch64_insn_read(void *addr, u32 *insnp); 8 + int aarch64_insn_write(void *addr, u32 insn); 9 + 10 + int aarch64_insn_patch_text_nosync(void *addr, u32 insn); 11 + int aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt); 12 + 13 + #endif /* __ASM_PATCHING_H */
+5
arch/arm64/include/asm/perf_event.h
··· 239 239 /* PMMIR_EL1.SLOTS mask */ 240 240 #define ARMV8_PMU_SLOTS_MASK 0xff 241 241 242 + #define ARMV8_PMU_BUS_SLOTS_SHIFT 8 243 + #define ARMV8_PMU_BUS_SLOTS_MASK 0xff 244 + #define ARMV8_PMU_BUS_WIDTH_SHIFT 16 245 + #define ARMV8_PMU_BUS_WIDTH_MASK 0xf 246 + 242 247 #ifdef CONFIG_PERF_EVENTS 243 248 struct pt_regs; 244 249 extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
-7
arch/arm64/include/asm/pgtable-hwdef.h
··· 72 72 #define PTRS_PER_PGD (1 << (VA_BITS - PGDIR_SHIFT)) 73 73 74 74 /* 75 - * Section address mask and size definitions. 76 - */ 77 - #define SECTION_SHIFT PMD_SHIFT 78 - #define SECTION_SIZE (_AC(1, UL) << SECTION_SHIFT) 79 - #define SECTION_MASK (~(SECTION_SIZE-1)) 80 - 81 - /* 82 75 * Contiguous page definitions. 83 76 */ 84 77 #define CONT_PTE_SHIFT (CONFIG_ARM64_CONT_PTE_SHIFT + PAGE_SHIFT)
-1
arch/arm64/include/asm/pgtable-prot.h
··· 55 55 #define PROT_DEVICE_nGnRnE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRnE)) 56 56 #define PROT_DEVICE_nGnRE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRE)) 57 57 #define PROT_NORMAL_NC (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_NC)) 58 - #define PROT_NORMAL_WT (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_WT)) 59 58 #define PROT_NORMAL (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL)) 60 59 #define PROT_NORMAL_TAGGED (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_TAGGED)) 61 60
+2 -3
arch/arm64/include/asm/pgtable.h
··· 511 511 512 512 #define pmd_none(pmd) (!pmd_val(pmd)) 513 513 514 - #define pmd_bad(pmd) (!(pmd_val(pmd) & PMD_TABLE_BIT)) 515 - 516 514 #define pmd_table(pmd) ((pmd_val(pmd) & PMD_TYPE_MASK) == \ 517 515 PMD_TYPE_TABLE) 518 516 #define pmd_sect(pmd) ((pmd_val(pmd) & PMD_TYPE_MASK) == \ 519 517 PMD_TYPE_SECT) 520 518 #define pmd_leaf(pmd) pmd_sect(pmd) 519 + #define pmd_bad(pmd) (!pmd_table(pmd)) 521 520 522 521 #define pmd_leaf_size(pmd) (pmd_cont(pmd) ? CONT_PMD_SIZE : PMD_SIZE) 523 522 #define pte_leaf_size(pte) (pte_cont(pte) ? CONT_PTE_SIZE : PAGE_SIZE) ··· 603 604 pr_err("%s:%d: bad pmd %016llx.\n", __FILE__, __LINE__, pmd_val(e)) 604 605 605 606 #define pud_none(pud) (!pud_val(pud)) 606 - #define pud_bad(pud) (!(pud_val(pud) & PUD_TABLE_BIT)) 607 + #define pud_bad(pud) (!pud_table(pud)) 607 608 #define pud_present(pud) pte_present(pud_pte(pud)) 608 609 #define pud_leaf(pud) pud_sect(pud) 609 610 #define pud_valid(pud) pte_valid(pud_pte(pud))
+33 -26
arch/arm64/include/asm/pointer_auth.h
··· 31 31 struct ptrauth_key apga; 32 32 }; 33 33 34 - struct ptrauth_keys_kernel { 35 - struct ptrauth_key apia; 36 - }; 37 - 38 34 #define __ptrauth_key_install_nosync(k, v) \ 39 35 do { \ 40 36 struct ptrauth_key __pki_v = (v); \ 41 37 write_sysreg_s(__pki_v.lo, SYS_ ## k ## KEYLO_EL1); \ 42 38 write_sysreg_s(__pki_v.hi, SYS_ ## k ## KEYHI_EL1); \ 43 39 } while (0) 40 + 41 + #ifdef CONFIG_ARM64_PTR_AUTH_KERNEL 42 + 43 + struct ptrauth_keys_kernel { 44 + struct ptrauth_key apia; 45 + }; 46 + 47 + static __always_inline void ptrauth_keys_init_kernel(struct ptrauth_keys_kernel *keys) 48 + { 49 + if (system_supports_address_auth()) 50 + get_random_bytes(&keys->apia, sizeof(keys->apia)); 51 + } 52 + 53 + static __always_inline void ptrauth_keys_switch_kernel(struct ptrauth_keys_kernel *keys) 54 + { 55 + if (!system_supports_address_auth()) 56 + return; 57 + 58 + __ptrauth_key_install_nosync(APIA, keys->apia); 59 + isb(); 60 + } 61 + 62 + #endif /* CONFIG_ARM64_PTR_AUTH_KERNEL */ 44 63 45 64 static inline void ptrauth_keys_install_user(struct ptrauth_keys_user *keys) 46 65 { ··· 86 67 get_random_bytes(&keys->apga, sizeof(keys->apga)); 87 68 88 69 ptrauth_keys_install_user(keys); 89 - } 90 - 91 - static __always_inline void ptrauth_keys_init_kernel(struct ptrauth_keys_kernel *keys) 92 - { 93 - if (system_supports_address_auth()) 94 - get_random_bytes(&keys->apia, sizeof(keys->apia)); 95 - } 96 - 97 - static __always_inline void ptrauth_keys_switch_kernel(struct ptrauth_keys_kernel *keys) 98 - { 99 - if (!system_supports_address_auth()) 100 - return; 101 - 102 - __ptrauth_key_install_nosync(APIA, keys->apia); 103 - isb(); 104 70 } 105 71 106 72 extern int ptrauth_prctl_reset_keys(struct task_struct *tsk, unsigned long arg); ··· 125 121 #define ptrauth_thread_switch_user(tsk) \ 126 122 ptrauth_keys_install_user(&(tsk)->thread.keys_user) 127 123 128 - #define ptrauth_thread_init_kernel(tsk) \ 129 - ptrauth_keys_init_kernel(&(tsk)->thread.keys_kernel) 130 - #define ptrauth_thread_switch_kernel(tsk) \ 131 - ptrauth_keys_switch_kernel(&(tsk)->thread.keys_kernel) 132 - 133 124 #else /* CONFIG_ARM64_PTR_AUTH */ 134 125 #define ptrauth_enable() 135 126 #define ptrauth_prctl_reset_keys(tsk, arg) (-EINVAL) ··· 133 134 #define ptrauth_strip_insn_pac(lr) (lr) 134 135 #define ptrauth_suspend_exit() 135 136 #define ptrauth_thread_init_user() 136 - #define ptrauth_thread_init_kernel(tsk) 137 137 #define ptrauth_thread_switch_user(tsk) 138 - #define ptrauth_thread_switch_kernel(tsk) 139 138 #endif /* CONFIG_ARM64_PTR_AUTH */ 139 + 140 + #ifdef CONFIG_ARM64_PTR_AUTH_KERNEL 141 + #define ptrauth_thread_init_kernel(tsk) \ 142 + ptrauth_keys_init_kernel(&(tsk)->thread.keys_kernel) 143 + #define ptrauth_thread_switch_kernel(tsk) \ 144 + ptrauth_keys_switch_kernel(&(tsk)->thread.keys_kernel) 145 + #else 146 + #define ptrauth_thread_init_kernel(tsk) 147 + #define ptrauth_thread_switch_kernel(tsk) 148 + #endif /* CONFIG_ARM64_PTR_AUTH_KERNEL */ 140 149 141 150 #define PR_PAC_ENABLED_KEYS_MASK \ 142 151 (PR_PAC_APIAKEY | PR_PAC_APIBKEY | PR_PAC_APDAKEY | PR_PAC_APDBKEY)
+8 -8
arch/arm64/include/asm/processor.h
··· 148 148 struct debug_info debug; /* debugging */ 149 149 #ifdef CONFIG_ARM64_PTR_AUTH 150 150 struct ptrauth_keys_user keys_user; 151 + #ifdef CONFIG_ARM64_PTR_AUTH_KERNEL 151 152 struct ptrauth_keys_kernel keys_kernel; 153 + #endif 152 154 #endif 153 155 #ifdef CONFIG_ARM64_MTE 154 156 u64 gcr_user_excl; ··· 259 257 extern struct task_struct *cpu_switch_to(struct task_struct *prev, 260 258 struct task_struct *next); 261 259 262 - asmlinkage void arm64_preempt_schedule_irq(void); 263 - 264 260 #define task_pt_regs(p) \ 265 261 ((struct pt_regs *)(THREAD_SIZE + task_stack_page(p)) - 1) 266 262 ··· 329 329 * of header definitions for the use of task_stack_page. 330 330 */ 331 331 332 - #define current_top_of_stack() \ 333 - ({ \ 334 - struct stack_info _info; \ 335 - BUG_ON(!on_accessible_stack(current, current_stack_pointer, &_info)); \ 336 - _info.high; \ 332 + #define current_top_of_stack() \ 333 + ({ \ 334 + struct stack_info _info; \ 335 + BUG_ON(!on_accessible_stack(current, current_stack_pointer, 1, &_info)); \ 336 + _info.high; \ 337 337 }) 338 - #define on_thread_stack() (on_task_stack(current, current_stack_pointer, NULL)) 338 + #define on_thread_stack() (on_task_stack(current, current_stack_pointer, 1, NULL)) 339 339 340 340 #endif /* __ASSEMBLY__ */ 341 341 #endif /* __ASM_PROCESSOR_H */
+4 -4
arch/arm64/include/asm/scs.h
··· 9 9 #ifdef CONFIG_SHADOW_CALL_STACK 10 10 scs_sp .req x18 11 11 12 - .macro scs_load tsk, tmp 12 + .macro scs_load tsk 13 13 ldr scs_sp, [\tsk, #TSK_TI_SCS_SP] 14 14 .endm 15 15 16 - .macro scs_save tsk, tmp 16 + .macro scs_save tsk 17 17 str scs_sp, [\tsk, #TSK_TI_SCS_SP] 18 18 .endm 19 19 #else 20 - .macro scs_load tsk, tmp 20 + .macro scs_load tsk 21 21 .endm 22 22 23 - .macro scs_save tsk, tmp 23 + .macro scs_save tsk 24 24 .endm 25 25 #endif /* CONFIG_SHADOW_CALL_STACK */ 26 26
+7 -3
arch/arm64/include/asm/sdei.h
··· 37 37 asmlinkage unsigned long __sdei_handler(struct pt_regs *regs, 38 38 struct sdei_registered_event *arg); 39 39 40 + unsigned long do_sdei_event(struct pt_regs *regs, 41 + struct sdei_registered_event *arg); 42 + 40 43 unsigned long sdei_arch_get_entry_point(int conduit); 41 44 #define sdei_arch_get_entry_point(x) sdei_arch_get_entry_point(x) 42 45 43 46 struct stack_info; 44 47 45 - bool _on_sdei_stack(unsigned long sp, struct stack_info *info); 46 - static inline bool on_sdei_stack(unsigned long sp, 48 + bool _on_sdei_stack(unsigned long sp, unsigned long size, 49 + struct stack_info *info); 50 + static inline bool on_sdei_stack(unsigned long sp, unsigned long size, 47 51 struct stack_info *info) 48 52 { 49 53 if (!IS_ENABLED(CONFIG_VMAP_STACK)) ··· 55 51 if (!IS_ENABLED(CONFIG_ARM_SDE_INTERFACE)) 56 52 return false; 57 53 if (in_nmi()) 58 - return _on_sdei_stack(sp, info); 54 + return _on_sdei_stack(sp, size, info); 59 55 60 56 return false; 61 57 }
-2
arch/arm64/include/asm/smp.h
··· 73 73 74 74 /* 75 75 * Initial data for bringing up a secondary CPU. 76 - * @stack - sp for the secondary CPU 77 76 * @status - Result passed back from the secondary CPU to 78 77 * indicate failure. 79 78 */ 80 79 struct secondary_data { 81 - void *stack; 82 80 struct task_struct *task; 83 81 long status; 84 82 };
+16 -16
arch/arm64/include/asm/stacktrace.h
··· 69 69 70 70 DECLARE_PER_CPU(unsigned long *, irq_stack_ptr); 71 71 72 - static inline bool on_stack(unsigned long sp, unsigned long low, 73 - unsigned long high, enum stack_type type, 74 - struct stack_info *info) 72 + static inline bool on_stack(unsigned long sp, unsigned long size, 73 + unsigned long low, unsigned long high, 74 + enum stack_type type, struct stack_info *info) 75 75 { 76 76 if (!low) 77 77 return false; 78 78 79 - if (sp < low || sp >= high) 79 + if (sp < low || sp + size < sp || sp + size > high) 80 80 return false; 81 81 82 82 if (info) { ··· 87 87 return true; 88 88 } 89 89 90 - static inline bool on_irq_stack(unsigned long sp, 90 + static inline bool on_irq_stack(unsigned long sp, unsigned long size, 91 91 struct stack_info *info) 92 92 { 93 93 unsigned long low = (unsigned long)raw_cpu_read(irq_stack_ptr); 94 94 unsigned long high = low + IRQ_STACK_SIZE; 95 95 96 - return on_stack(sp, low, high, STACK_TYPE_IRQ, info); 96 + return on_stack(sp, size, low, high, STACK_TYPE_IRQ, info); 97 97 } 98 98 99 99 static inline bool on_task_stack(const struct task_struct *tsk, 100 - unsigned long sp, 100 + unsigned long sp, unsigned long size, 101 101 struct stack_info *info) 102 102 { 103 103 unsigned long low = (unsigned long)task_stack_page(tsk); 104 104 unsigned long high = low + THREAD_SIZE; 105 105 106 - return on_stack(sp, low, high, STACK_TYPE_TASK, info); 106 + return on_stack(sp, size, low, high, STACK_TYPE_TASK, info); 107 107 } 108 108 109 109 #ifdef CONFIG_VMAP_STACK 110 110 DECLARE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], overflow_stack); 111 111 112 - static inline bool on_overflow_stack(unsigned long sp, 112 + static inline bool on_overflow_stack(unsigned long sp, unsigned long size, 113 113 struct stack_info *info) 114 114 { 115 115 unsigned long low = (unsigned long)raw_cpu_ptr(overflow_stack); 116 116 unsigned long high = low + OVERFLOW_STACK_SIZE; 117 117 118 - return on_stack(sp, low, high, STACK_TYPE_OVERFLOW, info); 118 + return on_stack(sp, size, low, high, STACK_TYPE_OVERFLOW, info); 119 119 } 120 120 #else 121 - static inline bool on_overflow_stack(unsigned long sp, 121 + static inline bool on_overflow_stack(unsigned long sp, unsigned long size, 122 122 struct stack_info *info) { return false; } 123 123 #endif 124 124 ··· 128 128 * context. 129 129 */ 130 130 static inline bool on_accessible_stack(const struct task_struct *tsk, 131 - unsigned long sp, 131 + unsigned long sp, unsigned long size, 132 132 struct stack_info *info) 133 133 { 134 134 if (info) 135 135 info->type = STACK_TYPE_UNKNOWN; 136 136 137 - if (on_task_stack(tsk, sp, info)) 137 + if (on_task_stack(tsk, sp, size, info)) 138 138 return true; 139 139 if (tsk != current || preemptible()) 140 140 return false; 141 - if (on_irq_stack(sp, info)) 141 + if (on_irq_stack(sp, size, info)) 142 142 return true; 143 - if (on_overflow_stack(sp, info)) 143 + if (on_overflow_stack(sp, size, info)) 144 144 return true; 145 - if (on_sdei_stack(sp, info)) 145 + if (on_sdei_stack(sp, size, info)) 146 146 return true; 147 147 148 148 return false;
-2
arch/arm64/include/asm/sysreg.h
··· 703 703 /* MAIR_ELx memory attributes (used by Linux) */ 704 704 #define MAIR_ATTR_DEVICE_nGnRnE UL(0x00) 705 705 #define MAIR_ATTR_DEVICE_nGnRE UL(0x04) 706 - #define MAIR_ATTR_DEVICE_GRE UL(0x0c) 707 706 #define MAIR_ATTR_NORMAL_NC UL(0x44) 708 - #define MAIR_ATTR_NORMAL_WT UL(0xbb) 709 707 #define MAIR_ATTR_NORMAL_TAGGED UL(0xf0) 710 708 #define MAIR_ATTR_NORMAL UL(0xff) 711 709 #define MAIR_ATTR_MASK UL(0xff)
+4
arch/arm64/include/asm/tlb.h
··· 28 28 */ 29 29 static inline int tlb_get_level(struct mmu_gather *tlb) 30 30 { 31 + /* The TTL field is only valid for the leaf entry. */ 32 + if (tlb->freed_tables) 33 + return 0; 34 + 31 35 if (tlb->cleared_ptes && !(tlb->cleared_pmds || 32 36 tlb->cleared_puds || 33 37 tlb->cleared_p4ds))
+9 -2
arch/arm64/kernel/Makefile
··· 14 14 CFLAGS_REMOVE_syscall.o = -fstack-protector -fstack-protector-strong 15 15 CFLAGS_syscall.o += -fno-stack-protector 16 16 17 + # It's not safe to invoke KCOV when portions of the kernel environment aren't 18 + # available or are out-of-sync with HW state. Since `noinstr` doesn't always 19 + # inhibit KCOV instrumentation, disable it for the entire compilation unit. 20 + KCOV_INSTRUMENT_entry.o := n 21 + KCOV_INSTRUMENT_idle.o := n 22 + 17 23 # Object file lists. 18 24 obj-y := debug-monitors.o entry.o irq.o fpsimd.o \ 19 25 entry-common.o entry-fpsimd.o process.o ptrace.o \ 20 26 setup.o signal.o sys.o stacktrace.o time.o traps.o \ 21 - io.o vdso.o hyp-stub.o psci.o cpu_ops.o insn.o \ 27 + io.o vdso.o hyp-stub.o psci.o cpu_ops.o \ 22 28 return_address.o cpuinfo.o cpu_errata.o \ 23 29 cpufeature.o alternative.o cacheinfo.o \ 24 30 smp.o smp_spin_table.o topology.o smccc-call.o \ 25 - syscall.o proton-pack.o idreg-override.o 31 + syscall.o proton-pack.o idreg-override.o idle.o \ 32 + patching.o 26 33 27 34 targets += efi-entry.o 28 35
+17 -5
arch/arm64/kernel/acpi.c
··· 239 239 } 240 240 } 241 241 242 + static pgprot_t __acpi_get_writethrough_mem_attribute(void) 243 + { 244 + /* 245 + * Although UEFI specifies the use of Normal Write-through for 246 + * EFI_MEMORY_WT, it is seldom used in practice and not implemented 247 + * by most (all?) CPUs. Rather than allocate a MAIR just for this 248 + * purpose, emit a warning and use Normal Non-cacheable instead. 249 + */ 250 + pr_warn_once("No MAIR allocation for EFI_MEMORY_WT; treating as Normal Non-cacheable\n"); 251 + return __pgprot(PROT_NORMAL_NC); 252 + } 253 + 242 254 pgprot_t __acpi_get_mem_attribute(phys_addr_t addr) 243 255 { 244 256 /* ··· 258 246 * types" of UEFI 2.5 section 2.3.6.1, each EFI memory type is 259 247 * mapped to a corresponding MAIR attribute encoding. 260 248 * The EFI memory attribute advises all possible capabilities 261 - * of a memory region. We use the most efficient capability. 249 + * of a memory region. 262 250 */ 263 251 264 252 u64 attr; ··· 266 254 attr = efi_mem_attributes(addr); 267 255 if (attr & EFI_MEMORY_WB) 268 256 return PAGE_KERNEL; 269 - if (attr & EFI_MEMORY_WT) 270 - return __pgprot(PROT_NORMAL_WT); 271 257 if (attr & EFI_MEMORY_WC) 272 258 return __pgprot(PROT_NORMAL_NC); 259 + if (attr & EFI_MEMORY_WT) 260 + return __acpi_get_writethrough_mem_attribute(); 273 261 return __pgprot(PROT_DEVICE_nGnRnE); 274 262 } 275 263 ··· 352 340 default: 353 341 if (region->attribute & EFI_MEMORY_WB) 354 342 prot = PAGE_KERNEL; 355 - else if (region->attribute & EFI_MEMORY_WT) 356 - prot = __pgprot(PROT_NORMAL_WT); 357 343 else if (region->attribute & EFI_MEMORY_WC) 358 344 prot = __pgprot(PROT_NORMAL_NC); 345 + else if (region->attribute & EFI_MEMORY_WT) 346 + prot = __acpi_get_writethrough_mem_attribute(); 359 347 } 360 348 } 361 349 return __ioremap(phys, size, prot);
+1 -1
arch/arm64/kernel/alternative.c
··· 181 181 */ 182 182 if (!is_module) { 183 183 dsb(ish); 184 - __flush_icache_all(); 184 + icache_inval_all_pou(); 185 185 isb(); 186 186 187 187 /* Ignore ARM64_CB bit from feature mask */
+14 -1
arch/arm64/kernel/asm-offsets.c
··· 27 27 int main(void) 28 28 { 29 29 DEFINE(TSK_ACTIVE_MM, offsetof(struct task_struct, active_mm)); 30 + DEFINE(TSK_CPU, offsetof(struct task_struct, cpu)); 30 31 BLANK(); 31 32 DEFINE(TSK_TI_FLAGS, offsetof(struct task_struct, thread_info.flags)); 32 33 DEFINE(TSK_TI_PREEMPT, offsetof(struct task_struct, thread_info.preempt_count)); ··· 47 46 DEFINE(THREAD_SCTLR_USER, offsetof(struct task_struct, thread.sctlr_user)); 48 47 #ifdef CONFIG_ARM64_PTR_AUTH 49 48 DEFINE(THREAD_KEYS_USER, offsetof(struct task_struct, thread.keys_user)); 49 + #endif 50 + #ifdef CONFIG_ARM64_PTR_AUTH_KERNEL 50 51 DEFINE(THREAD_KEYS_KERNEL, offsetof(struct task_struct, thread.keys_kernel)); 51 52 #endif 52 53 #ifdef CONFIG_ARM64_MTE ··· 102 99 DEFINE(SOFTIRQ_SHIFT, SOFTIRQ_SHIFT); 103 100 DEFINE(IRQ_CPUSTAT_SOFTIRQ_PENDING, offsetof(irq_cpustat_t, __softirq_pending)); 104 101 BLANK(); 105 - DEFINE(CPU_BOOT_STACK, offsetof(struct secondary_data, stack)); 106 102 DEFINE(CPU_BOOT_TASK, offsetof(struct secondary_data, task)); 107 103 BLANK(); 108 104 DEFINE(FTR_OVR_VAL_OFFSET, offsetof(struct arm64_ftr_override, val)); ··· 140 138 DEFINE(ARM_SMCCC_RES_X2_OFFS, offsetof(struct arm_smccc_res, a2)); 141 139 DEFINE(ARM_SMCCC_QUIRK_ID_OFFS, offsetof(struct arm_smccc_quirk, id)); 142 140 DEFINE(ARM_SMCCC_QUIRK_STATE_OFFS, offsetof(struct arm_smccc_quirk, state)); 141 + DEFINE(ARM_SMCCC_1_2_REGS_X0_OFFS, offsetof(struct arm_smccc_1_2_regs, a0)); 142 + DEFINE(ARM_SMCCC_1_2_REGS_X2_OFFS, offsetof(struct arm_smccc_1_2_regs, a2)); 143 + DEFINE(ARM_SMCCC_1_2_REGS_X4_OFFS, offsetof(struct arm_smccc_1_2_regs, a4)); 144 + DEFINE(ARM_SMCCC_1_2_REGS_X6_OFFS, offsetof(struct arm_smccc_1_2_regs, a6)); 145 + DEFINE(ARM_SMCCC_1_2_REGS_X8_OFFS, offsetof(struct arm_smccc_1_2_regs, a8)); 146 + DEFINE(ARM_SMCCC_1_2_REGS_X10_OFFS, offsetof(struct arm_smccc_1_2_regs, a10)); 147 + DEFINE(ARM_SMCCC_1_2_REGS_X12_OFFS, offsetof(struct arm_smccc_1_2_regs, a12)); 148 + DEFINE(ARM_SMCCC_1_2_REGS_X14_OFFS, offsetof(struct arm_smccc_1_2_regs, a14)); 149 + DEFINE(ARM_SMCCC_1_2_REGS_X16_OFFS, offsetof(struct arm_smccc_1_2_regs, a16)); 143 150 BLANK(); 144 151 DEFINE(HIBERN_PBE_ORIG, offsetof(struct pbe, orig_address)); 145 152 DEFINE(HIBERN_PBE_ADDR, offsetof(struct pbe, address)); ··· 164 153 #endif 165 154 #ifdef CONFIG_ARM64_PTR_AUTH 166 155 DEFINE(PTRAUTH_USER_KEY_APIA, offsetof(struct ptrauth_keys_user, apia)); 156 + #ifdef CONFIG_ARM64_PTR_AUTH_KERNEL 167 157 DEFINE(PTRAUTH_KERNEL_KEY_APIA, offsetof(struct ptrauth_keys_kernel, apia)); 158 + #endif 168 159 BLANK(); 169 160 #endif 170 161 return 0;
+162 -48
arch/arm64/kernel/cpufeature.c
··· 76 76 #include <asm/cpufeature.h> 77 77 #include <asm/cpu_ops.h> 78 78 #include <asm/fpsimd.h> 79 + #include <asm/insn.h> 79 80 #include <asm/kvm_host.h> 80 81 #include <asm/mmu_context.h> 81 82 #include <asm/mte.h> ··· 107 106 108 107 bool arm64_use_ng_mappings = false; 109 108 EXPORT_SYMBOL(arm64_use_ng_mappings); 109 + 110 + /* 111 + * Permit PER_LINUX32 and execve() of 32-bit binaries even if not all CPUs 112 + * support it? 113 + */ 114 + static bool __read_mostly allow_mismatched_32bit_el0; 115 + 116 + /* 117 + * Static branch enabled only if allow_mismatched_32bit_el0 is set and we have 118 + * seen at least one CPU capable of 32-bit EL0. 119 + */ 120 + DEFINE_STATIC_KEY_FALSE(arm64_mismatched_32bit_el0); 121 + 122 + /* 123 + * Mask of CPUs supporting 32-bit EL0. 124 + * Only valid if arm64_mismatched_32bit_el0 is enabled. 125 + */ 126 + static cpumask_var_t cpu_32bit_el0_mask __cpumask_var_read_mostly; 110 127 111 128 /* 112 129 * Flag to indicate if we have computed the system wide ··· 419 400 ARM64_FTR_END, 420 401 }; 421 402 403 + static const struct arm64_ftr_bits ftr_gmid[] = { 404 + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, SYS_GMID_EL1_BS_SHIFT, 4, 0), 405 + ARM64_FTR_END, 406 + }; 407 + 422 408 static const struct arm64_ftr_bits ftr_id_isar0[] = { 423 409 ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR0_DIVIDE_SHIFT, 4, 0), 424 410 ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR0_DEBUG_SHIFT, 4, 0), ··· 641 617 /* Op1 = 0, CRn = 1, CRm = 2 */ 642 618 ARM64_FTR_REG(SYS_ZCR_EL1, ftr_zcr), 643 619 620 + /* Op1 = 1, CRn = 0, CRm = 0 */ 621 + ARM64_FTR_REG(SYS_GMID_EL1, ftr_gmid), 622 + 644 623 /* Op1 = 3, CRn = 0, CRm = 0 */ 645 624 { SYS_CTR_EL0, &arm64_ftr_reg_ctrel0 }, 646 625 ARM64_FTR_REG(SYS_DCZID_EL0, ftr_dczid), ··· 794 767 * Any bits that are not covered by an arm64_ftr_bits entry are considered 795 768 * RES0 for the system-wide value, and must strictly match. 796 769 */ 797 - static void __init init_cpu_ftr_reg(u32 sys_reg, u64 new) 770 + static void init_cpu_ftr_reg(u32 sys_reg, u64 new) 798 771 { 799 772 u64 val = 0; 800 773 u64 strict_mask = ~0x0ULL; ··· 890 863 891 864 static void __init setup_boot_cpu_capabilities(void); 892 865 866 + static void init_32bit_cpu_features(struct cpuinfo_32bit *info) 867 + { 868 + init_cpu_ftr_reg(SYS_ID_DFR0_EL1, info->reg_id_dfr0); 869 + init_cpu_ftr_reg(SYS_ID_DFR1_EL1, info->reg_id_dfr1); 870 + init_cpu_ftr_reg(SYS_ID_ISAR0_EL1, info->reg_id_isar0); 871 + init_cpu_ftr_reg(SYS_ID_ISAR1_EL1, info->reg_id_isar1); 872 + init_cpu_ftr_reg(SYS_ID_ISAR2_EL1, info->reg_id_isar2); 873 + init_cpu_ftr_reg(SYS_ID_ISAR3_EL1, info->reg_id_isar3); 874 + init_cpu_ftr_reg(SYS_ID_ISAR4_EL1, info->reg_id_isar4); 875 + init_cpu_ftr_reg(SYS_ID_ISAR5_EL1, info->reg_id_isar5); 876 + init_cpu_ftr_reg(SYS_ID_ISAR6_EL1, info->reg_id_isar6); 877 + init_cpu_ftr_reg(SYS_ID_MMFR0_EL1, info->reg_id_mmfr0); 878 + init_cpu_ftr_reg(SYS_ID_MMFR1_EL1, info->reg_id_mmfr1); 879 + init_cpu_ftr_reg(SYS_ID_MMFR2_EL1, info->reg_id_mmfr2); 880 + init_cpu_ftr_reg(SYS_ID_MMFR3_EL1, info->reg_id_mmfr3); 881 + init_cpu_ftr_reg(SYS_ID_MMFR4_EL1, info->reg_id_mmfr4); 882 + init_cpu_ftr_reg(SYS_ID_MMFR5_EL1, info->reg_id_mmfr5); 883 + init_cpu_ftr_reg(SYS_ID_PFR0_EL1, info->reg_id_pfr0); 884 + init_cpu_ftr_reg(SYS_ID_PFR1_EL1, info->reg_id_pfr1); 885 + init_cpu_ftr_reg(SYS_ID_PFR2_EL1, info->reg_id_pfr2); 886 + init_cpu_ftr_reg(SYS_MVFR0_EL1, info->reg_mvfr0); 887 + init_cpu_ftr_reg(SYS_MVFR1_EL1, info->reg_mvfr1); 888 + init_cpu_ftr_reg(SYS_MVFR2_EL1, info->reg_mvfr2); 889 + } 890 + 893 891 void __init init_cpu_features(struct cpuinfo_arm64 *info) 894 892 { 895 893 /* Before we start using the tables, make sure it is sorted */ ··· 934 882 init_cpu_ftr_reg(SYS_ID_AA64PFR1_EL1, info->reg_id_aa64pfr1); 935 883 init_cpu_ftr_reg(SYS_ID_AA64ZFR0_EL1, info->reg_id_aa64zfr0); 936 884 937 - if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0)) { 938 - init_cpu_ftr_reg(SYS_ID_DFR0_EL1, info->reg_id_dfr0); 939 - init_cpu_ftr_reg(SYS_ID_DFR1_EL1, info->reg_id_dfr1); 940 - init_cpu_ftr_reg(SYS_ID_ISAR0_EL1, info->reg_id_isar0); 941 - init_cpu_ftr_reg(SYS_ID_ISAR1_EL1, info->reg_id_isar1); 942 - init_cpu_ftr_reg(SYS_ID_ISAR2_EL1, info->reg_id_isar2); 943 - init_cpu_ftr_reg(SYS_ID_ISAR3_EL1, info->reg_id_isar3); 944 - init_cpu_ftr_reg(SYS_ID_ISAR4_EL1, info->reg_id_isar4); 945 - init_cpu_ftr_reg(SYS_ID_ISAR5_EL1, info->reg_id_isar5); 946 - init_cpu_ftr_reg(SYS_ID_ISAR6_EL1, info->reg_id_isar6); 947 - init_cpu_ftr_reg(SYS_ID_MMFR0_EL1, info->reg_id_mmfr0); 948 - init_cpu_ftr_reg(SYS_ID_MMFR1_EL1, info->reg_id_mmfr1); 949 - init_cpu_ftr_reg(SYS_ID_MMFR2_EL1, info->reg_id_mmfr2); 950 - init_cpu_ftr_reg(SYS_ID_MMFR3_EL1, info->reg_id_mmfr3); 951 - init_cpu_ftr_reg(SYS_ID_MMFR4_EL1, info->reg_id_mmfr4); 952 - init_cpu_ftr_reg(SYS_ID_MMFR5_EL1, info->reg_id_mmfr5); 953 - init_cpu_ftr_reg(SYS_ID_PFR0_EL1, info->reg_id_pfr0); 954 - init_cpu_ftr_reg(SYS_ID_PFR1_EL1, info->reg_id_pfr1); 955 - init_cpu_ftr_reg(SYS_ID_PFR2_EL1, info->reg_id_pfr2); 956 - init_cpu_ftr_reg(SYS_MVFR0_EL1, info->reg_mvfr0); 957 - init_cpu_ftr_reg(SYS_MVFR1_EL1, info->reg_mvfr1); 958 - init_cpu_ftr_reg(SYS_MVFR2_EL1, info->reg_mvfr2); 959 - } 885 + if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0)) 886 + init_32bit_cpu_features(&info->aarch32); 960 887 961 888 if (id_aa64pfr0_sve(info->reg_id_aa64pfr0)) { 962 889 init_cpu_ftr_reg(SYS_ZCR_EL1, info->reg_zcr); 963 890 sve_init_vq_map(); 964 891 } 892 + 893 + if (id_aa64pfr1_mte(info->reg_id_aa64pfr1)) 894 + init_cpu_ftr_reg(SYS_GMID_EL1, info->reg_gmid); 965 895 966 896 /* 967 897 * Initialize the indirect array of CPU hwcaps capabilities pointers ··· 1009 975 WARN_ON(!ftrp->width); 1010 976 } 1011 977 1012 - static int update_32bit_cpu_features(int cpu, struct cpuinfo_arm64 *info, 1013 - struct cpuinfo_arm64 *boot) 978 + static void lazy_init_32bit_cpu_features(struct cpuinfo_arm64 *info, 979 + struct cpuinfo_arm64 *boot) 980 + { 981 + static bool boot_cpu_32bit_regs_overridden = false; 982 + 983 + if (!allow_mismatched_32bit_el0 || boot_cpu_32bit_regs_overridden) 984 + return; 985 + 986 + if (id_aa64pfr0_32bit_el0(boot->reg_id_aa64pfr0)) 987 + return; 988 + 989 + boot->aarch32 = info->aarch32; 990 + init_32bit_cpu_features(&boot->aarch32); 991 + boot_cpu_32bit_regs_overridden = true; 992 + } 993 + 994 + static int update_32bit_cpu_features(int cpu, struct cpuinfo_32bit *info, 995 + struct cpuinfo_32bit *boot) 1014 996 { 1015 997 int taint = 0; 1016 998 u64 pfr0 = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1); 1017 - 1018 - /* 1019 - * If we don't have AArch32 at all then skip the checks entirely 1020 - * as the register values may be UNKNOWN and we're not going to be 1021 - * using them for anything. 1022 - */ 1023 - if (!id_aa64pfr0_32bit_el0(pfr0)) 1024 - return taint; 1025 999 1026 1000 /* 1027 1001 * If we don't have AArch32 at EL1, then relax the strictness of ··· 1177 1135 } 1178 1136 1179 1137 /* 1138 + * The kernel uses the LDGM/STGM instructions and the number of tags 1139 + * they read/write depends on the GMID_EL1.BS field. Check that the 1140 + * value is the same on all CPUs. 1141 + */ 1142 + if (IS_ENABLED(CONFIG_ARM64_MTE) && 1143 + id_aa64pfr1_mte(info->reg_id_aa64pfr1)) { 1144 + taint |= check_update_ftr_reg(SYS_GMID_EL1, cpu, 1145 + info->reg_gmid, boot->reg_gmid); 1146 + } 1147 + 1148 + /* 1149 + * If we don't have AArch32 at all then skip the checks entirely 1150 + * as the register values may be UNKNOWN and we're not going to be 1151 + * using them for anything. 1152 + * 1180 1153 * This relies on a sanitised view of the AArch64 ID registers 1181 1154 * (e.g. SYS_ID_AA64PFR0_EL1), so we call it last. 1182 1155 */ 1183 - taint |= update_32bit_cpu_features(cpu, info, boot); 1156 + if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0)) { 1157 + lazy_init_32bit_cpu_features(info, boot); 1158 + taint |= update_32bit_cpu_features(cpu, &info->aarch32, 1159 + &boot->aarch32); 1160 + } 1184 1161 1185 1162 /* 1186 1163 * Mismatched CPU features are a recipe for disaster. Don't even ··· 1307 1246 val = __read_sysreg_by_encoding(entry->sys_reg); 1308 1247 1309 1248 return feature_matches(val, entry); 1249 + } 1250 + 1251 + const struct cpumask *system_32bit_el0_cpumask(void) 1252 + { 1253 + if (!system_supports_32bit_el0()) 1254 + return cpu_none_mask; 1255 + 1256 + if (static_branch_unlikely(&arm64_mismatched_32bit_el0)) 1257 + return cpu_32bit_el0_mask; 1258 + 1259 + return cpu_possible_mask; 1260 + } 1261 + 1262 + static bool has_32bit_el0(const struct arm64_cpu_capabilities *entry, int scope) 1263 + { 1264 + if (!has_cpuid_feature(entry, scope)) 1265 + return allow_mismatched_32bit_el0; 1266 + 1267 + if (scope == SCOPE_SYSTEM) 1268 + pr_info("detected: 32-bit EL0 Support\n"); 1269 + 1270 + return true; 1310 1271 } 1311 1272 1312 1273 static bool has_useable_gicv3_cpuif(const struct arm64_cpu_capabilities *entry, int scope) ··· 1949 1866 .cpu_enable = cpu_copy_el2regs, 1950 1867 }, 1951 1868 { 1952 - .desc = "32-bit EL0 Support", 1953 - .capability = ARM64_HAS_32BIT_EL0, 1869 + .capability = ARM64_HAS_32BIT_EL0_DO_NOT_USE, 1954 1870 .type = ARM64_CPUCAP_SYSTEM_FEATURE, 1955 - .matches = has_cpuid_feature, 1871 + .matches = has_32bit_el0, 1956 1872 .sys_reg = SYS_ID_AA64PFR0_EL1, 1957 1873 .sign = FTR_UNSIGNED, 1958 1874 .field_pos = ID_AA64PFR0_EL0_SHIFT, ··· 2460 2378 {}, 2461 2379 }; 2462 2380 2463 - static void __init cap_set_elf_hwcap(const struct arm64_cpu_capabilities *cap) 2381 + static void cap_set_elf_hwcap(const struct arm64_cpu_capabilities *cap) 2464 2382 { 2465 2383 switch (cap->hwcap_type) { 2466 2384 case CAP_HWCAP: ··· 2505 2423 return rc; 2506 2424 } 2507 2425 2508 - static void __init setup_elf_hwcaps(const struct arm64_cpu_capabilities *hwcaps) 2426 + static void setup_elf_hwcaps(const struct arm64_cpu_capabilities *hwcaps) 2509 2427 { 2510 2428 /* We support emulation of accesses to CPU ID feature registers */ 2511 2429 cpu_set_named_feature(CPUID); ··· 2680 2598 } 2681 2599 2682 2600 static void 2683 - verify_local_elf_hwcaps(const struct arm64_cpu_capabilities *caps) 2601 + __verify_local_elf_hwcaps(const struct arm64_cpu_capabilities *caps) 2684 2602 { 2685 2603 2686 2604 for (; caps->matches; caps++) ··· 2689 2607 smp_processor_id(), caps->desc); 2690 2608 cpu_die_early(); 2691 2609 } 2610 + } 2611 + 2612 + static void verify_local_elf_hwcaps(void) 2613 + { 2614 + __verify_local_elf_hwcaps(arm64_elf_hwcaps); 2615 + 2616 + if (id_aa64pfr0_32bit_el0(read_cpuid(ID_AA64PFR0_EL1))) 2617 + __verify_local_elf_hwcaps(compat_elf_hwcaps); 2692 2618 } 2693 2619 2694 2620 static void verify_sve_features(void) ··· 2763 2673 * on all secondary CPUs. 2764 2674 */ 2765 2675 verify_local_cpu_caps(SCOPE_ALL & ~SCOPE_BOOT_CPU); 2766 - 2767 - verify_local_elf_hwcaps(arm64_elf_hwcaps); 2768 - 2769 - if (system_supports_32bit_el0()) 2770 - verify_local_elf_hwcaps(compat_elf_hwcaps); 2676 + verify_local_elf_hwcaps(); 2771 2677 2772 2678 if (system_supports_sve()) 2773 2679 verify_sve_features(); ··· 2898 2812 ARCH_DMA_MINALIGN); 2899 2813 } 2900 2814 2815 + static int enable_mismatched_32bit_el0(unsigned int cpu) 2816 + { 2817 + struct cpuinfo_arm64 *info = &per_cpu(cpu_data, cpu); 2818 + bool cpu_32bit = id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0); 2819 + 2820 + if (cpu_32bit) { 2821 + cpumask_set_cpu(cpu, cpu_32bit_el0_mask); 2822 + static_branch_enable_cpuslocked(&arm64_mismatched_32bit_el0); 2823 + setup_elf_hwcaps(compat_elf_hwcaps); 2824 + } 2825 + 2826 + return 0; 2827 + } 2828 + 2829 + static int __init init_32bit_el0_mask(void) 2830 + { 2831 + if (!allow_mismatched_32bit_el0) 2832 + return 0; 2833 + 2834 + if (!zalloc_cpumask_var(&cpu_32bit_el0_mask, GFP_KERNEL)) 2835 + return -ENOMEM; 2836 + 2837 + return cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, 2838 + "arm64/mismatched_32bit_el0:online", 2839 + enable_mismatched_32bit_el0, NULL); 2840 + } 2841 + subsys_initcall_sync(init_32bit_el0_mask); 2842 + 2901 2843 static void __maybe_unused cpu_enable_cnp(struct arm64_cpu_capabilities const *cap) 2902 2844 { 2903 2845 cpu_replace_ttbr1(lm_alias(swapper_pg_dir)); ··· 3019 2905 } 3020 2906 3021 2907 static struct undef_hook mrs_hook = { 3022 - .instr_mask = 0xfff00000, 3023 - .instr_val = 0xd5300000, 2908 + .instr_mask = 0xffff0000, 2909 + .instr_val = 0xd5380000, 3024 2910 .pstate_mask = PSR_AA32_MODE_MASK, 3025 2911 .pstate_val = PSR_MODE_EL0t, 3026 2912 .fn = emulate_mrs,
+31 -25
arch/arm64/kernel/cpuinfo.c
··· 246 246 struct cpuinfo_arm64 *info = kobj_to_cpuinfo(kobj); \ 247 247 \ 248 248 if (info->reg_midr) \ 249 - return sprintf(buf, "0x%016x\n", info->reg_##_field); \ 249 + return sprintf(buf, "0x%016llx\n", info->reg_##_field); \ 250 250 else \ 251 251 return 0; \ 252 252 } \ ··· 344 344 pr_info("Detected %s I-cache on CPU%d\n", icache_policy_str[l1ip], cpu); 345 345 } 346 346 347 + static void __cpuinfo_store_cpu_32bit(struct cpuinfo_32bit *info) 348 + { 349 + info->reg_id_dfr0 = read_cpuid(ID_DFR0_EL1); 350 + info->reg_id_dfr1 = read_cpuid(ID_DFR1_EL1); 351 + info->reg_id_isar0 = read_cpuid(ID_ISAR0_EL1); 352 + info->reg_id_isar1 = read_cpuid(ID_ISAR1_EL1); 353 + info->reg_id_isar2 = read_cpuid(ID_ISAR2_EL1); 354 + info->reg_id_isar3 = read_cpuid(ID_ISAR3_EL1); 355 + info->reg_id_isar4 = read_cpuid(ID_ISAR4_EL1); 356 + info->reg_id_isar5 = read_cpuid(ID_ISAR5_EL1); 357 + info->reg_id_isar6 = read_cpuid(ID_ISAR6_EL1); 358 + info->reg_id_mmfr0 = read_cpuid(ID_MMFR0_EL1); 359 + info->reg_id_mmfr1 = read_cpuid(ID_MMFR1_EL1); 360 + info->reg_id_mmfr2 = read_cpuid(ID_MMFR2_EL1); 361 + info->reg_id_mmfr3 = read_cpuid(ID_MMFR3_EL1); 362 + info->reg_id_mmfr4 = read_cpuid(ID_MMFR4_EL1); 363 + info->reg_id_mmfr5 = read_cpuid(ID_MMFR5_EL1); 364 + info->reg_id_pfr0 = read_cpuid(ID_PFR0_EL1); 365 + info->reg_id_pfr1 = read_cpuid(ID_PFR1_EL1); 366 + info->reg_id_pfr2 = read_cpuid(ID_PFR2_EL1); 367 + 368 + info->reg_mvfr0 = read_cpuid(MVFR0_EL1); 369 + info->reg_mvfr1 = read_cpuid(MVFR1_EL1); 370 + info->reg_mvfr2 = read_cpuid(MVFR2_EL1); 371 + } 372 + 347 373 static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info) 348 374 { 349 375 info->reg_cntfrq = arch_timer_get_cntfrq(); ··· 397 371 info->reg_id_aa64pfr1 = read_cpuid(ID_AA64PFR1_EL1); 398 372 info->reg_id_aa64zfr0 = read_cpuid(ID_AA64ZFR0_EL1); 399 373 400 - /* Update the 32bit ID registers only if AArch32 is implemented */ 401 - if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0)) { 402 - info->reg_id_dfr0 = read_cpuid(ID_DFR0_EL1); 403 - info->reg_id_dfr1 = read_cpuid(ID_DFR1_EL1); 404 - info->reg_id_isar0 = read_cpuid(ID_ISAR0_EL1); 405 - info->reg_id_isar1 = read_cpuid(ID_ISAR1_EL1); 406 - info->reg_id_isar2 = read_cpuid(ID_ISAR2_EL1); 407 - info->reg_id_isar3 = read_cpuid(ID_ISAR3_EL1); 408 - info->reg_id_isar4 = read_cpuid(ID_ISAR4_EL1); 409 - info->reg_id_isar5 = read_cpuid(ID_ISAR5_EL1); 410 - info->reg_id_isar6 = read_cpuid(ID_ISAR6_EL1); 411 - info->reg_id_mmfr0 = read_cpuid(ID_MMFR0_EL1); 412 - info->reg_id_mmfr1 = read_cpuid(ID_MMFR1_EL1); 413 - info->reg_id_mmfr2 = read_cpuid(ID_MMFR2_EL1); 414 - info->reg_id_mmfr3 = read_cpuid(ID_MMFR3_EL1); 415 - info->reg_id_mmfr4 = read_cpuid(ID_MMFR4_EL1); 416 - info->reg_id_mmfr5 = read_cpuid(ID_MMFR5_EL1); 417 - info->reg_id_pfr0 = read_cpuid(ID_PFR0_EL1); 418 - info->reg_id_pfr1 = read_cpuid(ID_PFR1_EL1); 419 - info->reg_id_pfr2 = read_cpuid(ID_PFR2_EL1); 374 + if (id_aa64pfr1_mte(info->reg_id_aa64pfr1)) 375 + info->reg_gmid = read_cpuid(GMID_EL1); 420 376 421 - info->reg_mvfr0 = read_cpuid(MVFR0_EL1); 422 - info->reg_mvfr1 = read_cpuid(MVFR1_EL1); 423 - info->reg_mvfr2 = read_cpuid(MVFR2_EL1); 424 - } 377 + if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0)) 378 + __cpuinfo_store_cpu_32bit(&info->aarch32); 425 379 426 380 if (IS_ENABLED(CONFIG_ARM64_SVE) && 427 381 id_aa64pfr0_sve(info->reg_id_aa64pfr0))
+5 -4
arch/arm64/kernel/efi-entry.S
··· 28 28 * stale icache entries from before relocation. 29 29 */ 30 30 ldr w1, =kernel_size 31 - bl __clean_dcache_area_poc 31 + add x1, x0, x1 32 + bl dcache_clean_poc 32 33 ic ialluis 33 34 34 35 /* ··· 37 36 * so that we can safely disable the MMU and caches. 38 37 */ 39 38 adr x0, 0f 40 - ldr w1, 3f 41 - bl __clean_dcache_area_poc 39 + adr x1, 3f 40 + bl dcache_clean_poc 42 41 0: 43 42 /* Turn off Dcache and MMU */ 44 43 mrs x0, CurrentEL ··· 65 64 mov x2, xzr 66 65 mov x3, xzr 67 66 br x19 67 + 3: 68 68 SYM_CODE_END(efi_enter_kernel) 69 - 3: .long . - 0b
+238 -18
arch/arm64/kernel/entry-common.c
··· 6 6 */ 7 7 8 8 #include <linux/context_tracking.h> 9 + #include <linux/linkage.h> 10 + #include <linux/lockdep.h> 9 11 #include <linux/ptrace.h> 12 + #include <linux/sched.h> 13 + #include <linux/sched/debug.h> 10 14 #include <linux/thread_info.h> 11 15 12 16 #include <asm/cpufeature.h> ··· 19 15 #include <asm/exception.h> 20 16 #include <asm/kprobes.h> 21 17 #include <asm/mmu.h> 18 + #include <asm/processor.h> 19 + #include <asm/sdei.h> 20 + #include <asm/stacktrace.h> 22 21 #include <asm/sysreg.h> 22 + #include <asm/system_misc.h> 23 23 24 24 /* 25 25 * This is intended to match the logic in irqentry_enter(), handling the kernel ··· 75 67 } 76 68 } 77 69 78 - void noinstr arm64_enter_nmi(struct pt_regs *regs) 70 + static void noinstr arm64_enter_nmi(struct pt_regs *regs) 79 71 { 80 72 regs->lockdep_hardirqs = lockdep_hardirqs_enabled(); 81 73 ··· 88 80 ftrace_nmi_enter(); 89 81 } 90 82 91 - void noinstr arm64_exit_nmi(struct pt_regs *regs) 83 + static void noinstr arm64_exit_nmi(struct pt_regs *regs) 92 84 { 93 85 bool restore = regs->lockdep_hardirqs; 94 86 ··· 105 97 __nmi_exit(); 106 98 } 107 99 108 - asmlinkage void noinstr enter_el1_irq_or_nmi(struct pt_regs *regs) 100 + static void noinstr enter_el1_irq_or_nmi(struct pt_regs *regs) 109 101 { 110 102 if (IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && !interrupts_enabled(regs)) 111 103 arm64_enter_nmi(regs); ··· 113 105 enter_from_kernel_mode(regs); 114 106 } 115 107 116 - asmlinkage void noinstr exit_el1_irq_or_nmi(struct pt_regs *regs) 108 + static void noinstr exit_el1_irq_or_nmi(struct pt_regs *regs) 117 109 { 118 110 if (IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && !interrupts_enabled(regs)) 119 111 arm64_exit_nmi(regs); 120 112 else 121 113 exit_to_kernel_mode(regs); 114 + } 115 + 116 + static void __sched arm64_preempt_schedule_irq(void) 117 + { 118 + lockdep_assert_irqs_disabled(); 119 + 120 + /* 121 + * DAIF.DA are cleared at the start of IRQ/FIQ handling, and when GIC 122 + * priority masking is used the GIC irqchip driver will clear DAIF.IF 123 + * using gic_arch_enable_irqs() for normal IRQs. If anything is set in 124 + * DAIF we must have handled an NMI, so skip preemption. 125 + */ 126 + if (system_uses_irq_prio_masking() && read_sysreg(daif)) 127 + return; 128 + 129 + /* 130 + * Preempting a task from an IRQ means we leave copies of PSTATE 131 + * on the stack. cpufeature's enable calls may modify PSTATE, but 132 + * resuming one of these preempted tasks would undo those changes. 133 + * 134 + * Only allow a task to be preempted once cpufeatures have been 135 + * enabled. 136 + */ 137 + if (system_capabilities_finalized()) 138 + preempt_schedule_irq(); 139 + } 140 + 141 + static void do_interrupt_handler(struct pt_regs *regs, 142 + void (*handler)(struct pt_regs *)) 143 + { 144 + if (on_thread_stack()) 145 + call_on_irq_stack(regs, handler); 146 + else 147 + handler(regs); 148 + } 149 + 150 + extern void (*handle_arch_irq)(struct pt_regs *); 151 + extern void (*handle_arch_fiq)(struct pt_regs *); 152 + 153 + static void noinstr __panic_unhandled(struct pt_regs *regs, const char *vector, 154 + unsigned int esr) 155 + { 156 + arm64_enter_nmi(regs); 157 + 158 + console_verbose(); 159 + 160 + pr_crit("Unhandled %s exception on CPU%d, ESR 0x%08x -- %s\n", 161 + vector, smp_processor_id(), esr, 162 + esr_get_class_string(esr)); 163 + 164 + __show_regs(regs); 165 + panic("Unhandled exception"); 166 + } 167 + 168 + #define UNHANDLED(el, regsize, vector) \ 169 + asmlinkage void noinstr el##_##regsize##_##vector##_handler(struct pt_regs *regs) \ 170 + { \ 171 + const char *desc = #regsize "-bit " #el " " #vector; \ 172 + __panic_unhandled(regs, desc, read_sysreg(esr_el1)); \ 122 173 } 123 174 124 175 #ifdef CONFIG_ARM64_ERRATUM_1463225 ··· 229 162 } 230 163 #endif /* CONFIG_ARM64_ERRATUM_1463225 */ 231 164 165 + UNHANDLED(el1t, 64, sync) 166 + UNHANDLED(el1t, 64, irq) 167 + UNHANDLED(el1t, 64, fiq) 168 + UNHANDLED(el1t, 64, error) 169 + 232 170 static void noinstr el1_abort(struct pt_regs *regs, unsigned long esr) 233 171 { 234 172 unsigned long far = read_sysreg(far_el1); ··· 261 189 enter_from_kernel_mode(regs); 262 190 local_daif_inherit(regs); 263 191 do_undefinstr(regs); 264 - local_daif_mask(); 265 - exit_to_kernel_mode(regs); 266 - } 267 - 268 - static void noinstr el1_inv(struct pt_regs *regs, unsigned long esr) 269 - { 270 - enter_from_kernel_mode(regs); 271 - local_daif_inherit(regs); 272 - bad_mode(regs, 0, esr); 273 192 local_daif_mask(); 274 193 exit_to_kernel_mode(regs); 275 194 } ··· 308 245 exit_to_kernel_mode(regs); 309 246 } 310 247 311 - asmlinkage void noinstr el1_sync_handler(struct pt_regs *regs) 248 + asmlinkage void noinstr el1h_64_sync_handler(struct pt_regs *regs) 312 249 { 313 250 unsigned long esr = read_sysreg(esr_el1); 314 251 ··· 338 275 el1_fpac(regs, esr); 339 276 break; 340 277 default: 341 - el1_inv(regs, esr); 278 + __panic_unhandled(regs, "64-bit el1h sync", esr); 342 279 } 280 + } 281 + 282 + static void noinstr el1_interrupt(struct pt_regs *regs, 283 + void (*handler)(struct pt_regs *)) 284 + { 285 + write_sysreg(DAIF_PROCCTX_NOIRQ, daif); 286 + 287 + enter_el1_irq_or_nmi(regs); 288 + do_interrupt_handler(regs, handler); 289 + 290 + /* 291 + * Note: thread_info::preempt_count includes both thread_info::count 292 + * and thread_info::need_resched, and is not equivalent to 293 + * preempt_count(). 294 + */ 295 + if (IS_ENABLED(CONFIG_PREEMPTION) && 296 + READ_ONCE(current_thread_info()->preempt_count) == 0) 297 + arm64_preempt_schedule_irq(); 298 + 299 + exit_el1_irq_or_nmi(regs); 300 + } 301 + 302 + asmlinkage void noinstr el1h_64_irq_handler(struct pt_regs *regs) 303 + { 304 + el1_interrupt(regs, handle_arch_irq); 305 + } 306 + 307 + asmlinkage void noinstr el1h_64_fiq_handler(struct pt_regs *regs) 308 + { 309 + el1_interrupt(regs, handle_arch_fiq); 310 + } 311 + 312 + asmlinkage void noinstr el1h_64_error_handler(struct pt_regs *regs) 313 + { 314 + unsigned long esr = read_sysreg(esr_el1); 315 + 316 + local_daif_restore(DAIF_ERRCTX); 317 + arm64_enter_nmi(regs); 318 + do_serror(regs, esr); 319 + arm64_exit_nmi(regs); 343 320 } 344 321 345 322 asmlinkage void noinstr enter_from_user_mode(void) ··· 501 398 502 399 enter_from_user_mode(); 503 400 do_debug_exception(far, esr, regs); 504 - local_daif_restore(DAIF_PROCCTX_NOIRQ); 401 + local_daif_restore(DAIF_PROCCTX); 505 402 } 506 403 507 404 static void noinstr el0_svc(struct pt_regs *regs) ··· 518 415 do_ptrauth_fault(regs, esr); 519 416 } 520 417 521 - asmlinkage void noinstr el0_sync_handler(struct pt_regs *regs) 418 + asmlinkage void noinstr el0t_64_sync_handler(struct pt_regs *regs) 522 419 { 523 420 unsigned long esr = read_sysreg(esr_el1); 524 421 ··· 571 468 } 572 469 } 573 470 471 + static void noinstr el0_interrupt(struct pt_regs *regs, 472 + void (*handler)(struct pt_regs *)) 473 + { 474 + enter_from_user_mode(); 475 + 476 + write_sysreg(DAIF_PROCCTX_NOIRQ, daif); 477 + 478 + if (regs->pc & BIT(55)) 479 + arm64_apply_bp_hardening(); 480 + 481 + do_interrupt_handler(regs, handler); 482 + } 483 + 484 + static void noinstr __el0_irq_handler_common(struct pt_regs *regs) 485 + { 486 + el0_interrupt(regs, handle_arch_irq); 487 + } 488 + 489 + asmlinkage void noinstr el0t_64_irq_handler(struct pt_regs *regs) 490 + { 491 + __el0_irq_handler_common(regs); 492 + } 493 + 494 + static void noinstr __el0_fiq_handler_common(struct pt_regs *regs) 495 + { 496 + el0_interrupt(regs, handle_arch_fiq); 497 + } 498 + 499 + asmlinkage void noinstr el0t_64_fiq_handler(struct pt_regs *regs) 500 + { 501 + __el0_fiq_handler_common(regs); 502 + } 503 + 504 + static void __el0_error_handler_common(struct pt_regs *regs) 505 + { 506 + unsigned long esr = read_sysreg(esr_el1); 507 + 508 + enter_from_user_mode(); 509 + local_daif_restore(DAIF_ERRCTX); 510 + arm64_enter_nmi(regs); 511 + do_serror(regs, esr); 512 + arm64_exit_nmi(regs); 513 + local_daif_restore(DAIF_PROCCTX); 514 + } 515 + 516 + asmlinkage void noinstr el0t_64_error_handler(struct pt_regs *regs) 517 + { 518 + __el0_error_handler_common(regs); 519 + } 520 + 574 521 #ifdef CONFIG_COMPAT 575 522 static void noinstr el0_cp15(struct pt_regs *regs, unsigned long esr) 576 523 { ··· 636 483 do_el0_svc_compat(regs); 637 484 } 638 485 639 - asmlinkage void noinstr el0_sync_compat_handler(struct pt_regs *regs) 486 + asmlinkage void noinstr el0t_32_sync_handler(struct pt_regs *regs) 640 487 { 641 488 unsigned long esr = read_sysreg(esr_el1); 642 489 ··· 679 526 el0_inv(regs, esr); 680 527 } 681 528 } 529 + 530 + asmlinkage void noinstr el0t_32_irq_handler(struct pt_regs *regs) 531 + { 532 + __el0_irq_handler_common(regs); 533 + } 534 + 535 + asmlinkage void noinstr el0t_32_fiq_handler(struct pt_regs *regs) 536 + { 537 + __el0_fiq_handler_common(regs); 538 + } 539 + 540 + asmlinkage void noinstr el0t_32_error_handler(struct pt_regs *regs) 541 + { 542 + __el0_error_handler_common(regs); 543 + } 544 + #else /* CONFIG_COMPAT */ 545 + UNHANDLED(el0t, 32, sync) 546 + UNHANDLED(el0t, 32, irq) 547 + UNHANDLED(el0t, 32, fiq) 548 + UNHANDLED(el0t, 32, error) 682 549 #endif /* CONFIG_COMPAT */ 550 + 551 + #ifdef CONFIG_VMAP_STACK 552 + asmlinkage void noinstr handle_bad_stack(struct pt_regs *regs) 553 + { 554 + unsigned int esr = read_sysreg(esr_el1); 555 + unsigned long far = read_sysreg(far_el1); 556 + 557 + arm64_enter_nmi(regs); 558 + panic_bad_stack(regs, esr, far); 559 + } 560 + #endif /* CONFIG_VMAP_STACK */ 561 + 562 + #ifdef CONFIG_ARM_SDE_INTERFACE 563 + asmlinkage noinstr unsigned long 564 + __sdei_handler(struct pt_regs *regs, struct sdei_registered_event *arg) 565 + { 566 + unsigned long ret; 567 + 568 + /* 569 + * We didn't take an exception to get here, so the HW hasn't 570 + * set/cleared bits in PSTATE that we may rely on. 571 + * 572 + * The original SDEI spec (ARM DEN 0054A) can be read ambiguously as to 573 + * whether PSTATE bits are inherited unchanged or generated from 574 + * scratch, and the TF-A implementation always clears PAN and always 575 + * clears UAO. There are no other known implementations. 576 + * 577 + * Subsequent revisions (ARM DEN 0054B) follow the usual rules for how 578 + * PSTATE is modified upon architectural exceptions, and so PAN is 579 + * either inherited or set per SCTLR_ELx.SPAN, and UAO is always 580 + * cleared. 581 + * 582 + * We must explicitly reset PAN to the expected state, including 583 + * clearing it when the host isn't using it, in case a VM had it set. 584 + */ 585 + if (system_uses_hw_pan()) 586 + set_pstate_pan(1); 587 + else if (cpu_has_pan()) 588 + set_pstate_pan(0); 589 + 590 + arm64_enter_nmi(regs); 591 + ret = do_sdei_event(regs, arg); 592 + arm64_exit_nmi(regs); 593 + 594 + return ret; 595 + } 596 + #endif /* CONFIG_ARM_SDE_INTERFACE */
+15 -7
arch/arm64/kernel/entry-fpsimd.S
··· 63 63 * and the rest zeroed. All the other SVE registers will be zeroed. 64 64 */ 65 65 SYM_FUNC_START(sve_load_from_fpsimd_state) 66 - sve_load_vq x1, x2, x3 67 - fpsimd_restore x0, 8 68 - _for n, 0, 15, _sve_pfalse \n 69 - _sve_wrffr 0 70 - ret 66 + sve_load_vq x1, x2, x3 67 + fpsimd_restore x0, 8 68 + sve_flush_p_ffr 69 + ret 71 70 SYM_FUNC_END(sve_load_from_fpsimd_state) 72 71 73 - /* Zero all SVE registers but the first 128-bits of each vector */ 72 + /* 73 + * Zero all SVE registers but the first 128-bits of each vector 74 + * 75 + * VQ must already be configured by caller, any further updates of VQ 76 + * will need to ensure that the register state remains valid. 77 + * 78 + * x0 = VQ - 1 79 + */ 74 80 SYM_FUNC_START(sve_flush_live) 75 - sve_flush 81 + cbz x0, 1f // A VQ-1 of 0 is 128 bits so no extra Z state 82 + sve_flush_z 83 + 1: sve_flush_p_ffr 76 84 ret 77 85 SYM_FUNC_END(sve_flush_live) 78 86
+93 -270
arch/arm64/kernel/entry.S
··· 33 33 * Context tracking and irqflag tracing need to instrument transitions between 34 34 * user and kernel mode. 35 35 */ 36 - .macro user_exit_irqoff 37 - #if defined(CONFIG_CONTEXT_TRACKING) || defined(CONFIG_TRACE_IRQFLAGS) 38 - bl enter_from_user_mode 39 - #endif 40 - .endm 41 - 42 36 .macro user_enter_irqoff 43 37 #if defined(CONFIG_CONTEXT_TRACKING) || defined(CONFIG_TRACE_IRQFLAGS) 44 38 bl exit_to_user_mode ··· 45 51 .endr 46 52 .endm 47 53 48 - /* 49 - * Bad Abort numbers 50 - *----------------- 51 - */ 52 - #define BAD_SYNC 0 53 - #define BAD_IRQ 1 54 - #define BAD_FIQ 2 55 - #define BAD_ERROR 3 56 - 57 - .macro kernel_ventry, el, label, regsize = 64 54 + .macro kernel_ventry, el:req, ht:req, regsize:req, label:req 58 55 .align 7 59 56 #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 60 57 .if \el == 0 ··· 72 87 tbnz x0, #THREAD_SHIFT, 0f 73 88 sub x0, sp, x0 // x0'' = sp' - x0' = (sp + x0) - sp = x0 74 89 sub sp, sp, x0 // sp'' = sp' - x0 = (sp + x0) - x0 = sp 75 - b el\()\el\()_\label 90 + b el\el\ht\()_\regsize\()_\label 76 91 77 92 0: 78 93 /* ··· 104 119 sub sp, sp, x0 105 120 mrs x0, tpidrro_el0 106 121 #endif 107 - b el\()\el\()_\label 122 + b el\el\ht\()_\regsize\()_\label 108 123 .endm 109 124 110 125 .macro tramp_alias, dst, sym ··· 260 275 261 276 mte_set_kernel_gcr x22, x23 262 277 263 - scs_load tsk, x20 278 + scs_load tsk 264 279 .else 265 280 add x21, sp, #PT_REGS_SIZE 266 281 get_current_task tsk ··· 270 285 stp lr, x21, [sp, #S_LR] 271 286 272 287 /* 273 - * For exceptions from EL0, create a terminal frame record. 288 + * For exceptions from EL0, create a final frame record. 274 289 * For exceptions from EL1, create a synthetic frame record so the 275 290 * interrupted code shows up in the backtrace. 276 291 */ ··· 360 375 alternative_else_nop_endif 361 376 #endif 362 377 3: 363 - scs_save tsk, x0 378 + scs_save tsk 364 379 365 380 #ifdef CONFIG_ARM64_PTR_AUTH 366 381 alternative_if ARM64_HAS_ADDRESS_AUTH ··· 471 486 SYM_CODE_END(__swpan_exit_el0) 472 487 #endif 473 488 474 - .macro irq_stack_entry 475 - mov x19, sp // preserve the original sp 476 - #ifdef CONFIG_SHADOW_CALL_STACK 477 - mov x24, scs_sp // preserve the original shadow stack 478 - #endif 479 - 480 - /* 481 - * Compare sp with the base of the task stack. 482 - * If the top ~(THREAD_SIZE - 1) bits match, we are on a task stack, 483 - * and should switch to the irq stack. 484 - */ 485 - ldr x25, [tsk, TSK_STACK] 486 - eor x25, x25, x19 487 - and x25, x25, #~(THREAD_SIZE - 1) 488 - cbnz x25, 9998f 489 - 490 - ldr_this_cpu x25, irq_stack_ptr, x26 491 - mov x26, #IRQ_STACK_SIZE 492 - add x26, x25, x26 493 - 494 - /* switch to the irq stack */ 495 - mov sp, x26 496 - 497 - #ifdef CONFIG_SHADOW_CALL_STACK 498 - /* also switch to the irq shadow stack */ 499 - ldr_this_cpu scs_sp, irq_shadow_call_stack_ptr, x26 500 - #endif 501 - 502 - 9998: 503 - .endm 504 - 505 - /* 506 - * The callee-saved regs (x19-x29) should be preserved between 507 - * irq_stack_entry and irq_stack_exit, but note that kernel_entry 508 - * uses x20-x23 to store data for later use. 509 - */ 510 - .macro irq_stack_exit 511 - mov sp, x19 512 - #ifdef CONFIG_SHADOW_CALL_STACK 513 - mov scs_sp, x24 514 - #endif 515 - .endm 516 - 517 489 /* GPRs used by entry code */ 518 490 tsk .req x28 // current thread_info 519 491 520 492 /* 521 493 * Interrupt handling. 522 494 */ 523 - .macro irq_handler, handler:req 524 - ldr_l x1, \handler 525 - mov x0, sp 526 - irq_stack_entry 527 - blr x1 528 - irq_stack_exit 529 - .endm 530 - 531 495 .macro gic_prio_kentry_setup, tmp:req 532 496 #ifdef CONFIG_ARM64_PSEUDO_NMI 533 497 alternative_if ARM64_HAS_IRQ_PRIO_MASKING ··· 484 550 msr_s SYS_ICC_PMR_EL1, \tmp 485 551 alternative_else_nop_endif 486 552 #endif 487 - .endm 488 - 489 - .macro el1_interrupt_handler, handler:req 490 - enable_da 491 - 492 - mov x0, sp 493 - bl enter_el1_irq_or_nmi 494 - 495 - irq_handler \handler 496 - 497 - #ifdef CONFIG_PREEMPTION 498 - ldr x24, [tsk, #TSK_TI_PREEMPT] // get preempt count 499 - alternative_if ARM64_HAS_IRQ_PRIO_MASKING 500 - /* 501 - * DA were cleared at start of handling, and IF are cleared by 502 - * the GIC irqchip driver using gic_arch_enable_irqs() for 503 - * normal IRQs. If anything is set, it means we come back from 504 - * an NMI instead of a normal IRQ, so skip preemption 505 - */ 506 - mrs x0, daif 507 - orr x24, x24, x0 508 - alternative_else_nop_endif 509 - cbnz x24, 1f // preempt count != 0 || NMI return path 510 - bl arm64_preempt_schedule_irq // irq en/disable is done inside 511 - 1: 512 - #endif 513 - 514 - mov x0, sp 515 - bl exit_el1_irq_or_nmi 516 - .endm 517 - 518 - .macro el0_interrupt_handler, handler:req 519 - user_exit_irqoff 520 - enable_da 521 - 522 - tbz x22, #55, 1f 523 - bl do_el0_irq_bp_hardening 524 - 1: 525 - irq_handler \handler 526 553 .endm 527 554 528 555 .text ··· 495 600 496 601 .align 11 497 602 SYM_CODE_START(vectors) 498 - kernel_ventry 1, sync_invalid // Synchronous EL1t 499 - kernel_ventry 1, irq_invalid // IRQ EL1t 500 - kernel_ventry 1, fiq_invalid // FIQ EL1t 501 - kernel_ventry 1, error_invalid // Error EL1t 603 + kernel_ventry 1, t, 64, sync // Synchronous EL1t 604 + kernel_ventry 1, t, 64, irq // IRQ EL1t 605 + kernel_ventry 1, t, 64, fiq // FIQ EL1h 606 + kernel_ventry 1, t, 64, error // Error EL1t 502 607 503 - kernel_ventry 1, sync // Synchronous EL1h 504 - kernel_ventry 1, irq // IRQ EL1h 505 - kernel_ventry 1, fiq // FIQ EL1h 506 - kernel_ventry 1, error // Error EL1h 608 + kernel_ventry 1, h, 64, sync // Synchronous EL1h 609 + kernel_ventry 1, h, 64, irq // IRQ EL1h 610 + kernel_ventry 1, h, 64, fiq // FIQ EL1h 611 + kernel_ventry 1, h, 64, error // Error EL1h 507 612 508 - kernel_ventry 0, sync // Synchronous 64-bit EL0 509 - kernel_ventry 0, irq // IRQ 64-bit EL0 510 - kernel_ventry 0, fiq // FIQ 64-bit EL0 511 - kernel_ventry 0, error // Error 64-bit EL0 613 + kernel_ventry 0, t, 64, sync // Synchronous 64-bit EL0 614 + kernel_ventry 0, t, 64, irq // IRQ 64-bit EL0 615 + kernel_ventry 0, t, 64, fiq // FIQ 64-bit EL0 616 + kernel_ventry 0, t, 64, error // Error 64-bit EL0 512 617 513 - #ifdef CONFIG_COMPAT 514 - kernel_ventry 0, sync_compat, 32 // Synchronous 32-bit EL0 515 - kernel_ventry 0, irq_compat, 32 // IRQ 32-bit EL0 516 - kernel_ventry 0, fiq_compat, 32 // FIQ 32-bit EL0 517 - kernel_ventry 0, error_compat, 32 // Error 32-bit EL0 518 - #else 519 - kernel_ventry 0, sync_invalid, 32 // Synchronous 32-bit EL0 520 - kernel_ventry 0, irq_invalid, 32 // IRQ 32-bit EL0 521 - kernel_ventry 0, fiq_invalid, 32 // FIQ 32-bit EL0 522 - kernel_ventry 0, error_invalid, 32 // Error 32-bit EL0 523 - #endif 618 + kernel_ventry 0, t, 32, sync // Synchronous 32-bit EL0 619 + kernel_ventry 0, t, 32, irq // IRQ 32-bit EL0 620 + kernel_ventry 0, t, 32, fiq // FIQ 32-bit EL0 621 + kernel_ventry 0, t, 32, error // Error 32-bit EL0 524 622 SYM_CODE_END(vectors) 525 623 526 624 #ifdef CONFIG_VMAP_STACK ··· 544 656 ASM_BUG() 545 657 #endif /* CONFIG_VMAP_STACK */ 546 658 547 - /* 548 - * Invalid mode handlers 549 - */ 550 - .macro inv_entry, el, reason, regsize = 64 659 + 660 + .macro entry_handler el:req, ht:req, regsize:req, label:req 661 + SYM_CODE_START_LOCAL(el\el\ht\()_\regsize\()_\label) 551 662 kernel_entry \el, \regsize 552 663 mov x0, sp 553 - mov x1, #\reason 554 - mrs x2, esr_el1 555 - bl bad_mode 556 - ASM_BUG() 664 + bl el\el\ht\()_\regsize\()_\label\()_handler 665 + .if \el == 0 666 + b ret_to_user 667 + .else 668 + b ret_to_kernel 669 + .endif 670 + SYM_CODE_END(el\el\ht\()_\regsize\()_\label) 557 671 .endm 558 672 559 - SYM_CODE_START_LOCAL(el0_sync_invalid) 560 - inv_entry 0, BAD_SYNC 561 - SYM_CODE_END(el0_sync_invalid) 562 - 563 - SYM_CODE_START_LOCAL(el0_irq_invalid) 564 - inv_entry 0, BAD_IRQ 565 - SYM_CODE_END(el0_irq_invalid) 566 - 567 - SYM_CODE_START_LOCAL(el0_fiq_invalid) 568 - inv_entry 0, BAD_FIQ 569 - SYM_CODE_END(el0_fiq_invalid) 570 - 571 - SYM_CODE_START_LOCAL(el0_error_invalid) 572 - inv_entry 0, BAD_ERROR 573 - SYM_CODE_END(el0_error_invalid) 574 - 575 - SYM_CODE_START_LOCAL(el1_sync_invalid) 576 - inv_entry 1, BAD_SYNC 577 - SYM_CODE_END(el1_sync_invalid) 578 - 579 - SYM_CODE_START_LOCAL(el1_irq_invalid) 580 - inv_entry 1, BAD_IRQ 581 - SYM_CODE_END(el1_irq_invalid) 582 - 583 - SYM_CODE_START_LOCAL(el1_fiq_invalid) 584 - inv_entry 1, BAD_FIQ 585 - SYM_CODE_END(el1_fiq_invalid) 586 - 587 - SYM_CODE_START_LOCAL(el1_error_invalid) 588 - inv_entry 1, BAD_ERROR 589 - SYM_CODE_END(el1_error_invalid) 590 - 591 673 /* 592 - * EL1 mode handlers. 674 + * Early exception handlers 593 675 */ 594 - .align 6 595 - SYM_CODE_START_LOCAL_NOALIGN(el1_sync) 596 - kernel_entry 1 597 - mov x0, sp 598 - bl el1_sync_handler 676 + entry_handler 1, t, 64, sync 677 + entry_handler 1, t, 64, irq 678 + entry_handler 1, t, 64, fiq 679 + entry_handler 1, t, 64, error 680 + 681 + entry_handler 1, h, 64, sync 682 + entry_handler 1, h, 64, irq 683 + entry_handler 1, h, 64, fiq 684 + entry_handler 1, h, 64, error 685 + 686 + entry_handler 0, t, 64, sync 687 + entry_handler 0, t, 64, irq 688 + entry_handler 0, t, 64, fiq 689 + entry_handler 0, t, 64, error 690 + 691 + entry_handler 0, t, 32, sync 692 + entry_handler 0, t, 32, irq 693 + entry_handler 0, t, 32, fiq 694 + entry_handler 0, t, 32, error 695 + 696 + SYM_CODE_START_LOCAL(ret_to_kernel) 599 697 kernel_exit 1 600 - SYM_CODE_END(el1_sync) 601 - 602 - .align 6 603 - SYM_CODE_START_LOCAL_NOALIGN(el1_irq) 604 - kernel_entry 1 605 - el1_interrupt_handler handle_arch_irq 606 - kernel_exit 1 607 - SYM_CODE_END(el1_irq) 608 - 609 - SYM_CODE_START_LOCAL_NOALIGN(el1_fiq) 610 - kernel_entry 1 611 - el1_interrupt_handler handle_arch_fiq 612 - kernel_exit 1 613 - SYM_CODE_END(el1_fiq) 614 - 615 - /* 616 - * EL0 mode handlers. 617 - */ 618 - .align 6 619 - SYM_CODE_START_LOCAL_NOALIGN(el0_sync) 620 - kernel_entry 0 621 - mov x0, sp 622 - bl el0_sync_handler 623 - b ret_to_user 624 - SYM_CODE_END(el0_sync) 625 - 626 - #ifdef CONFIG_COMPAT 627 - .align 6 628 - SYM_CODE_START_LOCAL_NOALIGN(el0_sync_compat) 629 - kernel_entry 0, 32 630 - mov x0, sp 631 - bl el0_sync_compat_handler 632 - b ret_to_user 633 - SYM_CODE_END(el0_sync_compat) 634 - 635 - .align 6 636 - SYM_CODE_START_LOCAL_NOALIGN(el0_irq_compat) 637 - kernel_entry 0, 32 638 - b el0_irq_naked 639 - SYM_CODE_END(el0_irq_compat) 640 - 641 - SYM_CODE_START_LOCAL_NOALIGN(el0_fiq_compat) 642 - kernel_entry 0, 32 643 - b el0_fiq_naked 644 - SYM_CODE_END(el0_fiq_compat) 645 - 646 - SYM_CODE_START_LOCAL_NOALIGN(el0_error_compat) 647 - kernel_entry 0, 32 648 - b el0_error_naked 649 - SYM_CODE_END(el0_error_compat) 650 - #endif 651 - 652 - .align 6 653 - SYM_CODE_START_LOCAL_NOALIGN(el0_irq) 654 - kernel_entry 0 655 - el0_irq_naked: 656 - el0_interrupt_handler handle_arch_irq 657 - b ret_to_user 658 - SYM_CODE_END(el0_irq) 659 - 660 - SYM_CODE_START_LOCAL_NOALIGN(el0_fiq) 661 - kernel_entry 0 662 - el0_fiq_naked: 663 - el0_interrupt_handler handle_arch_fiq 664 - b ret_to_user 665 - SYM_CODE_END(el0_fiq) 666 - 667 - SYM_CODE_START_LOCAL(el1_error) 668 - kernel_entry 1 669 - mrs x1, esr_el1 670 - enable_dbg 671 - mov x0, sp 672 - bl do_serror 673 - kernel_exit 1 674 - SYM_CODE_END(el1_error) 675 - 676 - SYM_CODE_START_LOCAL(el0_error) 677 - kernel_entry 0 678 - el0_error_naked: 679 - mrs x25, esr_el1 680 - user_exit_irqoff 681 - enable_dbg 682 - mov x0, sp 683 - mov x1, x25 684 - bl do_serror 685 - enable_da 686 - b ret_to_user 687 - SYM_CODE_END(el0_error) 698 + SYM_CODE_END(ret_to_kernel) 688 699 689 700 /* 690 701 * "slow" syscall return path. ··· 766 979 mov sp, x9 767 980 msr sp_el0, x1 768 981 ptrauth_keys_install_kernel x1, x8, x9, x10 769 - scs_save x0, x8 770 - scs_load x1, x8 982 + scs_save x0 983 + scs_load x1 771 984 ret 772 985 SYM_FUNC_END(cpu_switch_to) 773 986 NOKPROBE(cpu_switch_to) ··· 784 997 b ret_to_user 785 998 SYM_CODE_END(ret_from_fork) 786 999 NOKPROBE(ret_from_fork) 1000 + 1001 + /* 1002 + * void call_on_irq_stack(struct pt_regs *regs, 1003 + * void (*func)(struct pt_regs *)); 1004 + * 1005 + * Calls func(regs) using this CPU's irq stack and shadow irq stack. 1006 + */ 1007 + SYM_FUNC_START(call_on_irq_stack) 1008 + #ifdef CONFIG_SHADOW_CALL_STACK 1009 + stp scs_sp, xzr, [sp, #-16]! 1010 + ldr_this_cpu scs_sp, irq_shadow_call_stack_ptr, x17 1011 + #endif 1012 + /* Create a frame record to save our LR and SP (implicit in FP) */ 1013 + stp x29, x30, [sp, #-16]! 1014 + mov x29, sp 1015 + 1016 + ldr_this_cpu x16, irq_stack_ptr, x17 1017 + mov x15, #IRQ_STACK_SIZE 1018 + add x16, x16, x15 1019 + 1020 + /* Move to the new stack and call the function there */ 1021 + mov sp, x16 1022 + blr x1 1023 + 1024 + /* 1025 + * Restore the SP from the FP, and restore the FP and LR from the frame 1026 + * record. 1027 + */ 1028 + mov sp, x29 1029 + ldp x29, x30, [sp], #16 1030 + #ifdef CONFIG_SHADOW_CALL_STACK 1031 + ldp scs_sp, xzr, [sp], #16 1032 + #endif 1033 + ret 1034 + SYM_FUNC_END(call_on_irq_stack) 1035 + NOKPROBE(call_on_irq_stack) 787 1036 788 1037 #ifdef CONFIG_ARM_SDE_INTERFACE 789 1038
+4 -2
arch/arm64/kernel/fpsimd.c
··· 957 957 * disabling the trap, otherwise update our in-memory copy. 958 958 */ 959 959 if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) { 960 - sve_set_vq(sve_vq_from_vl(current->thread.sve_vl) - 1); 961 - sve_flush_live(); 960 + unsigned long vq_minus_one = 961 + sve_vq_from_vl(current->thread.sve_vl) - 1; 962 + sve_set_vq(vq_minus_one); 963 + sve_flush_live(vq_minus_one); 962 964 fpsimd_bind_task_to_cpu(); 963 965 } else { 964 966 fpsimd_to_sve(current);
+1
arch/arm64/kernel/ftrace.c
··· 15 15 #include <asm/debug-monitors.h> 16 16 #include <asm/ftrace.h> 17 17 #include <asm/insn.h> 18 + #include <asm/patching.h> 18 19 19 20 #ifdef CONFIG_DYNAMIC_FTRACE 20 21 /*
+44 -32
arch/arm64/kernel/head.S
··· 16 16 #include <asm/asm_pointer_auth.h> 17 17 #include <asm/assembler.h> 18 18 #include <asm/boot.h> 19 + #include <asm/bug.h> 19 20 #include <asm/ptrace.h> 20 21 #include <asm/asm-offsets.h> 21 22 #include <asm/cache.h> ··· 118 117 dmb sy // needed before dc ivac with 119 118 // MMU off 120 119 121 - mov x1, #0x20 // 4 x 8 bytes 122 - b __inval_dcache_area // tail call 120 + add x1, x0, #0x20 // 4 x 8 bytes 121 + b dcache_inval_poc // tail call 123 122 SYM_CODE_END(preserve_boot_args) 124 123 125 124 /* ··· 196 195 and \iend, \iend, \istart // iend = (vend >> shift) & (ptrs - 1) 197 196 mov \istart, \ptrs 198 197 mul \istart, \istart, \count 199 - add \iend, \iend, \istart // iend += (count - 1) * ptrs 198 + add \iend, \iend, \istart // iend += count * ptrs 200 199 // our entries span multiple tables 201 200 202 201 lsr \istart, \vstart, \shift ··· 269 268 */ 270 269 adrp x0, init_pg_dir 271 270 adrp x1, init_pg_end 272 - sub x1, x1, x0 273 - bl __inval_dcache_area 271 + bl dcache_inval_poc 274 272 275 273 /* 276 274 * Clear the init page tables. ··· 354 354 #endif 355 355 1: 356 356 ldr_l x4, idmap_ptrs_per_pgd 357 - mov x5, x3 // __pa(__idmap_text_start) 358 357 adr_l x6, __idmap_text_end // __pa(__idmap_text_end) 359 358 360 359 map_memory x0, x1, x3, x6, x7, x3, x4, x10, x11, x12, x13, x14 ··· 381 382 382 383 adrp x0, idmap_pg_dir 383 384 adrp x1, idmap_pg_end 384 - sub x1, x1, x0 385 - bl __inval_dcache_area 385 + bl dcache_inval_poc 386 386 387 387 adrp x0, init_pg_dir 388 388 adrp x1, init_pg_end 389 - sub x1, x1, x0 390 - bl __inval_dcache_area 389 + bl dcache_inval_poc 391 390 392 391 ret x28 393 392 SYM_FUNC_END(__create_page_tables) 393 + 394 + /* 395 + * Initialize CPU registers with task-specific and cpu-specific context. 396 + * 397 + * Create a final frame record at task_pt_regs(current)->stackframe, so 398 + * that the unwinder can identify the final frame record of any task by 399 + * its location in the task stack. We reserve the entire pt_regs space 400 + * for consistency with user tasks and kthreads. 401 + */ 402 + .macro init_cpu_task tsk, tmp1, tmp2 403 + msr sp_el0, \tsk 404 + 405 + ldr \tmp1, [\tsk, #TSK_STACK] 406 + add sp, \tmp1, #THREAD_SIZE 407 + sub sp, sp, #PT_REGS_SIZE 408 + 409 + stp xzr, xzr, [sp, #S_STACKFRAME] 410 + add x29, sp, #S_STACKFRAME 411 + 412 + scs_load \tsk 413 + 414 + adr_l \tmp1, __per_cpu_offset 415 + ldr w\tmp2, [\tsk, #TSK_CPU] 416 + ldr \tmp1, [\tmp1, \tmp2, lsl #3] 417 + set_this_cpu_offset \tmp1 418 + .endm 394 419 395 420 /* 396 421 * The following fragment of code is executed with the MMU enabled. ··· 422 399 * x0 = __PHYS_OFFSET 423 400 */ 424 401 SYM_FUNC_START_LOCAL(__primary_switched) 425 - adrp x4, init_thread_union 426 - add sp, x4, #THREAD_SIZE 427 - adr_l x5, init_task 428 - msr sp_el0, x5 // Save thread_info 402 + adr_l x4, init_task 403 + init_cpu_task x4, x5, x6 429 404 430 405 adr_l x8, vectors // load VBAR_EL1 with virtual 431 406 msr vbar_el1, x8 // vector table address 432 407 isb 433 408 434 - stp xzr, x30, [sp, #-16]! 409 + stp x29, x30, [sp, #-16]! 435 410 mov x29, sp 436 - 437 - #ifdef CONFIG_SHADOW_CALL_STACK 438 - adr_l scs_sp, init_shadow_call_stack // Set shadow call stack 439 - #endif 440 411 441 412 str_l x21, __fdt_pointer, x5 // Save FDT pointer 442 413 ··· 463 446 0: 464 447 #endif 465 448 bl switch_to_vhe // Prefer VHE if possible 466 - add sp, sp, #16 467 - mov x29, #0 468 - mov x30, #0 469 - b start_kernel 449 + ldp x29, x30, [sp], #16 450 + bl start_kernel 451 + ASM_BUG() 470 452 SYM_FUNC_END(__primary_switched) 471 453 472 454 .pushsection ".rodata", "a" ··· 567 551 cmp w0, #BOOT_CPU_MODE_EL2 568 552 b.ne 1f 569 553 add x1, x1, #4 570 - 1: str w0, [x1] // This CPU has booted in EL1 554 + 1: str w0, [x1] // Save CPU boot mode 571 555 dmb sy 572 556 dc ivac, x1 // Invalidate potentially stale cache line 573 557 ret ··· 648 632 isb 649 633 650 634 adr_l x0, secondary_data 651 - ldr x1, [x0, #CPU_BOOT_STACK] // get secondary_data.stack 652 - cbz x1, __secondary_too_slow 653 - mov sp, x1 654 635 ldr x2, [x0, #CPU_BOOT_TASK] 655 636 cbz x2, __secondary_too_slow 656 - msr sp_el0, x2 657 - scs_load x2, x3 658 - mov x29, #0 659 - mov x30, #0 637 + 638 + init_cpu_task x2, x1, x3 660 639 661 640 #ifdef CONFIG_ARM64_PTR_AUTH 662 641 ptrauth_keys_init_cpu x2, x3, x4, x5 663 642 #endif 664 643 665 - b secondary_start_kernel 644 + bl secondary_start_kernel 645 + ASM_BUG() 666 646 SYM_FUNC_END(__secondary_switched) 667 647 668 648 SYM_FUNC_START_LOCAL(__secondary_too_slow)
+4 -3
arch/arm64/kernel/hibernate-asm.S
··· 45 45 * Because this code has to be copied to a 'safe' page, it can't call out to 46 46 * other functions by PC-relative address. Also remember that it may be 47 47 * mid-way through over-writing other functions. For this reason it contains 48 - * code from flush_icache_range() and uses the copy_page() macro. 48 + * code from caches_clean_inval_pou() and uses the copy_page() macro. 49 49 * 50 50 * This 'safe' page is mapped via ttbr0, and executed from there. This function 51 51 * switches to a copy of the linear map in ttbr1, performs the restore, then ··· 87 87 copy_page x0, x1, x2, x3, x4, x5, x6, x7, x8, x9 88 88 89 89 add x1, x10, #PAGE_SIZE 90 - /* Clean the copied page to PoU - based on flush_icache_range() */ 90 + /* Clean the copied page to PoU - based on caches_clean_inval_pou() */ 91 91 raw_dcache_line_size x2, x3 92 92 sub x3, x2, #1 93 93 bic x4, x10, x3 94 - 2: dc cvau, x4 /* clean D line / unified line */ 94 + 2: /* clean D line / unified line */ 95 + alternative_insn "dc cvau, x4", "dc civac, x4", ARM64_WORKAROUND_CLEAN_CACHE 95 96 add x4, x4, x2 96 97 cmp x4, x1 97 98 b.lo 2b
+12 -8
arch/arm64/kernel/hibernate.c
··· 210 210 return -ENOMEM; 211 211 212 212 memcpy(page, src_start, length); 213 - __flush_icache_range((unsigned long)page, (unsigned long)page + length); 213 + caches_clean_inval_pou((unsigned long)page, (unsigned long)page + length); 214 214 rc = trans_pgd_idmap_page(&trans_info, &trans_ttbr0, &t0sz, page); 215 215 if (rc) 216 216 return rc; ··· 239 239 240 240 return 0; 241 241 } 242 - 243 - #define dcache_clean_range(start, end) __flush_dcache_area(start, (end - start)) 244 242 245 243 #ifdef CONFIG_ARM64_MTE 246 244 ··· 381 383 ret = swsusp_save(); 382 384 } else { 383 385 /* Clean kernel core startup/idle code to PoC*/ 384 - dcache_clean_range(__mmuoff_data_start, __mmuoff_data_end); 385 - dcache_clean_range(__idmap_text_start, __idmap_text_end); 386 + dcache_clean_inval_poc((unsigned long)__mmuoff_data_start, 387 + (unsigned long)__mmuoff_data_end); 388 + dcache_clean_inval_poc((unsigned long)__idmap_text_start, 389 + (unsigned long)__idmap_text_end); 386 390 387 391 /* Clean kvm setup code to PoC? */ 388 392 if (el2_reset_needed()) { 389 - dcache_clean_range(__hyp_idmap_text_start, __hyp_idmap_text_end); 390 - dcache_clean_range(__hyp_text_start, __hyp_text_end); 393 + dcache_clean_inval_poc( 394 + (unsigned long)__hyp_idmap_text_start, 395 + (unsigned long)__hyp_idmap_text_end); 396 + dcache_clean_inval_poc((unsigned long)__hyp_text_start, 397 + (unsigned long)__hyp_text_end); 391 398 } 392 399 393 400 swsusp_mte_restore_tags(); ··· 477 474 * The hibernate exit text contains a set of el2 vectors, that will 478 475 * be executed at el2 with the mmu off in order to reload hyp-stub. 479 476 */ 480 - __flush_dcache_area(hibernate_exit, exit_size); 477 + dcache_clean_inval_poc((unsigned long)hibernate_exit, 478 + (unsigned long)hibernate_exit + exit_size); 481 479 482 480 /* 483 481 * KASLR will cause the el2 vectors to be in a different location in
+46
arch/arm64/kernel/idle.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* 3 + * Low-level idle sequences 4 + */ 5 + 6 + #include <linux/cpu.h> 7 + #include <linux/irqflags.h> 8 + 9 + #include <asm/barrier.h> 10 + #include <asm/cpuidle.h> 11 + #include <asm/cpufeature.h> 12 + #include <asm/sysreg.h> 13 + 14 + /* 15 + * cpu_do_idle() 16 + * 17 + * Idle the processor (wait for interrupt). 18 + * 19 + * If the CPU supports priority masking we must do additional work to 20 + * ensure that interrupts are not masked at the PMR (because the core will 21 + * not wake up if we block the wake up signal in the interrupt controller). 22 + */ 23 + void noinstr cpu_do_idle(void) 24 + { 25 + struct arm_cpuidle_irq_context context; 26 + 27 + arm_cpuidle_save_irq_context(&context); 28 + 29 + dsb(sy); 30 + wfi(); 31 + 32 + arm_cpuidle_restore_irq_context(&context); 33 + } 34 + 35 + /* 36 + * This is our default idle handler. 37 + */ 38 + void noinstr arch_cpu_idle(void) 39 + { 40 + /* 41 + * This should do all the clock switching and wait for interrupt 42 + * tricks 43 + */ 44 + cpu_do_idle(); 45 + raw_local_irq_enable(); 46 + }
+2 -1
arch/arm64/kernel/idreg-override.c
··· 237 237 238 238 for (i = 0; i < ARRAY_SIZE(regs); i++) { 239 239 if (regs[i]->override) 240 - __flush_dcache_area(regs[i]->override, 240 + dcache_clean_inval_poc((unsigned long)regs[i]->override, 241 + (unsigned long)regs[i]->override + 241 242 sizeof(*regs[i]->override)); 242 243 } 243 244 }
+1 -1
arch/arm64/kernel/image-vars.h
··· 35 35 __efistub_strcmp = __pi_strcmp; 36 36 __efistub_strncmp = __pi_strncmp; 37 37 __efistub_strrchr = __pi_strrchr; 38 - __efistub___clean_dcache_area_poc = __pi___clean_dcache_area_poc; 38 + __efistub_dcache_clean_poc = __pi_dcache_clean_poc; 39 39 40 40 #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) 41 41 __efistub___memcpy = __pi_memcpy;
+4 -245
arch/arm64/kernel/insn.c arch/arm64/lib/insn.c
··· 7 7 */ 8 8 #include <linux/bitops.h> 9 9 #include <linux/bug.h> 10 - #include <linux/compiler.h> 11 - #include <linux/kernel.h> 12 - #include <linux/mm.h> 13 - #include <linux/smp.h> 14 - #include <linux/spinlock.h> 15 - #include <linux/stop_machine.h> 10 + #include <linux/printk.h> 11 + #include <linux/sizes.h> 16 12 #include <linux/types.h> 17 - #include <linux/uaccess.h> 18 13 19 - #include <asm/cacheflush.h> 20 14 #include <asm/debug-monitors.h> 21 - #include <asm/fixmap.h> 15 + #include <asm/errno.h> 22 16 #include <asm/insn.h> 23 17 #include <asm/kprobes.h> 24 - #include <asm/sections.h> 25 18 26 19 #define AARCH64_INSN_SF_BIT BIT(31) 27 20 #define AARCH64_INSN_N_BIT BIT(22) ··· 23 30 static const int aarch64_insn_encoding_class[] = { 24 31 AARCH64_INSN_CLS_UNKNOWN, 25 32 AARCH64_INSN_CLS_UNKNOWN, 26 - AARCH64_INSN_CLS_UNKNOWN, 33 + AARCH64_INSN_CLS_SVE, 27 34 AARCH64_INSN_CLS_UNKNOWN, 28 35 AARCH64_INSN_CLS_LDST, 29 36 AARCH64_INSN_CLS_DP_REG, ··· 76 83 aarch64_insn_is_bcond(insn)); 77 84 } 78 85 79 - static DEFINE_RAW_SPINLOCK(patch_lock); 80 - 81 - static bool is_exit_text(unsigned long addr) 82 - { 83 - /* discarded with init text/data */ 84 - return system_state < SYSTEM_RUNNING && 85 - addr >= (unsigned long)__exittext_begin && 86 - addr < (unsigned long)__exittext_end; 87 - } 88 - 89 - static bool is_image_text(unsigned long addr) 90 - { 91 - return core_kernel_text(addr) || is_exit_text(addr); 92 - } 93 - 94 - static void __kprobes *patch_map(void *addr, int fixmap) 95 - { 96 - unsigned long uintaddr = (uintptr_t) addr; 97 - bool image = is_image_text(uintaddr); 98 - struct page *page; 99 - 100 - if (image) 101 - page = phys_to_page(__pa_symbol(addr)); 102 - else if (IS_ENABLED(CONFIG_STRICT_MODULE_RWX)) 103 - page = vmalloc_to_page(addr); 104 - else 105 - return addr; 106 - 107 - BUG_ON(!page); 108 - return (void *)set_fixmap_offset(fixmap, page_to_phys(page) + 109 - (uintaddr & ~PAGE_MASK)); 110 - } 111 - 112 - static void __kprobes patch_unmap(int fixmap) 113 - { 114 - clear_fixmap(fixmap); 115 - } 116 - /* 117 - * In ARMv8-A, A64 instructions have a fixed length of 32 bits and are always 118 - * little-endian. 119 - */ 120 - int __kprobes aarch64_insn_read(void *addr, u32 *insnp) 121 - { 122 - int ret; 123 - __le32 val; 124 - 125 - ret = copy_from_kernel_nofault(&val, addr, AARCH64_INSN_SIZE); 126 - if (!ret) 127 - *insnp = le32_to_cpu(val); 128 - 129 - return ret; 130 - } 131 - 132 - static int __kprobes __aarch64_insn_write(void *addr, __le32 insn) 133 - { 134 - void *waddr = addr; 135 - unsigned long flags = 0; 136 - int ret; 137 - 138 - raw_spin_lock_irqsave(&patch_lock, flags); 139 - waddr = patch_map(addr, FIX_TEXT_POKE0); 140 - 141 - ret = copy_to_kernel_nofault(waddr, &insn, AARCH64_INSN_SIZE); 142 - 143 - patch_unmap(FIX_TEXT_POKE0); 144 - raw_spin_unlock_irqrestore(&patch_lock, flags); 145 - 146 - return ret; 147 - } 148 - 149 - int __kprobes aarch64_insn_write(void *addr, u32 insn) 150 - { 151 - return __aarch64_insn_write(addr, cpu_to_le32(insn)); 152 - } 153 - 154 86 bool __kprobes aarch64_insn_uses_literal(u32 insn) 155 87 { 156 88 /* ldr/ldrsw (literal), prfm */ ··· 103 185 aarch64_insn_is_blr(insn) || 104 186 aarch64_insn_is_blr_auth(insn) || 105 187 aarch64_insn_is_bcond(insn); 106 - } 107 - 108 - int __kprobes aarch64_insn_patch_text_nosync(void *addr, u32 insn) 109 - { 110 - u32 *tp = addr; 111 - int ret; 112 - 113 - /* A64 instructions must be word aligned */ 114 - if ((uintptr_t)tp & 0x3) 115 - return -EINVAL; 116 - 117 - ret = aarch64_insn_write(tp, insn); 118 - if (ret == 0) 119 - __flush_icache_range((uintptr_t)tp, 120 - (uintptr_t)tp + AARCH64_INSN_SIZE); 121 - 122 - return ret; 123 - } 124 - 125 - struct aarch64_insn_patch { 126 - void **text_addrs; 127 - u32 *new_insns; 128 - int insn_cnt; 129 - atomic_t cpu_count; 130 - }; 131 - 132 - static int __kprobes aarch64_insn_patch_text_cb(void *arg) 133 - { 134 - int i, ret = 0; 135 - struct aarch64_insn_patch *pp = arg; 136 - 137 - /* The first CPU becomes master */ 138 - if (atomic_inc_return(&pp->cpu_count) == 1) { 139 - for (i = 0; ret == 0 && i < pp->insn_cnt; i++) 140 - ret = aarch64_insn_patch_text_nosync(pp->text_addrs[i], 141 - pp->new_insns[i]); 142 - /* Notify other processors with an additional increment. */ 143 - atomic_inc(&pp->cpu_count); 144 - } else { 145 - while (atomic_read(&pp->cpu_count) <= num_online_cpus()) 146 - cpu_relax(); 147 - isb(); 148 - } 149 - 150 - return ret; 151 - } 152 - 153 - int __kprobes aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt) 154 - { 155 - struct aarch64_insn_patch patch = { 156 - .text_addrs = addrs, 157 - .new_insns = insns, 158 - .insn_cnt = cnt, 159 - .cpu_count = ATOMIC_INIT(0), 160 - }; 161 - 162 - if (cnt <= 0) 163 - return -EINVAL; 164 - 165 - return stop_machine_cpuslocked(aarch64_insn_patch_text_cb, &patch, 166 - cpu_online_mask); 167 188 } 168 189 169 190 static int __kprobes aarch64_get_imm_shift_mask(enum aarch64_insn_imm_type type, ··· 1288 1431 { 1289 1432 return insn & CRM_MASK; 1290 1433 } 1291 - 1292 - static bool __kprobes __check_eq(unsigned long pstate) 1293 - { 1294 - return (pstate & PSR_Z_BIT) != 0; 1295 - } 1296 - 1297 - static bool __kprobes __check_ne(unsigned long pstate) 1298 - { 1299 - return (pstate & PSR_Z_BIT) == 0; 1300 - } 1301 - 1302 - static bool __kprobes __check_cs(unsigned long pstate) 1303 - { 1304 - return (pstate & PSR_C_BIT) != 0; 1305 - } 1306 - 1307 - static bool __kprobes __check_cc(unsigned long pstate) 1308 - { 1309 - return (pstate & PSR_C_BIT) == 0; 1310 - } 1311 - 1312 - static bool __kprobes __check_mi(unsigned long pstate) 1313 - { 1314 - return (pstate & PSR_N_BIT) != 0; 1315 - } 1316 - 1317 - static bool __kprobes __check_pl(unsigned long pstate) 1318 - { 1319 - return (pstate & PSR_N_BIT) == 0; 1320 - } 1321 - 1322 - static bool __kprobes __check_vs(unsigned long pstate) 1323 - { 1324 - return (pstate & PSR_V_BIT) != 0; 1325 - } 1326 - 1327 - static bool __kprobes __check_vc(unsigned long pstate) 1328 - { 1329 - return (pstate & PSR_V_BIT) == 0; 1330 - } 1331 - 1332 - static bool __kprobes __check_hi(unsigned long pstate) 1333 - { 1334 - pstate &= ~(pstate >> 1); /* PSR_C_BIT &= ~PSR_Z_BIT */ 1335 - return (pstate & PSR_C_BIT) != 0; 1336 - } 1337 - 1338 - static bool __kprobes __check_ls(unsigned long pstate) 1339 - { 1340 - pstate &= ~(pstate >> 1); /* PSR_C_BIT &= ~PSR_Z_BIT */ 1341 - return (pstate & PSR_C_BIT) == 0; 1342 - } 1343 - 1344 - static bool __kprobes __check_ge(unsigned long pstate) 1345 - { 1346 - pstate ^= (pstate << 3); /* PSR_N_BIT ^= PSR_V_BIT */ 1347 - return (pstate & PSR_N_BIT) == 0; 1348 - } 1349 - 1350 - static bool __kprobes __check_lt(unsigned long pstate) 1351 - { 1352 - pstate ^= (pstate << 3); /* PSR_N_BIT ^= PSR_V_BIT */ 1353 - return (pstate & PSR_N_BIT) != 0; 1354 - } 1355 - 1356 - static bool __kprobes __check_gt(unsigned long pstate) 1357 - { 1358 - /*PSR_N_BIT ^= PSR_V_BIT */ 1359 - unsigned long temp = pstate ^ (pstate << 3); 1360 - 1361 - temp |= (pstate << 1); /*PSR_N_BIT |= PSR_Z_BIT */ 1362 - return (temp & PSR_N_BIT) == 0; 1363 - } 1364 - 1365 - static bool __kprobes __check_le(unsigned long pstate) 1366 - { 1367 - /*PSR_N_BIT ^= PSR_V_BIT */ 1368 - unsigned long temp = pstate ^ (pstate << 3); 1369 - 1370 - temp |= (pstate << 1); /*PSR_N_BIT |= PSR_Z_BIT */ 1371 - return (temp & PSR_N_BIT) != 0; 1372 - } 1373 - 1374 - static bool __kprobes __check_al(unsigned long pstate) 1375 - { 1376 - return true; 1377 - } 1378 - 1379 - /* 1380 - * Note that the ARMv8 ARM calls condition code 0b1111 "nv", but states that 1381 - * it behaves identically to 0b1110 ("al"). 1382 - */ 1383 - pstate_check_t * const aarch32_opcode_cond_checks[16] = { 1384 - __check_eq, __check_ne, __check_cs, __check_cc, 1385 - __check_mi, __check_pl, __check_vs, __check_vc, 1386 - __check_hi, __check_ls, __check_ge, __check_lt, 1387 - __check_gt, __check_le, __check_al, __check_al 1388 - }; 1389 1434 1390 1435 static bool range_of_ones(u64 val) 1391 1436 {
+1
arch/arm64/kernel/jump_label.c
··· 8 8 #include <linux/kernel.h> 9 9 #include <linux/jump_label.h> 10 10 #include <asm/insn.h> 11 + #include <asm/patching.h> 11 12 12 13 void arch_jump_label_transform(struct jump_entry *entry, 13 14 enum jump_label_type type)
+9 -3
arch/arm64/kernel/kaslr.c
··· 72 72 * we end up running with module randomization disabled. 73 73 */ 74 74 module_alloc_base = (u64)_etext - MODULES_VSIZE; 75 - __flush_dcache_area(&module_alloc_base, sizeof(module_alloc_base)); 75 + dcache_clean_inval_poc((unsigned long)&module_alloc_base, 76 + (unsigned long)&module_alloc_base + 77 + sizeof(module_alloc_base)); 76 78 77 79 /* 78 80 * Try to map the FDT early. If this fails, we simply bail, ··· 172 170 module_alloc_base += (module_range * (seed & ((1 << 21) - 1))) >> 21; 173 171 module_alloc_base &= PAGE_MASK; 174 172 175 - __flush_dcache_area(&module_alloc_base, sizeof(module_alloc_base)); 176 - __flush_dcache_area(&memstart_offset_seed, sizeof(memstart_offset_seed)); 173 + dcache_clean_inval_poc((unsigned long)&module_alloc_base, 174 + (unsigned long)&module_alloc_base + 175 + sizeof(module_alloc_base)); 176 + dcache_clean_inval_poc((unsigned long)&memstart_offset_seed, 177 + (unsigned long)&memstart_offset_seed + 178 + sizeof(memstart_offset_seed)); 177 179 178 180 return offset; 179 181 }
+1
arch/arm64/kernel/kgdb.c
··· 17 17 18 18 #include <asm/debug-monitors.h> 19 19 #include <asm/insn.h> 20 + #include <asm/patching.h> 20 21 #include <asm/traps.h> 21 22 22 23 struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = {
+20 -10
arch/arm64/kernel/machine_kexec.c
··· 68 68 kimage->arch.kern_reloc = __pa(reloc_code); 69 69 kexec_image_info(kimage); 70 70 71 - /* Flush the reloc_code in preparation for its execution. */ 72 - __flush_dcache_area(reloc_code, arm64_relocate_new_kernel_size); 73 - flush_icache_range((uintptr_t)reloc_code, (uintptr_t)reloc_code + 74 - arm64_relocate_new_kernel_size); 71 + /* 72 + * For execution with the MMU off, reloc_code needs to be cleaned to the 73 + * PoC and invalidated from the I-cache. 74 + */ 75 + dcache_clean_inval_poc((unsigned long)reloc_code, 76 + (unsigned long)reloc_code + 77 + arm64_relocate_new_kernel_size); 78 + icache_inval_pou((uintptr_t)reloc_code, 79 + (uintptr_t)reloc_code + 80 + arm64_relocate_new_kernel_size); 75 81 76 82 return 0; 77 83 } ··· 108 102 109 103 for (entry = &kimage->head; ; entry++) { 110 104 unsigned int flag; 111 - void *addr; 105 + unsigned long addr; 112 106 113 107 /* flush the list entries. */ 114 - __flush_dcache_area(entry, sizeof(kimage_entry_t)); 108 + dcache_clean_inval_poc((unsigned long)entry, 109 + (unsigned long)entry + 110 + sizeof(kimage_entry_t)); 115 111 116 112 flag = *entry & IND_FLAGS; 117 113 if (flag == IND_DONE) 118 114 break; 119 115 120 - addr = phys_to_virt(*entry & PAGE_MASK); 116 + addr = (unsigned long)phys_to_virt(*entry & PAGE_MASK); 121 117 122 118 switch (flag) { 123 119 case IND_INDIRECTION: ··· 128 120 break; 129 121 case IND_SOURCE: 130 122 /* flush the source pages. */ 131 - __flush_dcache_area(addr, PAGE_SIZE); 123 + dcache_clean_inval_poc(addr, addr + PAGE_SIZE); 132 124 break; 133 125 case IND_DESTINATION: 134 126 break; ··· 155 147 kimage->segment[i].memsz, 156 148 kimage->segment[i].memsz / PAGE_SIZE); 157 149 158 - __flush_dcache_area(phys_to_virt(kimage->segment[i].mem), 159 - kimage->segment[i].memsz); 150 + dcache_clean_inval_poc( 151 + (unsigned long)phys_to_virt(kimage->segment[i].mem), 152 + (unsigned long)phys_to_virt(kimage->segment[i].mem) + 153 + kimage->segment[i].memsz); 160 154 } 161 155 } 162 156
+150
arch/arm64/kernel/patching.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + #include <linux/kernel.h> 3 + #include <linux/mm.h> 4 + #include <linux/smp.h> 5 + #include <linux/spinlock.h> 6 + #include <linux/stop_machine.h> 7 + #include <linux/uaccess.h> 8 + 9 + #include <asm/cacheflush.h> 10 + #include <asm/fixmap.h> 11 + #include <asm/insn.h> 12 + #include <asm/kprobes.h> 13 + #include <asm/patching.h> 14 + #include <asm/sections.h> 15 + 16 + static DEFINE_RAW_SPINLOCK(patch_lock); 17 + 18 + static bool is_exit_text(unsigned long addr) 19 + { 20 + /* discarded with init text/data */ 21 + return system_state < SYSTEM_RUNNING && 22 + addr >= (unsigned long)__exittext_begin && 23 + addr < (unsigned long)__exittext_end; 24 + } 25 + 26 + static bool is_image_text(unsigned long addr) 27 + { 28 + return core_kernel_text(addr) || is_exit_text(addr); 29 + } 30 + 31 + static void __kprobes *patch_map(void *addr, int fixmap) 32 + { 33 + unsigned long uintaddr = (uintptr_t) addr; 34 + bool image = is_image_text(uintaddr); 35 + struct page *page; 36 + 37 + if (image) 38 + page = phys_to_page(__pa_symbol(addr)); 39 + else if (IS_ENABLED(CONFIG_STRICT_MODULE_RWX)) 40 + page = vmalloc_to_page(addr); 41 + else 42 + return addr; 43 + 44 + BUG_ON(!page); 45 + return (void *)set_fixmap_offset(fixmap, page_to_phys(page) + 46 + (uintaddr & ~PAGE_MASK)); 47 + } 48 + 49 + static void __kprobes patch_unmap(int fixmap) 50 + { 51 + clear_fixmap(fixmap); 52 + } 53 + /* 54 + * In ARMv8-A, A64 instructions have a fixed length of 32 bits and are always 55 + * little-endian. 56 + */ 57 + int __kprobes aarch64_insn_read(void *addr, u32 *insnp) 58 + { 59 + int ret; 60 + __le32 val; 61 + 62 + ret = copy_from_kernel_nofault(&val, addr, AARCH64_INSN_SIZE); 63 + if (!ret) 64 + *insnp = le32_to_cpu(val); 65 + 66 + return ret; 67 + } 68 + 69 + static int __kprobes __aarch64_insn_write(void *addr, __le32 insn) 70 + { 71 + void *waddr = addr; 72 + unsigned long flags = 0; 73 + int ret; 74 + 75 + raw_spin_lock_irqsave(&patch_lock, flags); 76 + waddr = patch_map(addr, FIX_TEXT_POKE0); 77 + 78 + ret = copy_to_kernel_nofault(waddr, &insn, AARCH64_INSN_SIZE); 79 + 80 + patch_unmap(FIX_TEXT_POKE0); 81 + raw_spin_unlock_irqrestore(&patch_lock, flags); 82 + 83 + return ret; 84 + } 85 + 86 + int __kprobes aarch64_insn_write(void *addr, u32 insn) 87 + { 88 + return __aarch64_insn_write(addr, cpu_to_le32(insn)); 89 + } 90 + 91 + int __kprobes aarch64_insn_patch_text_nosync(void *addr, u32 insn) 92 + { 93 + u32 *tp = addr; 94 + int ret; 95 + 96 + /* A64 instructions must be word aligned */ 97 + if ((uintptr_t)tp & 0x3) 98 + return -EINVAL; 99 + 100 + ret = aarch64_insn_write(tp, insn); 101 + if (ret == 0) 102 + caches_clean_inval_pou((uintptr_t)tp, 103 + (uintptr_t)tp + AARCH64_INSN_SIZE); 104 + 105 + return ret; 106 + } 107 + 108 + struct aarch64_insn_patch { 109 + void **text_addrs; 110 + u32 *new_insns; 111 + int insn_cnt; 112 + atomic_t cpu_count; 113 + }; 114 + 115 + static int __kprobes aarch64_insn_patch_text_cb(void *arg) 116 + { 117 + int i, ret = 0; 118 + struct aarch64_insn_patch *pp = arg; 119 + 120 + /* The first CPU becomes master */ 121 + if (atomic_inc_return(&pp->cpu_count) == 1) { 122 + for (i = 0; ret == 0 && i < pp->insn_cnt; i++) 123 + ret = aarch64_insn_patch_text_nosync(pp->text_addrs[i], 124 + pp->new_insns[i]); 125 + /* Notify other processors with an additional increment. */ 126 + atomic_inc(&pp->cpu_count); 127 + } else { 128 + while (atomic_read(&pp->cpu_count) <= num_online_cpus()) 129 + cpu_relax(); 130 + isb(); 131 + } 132 + 133 + return ret; 134 + } 135 + 136 + int __kprobes aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt) 137 + { 138 + struct aarch64_insn_patch patch = { 139 + .text_addrs = addrs, 140 + .new_insns = insns, 141 + .insn_cnt = cnt, 142 + .cpu_count = ATOMIC_INIT(0), 143 + }; 144 + 145 + if (cnt <= 0) 146 + return -EINVAL; 147 + 148 + return stop_machine_cpuslocked(aarch64_insn_patch_text_cb, &patch, 149 + cpu_online_mask); 150 + }
+1 -1
arch/arm64/kernel/perf_callchain.c
··· 116 116 tail = (struct frame_tail __user *)regs->regs[29]; 117 117 118 118 while (entry->nr < entry->max_stack && 119 - tail && !((unsigned long)tail & 0xf)) 119 + tail && !((unsigned long)tail & 0x7)) 120 120 tail = user_backtrace(tail, entry); 121 121 } else { 122 122 #ifdef CONFIG_COMPAT
+35 -5
arch/arm64/kernel/perf_event.c
··· 165 165 } 166 166 167 167 #define ARMV8_EVENT_ATTR(name, config) \ 168 - (&((struct perf_pmu_events_attr) { \ 169 - .attr = __ATTR(name, 0444, armv8pmu_events_sysfs_show, NULL), \ 170 - .id = config, \ 171 - }).attr.attr) 168 + PMU_EVENT_ATTR_ID(name, armv8pmu_events_sysfs_show, config) 172 169 173 170 static struct attribute *armv8_pmuv3_event_attrs[] = { 174 171 ARMV8_EVENT_ATTR(sw_incr, ARMV8_PMUV3_PERFCTR_SW_INCR), ··· 309 312 struct arm_pmu *cpu_pmu = container_of(pmu, struct arm_pmu, pmu); 310 313 u32 slots = cpu_pmu->reg_pmmir & ARMV8_PMU_SLOTS_MASK; 311 314 312 - return snprintf(page, PAGE_SIZE, "0x%08x\n", slots); 315 + return sysfs_emit(page, "0x%08x\n", slots); 313 316 } 314 317 315 318 static DEVICE_ATTR_RO(slots); 316 319 320 + static ssize_t bus_slots_show(struct device *dev, struct device_attribute *attr, 321 + char *page) 322 + { 323 + struct pmu *pmu = dev_get_drvdata(dev); 324 + struct arm_pmu *cpu_pmu = container_of(pmu, struct arm_pmu, pmu); 325 + u32 bus_slots = (cpu_pmu->reg_pmmir >> ARMV8_PMU_BUS_SLOTS_SHIFT) 326 + & ARMV8_PMU_BUS_SLOTS_MASK; 327 + 328 + return sysfs_emit(page, "0x%08x\n", bus_slots); 329 + } 330 + 331 + static DEVICE_ATTR_RO(bus_slots); 332 + 333 + static ssize_t bus_width_show(struct device *dev, struct device_attribute *attr, 334 + char *page) 335 + { 336 + struct pmu *pmu = dev_get_drvdata(dev); 337 + struct arm_pmu *cpu_pmu = container_of(pmu, struct arm_pmu, pmu); 338 + u32 bus_width = (cpu_pmu->reg_pmmir >> ARMV8_PMU_BUS_WIDTH_SHIFT) 339 + & ARMV8_PMU_BUS_WIDTH_MASK; 340 + u32 val = 0; 341 + 342 + /* Encoded as Log2(number of bytes), plus one */ 343 + if (bus_width > 2 && bus_width < 13) 344 + val = 1 << (bus_width - 1); 345 + 346 + return sysfs_emit(page, "0x%08x\n", val); 347 + } 348 + 349 + static DEVICE_ATTR_RO(bus_width); 350 + 317 351 static struct attribute *armv8_pmuv3_caps_attrs[] = { 318 352 &dev_attr_slots.attr, 353 + &dev_attr_bus_slots.attr, 354 + &dev_attr_bus_width.attr, 319 355 NULL, 320 356 }; 321 357
+13 -11
arch/arm64/kernel/probes/kprobes.c
··· 7 7 * Copyright (C) 2013 Linaro Limited. 8 8 * Author: Sandeepa Prabhu <sandeepa.prabhu@linaro.org> 9 9 */ 10 + #include <linux/extable.h> 10 11 #include <linux/kasan.h> 11 12 #include <linux/kernel.h> 12 13 #include <linux/kprobes.h> 13 - #include <linux/extable.h> 14 - #include <linux/slab.h> 15 - #include <linux/stop_machine.h> 16 14 #include <linux/sched/debug.h> 17 15 #include <linux/set_memory.h> 16 + #include <linux/slab.h> 17 + #include <linux/stop_machine.h> 18 18 #include <linux/stringify.h> 19 - #include <linux/vmalloc.h> 20 - #include <asm/traps.h> 21 - #include <asm/ptrace.h> 22 - #include <asm/cacheflush.h> 23 - #include <asm/debug-monitors.h> 24 - #include <asm/daifflags.h> 25 - #include <asm/system_misc.h> 26 - #include <asm/insn.h> 27 19 #include <linux/uaccess.h> 20 + #include <linux/vmalloc.h> 21 + 22 + #include <asm/cacheflush.h> 23 + #include <asm/daifflags.h> 24 + #include <asm/debug-monitors.h> 25 + #include <asm/insn.h> 28 26 #include <asm/irq.h> 27 + #include <asm/patching.h> 28 + #include <asm/ptrace.h> 29 29 #include <asm/sections.h> 30 + #include <asm/system_misc.h> 31 + #include <asm/traps.h> 30 32 31 33 #include "decode-insn.h" 32 34
+1
arch/arm64/kernel/probes/simulate-insn.c
··· 10 10 #include <linux/kprobes.h> 11 11 12 12 #include <asm/ptrace.h> 13 + #include <asm/traps.h> 13 14 14 15 #include "simulate-insn.h" 15 16
+1 -1
arch/arm64/kernel/probes/uprobes.c
··· 21 21 memcpy(dst, src, len); 22 22 23 23 /* flush caches (dcache/icache) */ 24 - sync_icache_aliases(dst, len); 24 + sync_icache_aliases((unsigned long)dst, (unsigned long)dst + len); 25 25 26 26 kunmap_atomic(xol_page_kaddr); 27 27 }
+23 -76
arch/arm64/kernel/process.c
··· 18 18 #include <linux/sched/task.h> 19 19 #include <linux/sched/task_stack.h> 20 20 #include <linux/kernel.h> 21 - #include <linux/lockdep.h> 22 21 #include <linux/mman.h> 23 22 #include <linux/mm.h> 24 23 #include <linux/nospec.h> ··· 45 46 #include <linux/prctl.h> 46 47 47 48 #include <asm/alternative.h> 48 - #include <asm/arch_gicv3.h> 49 49 #include <asm/compat.h> 50 50 #include <asm/cpufeature.h> 51 51 #include <asm/cacheflush.h> ··· 71 73 EXPORT_SYMBOL_GPL(pm_power_off); 72 74 73 75 void (*arm_pm_restart)(enum reboot_mode reboot_mode, const char *cmd); 74 - 75 - static void noinstr __cpu_do_idle(void) 76 - { 77 - dsb(sy); 78 - wfi(); 79 - } 80 - 81 - static void noinstr __cpu_do_idle_irqprio(void) 82 - { 83 - unsigned long pmr; 84 - unsigned long daif_bits; 85 - 86 - daif_bits = read_sysreg(daif); 87 - write_sysreg(daif_bits | PSR_I_BIT | PSR_F_BIT, daif); 88 - 89 - /* 90 - * Unmask PMR before going idle to make sure interrupts can 91 - * be raised. 92 - */ 93 - pmr = gic_read_pmr(); 94 - gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); 95 - 96 - __cpu_do_idle(); 97 - 98 - gic_write_pmr(pmr); 99 - write_sysreg(daif_bits, daif); 100 - } 101 - 102 - /* 103 - * cpu_do_idle() 104 - * 105 - * Idle the processor (wait for interrupt). 106 - * 107 - * If the CPU supports priority masking we must do additional work to 108 - * ensure that interrupts are not masked at the PMR (because the core will 109 - * not wake up if we block the wake up signal in the interrupt controller). 110 - */ 111 - void noinstr cpu_do_idle(void) 112 - { 113 - if (system_uses_irq_prio_masking()) 114 - __cpu_do_idle_irqprio(); 115 - else 116 - __cpu_do_idle(); 117 - } 118 - 119 - /* 120 - * This is our default idle handler. 121 - */ 122 - void noinstr arch_cpu_idle(void) 123 - { 124 - /* 125 - * This should do all the clock switching and wait for interrupt 126 - * tricks 127 - */ 128 - cpu_do_idle(); 129 - raw_local_irq_enable(); 130 - } 131 76 132 77 #ifdef CONFIG_HOTPLUG_CPU 133 78 void arch_cpu_idle_dead(void) ··· 376 435 } 377 436 p->thread.cpu_context.pc = (unsigned long)ret_from_fork; 378 437 p->thread.cpu_context.sp = (unsigned long)childregs; 438 + /* 439 + * For the benefit of the unwinder, set up childregs->stackframe 440 + * as the final frame for the new task. 441 + */ 442 + p->thread.cpu_context.fp = (unsigned long)childregs->stackframe; 379 443 380 444 ptrace_hw_copy_thread(p); 381 445 ··· 473 527 write_sysreg(val, cntkctl_el1); 474 528 } 475 529 530 + static void compat_thread_switch(struct task_struct *next) 531 + { 532 + if (!is_compat_thread(task_thread_info(next))) 533 + return; 534 + 535 + if (static_branch_unlikely(&arm64_mismatched_32bit_el0)) 536 + set_tsk_thread_flag(next, TIF_NOTIFY_RESUME); 537 + } 538 + 476 539 static void update_sctlr_el1(u64 sctlr) 477 540 { 478 541 /* ··· 523 568 ssbs_thread_switch(next); 524 569 erratum_1418040_thread_switch(prev, next); 525 570 ptrauth_thread_switch_user(next); 571 + compat_thread_switch(next); 526 572 527 573 /* 528 574 * Complete any pending TLB or cache maintenance on this CPU in case ··· 589 633 */ 590 634 void arch_setup_new_exec(void) 591 635 { 592 - current->mm->context.flags = is_compat_task() ? MMCF_AARCH32 : 0; 636 + unsigned long mmflags = 0; 593 637 638 + if (is_compat_task()) { 639 + mmflags = MMCF_AARCH32; 640 + if (static_branch_unlikely(&arm64_mismatched_32bit_el0)) 641 + set_tsk_thread_flag(current, TIF_NOTIFY_RESUME); 642 + } 643 + 644 + current->mm->context.flags = mmflags; 594 645 ptrauth_thread_init_user(); 595 646 mte_thread_init_user(); 596 647 ··· 686 723 687 724 core_initcall(tagged_addr_init); 688 725 #endif /* CONFIG_ARM64_TAGGED_ADDR_ABI */ 689 - 690 - asmlinkage void __sched arm64_preempt_schedule_irq(void) 691 - { 692 - lockdep_assert_irqs_disabled(); 693 - 694 - /* 695 - * Preempting a task from an IRQ means we leave copies of PSTATE 696 - * on the stack. cpufeature's enable calls may modify PSTATE, but 697 - * resuming one of these preempted tasks would undo those changes. 698 - * 699 - * Only allow a task to be preempted once cpufeatures have been 700 - * enabled. 701 - */ 702 - if (system_capabilities_finalized()) 703 - preempt_schedule_irq(); 704 - } 705 726 706 727 #ifdef CONFIG_BINFMT_ELF 707 728 int arch_elf_adjust_prot(int prot, const struct arch_elf_state *state,
+1 -1
arch/arm64/kernel/ptrace.c
··· 122 122 { 123 123 return ((addr & ~(THREAD_SIZE - 1)) == 124 124 (kernel_stack_pointer(regs) & ~(THREAD_SIZE - 1))) || 125 - on_irq_stack(addr, NULL); 125 + on_irq_stack(addr, sizeof(unsigned long), NULL); 126 126 } 127 127 128 128 /**
+12 -52
arch/arm64/kernel/sdei.c
··· 162 162 return err; 163 163 } 164 164 165 - static bool on_sdei_normal_stack(unsigned long sp, struct stack_info *info) 165 + static bool on_sdei_normal_stack(unsigned long sp, unsigned long size, 166 + struct stack_info *info) 166 167 { 167 168 unsigned long low = (unsigned long)raw_cpu_read(sdei_stack_normal_ptr); 168 169 unsigned long high = low + SDEI_STACK_SIZE; 169 170 170 - return on_stack(sp, low, high, STACK_TYPE_SDEI_NORMAL, info); 171 + return on_stack(sp, size, low, high, STACK_TYPE_SDEI_NORMAL, info); 171 172 } 172 173 173 - static bool on_sdei_critical_stack(unsigned long sp, struct stack_info *info) 174 + static bool on_sdei_critical_stack(unsigned long sp, unsigned long size, 175 + struct stack_info *info) 174 176 { 175 177 unsigned long low = (unsigned long)raw_cpu_read(sdei_stack_critical_ptr); 176 178 unsigned long high = low + SDEI_STACK_SIZE; 177 179 178 - return on_stack(sp, low, high, STACK_TYPE_SDEI_CRITICAL, info); 180 + return on_stack(sp, size, low, high, STACK_TYPE_SDEI_CRITICAL, info); 179 181 } 180 182 181 - bool _on_sdei_stack(unsigned long sp, struct stack_info *info) 183 + bool _on_sdei_stack(unsigned long sp, unsigned long size, struct stack_info *info) 182 184 { 183 185 if (!IS_ENABLED(CONFIG_VMAP_STACK)) 184 186 return false; 185 187 186 - if (on_sdei_critical_stack(sp, info)) 188 + if (on_sdei_critical_stack(sp, size, info)) 187 189 return true; 188 190 189 - if (on_sdei_normal_stack(sp, info)) 191 + if (on_sdei_normal_stack(sp, size, info)) 190 192 return true; 191 193 192 194 return false; ··· 233 231 } 234 232 235 233 /* 236 - * __sdei_handler() returns one of: 234 + * do_sdei_event() returns one of: 237 235 * SDEI_EV_HANDLED - success, return to the interrupted context. 238 236 * SDEI_EV_FAILED - failure, return this error code to firmare. 239 237 * virtual-address - success, return to this address. 240 238 */ 241 - static __kprobes unsigned long _sdei_handler(struct pt_regs *regs, 242 - struct sdei_registered_event *arg) 239 + unsigned long __kprobes do_sdei_event(struct pt_regs *regs, 240 + struct sdei_registered_event *arg) 243 241 { 244 242 u32 mode; 245 243 int i, err = 0; ··· 293 291 return vbar + 0x680; 294 292 295 293 return vbar + 0x480; 296 - } 297 - 298 - static void __kprobes notrace __sdei_pstate_entry(void) 299 - { 300 - /* 301 - * The original SDEI spec (ARM DEN 0054A) can be read ambiguously as to 302 - * whether PSTATE bits are inherited unchanged or generated from 303 - * scratch, and the TF-A implementation always clears PAN and always 304 - * clears UAO. There are no other known implementations. 305 - * 306 - * Subsequent revisions (ARM DEN 0054B) follow the usual rules for how 307 - * PSTATE is modified upon architectural exceptions, and so PAN is 308 - * either inherited or set per SCTLR_ELx.SPAN, and UAO is always 309 - * cleared. 310 - * 311 - * We must explicitly reset PAN to the expected state, including 312 - * clearing it when the host isn't using it, in case a VM had it set. 313 - */ 314 - if (system_uses_hw_pan()) 315 - set_pstate_pan(1); 316 - else if (cpu_has_pan()) 317 - set_pstate_pan(0); 318 - } 319 - 320 - asmlinkage noinstr unsigned long 321 - __sdei_handler(struct pt_regs *regs, struct sdei_registered_event *arg) 322 - { 323 - unsigned long ret; 324 - 325 - /* 326 - * We didn't take an exception to get here, so the HW hasn't 327 - * set/cleared bits in PSTATE that we may rely on. Initialize PAN. 328 - */ 329 - __sdei_pstate_entry(); 330 - 331 - arm64_enter_nmi(regs); 332 - 333 - ret = _sdei_handler(regs, arg); 334 - 335 - arm64_exit_nmi(regs); 336 - 337 - return ret; 338 294 }
+1 -7
arch/arm64/kernel/setup.c
··· 87 87 u64 mpidr = read_cpuid_mpidr() & MPIDR_HWID_BITMASK; 88 88 set_cpu_logical_map(0, mpidr); 89 89 90 - /* 91 - * clear __my_cpu_offset on boot CPU to avoid hang caused by 92 - * using percpu variable early, for example, lockdep will 93 - * access percpu variable inside lock_release 94 - */ 95 - set_my_cpu_offset(0); 96 90 pr_info("Booting Linux on physical CPU 0x%010lx [0x%08x]\n", 97 91 (unsigned long)mpidr, read_cpuid_id()); 98 92 } ··· 375 381 * faults in case uaccess_enable() is inadvertently called by the init 376 382 * thread. 377 383 */ 378 - init_task.thread_info.ttbr0 = __pa_symbol(reserved_pg_dir); 384 + init_task.thread_info.ttbr0 = phys_to_ttbr(__pa_symbol(reserved_pg_dir)); 379 385 #endif 380 386 381 387 if (boot_args[1] || boot_args[2] || boot_args[3]) {
+26
arch/arm64/kernel/signal.c
··· 911 911 restore_saved_sigmask(); 912 912 } 913 913 914 + static bool cpu_affinity_invalid(struct pt_regs *regs) 915 + { 916 + if (!compat_user_mode(regs)) 917 + return false; 918 + 919 + /* 920 + * We're preemptible, but a reschedule will cause us to check the 921 + * affinity again. 922 + */ 923 + return !cpumask_test_cpu(raw_smp_processor_id(), 924 + system_32bit_el0_cpumask()); 925 + } 926 + 914 927 asmlinkage void do_notify_resume(struct pt_regs *regs, 915 928 unsigned long thread_flags) 916 929 { ··· 951 938 if (thread_flags & _TIF_NOTIFY_RESUME) { 952 939 tracehook_notify_resume(regs); 953 940 rseq_handle_notify_resume(NULL, regs); 941 + 942 + /* 943 + * If we reschedule after checking the affinity 944 + * then we must ensure that TIF_NOTIFY_RESUME 945 + * is set so that we check the affinity again. 946 + * Since tracehook_notify_resume() clears the 947 + * flag, ensure that the compiler doesn't move 948 + * it after the affinity check. 949 + */ 950 + barrier(); 951 + 952 + if (cpu_affinity_invalid(regs)) 953 + force_sig(SIGKILL); 954 954 } 955 955 956 956 if (thread_flags & _TIF_FOREIGN_FPSTATE)
+83
arch/arm64/kernel/smccc-call.S
··· 7 7 8 8 #include <asm/asm-offsets.h> 9 9 #include <asm/assembler.h> 10 + #include <asm/thread_info.h> 11 + 12 + /* 13 + * If we have SMCCC v1.3 and (as is likely) no SVE state in 14 + * the registers then set the SMCCC hint bit to say there's no 15 + * need to preserve it. Do this by directly adjusting the SMCCC 16 + * function value which is already stored in x0 ready to be called. 17 + */ 18 + SYM_FUNC_START(__arm_smccc_sve_check) 19 + 20 + ldr_l x16, smccc_has_sve_hint 21 + cbz x16, 2f 22 + 23 + get_current_task x16 24 + ldr x16, [x16, #TSK_TI_FLAGS] 25 + tbnz x16, #TIF_FOREIGN_FPSTATE, 1f // Any live FP state? 26 + tbnz x16, #TIF_SVE, 2f // Does that state include SVE? 27 + 28 + 1: orr x0, x0, ARM_SMCCC_1_3_SVE_HINT 29 + 30 + 2: ret 31 + SYM_FUNC_END(__arm_smccc_sve_check) 32 + EXPORT_SYMBOL(__arm_smccc_sve_check) 10 33 11 34 .macro SMCCC instr 35 + alternative_if ARM64_SVE 36 + bl __arm_smccc_sve_check 37 + alternative_else_nop_endif 12 38 \instr #0 13 39 ldr x4, [sp] 14 40 stp x0, x1, [x4, #ARM_SMCCC_RES_X0_OFFS] ··· 69 43 SMCCC hvc 70 44 SYM_FUNC_END(__arm_smccc_hvc) 71 45 EXPORT_SYMBOL(__arm_smccc_hvc) 46 + 47 + .macro SMCCC_1_2 instr 48 + /* Save `res` and free a GPR that won't be clobbered */ 49 + stp x1, x19, [sp, #-16]! 50 + 51 + /* Ensure `args` won't be clobbered while loading regs in next step */ 52 + mov x19, x0 53 + 54 + /* Load the registers x0 - x17 from the struct arm_smccc_1_2_regs */ 55 + ldp x0, x1, [x19, #ARM_SMCCC_1_2_REGS_X0_OFFS] 56 + ldp x2, x3, [x19, #ARM_SMCCC_1_2_REGS_X2_OFFS] 57 + ldp x4, x5, [x19, #ARM_SMCCC_1_2_REGS_X4_OFFS] 58 + ldp x6, x7, [x19, #ARM_SMCCC_1_2_REGS_X6_OFFS] 59 + ldp x8, x9, [x19, #ARM_SMCCC_1_2_REGS_X8_OFFS] 60 + ldp x10, x11, [x19, #ARM_SMCCC_1_2_REGS_X10_OFFS] 61 + ldp x12, x13, [x19, #ARM_SMCCC_1_2_REGS_X12_OFFS] 62 + ldp x14, x15, [x19, #ARM_SMCCC_1_2_REGS_X14_OFFS] 63 + ldp x16, x17, [x19, #ARM_SMCCC_1_2_REGS_X16_OFFS] 64 + 65 + \instr #0 66 + 67 + /* Load the `res` from the stack */ 68 + ldr x19, [sp] 69 + 70 + /* Store the registers x0 - x17 into the result structure */ 71 + stp x0, x1, [x19, #ARM_SMCCC_1_2_REGS_X0_OFFS] 72 + stp x2, x3, [x19, #ARM_SMCCC_1_2_REGS_X2_OFFS] 73 + stp x4, x5, [x19, #ARM_SMCCC_1_2_REGS_X4_OFFS] 74 + stp x6, x7, [x19, #ARM_SMCCC_1_2_REGS_X6_OFFS] 75 + stp x8, x9, [x19, #ARM_SMCCC_1_2_REGS_X8_OFFS] 76 + stp x10, x11, [x19, #ARM_SMCCC_1_2_REGS_X10_OFFS] 77 + stp x12, x13, [x19, #ARM_SMCCC_1_2_REGS_X12_OFFS] 78 + stp x14, x15, [x19, #ARM_SMCCC_1_2_REGS_X14_OFFS] 79 + stp x16, x17, [x19, #ARM_SMCCC_1_2_REGS_X16_OFFS] 80 + 81 + /* Restore original x19 */ 82 + ldp xzr, x19, [sp], #16 83 + ret 84 + .endm 85 + 86 + /* 87 + * void arm_smccc_1_2_hvc(const struct arm_smccc_1_2_regs *args, 88 + * struct arm_smccc_1_2_regs *res); 89 + */ 90 + SYM_FUNC_START(arm_smccc_1_2_hvc) 91 + SMCCC_1_2 hvc 92 + SYM_FUNC_END(arm_smccc_1_2_hvc) 93 + EXPORT_SYMBOL(arm_smccc_1_2_hvc) 94 + 95 + /* 96 + * void arm_smccc_1_2_smc(const struct arm_smccc_1_2_regs *args, 97 + * struct arm_smccc_1_2_regs *res); 98 + */ 99 + SYM_FUNC_START(arm_smccc_1_2_smc) 100 + SMCCC_1_2 smc 101 + SYM_FUNC_END(arm_smccc_1_2_smc) 102 + EXPORT_SYMBOL(arm_smccc_1_2_smc)
+7 -9
arch/arm64/kernel/smp.c
··· 120 120 * page tables. 121 121 */ 122 122 secondary_data.task = idle; 123 - secondary_data.stack = task_stack_page(idle) + THREAD_SIZE; 124 123 update_cpu_boot_status(CPU_MMU_OFF); 125 - __flush_dcache_area(&secondary_data, sizeof(secondary_data)); 126 124 127 125 /* Now bring the CPU into our world */ 128 126 ret = boot_secondary(cpu, idle); ··· 140 142 141 143 pr_crit("CPU%u: failed to come online\n", cpu); 142 144 secondary_data.task = NULL; 143 - secondary_data.stack = NULL; 144 - __flush_dcache_area(&secondary_data, sizeof(secondary_data)); 145 145 status = READ_ONCE(secondary_data.status); 146 146 if (status == CPU_MMU_OFF) 147 147 status = READ_ONCE(__early_cpu_boot_status); ··· 198 202 u64 mpidr = read_cpuid_mpidr() & MPIDR_HWID_BITMASK; 199 203 struct mm_struct *mm = &init_mm; 200 204 const struct cpu_operations *ops; 201 - unsigned int cpu; 202 - 203 - cpu = task_cpu(current); 204 - set_my_cpu_offset(per_cpu_offset(cpu)); 205 + unsigned int cpu = smp_processor_id(); 205 206 206 207 /* 207 208 * All kernel threads share the same mm context; grab a ··· 344 351 pr_crit("CPU%u: cpu didn't die\n", cpu); 345 352 return; 346 353 } 347 - pr_notice("CPU%u: shutdown\n", cpu); 354 + pr_debug("CPU%u: shutdown\n", cpu); 348 355 349 356 /* 350 357 * Now that the dying CPU is beyond the point of no return w.r.t. ··· 444 451 445 452 void __init smp_prepare_boot_cpu(void) 446 453 { 454 + /* 455 + * The runtime per-cpu areas have been allocated by 456 + * setup_per_cpu_areas(), and CPU0's boot time per-cpu area will be 457 + * freed shortly, so we must move over to the runtime per-cpu area. 458 + */ 447 459 set_my_cpu_offset(per_cpu_offset(smp_processor_id())); 448 460 cpuinfo_store_boot_cpu(); 449 461
+4 -3
arch/arm64/kernel/smp_spin_table.c
··· 36 36 unsigned long size = sizeof(secondary_holding_pen_release); 37 37 38 38 secondary_holding_pen_release = val; 39 - __flush_dcache_area(start, size); 39 + dcache_clean_inval_poc((unsigned long)start, (unsigned long)start + size); 40 40 } 41 41 42 42 ··· 90 90 * the boot protocol. 91 91 */ 92 92 writeq_relaxed(pa_holding_pen, release_addr); 93 - __flush_dcache_area((__force void *)release_addr, 94 - sizeof(*release_addr)); 93 + dcache_clean_inval_poc((__force unsigned long)release_addr, 94 + (__force unsigned long)release_addr + 95 + sizeof(*release_addr)); 95 96 96 97 /* 97 98 * Send an event to wake up the secondary CPU.
+8 -10
arch/arm64/kernel/stacktrace.c
··· 68 68 unsigned long fp = frame->fp; 69 69 struct stack_info info; 70 70 71 - if (fp & 0xf) 72 - return -EINVAL; 73 - 74 71 if (!tsk) 75 72 tsk = current; 76 73 77 - if (!on_accessible_stack(tsk, fp, &info)) 74 + /* Final frame; nothing to unwind */ 75 + if (fp == (unsigned long)task_pt_regs(tsk)->stackframe) 76 + return -ENOENT; 77 + 78 + if (fp & 0x7) 79 + return -EINVAL; 80 + 81 + if (!on_accessible_stack(tsk, fp, 16, &info)) 78 82 return -EINVAL; 79 83 80 84 if (test_bit(info.type, frame->stacks_done)) ··· 131 127 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ 132 128 133 129 frame->pc = ptrauth_strip_insn_pac(frame->pc); 134 - 135 - /* 136 - * This is a terminal record, so we have finished unwinding. 137 - */ 138 - if (!frame->fp && !frame->pc) 139 - return -ENOENT; 140 130 141 131 return 0; 142 132 }
+11 -1
arch/arm64/kernel/suspend.c
··· 7 7 #include <asm/alternative.h> 8 8 #include <asm/cacheflush.h> 9 9 #include <asm/cpufeature.h> 10 + #include <asm/cpuidle.h> 10 11 #include <asm/daifflags.h> 11 12 #include <asm/debug-monitors.h> 12 13 #include <asm/exec.h> ··· 92 91 int ret = 0; 93 92 unsigned long flags; 94 93 struct sleep_stack_data state; 94 + struct arm_cpuidle_irq_context context; 95 95 96 96 /* Report any MTE async fault before going to suspend */ 97 97 mte_suspend_enter(); ··· 105 103 flags = local_daif_save(); 106 104 107 105 /* 108 - * Function graph tracer state gets incosistent when the kernel 106 + * Function graph tracer state gets inconsistent when the kernel 109 107 * calls functions that never return (aka suspend finishers) hence 110 108 * disable graph tracing during their execution. 111 109 */ 112 110 pause_graph_tracing(); 111 + 112 + /* 113 + * Switch to using DAIF.IF instead of PMR in order to reliably 114 + * resume if we're using pseudo-NMIs. 115 + */ 116 + arm_cpuidle_save_irq_context(&context); 113 117 114 118 if (__cpu_suspend_enter(&state)) { 115 119 /* Call the suspend finisher */ ··· 133 125 } else { 134 126 RCU_NONIDLE(__cpu_suspend_exit()); 135 127 } 128 + 129 + arm_cpuidle_restore_irq_context(&context); 136 130 137 131 unpause_graph_tracing(); 138 132
+1 -1
arch/arm64/kernel/sys_compat.c
··· 41 41 dsb(ish); 42 42 } 43 43 44 - ret = __flush_cache_user_range(start, start + chunk); 44 + ret = caches_clean_inval_user_pou(start, start + chunk); 45 45 if (ret) 46 46 return ret; 47 47
+100 -35
arch/arm64/kernel/traps.c
··· 38 38 #include <asm/extable.h> 39 39 #include <asm/insn.h> 40 40 #include <asm/kprobes.h> 41 + #include <asm/patching.h> 41 42 #include <asm/traps.h> 42 43 #include <asm/smp.h> 43 44 #include <asm/stack_pointer.h> ··· 46 45 #include <asm/system_misc.h> 47 46 #include <asm/sysreg.h> 48 47 49 - static const char *handler[] = { 50 - "Synchronous Abort", 51 - "IRQ", 52 - "FIQ", 53 - "Error" 48 + static bool __kprobes __check_eq(unsigned long pstate) 49 + { 50 + return (pstate & PSR_Z_BIT) != 0; 51 + } 52 + 53 + static bool __kprobes __check_ne(unsigned long pstate) 54 + { 55 + return (pstate & PSR_Z_BIT) == 0; 56 + } 57 + 58 + static bool __kprobes __check_cs(unsigned long pstate) 59 + { 60 + return (pstate & PSR_C_BIT) != 0; 61 + } 62 + 63 + static bool __kprobes __check_cc(unsigned long pstate) 64 + { 65 + return (pstate & PSR_C_BIT) == 0; 66 + } 67 + 68 + static bool __kprobes __check_mi(unsigned long pstate) 69 + { 70 + return (pstate & PSR_N_BIT) != 0; 71 + } 72 + 73 + static bool __kprobes __check_pl(unsigned long pstate) 74 + { 75 + return (pstate & PSR_N_BIT) == 0; 76 + } 77 + 78 + static bool __kprobes __check_vs(unsigned long pstate) 79 + { 80 + return (pstate & PSR_V_BIT) != 0; 81 + } 82 + 83 + static bool __kprobes __check_vc(unsigned long pstate) 84 + { 85 + return (pstate & PSR_V_BIT) == 0; 86 + } 87 + 88 + static bool __kprobes __check_hi(unsigned long pstate) 89 + { 90 + pstate &= ~(pstate >> 1); /* PSR_C_BIT &= ~PSR_Z_BIT */ 91 + return (pstate & PSR_C_BIT) != 0; 92 + } 93 + 94 + static bool __kprobes __check_ls(unsigned long pstate) 95 + { 96 + pstate &= ~(pstate >> 1); /* PSR_C_BIT &= ~PSR_Z_BIT */ 97 + return (pstate & PSR_C_BIT) == 0; 98 + } 99 + 100 + static bool __kprobes __check_ge(unsigned long pstate) 101 + { 102 + pstate ^= (pstate << 3); /* PSR_N_BIT ^= PSR_V_BIT */ 103 + return (pstate & PSR_N_BIT) == 0; 104 + } 105 + 106 + static bool __kprobes __check_lt(unsigned long pstate) 107 + { 108 + pstate ^= (pstate << 3); /* PSR_N_BIT ^= PSR_V_BIT */ 109 + return (pstate & PSR_N_BIT) != 0; 110 + } 111 + 112 + static bool __kprobes __check_gt(unsigned long pstate) 113 + { 114 + /*PSR_N_BIT ^= PSR_V_BIT */ 115 + unsigned long temp = pstate ^ (pstate << 3); 116 + 117 + temp |= (pstate << 1); /*PSR_N_BIT |= PSR_Z_BIT */ 118 + return (temp & PSR_N_BIT) == 0; 119 + } 120 + 121 + static bool __kprobes __check_le(unsigned long pstate) 122 + { 123 + /*PSR_N_BIT ^= PSR_V_BIT */ 124 + unsigned long temp = pstate ^ (pstate << 3); 125 + 126 + temp |= (pstate << 1); /*PSR_N_BIT |= PSR_Z_BIT */ 127 + return (temp & PSR_N_BIT) != 0; 128 + } 129 + 130 + static bool __kprobes __check_al(unsigned long pstate) 131 + { 132 + return true; 133 + } 134 + 135 + /* 136 + * Note that the ARMv8 ARM calls condition code 0b1111 "nv", but states that 137 + * it behaves identically to 0b1110 ("al"). 138 + */ 139 + pstate_check_t * const aarch32_opcode_cond_checks[16] = { 140 + __check_eq, __check_ne, __check_cs, __check_cc, 141 + __check_mi, __check_pl, __check_vs, __check_vc, 142 + __check_hi, __check_ls, __check_ge, __check_lt, 143 + __check_gt, __check_le, __check_al, __check_al 54 144 }; 55 145 56 146 int show_unhandled_signals = 0; ··· 843 751 } 844 752 845 753 /* 846 - * bad_mode handles the impossible case in the exception vector. This is always 847 - * fatal. 848 - */ 849 - asmlinkage void notrace bad_mode(struct pt_regs *regs, int reason, unsigned int esr) 850 - { 851 - arm64_enter_nmi(regs); 852 - 853 - console_verbose(); 854 - 855 - pr_crit("Bad mode in %s handler detected on CPU%d, code 0x%08x -- %s\n", 856 - handler[reason], smp_processor_id(), esr, 857 - esr_get_class_string(esr)); 858 - 859 - __show_regs(regs); 860 - local_daif_mask(); 861 - panic("bad mode"); 862 - } 863 - 864 - /* 865 754 * bad_el0_sync handles unexpected, but potentially recoverable synchronous 866 - * exceptions taken from EL0. Unlike bad_mode, this returns. 755 + * exceptions taken from EL0. 867 756 */ 868 757 void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr) 869 758 { ··· 862 789 DEFINE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], overflow_stack) 863 790 __aligned(16); 864 791 865 - asmlinkage void noinstr handle_bad_stack(struct pt_regs *regs) 792 + void panic_bad_stack(struct pt_regs *regs, unsigned int esr, unsigned long far) 866 793 { 867 794 unsigned long tsk_stk = (unsigned long)current->stack; 868 795 unsigned long irq_stk = (unsigned long)this_cpu_read(irq_stack_ptr); 869 796 unsigned long ovf_stk = (unsigned long)this_cpu_ptr(overflow_stack); 870 - unsigned int esr = read_sysreg(esr_el1); 871 - unsigned long far = read_sysreg(far_el1); 872 - 873 - arm64_enter_nmi(regs); 874 797 875 798 console_verbose(); 876 799 pr_emerg("Insufficient stack space to handle exception!"); ··· 939 870 } 940 871 } 941 872 942 - asmlinkage void noinstr do_serror(struct pt_regs *regs, unsigned int esr) 873 + void do_serror(struct pt_regs *regs, unsigned int esr) 943 874 { 944 - arm64_enter_nmi(regs); 945 - 946 875 /* non-RAS errors are not containable */ 947 876 if (!arm64_is_ras_serror(esr) || arm64_is_fatal_ras_serror(regs, esr)) 948 877 arm64_serror_panic(regs, esr); 949 - 950 - arm64_exit_nmi(regs); 951 878 } 952 879 953 880 /* GENERIC_BUG traps */
+11 -2
arch/arm64/kvm/arm.c
··· 692 692 } 693 693 } 694 694 695 + static bool vcpu_mode_is_bad_32bit(struct kvm_vcpu *vcpu) 696 + { 697 + if (likely(!vcpu_mode_is_32bit(vcpu))) 698 + return false; 699 + 700 + return !system_supports_32bit_el0() || 701 + static_branch_unlikely(&arm64_mismatched_32bit_el0); 702 + } 703 + 695 704 /** 696 705 * kvm_arch_vcpu_ioctl_run - the main VCPU run function to execute guest code 697 706 * @vcpu: The VCPU pointer ··· 886 877 * with the asymmetric AArch32 case), return to userspace with 887 878 * a fatal error. 888 879 */ 889 - if (!system_supports_32bit_el0() && vcpu_mode_is_32bit(vcpu)) { 880 + if (vcpu_mode_is_bad_32bit(vcpu)) { 890 881 /* 891 882 * As we have caught the guest red-handed, decide that 892 883 * it isn't fit for purpose anymore by making the vcpu ··· 1087 1078 if (!cpus_have_final_cap(ARM64_HAS_STAGE2_FWB)) 1088 1079 stage2_unmap_vm(vcpu->kvm); 1089 1080 else 1090 - __flush_icache_all(); 1081 + icache_inval_all_pou(); 1091 1082 } 1092 1083 1093 1084 vcpu_reset_hcr(vcpu);
+2 -2
arch/arm64/kvm/hyp/nvhe/cache.S
··· 7 7 #include <asm/assembler.h> 8 8 #include <asm/alternative.h> 9 9 10 - SYM_FUNC_START_PI(__flush_dcache_area) 10 + SYM_FUNC_START_PI(dcache_clean_inval_poc) 11 11 dcache_by_line_op civac, sy, x0, x1, x2, x3 12 12 ret 13 - SYM_FUNC_END_PI(__flush_dcache_area) 13 + SYM_FUNC_END_PI(dcache_clean_inval_poc)
+2 -1
arch/arm64/kvm/hyp/nvhe/setup.c
··· 134 134 for (i = 0; i < hyp_nr_cpus; i++) { 135 135 params = per_cpu_ptr(&kvm_init_params, i); 136 136 params->pgd_pa = __hyp_pa(pkvm_pgtable.pgd); 137 - __flush_dcache_area(params, sizeof(*params)); 137 + dcache_clean_inval_poc((unsigned long)params, 138 + (unsigned long)params + sizeof(*params)); 138 139 } 139 140 } 140 141
+1 -1
arch/arm64/kvm/hyp/nvhe/tlb.c
··· 104 104 * you should be running with VHE enabled. 105 105 */ 106 106 if (icache_is_vpipt()) 107 - __flush_icache_all(); 107 + icache_inval_all_pou(); 108 108 109 109 __tlb_switch_to_host(&cxt); 110 110 }
+10 -3
arch/arm64/kvm/hyp/pgtable.c
··· 839 839 stage2_put_pte(ptep, mmu, addr, level, mm_ops); 840 840 841 841 if (need_flush) { 842 - __flush_dcache_area(kvm_pte_follow(pte, mm_ops), 843 - kvm_granule_size(level)); 842 + kvm_pte_t *pte_follow = kvm_pte_follow(pte, mm_ops); 843 + 844 + dcache_clean_inval_poc((unsigned long)pte_follow, 845 + (unsigned long)pte_follow + 846 + kvm_granule_size(level)); 844 847 } 845 848 846 849 if (childp) ··· 991 988 struct kvm_pgtable *pgt = arg; 992 989 struct kvm_pgtable_mm_ops *mm_ops = pgt->mm_ops; 993 990 kvm_pte_t pte = *ptep; 991 + kvm_pte_t *pte_follow; 994 992 995 993 if (!kvm_pte_valid(pte) || !stage2_pte_cacheable(pgt, pte)) 996 994 return 0; 997 995 998 - __flush_dcache_area(kvm_pte_follow(pte, mm_ops), kvm_granule_size(level)); 996 + pte_follow = kvm_pte_follow(pte, mm_ops); 997 + dcache_clean_inval_poc((unsigned long)pte_follow, 998 + (unsigned long)pte_follow + 999 + kvm_granule_size(level)); 999 1000 return 0; 1000 1001 } 1001 1002
+3 -1
arch/arm64/lib/Makefile
··· 1 1 # SPDX-License-Identifier: GPL-2.0 2 2 lib-y := clear_user.o delay.o copy_from_user.o \ 3 3 copy_to_user.o copy_in_user.o copy_page.o \ 4 - clear_page.o csum.o memchr.o memcpy.o memmove.o \ 4 + clear_page.o csum.o insn.o memchr.o memcpy.o \ 5 5 memset.o memcmp.o strcmp.o strncmp.o strlen.o \ 6 6 strnlen.o strchr.o strrchr.o tishift.o 7 7 ··· 18 18 obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o 19 19 20 20 obj-$(CONFIG_ARM64_MTE) += mte.o 21 + 22 + obj-$(CONFIG_KASAN_SW_TAGS) += kasan_sw_tags.o
+27 -20
arch/arm64/lib/clear_user.S
··· 1 1 /* SPDX-License-Identifier: GPL-2.0-only */ 2 2 /* 3 - * Based on arch/arm/lib/clear_user.S 4 - * 5 - * Copyright (C) 2012 ARM Ltd. 3 + * Copyright (C) 2021 Arm Ltd. 6 4 */ 7 - #include <linux/linkage.h> 8 5 9 - #include <asm/asm-uaccess.h> 6 + #include <linux/linkage.h> 10 7 #include <asm/assembler.h> 11 8 12 9 .text ··· 16 19 * 17 20 * Alignment fixed up by hardware. 18 21 */ 22 + 23 + .p2align 4 24 + // Alignment is for the loop, but since the prologue (including BTI) 25 + // is also 16 bytes we can keep any padding outside the function 19 26 SYM_FUNC_START(__arch_clear_user) 20 - mov x2, x1 // save the size for fixup return 27 + add x2, x0, x1 21 28 subs x1, x1, #8 22 29 b.mi 2f 23 30 1: 24 - user_ldst 9f, sttr, xzr, x0, 8 31 + USER(9f, sttr xzr, [x0]) 32 + add x0, x0, #8 25 33 subs x1, x1, #8 26 - b.pl 1b 27 - 2: adds x1, x1, #4 28 - b.mi 3f 29 - user_ldst 9f, sttr, wzr, x0, 4 30 - sub x1, x1, #4 31 - 3: adds x1, x1, #2 32 - b.mi 4f 33 - user_ldst 9f, sttrh, wzr, x0, 2 34 - sub x1, x1, #2 35 - 4: adds x1, x1, #1 36 - b.mi 5f 37 - user_ldst 9f, sttrb, wzr, x0, 0 34 + b.hi 1b 35 + USER(9f, sttr xzr, [x2, #-8]) 36 + mov x0, #0 37 + ret 38 + 39 + 2: tbz x1, #2, 3f 40 + USER(9f, sttr wzr, [x0]) 41 + USER(8f, sttr wzr, [x2, #-4]) 42 + mov x0, #0 43 + ret 44 + 45 + 3: tbz x1, #1, 4f 46 + USER(9f, sttrh wzr, [x0]) 47 + 4: tbz x1, #0, 5f 48 + USER(7f, sttrb wzr, [x2, #-1]) 38 49 5: mov x0, #0 39 50 ret 40 51 SYM_FUNC_END(__arch_clear_user) ··· 50 45 51 46 .section .fixup,"ax" 52 47 .align 2 53 - 9: mov x0, x2 // return the original size 48 + 7: sub x0, x2, #5 // Adjust for faulting on the final byte... 49 + 8: add x0, x0, #4 // ...or the second word of the 4-7 byte case 50 + 9: sub x0, x2, x0 54 51 ret 55 52 .previous
+76
arch/arm64/lib/kasan_sw_tags.S
··· 1 + /* SPDX-License-Identifier: GPL-2.0-only */ 2 + /* 3 + * Copyright (C) 2020 Google LLC 4 + */ 5 + 6 + #include <linux/linkage.h> 7 + #include <asm/assembler.h> 8 + 9 + /* 10 + * Report a tag mismatch detected by tag-based KASAN. 11 + * 12 + * A compiler-generated thunk calls this with a non-AAPCS calling 13 + * convention. Upon entry to this function, registers are as follows: 14 + * 15 + * x0: fault address (see below for restore) 16 + * x1: fault description (see below for restore) 17 + * x2 to x15: callee-saved 18 + * x16 to x17: safe to clobber 19 + * x18 to x30: callee-saved 20 + * sp: pre-decremented by 256 bytes (see below for restore) 21 + * 22 + * The caller has decremented the SP by 256 bytes, and created a 23 + * structure on the stack as follows: 24 + * 25 + * sp + 0..15: x0 and x1 to be restored 26 + * sp + 16..231: free for use 27 + * sp + 232..247: x29 and x30 (same as in GPRs) 28 + * sp + 248..255: free for use 29 + * 30 + * Note that this is not a struct pt_regs. 31 + * 32 + * To call a regular AAPCS function we must save x2 to x15 (which we can 33 + * store in the gaps), and create a frame record (for which we can use 34 + * x29 and x30 spilled by the caller as those match the GPRs). 35 + * 36 + * The caller expects x0 and x1 to be restored from the structure, and 37 + * for the structure to be removed from the stack (i.e. the SP must be 38 + * incremented by 256 prior to return). 39 + */ 40 + SYM_CODE_START(__hwasan_tag_mismatch) 41 + #ifdef BTI_C 42 + BTI_C 43 + #endif 44 + add x29, sp, #232 45 + stp x2, x3, [sp, #8 * 2] 46 + stp x4, x5, [sp, #8 * 4] 47 + stp x6, x7, [sp, #8 * 6] 48 + stp x8, x9, [sp, #8 * 8] 49 + stp x10, x11, [sp, #8 * 10] 50 + stp x12, x13, [sp, #8 * 12] 51 + stp x14, x15, [sp, #8 * 14] 52 + #ifndef CONFIG_SHADOW_CALL_STACK 53 + str x18, [sp, #8 * 18] 54 + #endif 55 + 56 + mov x2, x30 57 + bl kasan_tag_mismatch 58 + 59 + ldp x0, x1, [sp] 60 + ldp x2, x3, [sp, #8 * 2] 61 + ldp x4, x5, [sp, #8 * 4] 62 + ldp x6, x7, [sp, #8 * 6] 63 + ldp x8, x9, [sp, #8 * 8] 64 + ldp x10, x11, [sp, #8 * 10] 65 + ldp x12, x13, [sp, #8 * 12] 66 + ldp x14, x15, [sp, #8 * 14] 67 + #ifndef CONFIG_SHADOW_CALL_STACK 68 + ldr x18, [sp, #8 * 18] 69 + #endif 70 + ldp x29, x30, [sp, #8 * 29] 71 + 72 + /* remove the structure from the stack */ 73 + add sp, sp, #256 74 + ret 75 + SYM_CODE_END(__hwasan_tag_mismatch) 76 + EXPORT_SYMBOL(__hwasan_tag_mismatch)
+53 -12
arch/arm64/lib/memchr.S
··· 1 1 /* SPDX-License-Identifier: GPL-2.0-only */ 2 2 /* 3 - * Based on arch/arm/lib/memchr.S 4 - * 5 - * Copyright (C) 1995-2000 Russell King 6 - * Copyright (C) 2013 ARM Ltd. 3 + * Copyright (C) 2021 Arm Ltd. 7 4 */ 8 5 9 6 #include <linux/linkage.h> ··· 16 19 * Returns: 17 20 * x0 - address of first occurrence of 'c' or 0 18 21 */ 22 + 23 + #define L(label) .L ## label 24 + 25 + #define REP8_01 0x0101010101010101 26 + #define REP8_7f 0x7f7f7f7f7f7f7f7f 27 + 28 + #define srcin x0 29 + #define chrin w1 30 + #define cntin x2 31 + 32 + #define result x0 33 + 34 + #define wordcnt x3 35 + #define rep01 x4 36 + #define repchr x5 37 + #define cur_word x6 38 + #define cur_byte w6 39 + #define tmp x7 40 + #define tmp2 x8 41 + 42 + .p2align 4 43 + nop 19 44 SYM_FUNC_START_WEAK_PI(memchr) 20 - and w1, w1, #0xff 21 - 1: subs x2, x2, #1 22 - b.mi 2f 23 - ldrb w3, [x0], #1 24 - cmp w3, w1 25 - b.ne 1b 26 - sub x0, x0, #1 45 + and chrin, chrin, #0xff 46 + lsr wordcnt, cntin, #3 47 + cbz wordcnt, L(byte_loop) 48 + mov rep01, #REP8_01 49 + mul repchr, x1, rep01 50 + and cntin, cntin, #7 51 + L(word_loop): 52 + ldr cur_word, [srcin], #8 53 + sub wordcnt, wordcnt, #1 54 + eor cur_word, cur_word, repchr 55 + sub tmp, cur_word, rep01 56 + orr tmp2, cur_word, #REP8_7f 57 + bics tmp, tmp, tmp2 58 + b.ne L(found_word) 59 + cbnz wordcnt, L(word_loop) 60 + L(byte_loop): 61 + cbz cntin, L(not_found) 62 + ldrb cur_byte, [srcin], #1 63 + sub cntin, cntin, #1 64 + cmp cur_byte, chrin 65 + b.ne L(byte_loop) 66 + sub srcin, srcin, #1 27 67 ret 28 - 2: mov x0, #0 68 + L(found_word): 69 + CPU_LE( rev tmp, tmp) 70 + clz tmp, tmp 71 + sub tmp, tmp, #64 72 + add result, srcin, tmp, asr #3 73 + ret 74 + L(not_found): 75 + mov result, #0 29 76 ret 30 77 SYM_FUNC_END_PI(memchr) 31 78 EXPORT_SYMBOL_NOKASAN(memchr)
+111 -219
arch/arm64/lib/memcmp.S
··· 1 1 /* SPDX-License-Identifier: GPL-2.0-only */ 2 2 /* 3 - * Copyright (C) 2013 ARM Ltd. 4 - * Copyright (C) 2013 Linaro. 3 + * Copyright (c) 2013-2021, Arm Limited. 5 4 * 6 - * This code is based on glibc cortex strings work originally authored by Linaro 7 - * be found @ 8 - * 9 - * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/ 10 - * files/head:/src/aarch64/ 5 + * Adapted from the original at: 6 + * https://github.com/ARM-software/optimized-routines/blob/e823e3abf5f89ecb/string/aarch64/memcmp.S 11 7 */ 12 8 13 9 #include <linux/linkage.h> 14 10 #include <asm/assembler.h> 15 11 16 - /* 17 - * compare memory areas(when two memory areas' offset are different, 18 - * alignment handled by the hardware) 19 - * 20 - * Parameters: 21 - * x0 - const memory area 1 pointer 22 - * x1 - const memory area 2 pointer 23 - * x2 - the maximal compare byte length 24 - * Returns: 25 - * x0 - a compare result, maybe less than, equal to, or greater than ZERO 26 - */ 12 + /* Assumptions: 13 + * 14 + * ARMv8-a, AArch64, unaligned accesses. 15 + */ 16 + 17 + #define L(label) .L ## label 27 18 28 19 /* Parameters and result. */ 29 - src1 .req x0 30 - src2 .req x1 31 - limit .req x2 32 - result .req x0 20 + #define src1 x0 21 + #define src2 x1 22 + #define limit x2 23 + #define result w0 33 24 34 25 /* Internal variables. */ 35 - data1 .req x3 36 - data1w .req w3 37 - data2 .req x4 38 - data2w .req w4 39 - has_nul .req x5 40 - diff .req x6 41 - endloop .req x7 42 - tmp1 .req x8 43 - tmp2 .req x9 44 - tmp3 .req x10 45 - pos .req x11 46 - limit_wd .req x12 47 - mask .req x13 26 + #define data1 x3 27 + #define data1w w3 28 + #define data1h x4 29 + #define data2 x5 30 + #define data2w w5 31 + #define data2h x6 32 + #define tmp1 x7 33 + #define tmp2 x8 48 34 49 35 SYM_FUNC_START_WEAK_PI(memcmp) 50 - cbz limit, .Lret0 51 - eor tmp1, src1, src2 52 - tst tmp1, #7 53 - b.ne .Lmisaligned8 54 - ands tmp1, src1, #7 55 - b.ne .Lmutual_align 56 - sub limit_wd, limit, #1 /* limit != 0, so no underflow. */ 57 - lsr limit_wd, limit_wd, #3 /* Convert to Dwords. */ 58 - /* 59 - * The input source addresses are at alignment boundary. 60 - * Directly compare eight bytes each time. 61 - */ 62 - .Lloop_aligned: 63 - ldr data1, [src1], #8 64 - ldr data2, [src2], #8 65 - .Lstart_realigned: 66 - subs limit_wd, limit_wd, #1 67 - eor diff, data1, data2 /* Non-zero if differences found. */ 68 - csinv endloop, diff, xzr, cs /* Last Dword or differences. */ 69 - cbz endloop, .Lloop_aligned 36 + subs limit, limit, 8 37 + b.lo L(less8) 70 38 71 - /* Not reached the limit, must have found a diff. */ 72 - tbz limit_wd, #63, .Lnot_limit 39 + ldr data1, [src1], 8 40 + ldr data2, [src2], 8 41 + cmp data1, data2 42 + b.ne L(return) 73 43 74 - /* Limit % 8 == 0 => the diff is in the last 8 bytes. */ 75 - ands limit, limit, #7 76 - b.eq .Lnot_limit 77 - /* 78 - * The remained bytes less than 8. It is needed to extract valid data 79 - * from last eight bytes of the intended memory range. 80 - */ 81 - lsl limit, limit, #3 /* bytes-> bits. */ 82 - mov mask, #~0 83 - CPU_BE( lsr mask, mask, limit ) 84 - CPU_LE( lsl mask, mask, limit ) 85 - bic data1, data1, mask 86 - bic data2, data2, mask 44 + subs limit, limit, 8 45 + b.gt L(more16) 87 46 88 - orr diff, diff, mask 89 - b .Lnot_limit 47 + ldr data1, [src1, limit] 48 + ldr data2, [src2, limit] 49 + b L(return) 90 50 91 - .Lmutual_align: 92 - /* 93 - * Sources are mutually aligned, but are not currently at an 94 - * alignment boundary. Round down the addresses and then mask off 95 - * the bytes that precede the start point. 96 - */ 97 - bic src1, src1, #7 98 - bic src2, src2, #7 99 - ldr data1, [src1], #8 100 - ldr data2, [src2], #8 101 - /* 102 - * We can not add limit with alignment offset(tmp1) here. Since the 103 - * addition probably make the limit overflown. 104 - */ 105 - sub limit_wd, limit, #1/*limit != 0, so no underflow.*/ 106 - and tmp3, limit_wd, #7 107 - lsr limit_wd, limit_wd, #3 108 - add tmp3, tmp3, tmp1 109 - add limit_wd, limit_wd, tmp3, lsr #3 110 - add limit, limit, tmp1/* Adjust the limit for the extra. */ 51 + L(more16): 52 + ldr data1, [src1], 8 53 + ldr data2, [src2], 8 54 + cmp data1, data2 55 + bne L(return) 111 56 112 - lsl tmp1, tmp1, #3/* Bytes beyond alignment -> bits.*/ 113 - neg tmp1, tmp1/* Bits to alignment -64. */ 114 - mov tmp2, #~0 115 - /*mask off the non-intended bytes before the start address.*/ 116 - CPU_BE( lsl tmp2, tmp2, tmp1 )/*Big-endian.Early bytes are at MSB*/ 117 - /* Little-endian. Early bytes are at LSB. */ 118 - CPU_LE( lsr tmp2, tmp2, tmp1 ) 57 + /* Jump directly to comparing the last 16 bytes for 32 byte (or less) 58 + strings. */ 59 + subs limit, limit, 16 60 + b.ls L(last_bytes) 119 61 120 - orr data1, data1, tmp2 121 - orr data2, data2, tmp2 122 - b .Lstart_realigned 62 + /* We overlap loads between 0-32 bytes at either side of SRC1 when we 63 + try to align, so limit it only to strings larger than 128 bytes. */ 64 + cmp limit, 96 65 + b.ls L(loop16) 123 66 124 - /*src1 and src2 have different alignment offset.*/ 125 - .Lmisaligned8: 126 - cmp limit, #8 127 - b.lo .Ltiny8proc /*limit < 8: compare byte by byte*/ 67 + /* Align src1 and adjust src2 with bytes not yet done. */ 68 + and tmp1, src1, 15 69 + add limit, limit, tmp1 70 + sub src1, src1, tmp1 71 + sub src2, src2, tmp1 128 72 129 - and tmp1, src1, #7 130 - neg tmp1, tmp1 131 - add tmp1, tmp1, #8/*valid length in the first 8 bytes of src1*/ 132 - and tmp2, src2, #7 133 - neg tmp2, tmp2 134 - add tmp2, tmp2, #8/*valid length in the first 8 bytes of src2*/ 135 - subs tmp3, tmp1, tmp2 136 - csel pos, tmp1, tmp2, hi /*Choose the maximum.*/ 73 + /* Loop performing 16 bytes per iteration using aligned src1. 74 + Limit is pre-decremented by 16 and must be larger than zero. 75 + Exit if <= 16 bytes left to do or if the data is not equal. */ 76 + .p2align 4 77 + L(loop16): 78 + ldp data1, data1h, [src1], 16 79 + ldp data2, data2h, [src2], 16 80 + subs limit, limit, 16 81 + ccmp data1, data2, 0, hi 82 + ccmp data1h, data2h, 0, eq 83 + b.eq L(loop16) 137 84 138 - sub limit, limit, pos 139 - /*compare the proceeding bytes in the first 8 byte segment.*/ 140 - .Ltinycmp: 141 - ldrb data1w, [src1], #1 142 - ldrb data2w, [src2], #1 143 - subs pos, pos, #1 144 - ccmp data1w, data2w, #0, ne /* NZCV = 0b0000. */ 145 - b.eq .Ltinycmp 146 - cbnz pos, 1f /*diff occurred before the last byte.*/ 85 + cmp data1, data2 86 + bne L(return) 87 + mov data1, data1h 88 + mov data2, data2h 89 + cmp data1, data2 90 + bne L(return) 91 + 92 + /* Compare last 1-16 bytes using unaligned access. */ 93 + L(last_bytes): 94 + add src1, src1, limit 95 + add src2, src2, limit 96 + ldp data1, data1h, [src1] 97 + ldp data2, data2h, [src2] 98 + cmp data1, data2 99 + bne L(return) 100 + mov data1, data1h 101 + mov data2, data2h 102 + cmp data1, data2 103 + 104 + /* Compare data bytes and set return value to 0, -1 or 1. */ 105 + L(return): 106 + #ifndef __AARCH64EB__ 107 + rev data1, data1 108 + rev data2, data2 109 + #endif 110 + cmp data1, data2 111 + L(ret_eq): 112 + cset result, ne 113 + cneg result, result, lo 114 + ret 115 + 116 + .p2align 4 117 + /* Compare up to 8 bytes. Limit is [-8..-1]. */ 118 + L(less8): 119 + adds limit, limit, 4 120 + b.lo L(less4) 121 + ldr data1w, [src1], 4 122 + ldr data2w, [src2], 4 147 123 cmp data1w, data2w 148 - b.eq .Lstart_align 149 - 1: 150 - sub result, data1, data2 124 + b.ne L(return) 125 + sub limit, limit, 4 126 + L(less4): 127 + adds limit, limit, 4 128 + beq L(ret_eq) 129 + L(byte_loop): 130 + ldrb data1w, [src1], 1 131 + ldrb data2w, [src2], 1 132 + subs limit, limit, 1 133 + ccmp data1w, data2w, 0, ne /* NZCV = 0b0000. */ 134 + b.eq L(byte_loop) 135 + sub result, data1w, data2w 151 136 ret 152 137 153 - .Lstart_align: 154 - lsr limit_wd, limit, #3 155 - cbz limit_wd, .Lremain8 156 - 157 - ands xzr, src1, #7 158 - b.eq .Lrecal_offset 159 - /*process more leading bytes to make src1 aligned...*/ 160 - add src1, src1, tmp3 /*backwards src1 to alignment boundary*/ 161 - add src2, src2, tmp3 162 - sub limit, limit, tmp3 163 - lsr limit_wd, limit, #3 164 - cbz limit_wd, .Lremain8 165 - /*load 8 bytes from aligned SRC1..*/ 166 - ldr data1, [src1], #8 167 - ldr data2, [src2], #8 168 - 169 - subs limit_wd, limit_wd, #1 170 - eor diff, data1, data2 /*Non-zero if differences found.*/ 171 - csinv endloop, diff, xzr, ne 172 - cbnz endloop, .Lunequal_proc 173 - /*How far is the current SRC2 from the alignment boundary...*/ 174 - and tmp3, tmp3, #7 175 - 176 - .Lrecal_offset:/*src1 is aligned now..*/ 177 - neg pos, tmp3 178 - .Lloopcmp_proc: 179 - /* 180 - * Divide the eight bytes into two parts. First,backwards the src2 181 - * to an alignment boundary,load eight bytes and compare from 182 - * the SRC2 alignment boundary. If all 8 bytes are equal,then start 183 - * the second part's comparison. Otherwise finish the comparison. 184 - * This special handle can garantee all the accesses are in the 185 - * thread/task space in avoid to overrange access. 186 - */ 187 - ldr data1, [src1,pos] 188 - ldr data2, [src2,pos] 189 - eor diff, data1, data2 /* Non-zero if differences found. */ 190 - cbnz diff, .Lnot_limit 191 - 192 - /*The second part process*/ 193 - ldr data1, [src1], #8 194 - ldr data2, [src2], #8 195 - eor diff, data1, data2 /* Non-zero if differences found. */ 196 - subs limit_wd, limit_wd, #1 197 - csinv endloop, diff, xzr, ne/*if limit_wd is 0,will finish the cmp*/ 198 - cbz endloop, .Lloopcmp_proc 199 - .Lunequal_proc: 200 - cbz diff, .Lremain8 201 - 202 - /* There is difference occurred in the latest comparison. */ 203 - .Lnot_limit: 204 - /* 205 - * For little endian,reverse the low significant equal bits into MSB,then 206 - * following CLZ can find how many equal bits exist. 207 - */ 208 - CPU_LE( rev diff, diff ) 209 - CPU_LE( rev data1, data1 ) 210 - CPU_LE( rev data2, data2 ) 211 - 212 - /* 213 - * The MS-non-zero bit of DIFF marks either the first bit 214 - * that is different, or the end of the significant data. 215 - * Shifting left now will bring the critical information into the 216 - * top bits. 217 - */ 218 - clz pos, diff 219 - lsl data1, data1, pos 220 - lsl data2, data2, pos 221 - /* 222 - * We need to zero-extend (char is unsigned) the value and then 223 - * perform a signed subtraction. 224 - */ 225 - lsr data1, data1, #56 226 - sub result, data1, data2, lsr #56 227 - ret 228 - 229 - .Lremain8: 230 - /* Limit % 8 == 0 =>. all data are equal.*/ 231 - ands limit, limit, #7 232 - b.eq .Lret0 233 - 234 - .Ltiny8proc: 235 - ldrb data1w, [src1], #1 236 - ldrb data2w, [src2], #1 237 - subs limit, limit, #1 238 - 239 - ccmp data1w, data2w, #0, ne /* NZCV = 0b0000. */ 240 - b.eq .Ltiny8proc 241 - sub result, data1, data2 242 - ret 243 - .Lret0: 244 - mov result, #0 245 - ret 246 138 SYM_FUNC_END_PI(memcmp) 247 139 EXPORT_SYMBOL_NOKASAN(memcmp)
+229 -43
arch/arm64/lib/memcpy.S
··· 1 1 /* SPDX-License-Identifier: GPL-2.0-only */ 2 2 /* 3 - * Copyright (C) 2013 ARM Ltd. 4 - * Copyright (C) 2013 Linaro. 3 + * Copyright (c) 2012-2021, Arm Limited. 5 4 * 6 - * This code is based on glibc cortex strings work originally authored by Linaro 7 - * be found @ 8 - * 9 - * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/ 10 - * files/head:/src/aarch64/ 5 + * Adapted from the original at: 6 + * https://github.com/ARM-software/optimized-routines/blob/afd6244a1f8d9229/string/aarch64/memcpy.S 11 7 */ 12 8 13 9 #include <linux/linkage.h> 14 10 #include <asm/assembler.h> 15 - #include <asm/cache.h> 16 11 17 - /* 18 - * Copy a buffer from src to dest (alignment handled by the hardware) 12 + /* Assumptions: 19 13 * 20 - * Parameters: 21 - * x0 - dest 22 - * x1 - src 23 - * x2 - n 24 - * Returns: 25 - * x0 - dest 14 + * ARMv8-a, AArch64, unaligned accesses. 15 + * 26 16 */ 27 - .macro ldrb1 reg, ptr, val 28 - ldrb \reg, [\ptr], \val 29 - .endm 30 17 31 - .macro strb1 reg, ptr, val 32 - strb \reg, [\ptr], \val 33 - .endm 18 + #define L(label) .L ## label 34 19 35 - .macro ldrh1 reg, ptr, val 36 - ldrh \reg, [\ptr], \val 37 - .endm 20 + #define dstin x0 21 + #define src x1 22 + #define count x2 23 + #define dst x3 24 + #define srcend x4 25 + #define dstend x5 26 + #define A_l x6 27 + #define A_lw w6 28 + #define A_h x7 29 + #define B_l x8 30 + #define B_lw w8 31 + #define B_h x9 32 + #define C_l x10 33 + #define C_lw w10 34 + #define C_h x11 35 + #define D_l x12 36 + #define D_h x13 37 + #define E_l x14 38 + #define E_h x15 39 + #define F_l x16 40 + #define F_h x17 41 + #define G_l count 42 + #define G_h dst 43 + #define H_l src 44 + #define H_h srcend 45 + #define tmp1 x14 38 46 39 - .macro strh1 reg, ptr, val 40 - strh \reg, [\ptr], \val 41 - .endm 47 + /* This implementation handles overlaps and supports both memcpy and memmove 48 + from a single entry point. It uses unaligned accesses and branchless 49 + sequences to keep the code small, simple and improve performance. 42 50 43 - .macro ldr1 reg, ptr, val 44 - ldr \reg, [\ptr], \val 45 - .endm 51 + Copies are split into 3 main cases: small copies of up to 32 bytes, medium 52 + copies of up to 128 bytes, and large copies. The overhead of the overlap 53 + check is negligible since it is only required for large copies. 46 54 47 - .macro str1 reg, ptr, val 48 - str \reg, [\ptr], \val 49 - .endm 55 + Large copies use a software pipelined loop processing 64 bytes per iteration. 56 + The destination pointer is 16-byte aligned to minimize unaligned accesses. 57 + The loop tail is handled by always copying 64 bytes from the end. 58 + */ 50 59 51 - .macro ldp1 reg1, reg2, ptr, val 52 - ldp \reg1, \reg2, [\ptr], \val 53 - .endm 54 - 55 - .macro stp1 reg1, reg2, ptr, val 56 - stp \reg1, \reg2, [\ptr], \val 57 - .endm 58 - 60 + SYM_FUNC_START_ALIAS(__memmove) 61 + SYM_FUNC_START_WEAK_ALIAS_PI(memmove) 59 62 SYM_FUNC_START_ALIAS(__memcpy) 60 63 SYM_FUNC_START_WEAK_PI(memcpy) 61 - #include "copy_template.S" 64 + add srcend, src, count 65 + add dstend, dstin, count 66 + cmp count, 128 67 + b.hi L(copy_long) 68 + cmp count, 32 69 + b.hi L(copy32_128) 70 + 71 + /* Small copies: 0..32 bytes. */ 72 + cmp count, 16 73 + b.lo L(copy16) 74 + ldp A_l, A_h, [src] 75 + ldp D_l, D_h, [srcend, -16] 76 + stp A_l, A_h, [dstin] 77 + stp D_l, D_h, [dstend, -16] 62 78 ret 79 + 80 + /* Copy 8-15 bytes. */ 81 + L(copy16): 82 + tbz count, 3, L(copy8) 83 + ldr A_l, [src] 84 + ldr A_h, [srcend, -8] 85 + str A_l, [dstin] 86 + str A_h, [dstend, -8] 87 + ret 88 + 89 + .p2align 3 90 + /* Copy 4-7 bytes. */ 91 + L(copy8): 92 + tbz count, 2, L(copy4) 93 + ldr A_lw, [src] 94 + ldr B_lw, [srcend, -4] 95 + str A_lw, [dstin] 96 + str B_lw, [dstend, -4] 97 + ret 98 + 99 + /* Copy 0..3 bytes using a branchless sequence. */ 100 + L(copy4): 101 + cbz count, L(copy0) 102 + lsr tmp1, count, 1 103 + ldrb A_lw, [src] 104 + ldrb C_lw, [srcend, -1] 105 + ldrb B_lw, [src, tmp1] 106 + strb A_lw, [dstin] 107 + strb B_lw, [dstin, tmp1] 108 + strb C_lw, [dstend, -1] 109 + L(copy0): 110 + ret 111 + 112 + .p2align 4 113 + /* Medium copies: 33..128 bytes. */ 114 + L(copy32_128): 115 + ldp A_l, A_h, [src] 116 + ldp B_l, B_h, [src, 16] 117 + ldp C_l, C_h, [srcend, -32] 118 + ldp D_l, D_h, [srcend, -16] 119 + cmp count, 64 120 + b.hi L(copy128) 121 + stp A_l, A_h, [dstin] 122 + stp B_l, B_h, [dstin, 16] 123 + stp C_l, C_h, [dstend, -32] 124 + stp D_l, D_h, [dstend, -16] 125 + ret 126 + 127 + .p2align 4 128 + /* Copy 65..128 bytes. */ 129 + L(copy128): 130 + ldp E_l, E_h, [src, 32] 131 + ldp F_l, F_h, [src, 48] 132 + cmp count, 96 133 + b.ls L(copy96) 134 + ldp G_l, G_h, [srcend, -64] 135 + ldp H_l, H_h, [srcend, -48] 136 + stp G_l, G_h, [dstend, -64] 137 + stp H_l, H_h, [dstend, -48] 138 + L(copy96): 139 + stp A_l, A_h, [dstin] 140 + stp B_l, B_h, [dstin, 16] 141 + stp E_l, E_h, [dstin, 32] 142 + stp F_l, F_h, [dstin, 48] 143 + stp C_l, C_h, [dstend, -32] 144 + stp D_l, D_h, [dstend, -16] 145 + ret 146 + 147 + .p2align 4 148 + /* Copy more than 128 bytes. */ 149 + L(copy_long): 150 + /* Use backwards copy if there is an overlap. */ 151 + sub tmp1, dstin, src 152 + cbz tmp1, L(copy0) 153 + cmp tmp1, count 154 + b.lo L(copy_long_backwards) 155 + 156 + /* Copy 16 bytes and then align dst to 16-byte alignment. */ 157 + 158 + ldp D_l, D_h, [src] 159 + and tmp1, dstin, 15 160 + bic dst, dstin, 15 161 + sub src, src, tmp1 162 + add count, count, tmp1 /* Count is now 16 too large. */ 163 + ldp A_l, A_h, [src, 16] 164 + stp D_l, D_h, [dstin] 165 + ldp B_l, B_h, [src, 32] 166 + ldp C_l, C_h, [src, 48] 167 + ldp D_l, D_h, [src, 64]! 168 + subs count, count, 128 + 16 /* Test and readjust count. */ 169 + b.ls L(copy64_from_end) 170 + 171 + L(loop64): 172 + stp A_l, A_h, [dst, 16] 173 + ldp A_l, A_h, [src, 16] 174 + stp B_l, B_h, [dst, 32] 175 + ldp B_l, B_h, [src, 32] 176 + stp C_l, C_h, [dst, 48] 177 + ldp C_l, C_h, [src, 48] 178 + stp D_l, D_h, [dst, 64]! 179 + ldp D_l, D_h, [src, 64]! 180 + subs count, count, 64 181 + b.hi L(loop64) 182 + 183 + /* Write the last iteration and copy 64 bytes from the end. */ 184 + L(copy64_from_end): 185 + ldp E_l, E_h, [srcend, -64] 186 + stp A_l, A_h, [dst, 16] 187 + ldp A_l, A_h, [srcend, -48] 188 + stp B_l, B_h, [dst, 32] 189 + ldp B_l, B_h, [srcend, -32] 190 + stp C_l, C_h, [dst, 48] 191 + ldp C_l, C_h, [srcend, -16] 192 + stp D_l, D_h, [dst, 64] 193 + stp E_l, E_h, [dstend, -64] 194 + stp A_l, A_h, [dstend, -48] 195 + stp B_l, B_h, [dstend, -32] 196 + stp C_l, C_h, [dstend, -16] 197 + ret 198 + 199 + .p2align 4 200 + 201 + /* Large backwards copy for overlapping copies. 202 + Copy 16 bytes and then align dst to 16-byte alignment. */ 203 + L(copy_long_backwards): 204 + ldp D_l, D_h, [srcend, -16] 205 + and tmp1, dstend, 15 206 + sub srcend, srcend, tmp1 207 + sub count, count, tmp1 208 + ldp A_l, A_h, [srcend, -16] 209 + stp D_l, D_h, [dstend, -16] 210 + ldp B_l, B_h, [srcend, -32] 211 + ldp C_l, C_h, [srcend, -48] 212 + ldp D_l, D_h, [srcend, -64]! 213 + sub dstend, dstend, tmp1 214 + subs count, count, 128 215 + b.ls L(copy64_from_start) 216 + 217 + L(loop64_backwards): 218 + stp A_l, A_h, [dstend, -16] 219 + ldp A_l, A_h, [srcend, -16] 220 + stp B_l, B_h, [dstend, -32] 221 + ldp B_l, B_h, [srcend, -32] 222 + stp C_l, C_h, [dstend, -48] 223 + ldp C_l, C_h, [srcend, -48] 224 + stp D_l, D_h, [dstend, -64]! 225 + ldp D_l, D_h, [srcend, -64]! 226 + subs count, count, 64 227 + b.hi L(loop64_backwards) 228 + 229 + /* Write the last iteration and copy 64 bytes from the start. */ 230 + L(copy64_from_start): 231 + ldp G_l, G_h, [src, 48] 232 + stp A_l, A_h, [dstend, -16] 233 + ldp A_l, A_h, [src, 32] 234 + stp B_l, B_h, [dstend, -32] 235 + ldp B_l, B_h, [src, 16] 236 + stp C_l, C_h, [dstend, -48] 237 + ldp C_l, C_h, [src] 238 + stp D_l, D_h, [dstend, -64] 239 + stp G_l, G_h, [dstin, 48] 240 + stp A_l, A_h, [dstin, 32] 241 + stp B_l, B_h, [dstin, 16] 242 + stp C_l, C_h, [dstin] 243 + ret 244 + 63 245 SYM_FUNC_END_PI(memcpy) 64 246 EXPORT_SYMBOL(memcpy) 65 247 SYM_FUNC_END_ALIAS(__memcpy) 66 248 EXPORT_SYMBOL(__memcpy) 249 + SYM_FUNC_END_ALIAS_PI(memmove) 250 + EXPORT_SYMBOL(memmove) 251 + SYM_FUNC_END_ALIAS(__memmove) 252 + EXPORT_SYMBOL(__memmove)
-189
arch/arm64/lib/memmove.S
··· 1 - /* SPDX-License-Identifier: GPL-2.0-only */ 2 - /* 3 - * Copyright (C) 2013 ARM Ltd. 4 - * Copyright (C) 2013 Linaro. 5 - * 6 - * This code is based on glibc cortex strings work originally authored by Linaro 7 - * be found @ 8 - * 9 - * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/ 10 - * files/head:/src/aarch64/ 11 - */ 12 - 13 - #include <linux/linkage.h> 14 - #include <asm/assembler.h> 15 - #include <asm/cache.h> 16 - 17 - /* 18 - * Move a buffer from src to test (alignment handled by the hardware). 19 - * If dest <= src, call memcpy, otherwise copy in reverse order. 20 - * 21 - * Parameters: 22 - * x0 - dest 23 - * x1 - src 24 - * x2 - n 25 - * Returns: 26 - * x0 - dest 27 - */ 28 - dstin .req x0 29 - src .req x1 30 - count .req x2 31 - tmp1 .req x3 32 - tmp1w .req w3 33 - tmp2 .req x4 34 - tmp2w .req w4 35 - tmp3 .req x5 36 - tmp3w .req w5 37 - dst .req x6 38 - 39 - A_l .req x7 40 - A_h .req x8 41 - B_l .req x9 42 - B_h .req x10 43 - C_l .req x11 44 - C_h .req x12 45 - D_l .req x13 46 - D_h .req x14 47 - 48 - SYM_FUNC_START_ALIAS(__memmove) 49 - SYM_FUNC_START_WEAK_PI(memmove) 50 - cmp dstin, src 51 - b.lo __memcpy 52 - add tmp1, src, count 53 - cmp dstin, tmp1 54 - b.hs __memcpy /* No overlap. */ 55 - 56 - add dst, dstin, count 57 - add src, src, count 58 - cmp count, #16 59 - b.lo .Ltail15 /*probably non-alignment accesses.*/ 60 - 61 - ands tmp2, src, #15 /* Bytes to reach alignment. */ 62 - b.eq .LSrcAligned 63 - sub count, count, tmp2 64 - /* 65 - * process the aligned offset length to make the src aligned firstly. 66 - * those extra instructions' cost is acceptable. It also make the 67 - * coming accesses are based on aligned address. 68 - */ 69 - tbz tmp2, #0, 1f 70 - ldrb tmp1w, [src, #-1]! 71 - strb tmp1w, [dst, #-1]! 72 - 1: 73 - tbz tmp2, #1, 2f 74 - ldrh tmp1w, [src, #-2]! 75 - strh tmp1w, [dst, #-2]! 76 - 2: 77 - tbz tmp2, #2, 3f 78 - ldr tmp1w, [src, #-4]! 79 - str tmp1w, [dst, #-4]! 80 - 3: 81 - tbz tmp2, #3, .LSrcAligned 82 - ldr tmp1, [src, #-8]! 83 - str tmp1, [dst, #-8]! 84 - 85 - .LSrcAligned: 86 - cmp count, #64 87 - b.ge .Lcpy_over64 88 - 89 - /* 90 - * Deal with small copies quickly by dropping straight into the 91 - * exit block. 92 - */ 93 - .Ltail63: 94 - /* 95 - * Copy up to 48 bytes of data. At this point we only need the 96 - * bottom 6 bits of count to be accurate. 97 - */ 98 - ands tmp1, count, #0x30 99 - b.eq .Ltail15 100 - cmp tmp1w, #0x20 101 - b.eq 1f 102 - b.lt 2f 103 - ldp A_l, A_h, [src, #-16]! 104 - stp A_l, A_h, [dst, #-16]! 105 - 1: 106 - ldp A_l, A_h, [src, #-16]! 107 - stp A_l, A_h, [dst, #-16]! 108 - 2: 109 - ldp A_l, A_h, [src, #-16]! 110 - stp A_l, A_h, [dst, #-16]! 111 - 112 - .Ltail15: 113 - tbz count, #3, 1f 114 - ldr tmp1, [src, #-8]! 115 - str tmp1, [dst, #-8]! 116 - 1: 117 - tbz count, #2, 2f 118 - ldr tmp1w, [src, #-4]! 119 - str tmp1w, [dst, #-4]! 120 - 2: 121 - tbz count, #1, 3f 122 - ldrh tmp1w, [src, #-2]! 123 - strh tmp1w, [dst, #-2]! 124 - 3: 125 - tbz count, #0, .Lexitfunc 126 - ldrb tmp1w, [src, #-1] 127 - strb tmp1w, [dst, #-1] 128 - 129 - .Lexitfunc: 130 - ret 131 - 132 - .Lcpy_over64: 133 - subs count, count, #128 134 - b.ge .Lcpy_body_large 135 - /* 136 - * Less than 128 bytes to copy, so handle 64 bytes here and then jump 137 - * to the tail. 138 - */ 139 - ldp A_l, A_h, [src, #-16] 140 - stp A_l, A_h, [dst, #-16] 141 - ldp B_l, B_h, [src, #-32] 142 - ldp C_l, C_h, [src, #-48] 143 - stp B_l, B_h, [dst, #-32] 144 - stp C_l, C_h, [dst, #-48] 145 - ldp D_l, D_h, [src, #-64]! 146 - stp D_l, D_h, [dst, #-64]! 147 - 148 - tst count, #0x3f 149 - b.ne .Ltail63 150 - ret 151 - 152 - /* 153 - * Critical loop. Start at a new cache line boundary. Assuming 154 - * 64 bytes per line this ensures the entire loop is in one line. 155 - */ 156 - .p2align L1_CACHE_SHIFT 157 - .Lcpy_body_large: 158 - /* pre-load 64 bytes data. */ 159 - ldp A_l, A_h, [src, #-16] 160 - ldp B_l, B_h, [src, #-32] 161 - ldp C_l, C_h, [src, #-48] 162 - ldp D_l, D_h, [src, #-64]! 163 - 1: 164 - /* 165 - * interlace the load of next 64 bytes data block with store of the last 166 - * loaded 64 bytes data. 167 - */ 168 - stp A_l, A_h, [dst, #-16] 169 - ldp A_l, A_h, [src, #-16] 170 - stp B_l, B_h, [dst, #-32] 171 - ldp B_l, B_h, [src, #-32] 172 - stp C_l, C_h, [dst, #-48] 173 - ldp C_l, C_h, [src, #-48] 174 - stp D_l, D_h, [dst, #-64]! 175 - ldp D_l, D_h, [src, #-64]! 176 - subs count, count, #64 177 - b.ge 1b 178 - stp A_l, A_h, [dst, #-16] 179 - stp B_l, B_h, [dst, #-32] 180 - stp C_l, C_h, [dst, #-48] 181 - stp D_l, D_h, [dst, #-64]! 182 - 183 - tst count, #0x3f 184 - b.ne .Ltail63 185 - ret 186 - SYM_FUNC_END_PI(memmove) 187 - EXPORT_SYMBOL(memmove) 188 - SYM_FUNC_END_ALIAS(__memmove) 189 - EXPORT_SYMBOL(__memmove)
+20
arch/arm64/lib/mte.S
··· 37 37 SYM_FUNC_END(mte_clear_page_tags) 38 38 39 39 /* 40 + * Zero the page and tags at the same time 41 + * 42 + * Parameters: 43 + * x0 - address to the beginning of the page 44 + */ 45 + SYM_FUNC_START(mte_zero_clear_page_tags) 46 + mrs x1, dczid_el0 47 + and w1, w1, #0xf 48 + mov x2, #4 49 + lsl x1, x2, x1 50 + and x0, x0, #(1 << MTE_TAG_SHIFT) - 1 // clear the tag 51 + 52 + 1: dc gzva, x0 53 + add x0, x0, x1 54 + tst x0, #(PAGE_SIZE - 1) 55 + b.ne 1b 56 + ret 57 + SYM_FUNC_END(mte_zero_clear_page_tags) 58 + 59 + /* 40 60 * Copy the tags from the source page to the destination one 41 61 * x0 - address of the destination page 42 62 * x1 - address of the source page
+124 -171
arch/arm64/lib/strcmp.S
··· 1 1 /* SPDX-License-Identifier: GPL-2.0-only */ 2 2 /* 3 - * Copyright (C) 2013 ARM Ltd. 4 - * Copyright (C) 2013 Linaro. 3 + * Copyright (c) 2012-2021, Arm Limited. 5 4 * 6 - * This code is based on glibc cortex strings work originally authored by Linaro 7 - * be found @ 8 - * 9 - * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/ 10 - * files/head:/src/aarch64/ 5 + * Adapted from the original at: 6 + * https://github.com/ARM-software/optimized-routines/blob/afd6244a1f8d9229/string/aarch64/strcmp.S 11 7 */ 12 8 13 9 #include <linux/linkage.h> 14 10 #include <asm/assembler.h> 15 11 16 - /* 17 - * compare two strings 12 + /* Assumptions: 18 13 * 19 - * Parameters: 20 - * x0 - const string 1 pointer 21 - * x1 - const string 2 pointer 22 - * Returns: 23 - * x0 - an integer less than, equal to, or greater than zero 24 - * if s1 is found, respectively, to be less than, to match, 25 - * or be greater than s2. 14 + * ARMv8-a, AArch64 26 15 */ 16 + 17 + #define L(label) .L ## label 27 18 28 19 #define REP8_01 0x0101010101010101 29 20 #define REP8_7f 0x7f7f7f7f7f7f7f7f 30 21 #define REP8_80 0x8080808080808080 31 22 32 23 /* Parameters and result. */ 33 - src1 .req x0 34 - src2 .req x1 35 - result .req x0 24 + #define src1 x0 25 + #define src2 x1 26 + #define result x0 36 27 37 28 /* Internal variables. */ 38 - data1 .req x2 39 - data1w .req w2 40 - data2 .req x3 41 - data2w .req w3 42 - has_nul .req x4 43 - diff .req x5 44 - syndrome .req x6 45 - tmp1 .req x7 46 - tmp2 .req x8 47 - tmp3 .req x9 48 - zeroones .req x10 49 - pos .req x11 29 + #define data1 x2 30 + #define data1w w2 31 + #define data2 x3 32 + #define data2w w3 33 + #define has_nul x4 34 + #define diff x5 35 + #define syndrome x6 36 + #define tmp1 x7 37 + #define tmp2 x8 38 + #define tmp3 x9 39 + #define zeroones x10 40 + #define pos x11 50 41 42 + /* Start of performance-critical section -- one 64B cache line. */ 43 + .align 6 51 44 SYM_FUNC_START_WEAK_PI(strcmp) 52 45 eor tmp1, src1, src2 53 46 mov zeroones, #REP8_01 54 47 tst tmp1, #7 55 - b.ne .Lmisaligned8 48 + b.ne L(misaligned8) 56 49 ands tmp1, src1, #7 57 - b.ne .Lmutual_align 58 - 59 - /* 60 - * NUL detection works on the principle that (X - 1) & (~X) & 0x80 61 - * (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and 62 - * can be done in parallel across the entire word. 63 - */ 64 - .Lloop_aligned: 50 + b.ne L(mutual_align) 51 + /* NUL detection works on the principle that (X - 1) & (~X) & 0x80 52 + (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and 53 + can be done in parallel across the entire word. */ 54 + L(loop_aligned): 65 55 ldr data1, [src1], #8 66 56 ldr data2, [src2], #8 67 - .Lstart_realigned: 57 + L(start_realigned): 68 58 sub tmp1, data1, zeroones 69 59 orr tmp2, data1, #REP8_7f 70 60 eor diff, data1, data2 /* Non-zero if differences found. */ 71 61 bic has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */ 72 62 orr syndrome, diff, has_nul 73 - cbz syndrome, .Lloop_aligned 74 - b .Lcal_cmpresult 63 + cbz syndrome, L(loop_aligned) 64 + /* End of performance-critical section -- one 64B cache line. */ 75 65 76 - .Lmutual_align: 77 - /* 78 - * Sources are mutually aligned, but are not currently at an 79 - * alignment boundary. Round down the addresses and then mask off 80 - * the bytes that preceed the start point. 81 - */ 66 + L(end): 67 + #ifndef __AARCH64EB__ 68 + rev syndrome, syndrome 69 + rev data1, data1 70 + /* The MS-non-zero bit of the syndrome marks either the first bit 71 + that is different, or the top bit of the first zero byte. 72 + Shifting left now will bring the critical information into the 73 + top bits. */ 74 + clz pos, syndrome 75 + rev data2, data2 76 + lsl data1, data1, pos 77 + lsl data2, data2, pos 78 + /* But we need to zero-extend (char is unsigned) the value and then 79 + perform a signed 32-bit subtraction. */ 80 + lsr data1, data1, #56 81 + sub result, data1, data2, lsr #56 82 + ret 83 + #else 84 + /* For big-endian we cannot use the trick with the syndrome value 85 + as carry-propagation can corrupt the upper bits if the trailing 86 + bytes in the string contain 0x01. */ 87 + /* However, if there is no NUL byte in the dword, we can generate 88 + the result directly. We can't just subtract the bytes as the 89 + MSB might be significant. */ 90 + cbnz has_nul, 1f 91 + cmp data1, data2 92 + cset result, ne 93 + cneg result, result, lo 94 + ret 95 + 1: 96 + /* Re-compute the NUL-byte detection, using a byte-reversed value. */ 97 + rev tmp3, data1 98 + sub tmp1, tmp3, zeroones 99 + orr tmp2, tmp3, #REP8_7f 100 + bic has_nul, tmp1, tmp2 101 + rev has_nul, has_nul 102 + orr syndrome, diff, has_nul 103 + clz pos, syndrome 104 + /* The MS-non-zero bit of the syndrome marks either the first bit 105 + that is different, or the top bit of the first zero byte. 106 + Shifting left now will bring the critical information into the 107 + top bits. */ 108 + lsl data1, data1, pos 109 + lsl data2, data2, pos 110 + /* But we need to zero-extend (char is unsigned) the value and then 111 + perform a signed 32-bit subtraction. */ 112 + lsr data1, data1, #56 113 + sub result, data1, data2, lsr #56 114 + ret 115 + #endif 116 + 117 + L(mutual_align): 118 + /* Sources are mutually aligned, but are not currently at an 119 + alignment boundary. Round down the addresses and then mask off 120 + the bytes that preceed the start point. */ 82 121 bic src1, src1, #7 83 122 bic src2, src2, #7 84 123 lsl tmp1, tmp1, #3 /* Bytes beyond alignment -> bits. */ ··· 125 86 neg tmp1, tmp1 /* Bits to alignment -64. */ 126 87 ldr data2, [src2], #8 127 88 mov tmp2, #~0 89 + #ifdef __AARCH64EB__ 128 90 /* Big-endian. Early bytes are at MSB. */ 129 - CPU_BE( lsl tmp2, tmp2, tmp1 ) /* Shift (tmp1 & 63). */ 91 + lsl tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */ 92 + #else 130 93 /* Little-endian. Early bytes are at LSB. */ 131 - CPU_LE( lsr tmp2, tmp2, tmp1 ) /* Shift (tmp1 & 63). */ 132 - 94 + lsr tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */ 95 + #endif 133 96 orr data1, data1, tmp2 134 97 orr data2, data2, tmp2 135 - b .Lstart_realigned 98 + b L(start_realigned) 136 99 137 - .Lmisaligned8: 138 - /* 139 - * Get the align offset length to compare per byte first. 140 - * After this process, one string's address will be aligned. 141 - */ 142 - and tmp1, src1, #7 143 - neg tmp1, tmp1 144 - add tmp1, tmp1, #8 145 - and tmp2, src2, #7 146 - neg tmp2, tmp2 147 - add tmp2, tmp2, #8 148 - subs tmp3, tmp1, tmp2 149 - csel pos, tmp1, tmp2, hi /*Choose the maximum. */ 150 - .Ltinycmp: 100 + L(misaligned8): 101 + /* Align SRC1 to 8 bytes and then compare 8 bytes at a time, always 102 + checking to make sure that we don't access beyond page boundary in 103 + SRC2. */ 104 + tst src1, #7 105 + b.eq L(loop_misaligned) 106 + L(do_misaligned): 151 107 ldrb data1w, [src1], #1 152 108 ldrb data2w, [src2], #1 153 - subs pos, pos, #1 154 - ccmp data1w, #1, #0, ne /* NZCV = 0b0000. */ 155 - ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */ 156 - b.eq .Ltinycmp 157 - cbnz pos, 1f /*find the null or unequal...*/ 158 109 cmp data1w, #1 159 - ccmp data1w, data2w, #0, cs 160 - b.eq .Lstart_align /*the last bytes are equal....*/ 161 - 1: 110 + ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */ 111 + b.ne L(done) 112 + tst src1, #7 113 + b.ne L(do_misaligned) 114 + 115 + L(loop_misaligned): 116 + /* Test if we are within the last dword of the end of a 4K page. If 117 + yes then jump back to the misaligned loop to copy a byte at a time. */ 118 + and tmp1, src2, #0xff8 119 + eor tmp1, tmp1, #0xff8 120 + cbz tmp1, L(do_misaligned) 121 + ldr data1, [src1], #8 122 + ldr data2, [src2], #8 123 + 124 + sub tmp1, data1, zeroones 125 + orr tmp2, data1, #REP8_7f 126 + eor diff, data1, data2 /* Non-zero if differences found. */ 127 + bic has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */ 128 + orr syndrome, diff, has_nul 129 + cbz syndrome, L(loop_misaligned) 130 + b L(end) 131 + 132 + L(done): 162 133 sub result, data1, data2 163 134 ret 164 135 165 - .Lstart_align: 166 - ands xzr, src1, #7 167 - b.eq .Lrecal_offset 168 - /*process more leading bytes to make str1 aligned...*/ 169 - add src1, src1, tmp3 170 - add src2, src2, tmp3 171 - /*load 8 bytes from aligned str1 and non-aligned str2..*/ 172 - ldr data1, [src1], #8 173 - ldr data2, [src2], #8 174 - 175 - sub tmp1, data1, zeroones 176 - orr tmp2, data1, #REP8_7f 177 - bic has_nul, tmp1, tmp2 178 - eor diff, data1, data2 /* Non-zero if differences found. */ 179 - orr syndrome, diff, has_nul 180 - cbnz syndrome, .Lcal_cmpresult 181 - /*How far is the current str2 from the alignment boundary...*/ 182 - and tmp3, tmp3, #7 183 - .Lrecal_offset: 184 - neg pos, tmp3 185 - .Lloopcmp_proc: 186 - /* 187 - * Divide the eight bytes into two parts. First,backwards the src2 188 - * to an alignment boundary,load eight bytes from the SRC2 alignment 189 - * boundary,then compare with the relative bytes from SRC1. 190 - * If all 8 bytes are equal,then start the second part's comparison. 191 - * Otherwise finish the comparison. 192 - * This special handle can garantee all the accesses are in the 193 - * thread/task space in avoid to overrange access. 194 - */ 195 - ldr data1, [src1,pos] 196 - ldr data2, [src2,pos] 197 - sub tmp1, data1, zeroones 198 - orr tmp2, data1, #REP8_7f 199 - bic has_nul, tmp1, tmp2 200 - eor diff, data1, data2 /* Non-zero if differences found. */ 201 - orr syndrome, diff, has_nul 202 - cbnz syndrome, .Lcal_cmpresult 203 - 204 - /*The second part process*/ 205 - ldr data1, [src1], #8 206 - ldr data2, [src2], #8 207 - sub tmp1, data1, zeroones 208 - orr tmp2, data1, #REP8_7f 209 - bic has_nul, tmp1, tmp2 210 - eor diff, data1, data2 /* Non-zero if differences found. */ 211 - orr syndrome, diff, has_nul 212 - cbz syndrome, .Lloopcmp_proc 213 - 214 - .Lcal_cmpresult: 215 - /* 216 - * reversed the byte-order as big-endian,then CLZ can find the most 217 - * significant zero bits. 218 - */ 219 - CPU_LE( rev syndrome, syndrome ) 220 - CPU_LE( rev data1, data1 ) 221 - CPU_LE( rev data2, data2 ) 222 - 223 - /* 224 - * For big-endian we cannot use the trick with the syndrome value 225 - * as carry-propagation can corrupt the upper bits if the trailing 226 - * bytes in the string contain 0x01. 227 - * However, if there is no NUL byte in the dword, we can generate 228 - * the result directly. We cannot just subtract the bytes as the 229 - * MSB might be significant. 230 - */ 231 - CPU_BE( cbnz has_nul, 1f ) 232 - CPU_BE( cmp data1, data2 ) 233 - CPU_BE( cset result, ne ) 234 - CPU_BE( cneg result, result, lo ) 235 - CPU_BE( ret ) 236 - CPU_BE( 1: ) 237 - /*Re-compute the NUL-byte detection, using a byte-reversed value. */ 238 - CPU_BE( rev tmp3, data1 ) 239 - CPU_BE( sub tmp1, tmp3, zeroones ) 240 - CPU_BE( orr tmp2, tmp3, #REP8_7f ) 241 - CPU_BE( bic has_nul, tmp1, tmp2 ) 242 - CPU_BE( rev has_nul, has_nul ) 243 - CPU_BE( orr syndrome, diff, has_nul ) 244 - 245 - clz pos, syndrome 246 - /* 247 - * The MS-non-zero bit of the syndrome marks either the first bit 248 - * that is different, or the top bit of the first zero byte. 249 - * Shifting left now will bring the critical information into the 250 - * top bits. 251 - */ 252 - lsl data1, data1, pos 253 - lsl data2, data2, pos 254 - /* 255 - * But we need to zero-extend (char is unsigned) the value and then 256 - * perform a signed 32-bit subtraction. 257 - */ 258 - lsr data1, data1, #56 259 - sub result, data1, data2, lsr #56 260 - ret 261 136 SYM_FUNC_END_PI(strcmp) 262 137 EXPORT_SYMBOL_NOKASAN(strcmp)
+175 -87
arch/arm64/lib/strlen.S
··· 1 1 /* SPDX-License-Identifier: GPL-2.0-only */ 2 2 /* 3 - * Copyright (C) 2013 ARM Ltd. 4 - * Copyright (C) 2013 Linaro. 3 + * Copyright (c) 2013-2021, Arm Limited. 5 4 * 6 - * This code is based on glibc cortex strings work originally authored by Linaro 7 - * be found @ 8 - * 9 - * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/ 10 - * files/head:/src/aarch64/ 5 + * Adapted from the original at: 6 + * https://github.com/ARM-software/optimized-routines/blob/98e4d6a5c13c8e54/string/aarch64/strlen.S 11 7 */ 12 8 13 9 #include <linux/linkage.h> 14 10 #include <asm/assembler.h> 15 11 16 - /* 17 - * calculate the length of a string 12 + /* Assumptions: 18 13 * 19 - * Parameters: 20 - * x0 - const string pointer 21 - * Returns: 22 - * x0 - the return length of specific string 14 + * ARMv8-a, AArch64, unaligned accesses, min page size 4k. 23 15 */ 24 16 17 + #define L(label) .L ## label 18 + 25 19 /* Arguments and results. */ 26 - srcin .req x0 27 - len .req x0 20 + #define srcin x0 21 + #define len x0 28 22 29 23 /* Locals and temporaries. */ 30 - src .req x1 31 - data1 .req x2 32 - data2 .req x3 33 - data2a .req x4 34 - has_nul1 .req x5 35 - has_nul2 .req x6 36 - tmp1 .req x7 37 - tmp2 .req x8 38 - tmp3 .req x9 39 - tmp4 .req x10 40 - zeroones .req x11 41 - pos .req x12 24 + #define src x1 25 + #define data1 x2 26 + #define data2 x3 27 + #define has_nul1 x4 28 + #define has_nul2 x5 29 + #define tmp1 x4 30 + #define tmp2 x5 31 + #define tmp3 x6 32 + #define tmp4 x7 33 + #define zeroones x8 34 + 35 + /* NUL detection works on the principle that (X - 1) & (~X) & 0x80 36 + (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and 37 + can be done in parallel across the entire word. A faster check 38 + (X - 1) & 0x80 is zero for non-NUL ASCII characters, but gives 39 + false hits for characters 129..255. */ 42 40 43 41 #define REP8_01 0x0101010101010101 44 42 #define REP8_7f 0x7f7f7f7f7f7f7f7f 45 43 #define REP8_80 0x8080808080808080 46 44 45 + #define MIN_PAGE_SIZE 4096 46 + 47 + /* Since strings are short on average, we check the first 16 bytes 48 + of the string for a NUL character. In order to do an unaligned ldp 49 + safely we have to do a page cross check first. If there is a NUL 50 + byte we calculate the length from the 2 8-byte words using 51 + conditional select to reduce branch mispredictions (it is unlikely 52 + strlen will be repeatedly called on strings with the same length). 53 + 54 + If the string is longer than 16 bytes, we align src so don't need 55 + further page cross checks, and process 32 bytes per iteration 56 + using the fast NUL check. If we encounter non-ASCII characters, 57 + fallback to a second loop using the full NUL check. 58 + 59 + If the page cross check fails, we read 16 bytes from an aligned 60 + address, remove any characters before the string, and continue 61 + in the main loop using aligned loads. Since strings crossing a 62 + page in the first 16 bytes are rare (probability of 63 + 16/MIN_PAGE_SIZE ~= 0.4%), this case does not need to be optimized. 64 + 65 + AArch64 systems have a minimum page size of 4k. We don't bother 66 + checking for larger page sizes - the cost of setting up the correct 67 + page size is just not worth the extra gain from a small reduction in 68 + the cases taking the slow path. Note that we only care about 69 + whether the first fetch, which may be misaligned, crosses a page 70 + boundary. */ 71 + 47 72 SYM_FUNC_START_WEAK_PI(strlen) 48 - mov zeroones, #REP8_01 49 - bic src, srcin, #15 50 - ands tmp1, srcin, #15 51 - b.ne .Lmisaligned 52 - /* 53 - * NUL detection works on the principle that (X - 1) & (~X) & 0x80 54 - * (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and 55 - * can be done in parallel across the entire word. 56 - */ 57 - /* 58 - * The inner loop deals with two Dwords at a time. This has a 59 - * slightly higher start-up cost, but we should win quite quickly, 60 - * especially on cores with a high number of issue slots per 61 - * cycle, as we get much better parallelism out of the operations. 62 - */ 63 - .Lloop: 64 - ldp data1, data2, [src], #16 65 - .Lrealigned: 73 + and tmp1, srcin, MIN_PAGE_SIZE - 1 74 + mov zeroones, REP8_01 75 + cmp tmp1, MIN_PAGE_SIZE - 16 76 + b.gt L(page_cross) 77 + ldp data1, data2, [srcin] 78 + #ifdef __AARCH64EB__ 79 + /* For big-endian, carry propagation (if the final byte in the 80 + string is 0x01) means we cannot use has_nul1/2 directly. 81 + Since we expect strings to be small and early-exit, 82 + byte-swap the data now so has_null1/2 will be correct. */ 83 + rev data1, data1 84 + rev data2, data2 85 + #endif 66 86 sub tmp1, data1, zeroones 67 - orr tmp2, data1, #REP8_7f 87 + orr tmp2, data1, REP8_7f 68 88 sub tmp3, data2, zeroones 69 - orr tmp4, data2, #REP8_7f 70 - bic has_nul1, tmp1, tmp2 71 - bics has_nul2, tmp3, tmp4 72 - ccmp has_nul1, #0, #0, eq /* NZCV = 0000 */ 73 - b.eq .Lloop 89 + orr tmp4, data2, REP8_7f 90 + bics has_nul1, tmp1, tmp2 91 + bic has_nul2, tmp3, tmp4 92 + ccmp has_nul2, 0, 0, eq 93 + beq L(main_loop_entry) 74 94 75 - sub len, src, srcin 76 - cbz has_nul1, .Lnul_in_data2 77 - CPU_BE( mov data2, data1 ) /*prepare data to re-calculate the syndrome*/ 78 - sub len, len, #8 79 - mov has_nul2, has_nul1 80 - .Lnul_in_data2: 81 - /* 82 - * For big-endian, carry propagation (if the final byte in the 83 - * string is 0x01) means we cannot use has_nul directly. The 84 - * easiest way to get the correct byte is to byte-swap the data 85 - * and calculate the syndrome a second time. 86 - */ 87 - CPU_BE( rev data2, data2 ) 88 - CPU_BE( sub tmp1, data2, zeroones ) 89 - CPU_BE( orr tmp2, data2, #REP8_7f ) 90 - CPU_BE( bic has_nul2, tmp1, tmp2 ) 91 - 92 - sub len, len, #8 93 - rev has_nul2, has_nul2 94 - clz pos, has_nul2 95 - add len, len, pos, lsr #3 /* Bits to bytes. */ 95 + /* Enter with C = has_nul1 == 0. */ 96 + csel has_nul1, has_nul1, has_nul2, cc 97 + mov len, 8 98 + rev has_nul1, has_nul1 99 + clz tmp1, has_nul1 100 + csel len, xzr, len, cc 101 + add len, len, tmp1, lsr 3 96 102 ret 97 103 98 - .Lmisaligned: 99 - cmp tmp1, #8 100 - neg tmp1, tmp1 101 - ldp data1, data2, [src], #16 102 - lsl tmp1, tmp1, #3 /* Bytes beyond alignment -> bits. */ 103 - mov tmp2, #~0 104 - /* Big-endian. Early bytes are at MSB. */ 105 - CPU_BE( lsl tmp2, tmp2, tmp1 ) /* Shift (tmp1 & 63). */ 106 - /* Little-endian. Early bytes are at LSB. */ 107 - CPU_LE( lsr tmp2, tmp2, tmp1 ) /* Shift (tmp1 & 63). */ 104 + /* The inner loop processes 32 bytes per iteration and uses the fast 105 + NUL check. If we encounter non-ASCII characters, use a second 106 + loop with the accurate NUL check. */ 107 + .p2align 4 108 + L(main_loop_entry): 109 + bic src, srcin, 15 110 + sub src, src, 16 111 + L(main_loop): 112 + ldp data1, data2, [src, 32]! 113 + L(page_cross_entry): 114 + sub tmp1, data1, zeroones 115 + sub tmp3, data2, zeroones 116 + orr tmp2, tmp1, tmp3 117 + tst tmp2, zeroones, lsl 7 118 + bne 1f 119 + ldp data1, data2, [src, 16] 120 + sub tmp1, data1, zeroones 121 + sub tmp3, data2, zeroones 122 + orr tmp2, tmp1, tmp3 123 + tst tmp2, zeroones, lsl 7 124 + beq L(main_loop) 125 + add src, src, 16 126 + 1: 127 + /* The fast check failed, so do the slower, accurate NUL check. */ 128 + orr tmp2, data1, REP8_7f 129 + orr tmp4, data2, REP8_7f 130 + bics has_nul1, tmp1, tmp2 131 + bic has_nul2, tmp3, tmp4 132 + ccmp has_nul2, 0, 0, eq 133 + beq L(nonascii_loop) 108 134 109 - orr data1, data1, tmp2 110 - orr data2a, data2, tmp2 111 - csinv data1, data1, xzr, le 112 - csel data2, data2, data2a, le 113 - b .Lrealigned 135 + /* Enter with C = has_nul1 == 0. */ 136 + L(tail): 137 + #ifdef __AARCH64EB__ 138 + /* For big-endian, carry propagation (if the final byte in the 139 + string is 0x01) means we cannot use has_nul1/2 directly. The 140 + easiest way to get the correct byte is to byte-swap the data 141 + and calculate the syndrome a second time. */ 142 + csel data1, data1, data2, cc 143 + rev data1, data1 144 + sub tmp1, data1, zeroones 145 + orr tmp2, data1, REP8_7f 146 + bic has_nul1, tmp1, tmp2 147 + #else 148 + csel has_nul1, has_nul1, has_nul2, cc 149 + #endif 150 + sub len, src, srcin 151 + rev has_nul1, has_nul1 152 + add tmp2, len, 8 153 + clz tmp1, has_nul1 154 + csel len, len, tmp2, cc 155 + add len, len, tmp1, lsr 3 156 + ret 157 + 158 + L(nonascii_loop): 159 + ldp data1, data2, [src, 16]! 160 + sub tmp1, data1, zeroones 161 + orr tmp2, data1, REP8_7f 162 + sub tmp3, data2, zeroones 163 + orr tmp4, data2, REP8_7f 164 + bics has_nul1, tmp1, tmp2 165 + bic has_nul2, tmp3, tmp4 166 + ccmp has_nul2, 0, 0, eq 167 + bne L(tail) 168 + ldp data1, data2, [src, 16]! 169 + sub tmp1, data1, zeroones 170 + orr tmp2, data1, REP8_7f 171 + sub tmp3, data2, zeroones 172 + orr tmp4, data2, REP8_7f 173 + bics has_nul1, tmp1, tmp2 174 + bic has_nul2, tmp3, tmp4 175 + ccmp has_nul2, 0, 0, eq 176 + beq L(nonascii_loop) 177 + b L(tail) 178 + 179 + /* Load 16 bytes from [srcin & ~15] and force the bytes that precede 180 + srcin to 0x7f, so we ignore any NUL bytes before the string. 181 + Then continue in the aligned loop. */ 182 + L(page_cross): 183 + bic src, srcin, 15 184 + ldp data1, data2, [src] 185 + lsl tmp1, srcin, 3 186 + mov tmp4, -1 187 + #ifdef __AARCH64EB__ 188 + /* Big-endian. Early bytes are at MSB. */ 189 + lsr tmp1, tmp4, tmp1 /* Shift (tmp1 & 63). */ 190 + #else 191 + /* Little-endian. Early bytes are at LSB. */ 192 + lsl tmp1, tmp4, tmp1 /* Shift (tmp1 & 63). */ 193 + #endif 194 + orr tmp1, tmp1, REP8_80 195 + orn data1, data1, tmp1 196 + orn tmp2, data2, tmp1 197 + tst srcin, 8 198 + csel data1, data1, tmp4, eq 199 + csel data2, data2, tmp2, eq 200 + b L(page_cross_entry) 201 + 114 202 SYM_FUNC_END_PI(strlen) 115 203 EXPORT_SYMBOL_NOKASAN(strlen)
+199 -237
arch/arm64/lib/strncmp.S
··· 1 1 /* SPDX-License-Identifier: GPL-2.0-only */ 2 2 /* 3 - * Copyright (C) 2013 ARM Ltd. 4 - * Copyright (C) 2013 Linaro. 3 + * Copyright (c) 2013-2021, Arm Limited. 5 4 * 6 - * This code is based on glibc cortex strings work originally authored by Linaro 7 - * be found @ 8 - * 9 - * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/ 10 - * files/head:/src/aarch64/ 5 + * Adapted from the original at: 6 + * https://github.com/ARM-software/optimized-routines/blob/e823e3abf5f89ecb/string/aarch64/strncmp.S 11 7 */ 12 8 13 9 #include <linux/linkage.h> 14 10 #include <asm/assembler.h> 15 11 16 - /* 17 - * compare two strings 12 + /* Assumptions: 18 13 * 19 - * Parameters: 20 - * x0 - const string 1 pointer 21 - * x1 - const string 2 pointer 22 - * x2 - the maximal length to be compared 23 - * Returns: 24 - * x0 - an integer less than, equal to, or greater than zero if s1 is found, 25 - * respectively, to be less than, to match, or be greater than s2. 14 + * ARMv8-a, AArch64 26 15 */ 16 + 17 + #define L(label) .L ## label 27 18 28 19 #define REP8_01 0x0101010101010101 29 20 #define REP8_7f 0x7f7f7f7f7f7f7f7f 30 21 #define REP8_80 0x8080808080808080 31 22 32 23 /* Parameters and result. */ 33 - src1 .req x0 34 - src2 .req x1 35 - limit .req x2 36 - result .req x0 24 + #define src1 x0 25 + #define src2 x1 26 + #define limit x2 27 + #define result x0 37 28 38 29 /* Internal variables. */ 39 - data1 .req x3 40 - data1w .req w3 41 - data2 .req x4 42 - data2w .req w4 43 - has_nul .req x5 44 - diff .req x6 45 - syndrome .req x7 46 - tmp1 .req x8 47 - tmp2 .req x9 48 - tmp3 .req x10 49 - zeroones .req x11 50 - pos .req x12 51 - limit_wd .req x13 52 - mask .req x14 53 - endloop .req x15 30 + #define data1 x3 31 + #define data1w w3 32 + #define data2 x4 33 + #define data2w w4 34 + #define has_nul x5 35 + #define diff x6 36 + #define syndrome x7 37 + #define tmp1 x8 38 + #define tmp2 x9 39 + #define tmp3 x10 40 + #define zeroones x11 41 + #define pos x12 42 + #define limit_wd x13 43 + #define mask x14 44 + #define endloop x15 45 + #define count mask 54 46 55 47 SYM_FUNC_START_WEAK_PI(strncmp) 56 - cbz limit, .Lret0 48 + cbz limit, L(ret0) 57 49 eor tmp1, src1, src2 58 50 mov zeroones, #REP8_01 59 51 tst tmp1, #7 60 - b.ne .Lmisaligned8 61 - ands tmp1, src1, #7 62 - b.ne .Lmutual_align 52 + and count, src1, #7 53 + b.ne L(misaligned8) 54 + cbnz count, L(mutual_align) 63 55 /* Calculate the number of full and partial words -1. */ 64 - /* 65 - * when limit is mulitply of 8, if not sub 1, 66 - * the judgement of last dword will wrong. 67 - */ 68 - sub limit_wd, limit, #1 /* limit != 0, so no underflow. */ 69 - lsr limit_wd, limit_wd, #3 /* Convert to Dwords. */ 56 + sub limit_wd, limit, #1 /* limit != 0, so no underflow. */ 57 + lsr limit_wd, limit_wd, #3 /* Convert to Dwords. */ 70 58 71 - /* 72 - * NUL detection works on the principle that (X - 1) & (~X) & 0x80 73 - * (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and 74 - * can be done in parallel across the entire word. 75 - */ 76 - .Lloop_aligned: 59 + /* NUL detection works on the principle that (X - 1) & (~X) & 0x80 60 + (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and 61 + can be done in parallel across the entire word. */ 62 + .p2align 4 63 + L(loop_aligned): 77 64 ldr data1, [src1], #8 78 65 ldr data2, [src2], #8 79 - .Lstart_realigned: 66 + L(start_realigned): 80 67 subs limit_wd, limit_wd, #1 81 68 sub tmp1, data1, zeroones 82 69 orr tmp2, data1, #REP8_7f 83 - eor diff, data1, data2 /* Non-zero if differences found. */ 84 - csinv endloop, diff, xzr, pl /* Last Dword or differences.*/ 85 - bics has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */ 70 + eor diff, data1, data2 /* Non-zero if differences found. */ 71 + csinv endloop, diff, xzr, pl /* Last Dword or differences. */ 72 + bics has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */ 86 73 ccmp endloop, #0, #0, eq 87 - b.eq .Lloop_aligned 74 + b.eq L(loop_aligned) 75 + /* End of main loop */ 88 76 89 - /*Not reached the limit, must have found the end or a diff. */ 90 - tbz limit_wd, #63, .Lnot_limit 77 + /* Not reached the limit, must have found the end or a diff. */ 78 + tbz limit_wd, #63, L(not_limit) 91 79 92 80 /* Limit % 8 == 0 => all bytes significant. */ 93 81 ands limit, limit, #7 94 - b.eq .Lnot_limit 82 + b.eq L(not_limit) 95 83 96 - lsl limit, limit, #3 /* Bits -> bytes. */ 84 + lsl limit, limit, #3 /* Bits -> bytes. */ 97 85 mov mask, #~0 98 - CPU_BE( lsr mask, mask, limit ) 99 - CPU_LE( lsl mask, mask, limit ) 86 + #ifdef __AARCH64EB__ 87 + lsr mask, mask, limit 88 + #else 89 + lsl mask, mask, limit 90 + #endif 100 91 bic data1, data1, mask 101 92 bic data2, data2, mask 102 93 103 94 /* Make sure that the NUL byte is marked in the syndrome. */ 104 95 orr has_nul, has_nul, mask 105 96 106 - .Lnot_limit: 97 + L(not_limit): 107 98 orr syndrome, diff, has_nul 108 - b .Lcal_cmpresult 109 99 110 - .Lmutual_align: 111 - /* 112 - * Sources are mutually aligned, but are not currently at an 113 - * alignment boundary. Round down the addresses and then mask off 114 - * the bytes that precede the start point. 115 - * We also need to adjust the limit calculations, but without 116 - * overflowing if the limit is near ULONG_MAX. 117 - */ 118 - bic src1, src1, #7 119 - bic src2, src2, #7 120 - ldr data1, [src1], #8 121 - neg tmp3, tmp1, lsl #3 /* 64 - bits(bytes beyond align). */ 122 - ldr data2, [src2], #8 123 - mov tmp2, #~0 124 - sub limit_wd, limit, #1 /* limit != 0, so no underflow. */ 125 - /* Big-endian. Early bytes are at MSB. */ 126 - CPU_BE( lsl tmp2, tmp2, tmp3 ) /* Shift (tmp1 & 63). */ 127 - /* Little-endian. Early bytes are at LSB. */ 128 - CPU_LE( lsr tmp2, tmp2, tmp3 ) /* Shift (tmp1 & 63). */ 129 - 130 - and tmp3, limit_wd, #7 131 - lsr limit_wd, limit_wd, #3 132 - /* Adjust the limit. Only low 3 bits used, so overflow irrelevant.*/ 133 - add limit, limit, tmp1 134 - add tmp3, tmp3, tmp1 135 - orr data1, data1, tmp2 136 - orr data2, data2, tmp2 137 - add limit_wd, limit_wd, tmp3, lsr #3 138 - b .Lstart_realigned 139 - 140 - /*when src1 offset is not equal to src2 offset...*/ 141 - .Lmisaligned8: 142 - cmp limit, #8 143 - b.lo .Ltiny8proc /*limit < 8... */ 144 - /* 145 - * Get the align offset length to compare per byte first. 146 - * After this process, one string's address will be aligned.*/ 147 - and tmp1, src1, #7 148 - neg tmp1, tmp1 149 - add tmp1, tmp1, #8 150 - and tmp2, src2, #7 151 - neg tmp2, tmp2 152 - add tmp2, tmp2, #8 153 - subs tmp3, tmp1, tmp2 154 - csel pos, tmp1, tmp2, hi /*Choose the maximum. */ 155 - /* 156 - * Here, limit is not less than 8, so directly run .Ltinycmp 157 - * without checking the limit.*/ 158 - sub limit, limit, pos 159 - .Ltinycmp: 160 - ldrb data1w, [src1], #1 161 - ldrb data2w, [src2], #1 162 - subs pos, pos, #1 163 - ccmp data1w, #1, #0, ne /* NZCV = 0b0000. */ 164 - ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */ 165 - b.eq .Ltinycmp 166 - cbnz pos, 1f /*find the null or unequal...*/ 167 - cmp data1w, #1 168 - ccmp data1w, data2w, #0, cs 169 - b.eq .Lstart_align /*the last bytes are equal....*/ 170 - 1: 171 - sub result, data1, data2 172 - ret 173 - 174 - .Lstart_align: 175 - lsr limit_wd, limit, #3 176 - cbz limit_wd, .Lremain8 177 - /*process more leading bytes to make str1 aligned...*/ 178 - ands xzr, src1, #7 179 - b.eq .Lrecal_offset 180 - add src1, src1, tmp3 /*tmp3 is positive in this branch.*/ 181 - add src2, src2, tmp3 182 - ldr data1, [src1], #8 183 - ldr data2, [src2], #8 184 - 185 - sub limit, limit, tmp3 186 - lsr limit_wd, limit, #3 187 - subs limit_wd, limit_wd, #1 188 - 189 - sub tmp1, data1, zeroones 190 - orr tmp2, data1, #REP8_7f 191 - eor diff, data1, data2 /* Non-zero if differences found. */ 192 - csinv endloop, diff, xzr, ne/*if limit_wd is 0,will finish the cmp*/ 193 - bics has_nul, tmp1, tmp2 194 - ccmp endloop, #0, #0, eq /*has_null is ZERO: no null byte*/ 195 - b.ne .Lunequal_proc 196 - /*How far is the current str2 from the alignment boundary...*/ 197 - and tmp3, tmp3, #7 198 - .Lrecal_offset: 199 - neg pos, tmp3 200 - .Lloopcmp_proc: 201 - /* 202 - * Divide the eight bytes into two parts. First,backwards the src2 203 - * to an alignment boundary,load eight bytes from the SRC2 alignment 204 - * boundary,then compare with the relative bytes from SRC1. 205 - * If all 8 bytes are equal,then start the second part's comparison. 206 - * Otherwise finish the comparison. 207 - * This special handle can garantee all the accesses are in the 208 - * thread/task space in avoid to overrange access. 209 - */ 210 - ldr data1, [src1,pos] 211 - ldr data2, [src2,pos] 212 - sub tmp1, data1, zeroones 213 - orr tmp2, data1, #REP8_7f 214 - bics has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */ 215 - eor diff, data1, data2 /* Non-zero if differences found. */ 216 - csinv endloop, diff, xzr, eq 217 - cbnz endloop, .Lunequal_proc 218 - 219 - /*The second part process*/ 220 - ldr data1, [src1], #8 221 - ldr data2, [src2], #8 222 - subs limit_wd, limit_wd, #1 223 - sub tmp1, data1, zeroones 224 - orr tmp2, data1, #REP8_7f 225 - eor diff, data1, data2 /* Non-zero if differences found. */ 226 - csinv endloop, diff, xzr, ne/*if limit_wd is 0,will finish the cmp*/ 227 - bics has_nul, tmp1, tmp2 228 - ccmp endloop, #0, #0, eq /*has_null is ZERO: no null byte*/ 229 - b.eq .Lloopcmp_proc 230 - 231 - .Lunequal_proc: 232 - orr syndrome, diff, has_nul 233 - cbz syndrome, .Lremain8 234 - .Lcal_cmpresult: 235 - /* 236 - * reversed the byte-order as big-endian,then CLZ can find the most 237 - * significant zero bits. 238 - */ 239 - CPU_LE( rev syndrome, syndrome ) 240 - CPU_LE( rev data1, data1 ) 241 - CPU_LE( rev data2, data2 ) 242 - /* 243 - * For big-endian we cannot use the trick with the syndrome value 244 - * as carry-propagation can corrupt the upper bits if the trailing 245 - * bytes in the string contain 0x01. 246 - * However, if there is no NUL byte in the dword, we can generate 247 - * the result directly. We can't just subtract the bytes as the 248 - * MSB might be significant. 249 - */ 250 - CPU_BE( cbnz has_nul, 1f ) 251 - CPU_BE( cmp data1, data2 ) 252 - CPU_BE( cset result, ne ) 253 - CPU_BE( cneg result, result, lo ) 254 - CPU_BE( ret ) 255 - CPU_BE( 1: ) 256 - /* Re-compute the NUL-byte detection, using a byte-reversed value.*/ 257 - CPU_BE( rev tmp3, data1 ) 258 - CPU_BE( sub tmp1, tmp3, zeroones ) 259 - CPU_BE( orr tmp2, tmp3, #REP8_7f ) 260 - CPU_BE( bic has_nul, tmp1, tmp2 ) 261 - CPU_BE( rev has_nul, has_nul ) 262 - CPU_BE( orr syndrome, diff, has_nul ) 263 - /* 264 - * The MS-non-zero bit of the syndrome marks either the first bit 265 - * that is different, or the top bit of the first zero byte. 266 - * Shifting left now will bring the critical information into the 267 - * top bits. 268 - */ 100 + #ifndef __AARCH64EB__ 101 + rev syndrome, syndrome 102 + rev data1, data1 103 + /* The MS-non-zero bit of the syndrome marks either the first bit 104 + that is different, or the top bit of the first zero byte. 105 + Shifting left now will bring the critical information into the 106 + top bits. */ 269 107 clz pos, syndrome 108 + rev data2, data2 270 109 lsl data1, data1, pos 271 110 lsl data2, data2, pos 272 - /* 273 - * But we need to zero-extend (char is unsigned) the value and then 274 - * perform a signed 32-bit subtraction. 275 - */ 111 + /* But we need to zero-extend (char is unsigned) the value and then 112 + perform a signed 32-bit subtraction. */ 276 113 lsr data1, data1, #56 277 114 sub result, data1, data2, lsr #56 278 115 ret 116 + #else 117 + /* For big-endian we cannot use the trick with the syndrome value 118 + as carry-propagation can corrupt the upper bits if the trailing 119 + bytes in the string contain 0x01. */ 120 + /* However, if there is no NUL byte in the dword, we can generate 121 + the result directly. We can't just subtract the bytes as the 122 + MSB might be significant. */ 123 + cbnz has_nul, 1f 124 + cmp data1, data2 125 + cset result, ne 126 + cneg result, result, lo 127 + ret 128 + 1: 129 + /* Re-compute the NUL-byte detection, using a byte-reversed value. */ 130 + rev tmp3, data1 131 + sub tmp1, tmp3, zeroones 132 + orr tmp2, tmp3, #REP8_7f 133 + bic has_nul, tmp1, tmp2 134 + rev has_nul, has_nul 135 + orr syndrome, diff, has_nul 136 + clz pos, syndrome 137 + /* The MS-non-zero bit of the syndrome marks either the first bit 138 + that is different, or the top bit of the first zero byte. 139 + Shifting left now will bring the critical information into the 140 + top bits. */ 141 + lsl data1, data1, pos 142 + lsl data2, data2, pos 143 + /* But we need to zero-extend (char is unsigned) the value and then 144 + perform a signed 32-bit subtraction. */ 145 + lsr data1, data1, #56 146 + sub result, data1, data2, lsr #56 147 + ret 148 + #endif 279 149 280 - .Lremain8: 281 - /* Limit % 8 == 0 => all bytes significant. */ 282 - ands limit, limit, #7 283 - b.eq .Lret0 284 - .Ltiny8proc: 150 + L(mutual_align): 151 + /* Sources are mutually aligned, but are not currently at an 152 + alignment boundary. Round down the addresses and then mask off 153 + the bytes that precede the start point. 154 + We also need to adjust the limit calculations, but without 155 + overflowing if the limit is near ULONG_MAX. */ 156 + bic src1, src1, #7 157 + bic src2, src2, #7 158 + ldr data1, [src1], #8 159 + neg tmp3, count, lsl #3 /* 64 - bits(bytes beyond align). */ 160 + ldr data2, [src2], #8 161 + mov tmp2, #~0 162 + sub limit_wd, limit, #1 /* limit != 0, so no underflow. */ 163 + #ifdef __AARCH64EB__ 164 + /* Big-endian. Early bytes are at MSB. */ 165 + lsl tmp2, tmp2, tmp3 /* Shift (count & 63). */ 166 + #else 167 + /* Little-endian. Early bytes are at LSB. */ 168 + lsr tmp2, tmp2, tmp3 /* Shift (count & 63). */ 169 + #endif 170 + and tmp3, limit_wd, #7 171 + lsr limit_wd, limit_wd, #3 172 + /* Adjust the limit. Only low 3 bits used, so overflow irrelevant. */ 173 + add limit, limit, count 174 + add tmp3, tmp3, count 175 + orr data1, data1, tmp2 176 + orr data2, data2, tmp2 177 + add limit_wd, limit_wd, tmp3, lsr #3 178 + b L(start_realigned) 179 + 180 + .p2align 4 181 + /* Don't bother with dwords for up to 16 bytes. */ 182 + L(misaligned8): 183 + cmp limit, #16 184 + b.hs L(try_misaligned_words) 185 + 186 + L(byte_loop): 187 + /* Perhaps we can do better than this. */ 285 188 ldrb data1w, [src1], #1 286 189 ldrb data2w, [src2], #1 287 190 subs limit, limit, #1 288 - 289 - ccmp data1w, #1, #0, ne /* NZCV = 0b0000. */ 290 - ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */ 291 - b.eq .Ltiny8proc 191 + ccmp data1w, #1, #0, hi /* NZCV = 0b0000. */ 192 + ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */ 193 + b.eq L(byte_loop) 194 + L(done): 292 195 sub result, data1, data2 293 196 ret 197 + /* Align the SRC1 to a dword by doing a bytewise compare and then do 198 + the dword loop. */ 199 + L(try_misaligned_words): 200 + lsr limit_wd, limit, #3 201 + cbz count, L(do_misaligned) 294 202 295 - .Lret0: 203 + neg count, count 204 + and count, count, #7 205 + sub limit, limit, count 206 + lsr limit_wd, limit, #3 207 + 208 + L(page_end_loop): 209 + ldrb data1w, [src1], #1 210 + ldrb data2w, [src2], #1 211 + cmp data1w, #1 212 + ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */ 213 + b.ne L(done) 214 + subs count, count, #1 215 + b.hi L(page_end_loop) 216 + 217 + L(do_misaligned): 218 + /* Prepare ourselves for the next page crossing. Unlike the aligned 219 + loop, we fetch 1 less dword because we risk crossing bounds on 220 + SRC2. */ 221 + mov count, #8 222 + subs limit_wd, limit_wd, #1 223 + b.lo L(done_loop) 224 + L(loop_misaligned): 225 + and tmp2, src2, #0xff8 226 + eor tmp2, tmp2, #0xff8 227 + cbz tmp2, L(page_end_loop) 228 + 229 + ldr data1, [src1], #8 230 + ldr data2, [src2], #8 231 + sub tmp1, data1, zeroones 232 + orr tmp2, data1, #REP8_7f 233 + eor diff, data1, data2 /* Non-zero if differences found. */ 234 + bics has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */ 235 + ccmp diff, #0, #0, eq 236 + b.ne L(not_limit) 237 + subs limit_wd, limit_wd, #1 238 + b.pl L(loop_misaligned) 239 + 240 + L(done_loop): 241 + /* We found a difference or a NULL before the limit was reached. */ 242 + and limit, limit, #7 243 + cbz limit, L(not_limit) 244 + /* Read the last word. */ 245 + sub src1, src1, 8 246 + sub src2, src2, 8 247 + ldr data1, [src1, limit] 248 + ldr data2, [src2, limit] 249 + sub tmp1, data1, zeroones 250 + orr tmp2, data1, #REP8_7f 251 + eor diff, data1, data2 /* Non-zero if differences found. */ 252 + bics has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */ 253 + ccmp diff, #0, #0, eq 254 + b.ne L(not_limit) 255 + 256 + L(ret0): 296 257 mov result, #0 297 258 ret 259 + 298 260 SYM_FUNC_END_PI(strncmp) 299 261 EXPORT_SYMBOL_NOKASAN(strncmp)
+2 -2
arch/arm64/lib/uaccess_flushcache.c
··· 15 15 * barrier to order the cache maintenance against the memcpy. 16 16 */ 17 17 memcpy(dst, src, cnt); 18 - __clean_dcache_area_pop(dst, cnt); 18 + dcache_clean_pop((unsigned long)dst, (unsigned long)dst + cnt); 19 19 } 20 20 EXPORT_SYMBOL_GPL(memcpy_flushcache); 21 21 ··· 33 33 rc = raw_copy_from_user(to, from, n); 34 34 35 35 /* See above */ 36 - __clean_dcache_area_pop(to, n - rc); 36 + dcache_clean_pop((unsigned long)to, (unsigned long)to + n - rc); 37 37 return rc; 38 38 }
+89 -85
arch/arm64/mm/cache.S
··· 15 15 #include <asm/asm-uaccess.h> 16 16 17 17 /* 18 - * flush_icache_range(start,end) 18 + * caches_clean_inval_pou_macro(start,end) [fixup] 19 19 * 20 20 * Ensure that the I and D caches are coherent within specified region. 21 21 * This is typically used when code has been written to a memory region, ··· 23 23 * 24 24 * - start - virtual start address of region 25 25 * - end - virtual end address of region 26 + * - fixup - optional label to branch to on user fault 26 27 */ 27 - SYM_FUNC_START(__flush_icache_range) 28 - /* FALLTHROUGH */ 29 - 30 - /* 31 - * __flush_cache_user_range(start,end) 32 - * 33 - * Ensure that the I and D caches are coherent within specified region. 34 - * This is typically used when code has been written to a memory region, 35 - * and will be executed. 36 - * 37 - * - start - virtual start address of region 38 - * - end - virtual end address of region 39 - */ 40 - SYM_FUNC_START(__flush_cache_user_range) 41 - uaccess_ttbr0_enable x2, x3, x4 28 + .macro caches_clean_inval_pou_macro, fixup 42 29 alternative_if ARM64_HAS_CACHE_IDC 43 - dsb ishst 44 - b 7f 30 + dsb ishst 31 + b .Ldc_skip_\@ 45 32 alternative_else_nop_endif 46 - dcache_line_size x2, x3 47 - sub x3, x2, #1 48 - bic x4, x0, x3 49 - 1: 50 - user_alt 9f, "dc cvau, x4", "dc civac, x4", ARM64_WORKAROUND_CLEAN_CACHE 51 - add x4, x4, x2 52 - cmp x4, x1 53 - b.lo 1b 54 - dsb ish 55 - 56 - 7: 33 + mov x2, x0 34 + mov x3, x1 35 + dcache_by_line_op cvau, ish, x2, x3, x4, x5, \fixup 36 + .Ldc_skip_\@: 57 37 alternative_if ARM64_HAS_CACHE_DIC 58 38 isb 59 - b 8f 39 + b .Lic_skip_\@ 60 40 alternative_else_nop_endif 61 - invalidate_icache_by_line x0, x1, x2, x3, 9f 62 - 8: mov x0, #0 63 - 1: 64 - uaccess_ttbr0_disable x1, x2 65 - ret 66 - 9: 67 - mov x0, #-EFAULT 68 - b 1b 69 - SYM_FUNC_END(__flush_icache_range) 70 - SYM_FUNC_END(__flush_cache_user_range) 41 + invalidate_icache_by_line x0, x1, x2, x3, \fixup 42 + .Lic_skip_\@: 43 + .endm 71 44 72 45 /* 73 - * invalidate_icache_range(start,end) 46 + * caches_clean_inval_pou(start,end) 74 47 * 75 - * Ensure that the I cache is invalid within specified region. 48 + * Ensure that the I and D caches are coherent within specified region. 49 + * This is typically used when code has been written to a memory region, 50 + * and will be executed. 76 51 * 77 52 * - start - virtual start address of region 78 53 * - end - virtual end address of region 79 54 */ 80 - SYM_FUNC_START(invalidate_icache_range) 81 - alternative_if ARM64_HAS_CACHE_DIC 82 - mov x0, xzr 83 - isb 55 + SYM_FUNC_START(caches_clean_inval_pou) 56 + caches_clean_inval_pou_macro 84 57 ret 85 - alternative_else_nop_endif 58 + SYM_FUNC_END(caches_clean_inval_pou) 86 59 60 + /* 61 + * caches_clean_inval_user_pou(start,end) 62 + * 63 + * Ensure that the I and D caches are coherent within specified region. 64 + * This is typically used when code has been written to a memory region, 65 + * and will be executed. 66 + * 67 + * - start - virtual start address of region 68 + * - end - virtual end address of region 69 + */ 70 + SYM_FUNC_START(caches_clean_inval_user_pou) 87 71 uaccess_ttbr0_enable x2, x3, x4 88 72 89 - invalidate_icache_by_line x0, x1, x2, x3, 2f 73 + caches_clean_inval_pou_macro 2f 90 74 mov x0, xzr 91 75 1: 92 76 uaccess_ttbr0_disable x1, x2 ··· 78 94 2: 79 95 mov x0, #-EFAULT 80 96 b 1b 81 - SYM_FUNC_END(invalidate_icache_range) 97 + SYM_FUNC_END(caches_clean_inval_user_pou) 82 98 83 99 /* 84 - * __flush_dcache_area(kaddr, size) 100 + * icache_inval_pou(start,end) 85 101 * 86 - * Ensure that any D-cache lines for the interval [kaddr, kaddr+size) 102 + * Ensure that the I cache is invalid within specified region. 103 + * 104 + * - start - virtual start address of region 105 + * - end - virtual end address of region 106 + */ 107 + SYM_FUNC_START(icache_inval_pou) 108 + alternative_if ARM64_HAS_CACHE_DIC 109 + isb 110 + ret 111 + alternative_else_nop_endif 112 + 113 + invalidate_icache_by_line x0, x1, x2, x3 114 + ret 115 + SYM_FUNC_END(icache_inval_pou) 116 + 117 + /* 118 + * dcache_clean_inval_poc(start, end) 119 + * 120 + * Ensure that any D-cache lines for the interval [start, end) 87 121 * are cleaned and invalidated to the PoC. 88 122 * 89 - * - kaddr - kernel address 90 - * - size - size in question 123 + * - start - virtual start address of region 124 + * - end - virtual end address of region 91 125 */ 92 - SYM_FUNC_START_PI(__flush_dcache_area) 126 + SYM_FUNC_START_PI(dcache_clean_inval_poc) 93 127 dcache_by_line_op civac, sy, x0, x1, x2, x3 94 128 ret 95 - SYM_FUNC_END_PI(__flush_dcache_area) 129 + SYM_FUNC_END_PI(dcache_clean_inval_poc) 96 130 97 131 /* 98 - * __clean_dcache_area_pou(kaddr, size) 132 + * dcache_clean_pou(start, end) 99 133 * 100 - * Ensure that any D-cache lines for the interval [kaddr, kaddr+size) 134 + * Ensure that any D-cache lines for the interval [start, end) 101 135 * are cleaned to the PoU. 102 136 * 103 - * - kaddr - kernel address 104 - * - size - size in question 137 + * - start - virtual start address of region 138 + * - end - virtual end address of region 105 139 */ 106 - SYM_FUNC_START(__clean_dcache_area_pou) 140 + SYM_FUNC_START(dcache_clean_pou) 107 141 alternative_if ARM64_HAS_CACHE_IDC 108 142 dsb ishst 109 143 ret 110 144 alternative_else_nop_endif 111 145 dcache_by_line_op cvau, ish, x0, x1, x2, x3 112 146 ret 113 - SYM_FUNC_END(__clean_dcache_area_pou) 147 + SYM_FUNC_END(dcache_clean_pou) 114 148 115 149 /* 116 - * __inval_dcache_area(kaddr, size) 150 + * dcache_inval_poc(start, end) 117 151 * 118 - * Ensure that any D-cache lines for the interval [kaddr, kaddr+size) 152 + * Ensure that any D-cache lines for the interval [start, end) 119 153 * are invalidated. Any partial lines at the ends of the interval are 120 154 * also cleaned to PoC to prevent data loss. 121 155 * 122 - * - kaddr - kernel address 123 - * - size - size in question 156 + * - start - kernel start address of region 157 + * - end - kernel end address of region 124 158 */ 125 159 SYM_FUNC_START_LOCAL(__dma_inv_area) 126 - SYM_FUNC_START_PI(__inval_dcache_area) 160 + SYM_FUNC_START_PI(dcache_inval_poc) 127 161 /* FALLTHROUGH */ 128 162 129 163 /* 130 - * __dma_inv_area(start, size) 164 + * __dma_inv_area(start, end) 131 165 * - start - virtual start address of region 132 - * - size - size in question 166 + * - end - virtual end address of region 133 167 */ 134 - add x1, x1, x0 135 168 dcache_line_size x2, x3 136 169 sub x3, x2, #1 137 170 tst x1, x3 // end cache line aligned? ··· 166 165 b.lo 2b 167 166 dsb sy 168 167 ret 169 - SYM_FUNC_END_PI(__inval_dcache_area) 168 + SYM_FUNC_END_PI(dcache_inval_poc) 170 169 SYM_FUNC_END(__dma_inv_area) 171 170 172 171 /* 173 - * __clean_dcache_area_poc(kaddr, size) 172 + * dcache_clean_poc(start, end) 174 173 * 175 - * Ensure that any D-cache lines for the interval [kaddr, kaddr+size) 174 + * Ensure that any D-cache lines for the interval [start, end) 176 175 * are cleaned to the PoC. 177 176 * 178 - * - kaddr - kernel address 179 - * - size - size in question 177 + * - start - virtual start address of region 178 + * - end - virtual end address of region 180 179 */ 181 180 SYM_FUNC_START_LOCAL(__dma_clean_area) 182 - SYM_FUNC_START_PI(__clean_dcache_area_poc) 181 + SYM_FUNC_START_PI(dcache_clean_poc) 183 182 /* FALLTHROUGH */ 184 183 185 184 /* 186 - * __dma_clean_area(start, size) 185 + * __dma_clean_area(start, end) 187 186 * - start - virtual start address of region 188 - * - size - size in question 187 + * - end - virtual end address of region 189 188 */ 190 189 dcache_by_line_op cvac, sy, x0, x1, x2, x3 191 190 ret 192 - SYM_FUNC_END_PI(__clean_dcache_area_poc) 191 + SYM_FUNC_END_PI(dcache_clean_poc) 193 192 SYM_FUNC_END(__dma_clean_area) 194 193 195 194 /* 196 - * __clean_dcache_area_pop(kaddr, size) 195 + * dcache_clean_pop(start, end) 197 196 * 198 - * Ensure that any D-cache lines for the interval [kaddr, kaddr+size) 197 + * Ensure that any D-cache lines for the interval [start, end) 199 198 * are cleaned to the PoP. 200 199 * 201 - * - kaddr - kernel address 202 - * - size - size in question 200 + * - start - virtual start address of region 201 + * - end - virtual end address of region 203 202 */ 204 - SYM_FUNC_START_PI(__clean_dcache_area_pop) 203 + SYM_FUNC_START_PI(dcache_clean_pop) 205 204 alternative_if_not ARM64_HAS_DCPOP 206 - b __clean_dcache_area_poc 205 + b dcache_clean_poc 207 206 alternative_else_nop_endif 208 207 dcache_by_line_op cvap, sy, x0, x1, x2, x3 209 208 ret 210 - SYM_FUNC_END_PI(__clean_dcache_area_pop) 209 + SYM_FUNC_END_PI(dcache_clean_pop) 211 210 212 211 /* 213 212 * __dma_flush_area(start, size) ··· 218 217 * - size - size in question 219 218 */ 220 219 SYM_FUNC_START_PI(__dma_flush_area) 220 + add x1, x0, x1 221 221 dcache_by_line_op civac, sy, x0, x1, x2, x3 222 222 ret 223 223 SYM_FUNC_END_PI(__dma_flush_area) ··· 230 228 * - dir - DMA direction 231 229 */ 232 230 SYM_FUNC_START_PI(__dma_map_area) 231 + add x1, x0, x1 233 232 cmp w2, #DMA_FROM_DEVICE 234 233 b.eq __dma_inv_area 235 234 b __dma_clean_area ··· 243 240 * - dir - DMA direction 244 241 */ 245 242 SYM_FUNC_START_PI(__dma_unmap_area) 243 + add x1, x0, x1 246 244 cmp w2, #DMA_TO_DEVICE 247 245 b.ne __dma_inv_area 248 246 ret
+2 -4
arch/arm64/mm/context.c
··· 402 402 { 403 403 asid_bits = get_cpu_asid_bits(); 404 404 atomic64_set(&asid_generation, ASID_FIRST_VERSION); 405 - asid_map = kcalloc(BITS_TO_LONGS(NUM_USER_ASIDS), sizeof(*asid_map), 406 - GFP_KERNEL); 405 + asid_map = bitmap_zalloc(NUM_USER_ASIDS, GFP_KERNEL); 407 406 if (!asid_map) 408 407 panic("Failed to allocate bitmap for %lu ASIDs\n", 409 408 NUM_USER_ASIDS); 410 409 411 - pinned_asid_map = kcalloc(BITS_TO_LONGS(NUM_USER_ASIDS), 412 - sizeof(*pinned_asid_map), GFP_KERNEL); 410 + pinned_asid_map = bitmap_zalloc(NUM_USER_ASIDS, GFP_KERNEL); 413 411 nr_pinned_asids = 0; 414 412 415 413 /*
+37 -13
arch/arm64/mm/fault.c
··· 99 99 pr_alert(" EA = %lu, S1PTW = %lu\n", 100 100 (esr & ESR_ELx_EA) >> ESR_ELx_EA_SHIFT, 101 101 (esr & ESR_ELx_S1PTW) >> ESR_ELx_S1PTW_SHIFT); 102 + pr_alert(" FSC = 0x%02x: %s\n", (esr & ESR_ELx_FSC), 103 + esr_to_fault_info(esr)->name); 102 104 103 105 if (esr_is_data_abort(esr)) 104 106 data_abort_decode(esr); ··· 234 232 return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_CUR; 235 233 } 236 234 235 + static bool is_el1_data_abort(unsigned int esr) 236 + { 237 + return ESR_ELx_EC(esr) == ESR_ELx_EC_DABT_CUR; 238 + } 239 + 237 240 static inline bool is_el1_permission_fault(unsigned long addr, unsigned int esr, 238 241 struct pt_regs *regs) 239 242 { 240 - unsigned int ec = ESR_ELx_EC(esr); 241 243 unsigned int fsc_type = esr & ESR_ELx_FSC_TYPE; 242 244 243 - if (ec != ESR_ELx_EC_DABT_CUR && ec != ESR_ELx_EC_IABT_CUR) 245 + if (!is_el1_data_abort(esr) && !is_el1_instruction_abort(esr)) 244 246 return false; 245 247 246 248 if (fsc_type == ESR_ELx_FSC_PERM) ··· 264 258 unsigned long flags; 265 259 u64 par, dfsc; 266 260 267 - if (ESR_ELx_EC(esr) != ESR_ELx_EC_DABT_CUR || 261 + if (!is_el1_data_abort(esr) || 268 262 (esr & ESR_ELx_FSC_TYPE) != ESR_ELx_FSC_FAULT) 269 263 return false; 270 264 ··· 352 346 353 347 static bool is_el1_mte_sync_tag_check_fault(unsigned int esr) 354 348 { 355 - unsigned int ec = ESR_ELx_EC(esr); 356 349 unsigned int fsc = esr & ESR_ELx_FSC; 357 350 358 - if (ec != ESR_ELx_EC_DABT_CUR) 351 + if (!is_el1_data_abort(esr)) 359 352 return false; 360 353 361 354 if (fsc == ESR_ELx_FSC_MTE) ··· 509 504 */ 510 505 if (!(vma->vm_flags & vm_flags)) 511 506 return VM_FAULT_BADACCESS; 512 - return handle_mm_fault(vma, addr & PAGE_MASK, mm_flags, regs); 507 + return handle_mm_fault(vma, addr, mm_flags, regs); 513 508 } 514 509 515 510 static bool is_el0_instruction_abort(unsigned int esr) ··· 841 836 } 842 837 NOKPROBE_SYMBOL(do_mem_abort); 843 838 844 - void do_el0_irq_bp_hardening(void) 845 - { 846 - /* PC has already been checked in entry.S */ 847 - arm64_apply_bp_hardening(); 848 - } 849 - NOKPROBE_SYMBOL(do_el0_irq_bp_hardening); 850 - 851 839 void do_sp_pc_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs) 852 840 { 853 841 arm64_notify_die("SP/PC alignment exception", regs, SIGBUS, BUS_ADRALN, ··· 919 921 debug_exception_exit(regs); 920 922 } 921 923 NOKPROBE_SYMBOL(do_debug_exception); 924 + 925 + /* 926 + * Used during anonymous page fault handling. 927 + */ 928 + struct page *alloc_zeroed_user_highpage_movable(struct vm_area_struct *vma, 929 + unsigned long vaddr) 930 + { 931 + gfp_t flags = GFP_HIGHUSER_MOVABLE | __GFP_ZERO; 932 + 933 + /* 934 + * If the page is mapped with PROT_MTE, initialise the tags at the 935 + * point of allocation and page zeroing as this is usually faster than 936 + * separate DC ZVA and STGM. 937 + */ 938 + if (vma->vm_flags & VM_MTE) 939 + flags |= __GFP_ZEROTAGS; 940 + 941 + return alloc_page_vma(flags, vma, vaddr); 942 + } 943 + 944 + void tag_clear_highpage(struct page *page) 945 + { 946 + mte_zero_clear_page_tags(page_address(page)); 947 + page_kasan_tag_reset(page); 948 + set_bit(PG_mte_tagged, &page->flags); 949 + }
+14 -15
arch/arm64/mm/flush.c
··· 14 14 #include <asm/cache.h> 15 15 #include <asm/tlbflush.h> 16 16 17 - void sync_icache_aliases(void *kaddr, unsigned long len) 17 + void sync_icache_aliases(unsigned long start, unsigned long end) 18 18 { 19 - unsigned long addr = (unsigned long)kaddr; 20 - 21 19 if (icache_is_aliasing()) { 22 - __clean_dcache_area_pou(kaddr, len); 23 - __flush_icache_all(); 20 + dcache_clean_pou(start, end); 21 + icache_inval_all_pou(); 24 22 } else { 25 23 /* 26 24 * Don't issue kick_all_cpus_sync() after I-cache invalidation 27 25 * for user mappings. 28 26 */ 29 - __flush_icache_range(addr, addr + len); 27 + caches_clean_inval_pou(start, end); 30 28 } 31 29 } 32 30 33 - static void flush_ptrace_access(struct vm_area_struct *vma, struct page *page, 34 - unsigned long uaddr, void *kaddr, 35 - unsigned long len) 31 + static void flush_ptrace_access(struct vm_area_struct *vma, unsigned long start, 32 + unsigned long end) 36 33 { 37 34 if (vma->vm_flags & VM_EXEC) 38 - sync_icache_aliases(kaddr, len); 35 + sync_icache_aliases(start, end); 39 36 } 40 37 41 38 /* ··· 45 48 unsigned long len) 46 49 { 47 50 memcpy(dst, src, len); 48 - flush_ptrace_access(vma, page, uaddr, dst, len); 51 + flush_ptrace_access(vma, (unsigned long)dst, (unsigned long)dst + len); 49 52 } 50 53 51 54 void __sync_icache_dcache(pte_t pte) ··· 53 56 struct page *page = pte_page(pte); 54 57 55 58 if (!test_bit(PG_dcache_clean, &page->flags)) { 56 - sync_icache_aliases(page_address(page), page_size(page)); 59 + sync_icache_aliases((unsigned long)page_address(page), 60 + (unsigned long)page_address(page) + 61 + page_size(page)); 57 62 set_bit(PG_dcache_clean, &page->flags); 58 63 } 59 64 } ··· 76 77 /* 77 78 * Additional functions defined in assembly. 78 79 */ 79 - EXPORT_SYMBOL(__flush_icache_range); 80 + EXPORT_SYMBOL(caches_clean_inval_pou); 80 81 81 82 #ifdef CONFIG_ARCH_HAS_PMEM_API 82 83 void arch_wb_cache_pmem(void *addr, size_t size) 83 84 { 84 85 /* Ensure order against any prior non-cacheable writes */ 85 86 dmb(osh); 86 - __clean_dcache_area_pop(addr, size); 87 + dcache_clean_pop((unsigned long)addr, (unsigned long)addr + size); 87 88 } 88 89 EXPORT_SYMBOL_GPL(arch_wb_cache_pmem); 89 90 90 91 void arch_invalidate_pmem(void *addr, size_t size) 91 92 { 92 - __inval_dcache_area(addr, size); 93 + dcache_inval_poc((unsigned long)addr, (unsigned long)addr + size); 93 94 } 94 95 EXPORT_SYMBOL_GPL(arch_invalidate_pmem); 95 96 #endif
+7
arch/arm64/mm/init.c
··· 499 499 BUILD_BUG_ON(TASK_SIZE_32 > DEFAULT_MAP_WINDOW_64); 500 500 #endif 501 501 502 + /* 503 + * Selected page table levels should match when derived from 504 + * scratch using the virtual address range and page size. 505 + */ 506 + BUILD_BUG_ON(ARM64_HW_PGTABLE_LEVELS(CONFIG_ARM64_VA_BITS) != 507 + CONFIG_PGTABLE_LEVELS); 508 + 502 509 if (PAGE_SIZE >= 16384 && get_num_physpages() <= 128) { 503 510 extern int sysctl_overcommit_memory; 504 511 /*
+7 -6
arch/arm64/mm/mmu.c
··· 228 228 next = pmd_addr_end(addr, end); 229 229 230 230 /* try section mapping first */ 231 - if (((addr | next | phys) & ~SECTION_MASK) == 0 && 231 + if (((addr | next | phys) & ~PMD_MASK) == 0 && 232 232 (flags & NO_BLOCK_MAPPINGS) == 0) { 233 233 pmd_set_huge(pmdp, phys, prot); 234 234 ··· 1114 1114 } 1115 1115 #endif 1116 1116 1117 - #if !ARM64_SWAPPER_USES_SECTION_MAPS 1117 + #if !ARM64_KERNEL_USES_PMD_MAPS 1118 1118 int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, 1119 1119 struct vmem_altmap *altmap) 1120 1120 { 1121 1121 WARN_ON((start < VMEMMAP_START) || (end > VMEMMAP_END)); 1122 1122 return vmemmap_populate_basepages(start, end, node, altmap); 1123 1123 } 1124 - #else /* !ARM64_SWAPPER_USES_SECTION_MAPS */ 1124 + #else /* !ARM64_KERNEL_USES_PMD_MAPS */ 1125 1125 int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, 1126 1126 struct vmem_altmap *altmap) 1127 1127 { ··· 1166 1166 1167 1167 return 0; 1168 1168 } 1169 - #endif /* !ARM64_SWAPPER_USES_SECTION_MAPS */ 1169 + #endif /* !ARM64_KERNEL_USES_PMD_MAPS */ 1170 + 1171 + #ifdef CONFIG_MEMORY_HOTPLUG 1170 1172 void vmemmap_free(unsigned long start, unsigned long end, 1171 1173 struct vmem_altmap *altmap) 1172 1174 { 1173 - #ifdef CONFIG_MEMORY_HOTPLUG 1174 1175 WARN_ON((start < VMEMMAP_START) || (end > VMEMMAP_END)); 1175 1176 1176 1177 unmap_hotplug_range(start, end, true, altmap); 1177 1178 free_empty_tables(start, end, VMEMMAP_START, VMEMMAP_END); 1178 - #endif 1179 1179 } 1180 + #endif /* CONFIG_MEMORY_HOTPLUG */ 1180 1181 1181 1182 static inline pud_t *fixmap_pud(unsigned long addr) 1182 1183 {
+9 -15
arch/arm64/mm/proc.S
··· 46 46 #endif 47 47 48 48 #ifdef CONFIG_KASAN_HW_TAGS 49 - #define TCR_KASAN_HW_FLAGS SYS_TCR_EL1_TCMA1 | TCR_TBI1 | TCR_TBID1 49 + #define TCR_MTE_FLAGS SYS_TCR_EL1_TCMA1 | TCR_TBI1 | TCR_TBID1 50 50 #else 51 - #define TCR_KASAN_HW_FLAGS 0 51 + /* 52 + * The mte_zero_clear_page_tags() implementation uses DC GZVA, which relies on 53 + * TBI being enabled at EL1. 54 + */ 55 + #define TCR_MTE_FLAGS TCR_TBI1 | TCR_TBID1 52 56 #endif 53 57 54 58 /* ··· 62 58 #define MAIR_EL1_SET \ 63 59 (MAIR_ATTRIDX(MAIR_ATTR_DEVICE_nGnRnE, MT_DEVICE_nGnRnE) | \ 64 60 MAIR_ATTRIDX(MAIR_ATTR_DEVICE_nGnRE, MT_DEVICE_nGnRE) | \ 65 - MAIR_ATTRIDX(MAIR_ATTR_DEVICE_GRE, MT_DEVICE_GRE) | \ 66 61 MAIR_ATTRIDX(MAIR_ATTR_NORMAL_NC, MT_NORMAL_NC) | \ 67 62 MAIR_ATTRIDX(MAIR_ATTR_NORMAL, MT_NORMAL) | \ 68 - MAIR_ATTRIDX(MAIR_ATTR_NORMAL_WT, MT_NORMAL_WT) | \ 69 63 MAIR_ATTRIDX(MAIR_ATTR_NORMAL, MT_NORMAL_TAGGED)) 70 64 71 65 #ifdef CONFIG_CPU_PM ··· 85 83 mrs x9, mdscr_el1 86 84 mrs x10, oslsr_el1 87 85 mrs x11, sctlr_el1 88 - alternative_if_not ARM64_HAS_VIRT_HOST_EXTN 89 - mrs x12, tpidr_el1 90 - alternative_else 91 - mrs x12, tpidr_el2 92 - alternative_endif 86 + get_this_cpu_offset x12 93 87 mrs x13, sp_el0 94 88 stp x2, x3, [x0] 95 89 stp x4, x5, [x0, #16] ··· 143 145 msr mdscr_el1, x10 144 146 145 147 msr sctlr_el1, x12 146 - alternative_if_not ARM64_HAS_VIRT_HOST_EXTN 147 - msr tpidr_el1, x13 148 - alternative_else 149 - msr tpidr_el2, x13 150 - alternative_endif 148 + set_this_cpu_offset x13 151 149 msr sp_el0, x14 152 150 /* 153 151 * Restore oslsr_el1 by writing oslar_el1 ··· 458 464 msr_s SYS_TFSRE0_EL1, xzr 459 465 460 466 /* set the TCR_EL1 bits */ 461 - mov_q x10, TCR_KASAN_HW_FLAGS 467 + mov_q x10, TCR_MTE_FLAGS 462 468 orr tcr, tcr, x10 463 469 1: 464 470 #endif
-4
arch/arm64/mm/ptdump.c
··· 159 159 .set = "DEVICE/nGnRE", 160 160 }, { 161 161 .mask = PTE_ATTRINDX_MASK, 162 - .val = PTE_ATTRINDX(MT_DEVICE_GRE), 163 - .set = "DEVICE/GRE", 164 - }, { 165 - .mask = PTE_ATTRINDX_MASK, 166 162 .val = PTE_ATTRINDX(MT_NORMAL_NC), 167 163 .set = "MEM/NORMAL-NC", 168 164 }, {
+1
arch/arm64/net/bpf_jit_comp.c
··· 16 16 #include <asm/byteorder.h> 17 17 #include <asm/cacheflush.h> 18 18 #include <asm/debug-monitors.h> 19 + #include <asm/insn.h> 19 20 #include <asm/set_memory.h> 20 21 21 22 #include "bpf_jit.h"
+2 -1
arch/arm64/tools/cpucaps
··· 3 3 # Internal CPU capabilities constants, keep this list sorted 4 4 5 5 BTI 6 - HAS_32BIT_EL0 6 + # Unreliable: use system_supports_32bit_el0() instead. 7 + HAS_32BIT_EL0_DO_NOT_USE 7 8 HAS_32BIT_EL1 8 9 HAS_ADDRESS_AUTH 9 10 HAS_ADDRESS_AUTH_ARCH
+3 -3
arch/ia64/include/asm/page.h
··· 82 82 } while (0) 83 83 84 84 85 - #define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \ 85 + #define alloc_zeroed_user_highpage_movable(vma, vaddr) \ 86 86 ({ \ 87 87 struct page *page = alloc_page_vma( \ 88 - GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr); \ 88 + GFP_HIGHUSER_MOVABLE | __GFP_ZERO, vma, vaddr); \ 89 89 if (page) \ 90 90 flush_dcache_page(page); \ 91 91 page; \ 92 92 }) 93 93 94 - #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE 94 + #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE_MOVABLE 95 95 96 96 #define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT) 97 97
+3 -3
arch/m68k/include/asm/page_no.h
··· 13 13 #define clear_user_page(page, vaddr, pg) clear_page(page) 14 14 #define copy_user_page(to, from, vaddr, pg) copy_page(to, from) 15 15 16 - #define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \ 17 - alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr) 18 - #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE 16 + #define alloc_zeroed_user_highpage_movable(vma, vaddr) \ 17 + alloc_page_vma(GFP_HIGHUSER_MOVABLE | __GFP_ZERO, vma, vaddr) 18 + #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE_MOVABLE 19 19 20 20 #define __pa(vaddr) ((unsigned long)(vaddr)) 21 21 #define __va(paddr) ((void *)((unsigned long)(paddr)))
+3 -3
arch/s390/include/asm/page.h
··· 68 68 #define clear_user_page(page, vaddr, pg) clear_page(page) 69 69 #define copy_user_page(to, from, vaddr, pg) copy_page(to, from) 70 70 71 - #define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \ 72 - alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr) 73 - #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE 71 + #define alloc_zeroed_user_highpage_movable(vma, vaddr) \ 72 + alloc_page_vma(GFP_HIGHUSER_MOVABLE | __GFP_ZERO, vma, vaddr) 73 + #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE_MOVABLE 74 74 75 75 /* 76 76 * These are used to make use of C type-checking..
+3 -3
arch/x86/include/asm/page.h
··· 34 34 copy_page(to, from); 35 35 } 36 36 37 - #define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \ 38 - alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr) 39 - #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE 37 + #define alloc_zeroed_user_highpage_movable(vma, vaddr) \ 38 + alloc_page_vma(GFP_HIGHUSER_MOVABLE | __GFP_ZERO, vma, vaddr) 39 + #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE_MOVABLE 40 40 41 41 #ifndef __pa 42 42 #define __pa(x) __phys_addr((unsigned long)(x))
+7 -2
drivers/firmware/psci/psci.c
··· 335 335 { 336 336 int ret; 337 337 338 - if (!psci_power_state_loses_context(state)) 338 + if (!psci_power_state_loses_context(state)) { 339 + struct arm_cpuidle_irq_context context; 340 + 341 + arm_cpuidle_save_irq_context(&context); 339 342 ret = psci_ops.cpu_suspend(state, 0); 340 - else 343 + arm_cpuidle_restore_irq_context(&context); 344 + } else { 341 345 ret = cpu_suspend(state, psci_suspend_finisher); 346 + } 342 347 343 348 return ret; 344 349 }
+4
drivers/firmware/smccc/smccc.c
··· 15 15 static enum arm_smccc_conduit smccc_conduit = SMCCC_CONDUIT_NONE; 16 16 17 17 bool __ro_after_init smccc_trng_available = false; 18 + u64 __ro_after_init smccc_has_sve_hint = false; 18 19 19 20 void __init arm_smccc_version_init(u32 version, enum arm_smccc_conduit conduit) 20 21 { ··· 23 22 smccc_conduit = conduit; 24 23 25 24 smccc_trng_available = smccc_probe_trng(); 25 + if (IS_ENABLED(CONFIG_ARM64_SVE) && 26 + smccc_version >= ARM_SMCCC_VERSION_1_3) 27 + smccc_has_sve_hint = true; 26 28 } 27 29 28 30 enum arm_smccc_conduit arm_smccc_1_1_get_conduit(void)
+3 -3
drivers/misc/lkdtm/bugs.c
··· 463 463 #ifdef CONFIG_ARM64 464 464 static noinline void change_pac_parameters(void) 465 465 { 466 - if (IS_ENABLED(CONFIG_ARM64_PTR_AUTH)) { 466 + if (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL)) { 467 467 /* Reset the keys of current task */ 468 468 ptrauth_thread_init_kernel(current); 469 469 ptrauth_thread_switch_kernel(current); ··· 477 477 #define CORRUPT_PAC_ITERATE 10 478 478 int i; 479 479 480 - if (!IS_ENABLED(CONFIG_ARM64_PTR_AUTH)) 481 - pr_err("FAIL: kernel not built with CONFIG_ARM64_PTR_AUTH\n"); 480 + if (!IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL)) 481 + pr_err("FAIL: kernel not built with CONFIG_ARM64_PTR_AUTH_KERNEL\n"); 482 482 483 483 if (!system_supports_address_auth()) { 484 484 pr_err("FAIL: CPU lacks pointer authentication feature\n");
+2 -2
drivers/perf/arm-cci.c
··· 37 37 38 38 #define CCI_PMU_CNTR_SIZE(model) ((model)->cntr_size) 39 39 #define CCI_PMU_CNTR_BASE(model, idx) ((idx) * CCI_PMU_CNTR_SIZE(model)) 40 - #define CCI_PMU_CNTR_MASK ((1ULL << 32) -1) 40 + #define CCI_PMU_CNTR_MASK ((1ULL << 32) - 1) 41 41 #define CCI_PMU_CNTR_LAST(cci_pmu) (cci_pmu->num_cntrs - 1) 42 42 43 43 #define CCI_PMU_MAX_HW_CNTRS(model) \ ··· 806 806 return cci_pmu->model->get_event_idx(cci_pmu, hw, cci_event); 807 807 808 808 /* Generic code to find an unused idx from the mask */ 809 - for(idx = 0; idx <= CCI_PMU_CNTR_LAST(cci_pmu); idx++) 809 + for (idx = 0; idx <= CCI_PMU_CNTR_LAST(cci_pmu); idx++) 810 810 if (!test_and_set_bit(idx, hw->used_mask)) 811 811 return idx; 812 812
+2 -4
drivers/perf/arm-ccn.c
··· 1211 1211 perf_pmu_migrate_context(&dt->pmu, cpu, target); 1212 1212 dt->cpu = target; 1213 1213 if (ccn->irq) 1214 - WARN_ON(irq_set_affinity_hint(ccn->irq, cpumask_of(dt->cpu))); 1214 + WARN_ON(irq_set_affinity(ccn->irq, cpumask_of(dt->cpu))); 1215 1215 return 0; 1216 1216 } 1217 1217 ··· 1291 1291 1292 1292 /* Also make sure that the overflow interrupt is handled by this CPU */ 1293 1293 if (ccn->irq) { 1294 - err = irq_set_affinity_hint(ccn->irq, cpumask_of(ccn->dt.cpu)); 1294 + err = irq_set_affinity(ccn->irq, cpumask_of(ccn->dt.cpu)); 1295 1295 if (err) { 1296 1296 dev_err(ccn->dev, "Failed to set interrupt affinity!\n"); 1297 1297 goto error_set_affinity; ··· 1325 1325 1326 1326 cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_CCN_ONLINE, 1327 1327 &ccn->dt.node); 1328 - if (ccn->irq) 1329 - irq_set_affinity_hint(ccn->irq, NULL); 1330 1328 for (i = 0; i < ccn->num_xps; i++) 1331 1329 writel(0, ccn->xp[i].base + CCN_XP_DT_CONTROL); 1332 1330 writel(0, ccn->dt.base + CCN_DT_PMCR);
+4 -9
drivers/perf/arm-cmn.c
··· 31 31 #define CMN_CI_CHILD_COUNT GENMASK_ULL(15, 0) 32 32 #define CMN_CI_CHILD_PTR_OFFSET GENMASK_ULL(31, 16) 33 33 34 - #define CMN_CHILD_NODE_ADDR GENMASK(27,0) 34 + #define CMN_CHILD_NODE_ADDR GENMASK(27, 0) 35 35 #define CMN_CHILD_NODE_EXTERNAL BIT(31) 36 36 37 37 #define CMN_ADDR_NODE_PTR GENMASK(27, 14) ··· 1162 1162 1163 1163 perf_pmu_migrate_context(&cmn->pmu, cpu, target); 1164 1164 for (i = 0; i < cmn->num_dtcs; i++) 1165 - irq_set_affinity_hint(cmn->dtc[i].irq, cpumask_of(target)); 1165 + irq_set_affinity(cmn->dtc[i].irq, cpumask_of(target)); 1166 1166 cmn->cpu = target; 1167 1167 return 0; 1168 1168 } ··· 1212 1212 irq = cmn->dtc[i].irq; 1213 1213 for (j = i; j--; ) { 1214 1214 if (cmn->dtc[j].irq == irq) { 1215 - cmn->dtc[j].irq_friend = j - i; 1215 + cmn->dtc[j].irq_friend = i - j; 1216 1216 goto next; 1217 1217 } 1218 1218 } ··· 1222 1222 if (err) 1223 1223 return err; 1224 1224 1225 - err = irq_set_affinity_hint(irq, cpumask_of(cmn->cpu)); 1225 + err = irq_set_affinity(irq, cpumask_of(cmn->cpu)); 1226 1226 if (err) 1227 1227 return err; 1228 1228 next: ··· 1568 1568 static int arm_cmn_remove(struct platform_device *pdev) 1569 1569 { 1570 1570 struct arm_cmn *cmn = platform_get_drvdata(pdev); 1571 - int i; 1572 1571 1573 1572 writel_relaxed(0, cmn->dtc[0].base + CMN_DT_DTC_CTL); 1574 1573 1575 1574 perf_pmu_unregister(&cmn->pmu); 1576 1575 cpuhp_state_remove_instance(arm_cmn_hp_state, &cmn->cpuhp_node); 1577 - 1578 - for (i = 0; i < cmn->num_dtcs; i++) 1579 - irq_set_affinity_hint(cmn->dtc[i].irq, NULL); 1580 - 1581 1576 return 0; 1582 1577 } 1583 1578
+2 -3
drivers/perf/arm_dmc620_pmu.c
··· 421 421 if (ret) 422 422 goto out_free_aff; 423 423 424 - ret = irq_set_affinity_hint(irq_num, cpumask_of(irq->cpu)); 424 + ret = irq_set_affinity(irq_num, cpumask_of(irq->cpu)); 425 425 if (ret) 426 426 goto out_free_irq; 427 427 ··· 475 475 list_del(&irq->irqs_node); 476 476 mutex_unlock(&dmc620_pmu_irqs_lock); 477 477 478 - WARN_ON(irq_set_affinity_hint(irq->irq_num, NULL)); 479 478 free_irq(irq->irq_num, irq); 480 479 cpuhp_state_remove_instance_nocalls(cpuhp_state_num, &irq->node); 481 480 kfree(irq); ··· 621 622 perf_pmu_migrate_context(&dmc620_pmu->pmu, irq->cpu, target); 622 623 mutex_unlock(&dmc620_pmu_irqs_lock); 623 624 624 - WARN_ON(irq_set_affinity_hint(irq->irq_num, cpumask_of(target))); 625 + WARN_ON(irq_set_affinity(irq->irq_num, cpumask_of(target))); 625 626 irq->cpu = target; 626 627 627 628 return 0;
+2 -6
drivers/perf/arm_dsu_pmu.c
··· 687 687 static void dsu_pmu_set_active_cpu(int cpu, struct dsu_pmu *dsu_pmu) 688 688 { 689 689 cpumask_set_cpu(cpu, &dsu_pmu->active_cpu); 690 - if (irq_set_affinity_hint(dsu_pmu->irq, &dsu_pmu->active_cpu)) 690 + if (irq_set_affinity(dsu_pmu->irq, &dsu_pmu->active_cpu)) 691 691 pr_warn("Failed to set irq affinity to %d\n", cpu); 692 692 } 693 693 ··· 769 769 if (rc) { 770 770 cpuhp_state_remove_instance(dsu_pmu_cpuhp_state, 771 771 &dsu_pmu->cpuhp_node); 772 - irq_set_affinity_hint(dsu_pmu->irq, NULL); 773 772 } 774 773 775 774 return rc; ··· 780 781 781 782 perf_pmu_unregister(&dsu_pmu->pmu); 782 783 cpuhp_state_remove_instance(dsu_pmu_cpuhp_state, &dsu_pmu->cpuhp_node); 783 - irq_set_affinity_hint(dsu_pmu->irq, NULL); 784 784 785 785 return 0; 786 786 } ··· 838 840 839 841 dst = dsu_pmu_get_online_cpu_any_but(dsu_pmu, cpu); 840 842 /* If there are no active CPUs in the DSU, leave IRQ disabled */ 841 - if (dst >= nr_cpu_ids) { 842 - irq_set_affinity_hint(dsu_pmu->irq, NULL); 843 + if (dst >= nr_cpu_ids) 843 844 return 0; 844 - } 845 845 846 846 perf_pmu_migrate_context(&dsu_pmu->pmu, cpu, dst); 847 847 dsu_pmu_set_active_cpu(dst, dsu_pmu);
+6 -10
drivers/perf/arm_pmu.c
··· 563 563 return ret; 564 564 } 565 565 566 - static ssize_t armpmu_cpumask_show(struct device *dev, 567 - struct device_attribute *attr, char *buf) 566 + static ssize_t cpus_show(struct device *dev, 567 + struct device_attribute *attr, char *buf) 568 568 { 569 569 struct arm_pmu *armpmu = to_arm_pmu(dev_get_drvdata(dev)); 570 570 return cpumap_print_to_pagebuf(true, buf, &armpmu->supported_cpus); 571 571 } 572 572 573 - static DEVICE_ATTR(cpus, S_IRUGO, armpmu_cpumask_show, NULL); 573 + static DEVICE_ATTR_RO(cpus); 574 574 575 575 static struct attribute *armpmu_common_attrs[] = { 576 576 &dev_attr_cpus.attr, ··· 644 644 } 645 645 646 646 irq_flags = IRQF_PERCPU | 647 - IRQF_NOBALANCING | 647 + IRQF_NOBALANCING | IRQF_NO_AUTOEN | 648 648 IRQF_NO_THREAD; 649 - 650 - irq_set_status_flags(irq, IRQ_NOAUTOEN); 651 649 652 650 err = request_nmi(irq, handler, irq_flags, "arm-pmu", 653 651 per_cpu_ptr(&cpu_armpmu, cpu)); ··· 668 670 &cpu_armpmu); 669 671 irq_ops = &percpu_pmuirq_ops; 670 672 } else { 671 - has_nmi= true; 673 + has_nmi = true; 672 674 irq_ops = &percpu_pmunmi_ops; 673 675 } 674 676 } else { ··· 867 869 int cpu; 868 870 869 871 pmu = kzalloc(sizeof(*pmu), flags); 870 - if (!pmu) { 871 - pr_info("failed to allocate PMU device!\n"); 872 + if (!pmu) 872 873 goto out; 873 - } 874 874 875 875 pmu->hw_events = alloc_percpu_gfp(struct pmu_hw_events, flags); 876 876 if (!pmu->hw_events) {
+15 -20
drivers/perf/arm_smmuv3_pmu.c
··· 277 277 struct perf_event *event, int idx) 278 278 { 279 279 u32 span, sid; 280 - unsigned int num_ctrs = smmu_pmu->num_counters; 280 + unsigned int cur_idx, num_ctrs = smmu_pmu->num_counters; 281 281 bool filter_en = !!get_filter_enable(event); 282 282 283 283 span = filter_en ? get_filter_span(event) : ··· 285 285 sid = filter_en ? get_filter_stream_id(event) : 286 286 SMMU_PMCG_DEFAULT_FILTER_SID; 287 287 288 - /* Support individual filter settings */ 289 - if (!smmu_pmu->global_filter) { 288 + cur_idx = find_first_bit(smmu_pmu->used_counters, num_ctrs); 289 + /* 290 + * Per-counter filtering, or scheduling the first globally-filtered 291 + * event into an empty PMU so idx == 0 and it works out equivalent. 292 + */ 293 + if (!smmu_pmu->global_filter || cur_idx == num_ctrs) { 290 294 smmu_pmu_set_event_filter(event, idx, span, sid); 291 295 return 0; 292 296 } 293 297 294 - /* Requested settings same as current global settings*/ 295 - idx = find_first_bit(smmu_pmu->used_counters, num_ctrs); 296 - if (idx == num_ctrs || 297 - smmu_pmu_check_global_filter(smmu_pmu->events[idx], event)) { 298 - smmu_pmu_set_event_filter(event, 0, span, sid); 298 + /* Otherwise, must match whatever's currently scheduled */ 299 + if (smmu_pmu_check_global_filter(smmu_pmu->events[cur_idx], event)) { 300 + smmu_pmu_set_evtyper(smmu_pmu, idx, get_event(event)); 299 301 return 0; 300 302 } 301 303 ··· 511 509 return sysfs_emit(page, "event=0x%02llx\n", pmu_attr->id); 512 510 } 513 511 514 - #define SMMU_EVENT_ATTR(name, config) \ 515 - (&((struct perf_pmu_events_attr) { \ 516 - .attr = __ATTR(name, 0444, smmu_pmu_event_show, NULL), \ 517 - .id = config, \ 518 - }).attr.attr) 512 + #define SMMU_EVENT_ATTR(name, config) \ 513 + PMU_EVENT_ATTR_ID(name, smmu_pmu_event_show, config) 519 514 520 515 static struct attribute *smmu_pmu_events[] = { 521 516 SMMU_EVENT_ATTR(cycles, 0), ··· 627 628 628 629 perf_pmu_migrate_context(&smmu_pmu->pmu, cpu, target); 629 630 smmu_pmu->on_cpu = target; 630 - WARN_ON(irq_set_affinity_hint(smmu_pmu->irq, cpumask_of(target))); 631 + WARN_ON(irq_set_affinity(smmu_pmu->irq, cpumask_of(target))); 631 632 632 633 return 0; 633 634 } ··· 838 839 839 840 /* Pick one CPU to be the preferred one to use */ 840 841 smmu_pmu->on_cpu = raw_smp_processor_id(); 841 - WARN_ON(irq_set_affinity_hint(smmu_pmu->irq, 842 - cpumask_of(smmu_pmu->on_cpu))); 842 + WARN_ON(irq_set_affinity(smmu_pmu->irq, cpumask_of(smmu_pmu->on_cpu))); 843 843 844 844 err = cpuhp_state_add_instance_nocalls(cpuhp_state_num, 845 845 &smmu_pmu->node); 846 846 if (err) { 847 847 dev_err(dev, "Error %d registering hotplug, PMU @%pa\n", 848 848 err, &res_0->start); 849 - goto out_clear_affinity; 849 + return err; 850 850 } 851 851 852 852 err = perf_pmu_register(&smmu_pmu->pmu, name, -1); ··· 864 866 865 867 out_unregister: 866 868 cpuhp_state_remove_instance_nocalls(cpuhp_state_num, &smmu_pmu->node); 867 - out_clear_affinity: 868 - irq_set_affinity_hint(smmu_pmu->irq, NULL); 869 869 return err; 870 870 } 871 871 ··· 873 877 874 878 perf_pmu_unregister(&smmu_pmu->pmu); 875 879 cpuhp_state_remove_instance_nocalls(cpuhp_state_num, &smmu_pmu->node); 876 - irq_set_affinity_hint(smmu_pmu->irq, NULL); 877 880 878 881 return 0; 879 882 }
+4 -8
drivers/perf/arm_spe_pmu.c
··· 231 231 .attrs = arm_spe_pmu_formats_attr, 232 232 }; 233 233 234 - static ssize_t arm_spe_pmu_get_attr_cpumask(struct device *dev, 235 - struct device_attribute *attr, 236 - char *buf) 234 + static ssize_t cpumask_show(struct device *dev, 235 + struct device_attribute *attr, char *buf) 237 236 { 238 237 struct arm_spe_pmu *spe_pmu = dev_get_drvdata(dev); 239 238 240 239 return cpumap_print_to_pagebuf(true, buf, &spe_pmu->supported_cpus); 241 240 } 242 - static DEVICE_ATTR(cpumask, S_IRUGO, arm_spe_pmu_get_attr_cpumask, NULL); 241 + static DEVICE_ATTR_RO(cpumask); 243 242 244 243 static struct attribute *arm_spe_pmu_attrs[] = { 245 244 &dev_attr_cpumask.attr, ··· 1043 1044 spe_pmu->max_record_sz, spe_pmu->align, spe_pmu->features); 1044 1045 1045 1046 spe_pmu->features |= SPE_PMU_FEAT_DEV_PROBED; 1046 - return; 1047 1047 } 1048 1048 1049 1049 static void __arm_spe_pmu_reset_local(void) ··· 1188 1190 } 1189 1191 1190 1192 spe_pmu = devm_kzalloc(dev, sizeof(*spe_pmu), GFP_KERNEL); 1191 - if (!spe_pmu) { 1192 - dev_err(dev, "failed to allocate spe_pmu\n"); 1193 + if (!spe_pmu) 1193 1194 return -ENOMEM; 1194 - } 1195 1195 1196 1196 spe_pmu->handle = alloc_percpu(typeof(*spe_pmu->handle)); 1197 1197 if (!spe_pmu->handle)
+8 -10
drivers/perf/fsl_imx8_ddr_perf.c
··· 222 222 return sysfs_emit(page, "event=0x%02llx\n", pmu_attr->id); 223 223 } 224 224 225 - #define IMX8_DDR_PMU_EVENT_ATTR(_name, _id) \ 226 - (&((struct perf_pmu_events_attr[]) { \ 227 - { .attr = __ATTR(_name, 0444, ddr_pmu_event_show, NULL),\ 228 - .id = _id, } \ 229 - })[0].attr.attr) 225 + #define IMX8_DDR_PMU_EVENT_ATTR(_name, _id) \ 226 + PMU_EVENT_ATTR_ID(_name, ddr_pmu_event_show, _id) 230 227 231 228 static struct attribute *ddr_perf_events_attrs[] = { 232 229 IMX8_DDR_PMU_EVENT_ATTR(cycles, EVENT_CYCLES_ID), ··· 671 674 perf_pmu_migrate_context(&pmu->pmu, cpu, target); 672 675 pmu->cpu = target; 673 676 674 - WARN_ON(irq_set_affinity_hint(pmu->irq, cpumask_of(pmu->cpu))); 677 + WARN_ON(irq_set_affinity(pmu->irq, cpumask_of(pmu->cpu))); 675 678 676 679 return 0; 677 680 } ··· 702 705 703 706 name = devm_kasprintf(&pdev->dev, GFP_KERNEL, DDR_PERF_DEV_NAME "%d", 704 707 num); 705 - if (!name) 706 - return -ENOMEM; 708 + if (!name) { 709 + ret = -ENOMEM; 710 + goto cpuhp_state_err; 711 + } 707 712 708 713 pmu->devtype_data = of_device_get_match_data(&pdev->dev); 709 714 ··· 748 749 } 749 750 750 751 pmu->irq = irq; 751 - ret = irq_set_affinity_hint(pmu->irq, cpumask_of(pmu->cpu)); 752 + ret = irq_set_affinity(pmu->irq, cpumask_of(pmu->cpu)); 752 753 if (ret) { 753 754 dev_err(pmu->dev, "Failed to set interrupt affinity!\n"); 754 755 goto ddr_perf_err; ··· 776 777 777 778 cpuhp_state_remove_instance_nocalls(pmu->cpuhp_state, &pmu->node); 778 779 cpuhp_remove_multi_state(pmu->cpuhp_state); 779 - irq_set_affinity_hint(pmu->irq, NULL); 780 780 781 781 perf_pmu_unregister(&pmu->pmu); 782 782
+1 -4
drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c
··· 2 2 /* 3 3 * HiSilicon SoC DDRC uncore Hardware event counters support 4 4 * 5 - * Copyright (C) 2017 Hisilicon Limited 5 + * Copyright (C) 2017 HiSilicon Limited 6 6 * Author: Shaokun Zhang <zhangshaokun@hisilicon.com> 7 7 * Anurup M <anurup.m@huawei.com> 8 8 * ··· 537 537 dev_err(ddrc_pmu->dev, "DDRC PMU register failed!\n"); 538 538 cpuhp_state_remove_instance_nocalls( 539 539 CPUHP_AP_PERF_ARM_HISI_DDRC_ONLINE, &ddrc_pmu->node); 540 - irq_set_affinity_hint(ddrc_pmu->irq, NULL); 541 540 } 542 541 543 542 return ret; ··· 549 550 perf_pmu_unregister(&ddrc_pmu->pmu); 550 551 cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_HISI_DDRC_ONLINE, 551 552 &ddrc_pmu->node); 552 - irq_set_affinity_hint(ddrc_pmu->irq, NULL); 553 - 554 553 return 0; 555 554 } 556 555
+3 -6
drivers/perf/hisilicon/hisi_uncore_hha_pmu.c
··· 2 2 /* 3 3 * HiSilicon SoC HHA uncore Hardware event counters support 4 4 * 5 - * Copyright (C) 2017 Hisilicon Limited 5 + * Copyright (C) 2017 HiSilicon Limited 6 6 * Author: Shaokun Zhang <zhangshaokun@hisilicon.com> 7 7 * Anurup M <anurup.m@huawei.com> 8 8 * ··· 90 90 91 91 val = readl(hha_pmu->base + HHA_DATSRC_CTRL); 92 92 val |= HHA_DATSRC_SKT_EN; 93 - writel(ds_skt, hha_pmu->base + HHA_DATSRC_CTRL); 93 + writel(val, hha_pmu->base + HHA_DATSRC_CTRL); 94 94 } 95 95 } 96 96 ··· 104 104 105 105 val = readl(hha_pmu->base + HHA_DATSRC_CTRL); 106 106 val &= ~HHA_DATSRC_SKT_EN; 107 - writel(ds_skt, hha_pmu->base + HHA_DATSRC_CTRL); 107 + writel(val, hha_pmu->base + HHA_DATSRC_CTRL); 108 108 } 109 109 } 110 110 ··· 540 540 dev_err(hha_pmu->dev, "HHA PMU register failed!\n"); 541 541 cpuhp_state_remove_instance_nocalls( 542 542 CPUHP_AP_PERF_ARM_HISI_HHA_ONLINE, &hha_pmu->node); 543 - irq_set_affinity_hint(hha_pmu->irq, NULL); 544 543 } 545 544 546 545 return ret; ··· 552 553 perf_pmu_unregister(&hha_pmu->pmu); 553 554 cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_HISI_HHA_ONLINE, 554 555 &hha_pmu->node); 555 - irq_set_affinity_hint(hha_pmu->irq, NULL); 556 - 557 556 return 0; 558 557 } 559 558
+1 -4
drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c
··· 2 2 /* 3 3 * HiSilicon SoC L3C uncore Hardware event counters support 4 4 * 5 - * Copyright (C) 2017 Hisilicon Limited 5 + * Copyright (C) 2017 HiSilicon Limited 6 6 * Author: Anurup M <anurup.m@huawei.com> 7 7 * Shaokun Zhang <zhangshaokun@hisilicon.com> 8 8 * ··· 578 578 dev_err(l3c_pmu->dev, "L3C PMU register failed!\n"); 579 579 cpuhp_state_remove_instance_nocalls( 580 580 CPUHP_AP_PERF_ARM_HISI_L3_ONLINE, &l3c_pmu->node); 581 - irq_set_affinity_hint(l3c_pmu->irq, NULL); 582 581 } 583 582 584 583 return ret; ··· 590 591 perf_pmu_unregister(&l3c_pmu->pmu); 591 592 cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_HISI_L3_ONLINE, 592 593 &l3c_pmu->node); 593 - irq_set_affinity_hint(l3c_pmu->irq, NULL); 594 - 595 594 return 0; 596 595 } 597 596
+1 -4
drivers/perf/hisilicon/hisi_uncore_pa_pmu.c
··· 333 333 NULL 334 334 }; 335 335 336 - static struct attribute_group hisi_pa_pmu_identifier_group = { 336 + static const struct attribute_group hisi_pa_pmu_identifier_group = { 337 337 .attrs = hisi_pa_pmu_identifier_attrs, 338 338 }; 339 339 ··· 436 436 dev_err(pa_pmu->dev, "PMU register failed, ret = %d\n", ret); 437 437 cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_HISI_PA_ONLINE, 438 438 &pa_pmu->node); 439 - irq_set_affinity_hint(pa_pmu->irq, NULL); 440 439 return ret; 441 440 } 442 441 ··· 450 451 perf_pmu_unregister(&pa_pmu->pmu); 451 452 cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_HISI_PA_ONLINE, 452 453 &pa_pmu->node); 453 - irq_set_affinity_hint(pa_pmu->irq, NULL); 454 - 455 454 return 0; 456 455 } 457 456
+3 -3
drivers/perf/hisilicon/hisi_uncore_pmu.c
··· 2 2 /* 3 3 * HiSilicon SoC Hardware event counters support 4 4 * 5 - * Copyright (C) 2017 Hisilicon Limited 5 + * Copyright (C) 2017 HiSilicon Limited 6 6 * Author: Anurup M <anurup.m@huawei.com> 7 7 * Shaokun Zhang <zhangshaokun@hisilicon.com> 8 8 * ··· 488 488 hisi_pmu->on_cpu = cpu; 489 489 490 490 /* Overflow interrupt also should use the same CPU */ 491 - WARN_ON(irq_set_affinity_hint(hisi_pmu->irq, cpumask_of(cpu))); 491 + WARN_ON(irq_set_affinity(hisi_pmu->irq, cpumask_of(cpu))); 492 492 493 493 return 0; 494 494 } ··· 521 521 perf_pmu_migrate_context(&hisi_pmu->pmu, cpu, target); 522 522 /* Use this CPU for event counting */ 523 523 hisi_pmu->on_cpu = target; 524 - WARN_ON(irq_set_affinity_hint(hisi_pmu->irq, cpumask_of(target))); 524 + WARN_ON(irq_set_affinity(hisi_pmu->irq, cpumask_of(target))); 525 525 526 526 return 0; 527 527 }
+1 -1
drivers/perf/hisilicon/hisi_uncore_pmu.h
··· 2 2 /* 3 3 * HiSilicon SoC Hardware event counters support 4 4 * 5 - * Copyright (C) 2017 Hisilicon Limited 5 + * Copyright (C) 2017 HiSilicon Limited 6 6 * Author: Anurup M <anurup.m@huawei.com> 7 7 * Shaokun Zhang <zhangshaokun@hisilicon.com> 8 8 *
+2 -5
drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c
··· 2 2 /* 3 3 * HiSilicon SLLC uncore Hardware event counters support 4 4 * 5 - * Copyright (C) 2020 Hisilicon Limited 5 + * Copyright (C) 2020 HiSilicon Limited 6 6 * Author: Shaokun Zhang <zhangshaokun@hisilicon.com> 7 7 * 8 8 * This code is based on the uncore PMUs like arm-cci and arm-ccn. ··· 366 366 NULL 367 367 }; 368 368 369 - static struct attribute_group hisi_sllc_pmu_identifier_group = { 369 + static const struct attribute_group hisi_sllc_pmu_identifier_group = { 370 370 .attrs = hisi_sllc_pmu_identifier_attrs, 371 371 }; 372 372 ··· 465 465 dev_err(sllc_pmu->dev, "PMU register failed, ret = %d\n", ret); 466 466 cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_HISI_SLLC_ONLINE, 467 467 &sllc_pmu->node); 468 - irq_set_affinity_hint(sllc_pmu->irq, NULL); 469 468 return ret; 470 469 } 471 470 ··· 480 481 perf_pmu_unregister(&sllc_pmu->pmu); 481 482 cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_HISI_SLLC_ONLINE, 482 483 &sllc_pmu->node); 483 - irq_set_affinity_hint(sllc_pmu->irq, NULL); 484 - 485 484 return 0; 486 485 } 487 486
+4 -7
drivers/perf/qcom_l2_pmu.c
··· 679 679 return sysfs_emit(page, "event=0x%02llx\n", pmu_attr->id); 680 680 } 681 681 682 - #define L2CACHE_EVENT_ATTR(_name, _id) \ 683 - (&((struct perf_pmu_events_attr[]) { \ 684 - { .attr = __ATTR(_name, 0444, l2cache_pmu_event_show, NULL), \ 685 - .id = _id, } \ 686 - })[0].attr.attr) 682 + #define L2CACHE_EVENT_ATTR(_name, _id) \ 683 + PMU_EVENT_ATTR_ID(_name, l2cache_pmu_event_show, _id) 687 684 688 685 static struct attribute *l2_cache_pmu_events[] = { 689 686 L2CACHE_EVENT_ATTR(cycles, L2_EVENT_CYCLES), ··· 866 869 irq = platform_get_irq(sdev, 0); 867 870 if (irq < 0) 868 871 return irq; 869 - irq_set_status_flags(irq, IRQ_NOAUTOEN); 870 872 cluster->irq = irq; 871 873 872 874 cluster->l2cache_pmu = l2cache_pmu; 873 875 cluster->on_cpu = -1; 874 876 875 877 err = devm_request_irq(&pdev->dev, irq, l2_cache_handle_irq, 876 - IRQF_NOBALANCING | IRQF_NO_THREAD, 878 + IRQF_NOBALANCING | IRQF_NO_THREAD | 879 + IRQF_NO_AUTOEN, 877 880 "l2-cache-pmu", cluster); 878 881 if (err) { 879 882 dev_err(&pdev->dev,
+5 -10
drivers/perf/qcom_l3_pmu.c
··· 647 647 } 648 648 649 649 #define L3CACHE_EVENT_ATTR(_name, _id) \ 650 - (&((struct perf_pmu_events_attr[]) { \ 651 - { .attr = __ATTR(_name, 0444, l3cache_pmu_event_show, NULL), \ 652 - .id = _id, } \ 653 - })[0].attr.attr) 650 + PMU_EVENT_ATTR_ID(_name, l3cache_pmu_event_show, _id) 654 651 655 652 static struct attribute *qcom_l3_cache_pmu_events[] = { 656 653 L3CACHE_EVENT_ATTR(cycles, L3_EVENT_CYCLES), ··· 667 670 668 671 /* cpumask */ 669 672 670 - static ssize_t qcom_l3_cache_pmu_cpumask_show(struct device *dev, 671 - struct device_attribute *attr, char *buf) 673 + static ssize_t cpumask_show(struct device *dev, 674 + struct device_attribute *attr, char *buf) 672 675 { 673 676 struct l3cache_pmu *l3pmu = to_l3cache_pmu(dev_get_drvdata(dev)); 674 677 675 678 return cpumap_print_to_pagebuf(true, buf, &l3pmu->cpumask); 676 679 } 677 680 678 - static DEVICE_ATTR(cpumask, 0444, qcom_l3_cache_pmu_cpumask_show, NULL); 681 + static DEVICE_ATTR_RO(cpumask); 679 682 680 683 static struct attribute *qcom_l3_cache_pmu_cpumask_attrs[] = { 681 684 &dev_attr_cpumask.attr, ··· 764 767 765 768 memrc = platform_get_resource(pdev, IORESOURCE_MEM, 0); 766 769 l3pmu->regs = devm_ioremap_resource(&pdev->dev, memrc); 767 - if (IS_ERR(l3pmu->regs)) { 768 - dev_err(&pdev->dev, "Can't map PMU @%pa\n", &memrc->start); 770 + if (IS_ERR(l3pmu->regs)) 769 771 return PTR_ERR(l3pmu->regs); 770 - } 771 772 772 773 qcom_l3_cache__init(l3pmu); 773 774
+1 -3
drivers/perf/thunderx2_pmu.c
··· 817 817 } 818 818 819 819 base = devm_ioremap_resource(dev, &res); 820 - if (IS_ERR(base)) { 821 - dev_err(dev, "PMU type %d: Fail to map resource\n", type); 820 + if (IS_ERR(base)) 822 821 return NULL; 823 - } 824 822 825 823 tx2_pmu = devm_kzalloc(dev, sizeof(*tx2_pmu), GFP_KERNEL); 826 824 if (!tx2_pmu)
+7 -10
drivers/perf/xgene_pmu.c
··· 278 278 static ssize_t xgene_pmu_event_show(struct device *dev, 279 279 struct device_attribute *attr, char *buf) 280 280 { 281 - struct dev_ext_attribute *eattr; 281 + struct perf_pmu_events_attr *pmu_attr = 282 + container_of(attr, struct perf_pmu_events_attr, attr); 282 283 283 - eattr = container_of(attr, struct dev_ext_attribute, attr); 284 - return sysfs_emit(buf, "config=0x%lx\n", (unsigned long) eattr->var); 284 + return sysfs_emit(buf, "config=0x%llx\n", pmu_attr->id); 285 285 } 286 286 287 287 #define XGENE_PMU_EVENT_ATTR(_name, _config) \ 288 - (&((struct dev_ext_attribute[]) { \ 289 - { .attr = __ATTR(_name, S_IRUGO, xgene_pmu_event_show, NULL), \ 290 - .var = (void *) _config, } \ 291 - })[0].attr.attr) 288 + PMU_EVENT_ATTR_ID(_name, xgene_pmu_event_show, _config) 292 289 293 290 static struct attribute *l3c_pmu_events_attrs[] = { 294 291 XGENE_PMU_EVENT_ATTR(cycle-count, 0x00), ··· 601 604 /* 602 605 * sysfs cpumask attributes 603 606 */ 604 - static ssize_t xgene_pmu_cpumask_show(struct device *dev, 605 - struct device_attribute *attr, char *buf) 607 + static ssize_t cpumask_show(struct device *dev, 608 + struct device_attribute *attr, char *buf) 606 609 { 607 610 struct xgene_pmu_dev *pmu_dev = to_pmu_dev(dev_get_drvdata(dev)); 608 611 609 612 return cpumap_print_to_pagebuf(true, buf, &pmu_dev->parent->cpu); 610 613 } 611 614 612 - static DEVICE_ATTR(cpumask, S_IRUGO, xgene_pmu_cpumask_show, NULL); 615 + static DEVICE_ATTR_RO(cpumask); 613 616 614 617 static struct attribute *xgene_pmu_cpumask_attrs[] = { 615 618 &dev_attr_cpumask.attr,
+86 -2
include/linux/arm-smccc.h
··· 63 63 #define ARM_SMCCC_VERSION_1_0 0x10000 64 64 #define ARM_SMCCC_VERSION_1_1 0x10001 65 65 #define ARM_SMCCC_VERSION_1_2 0x10002 66 + #define ARM_SMCCC_VERSION_1_3 0x10003 67 + 68 + #define ARM_SMCCC_1_3_SVE_HINT 0x10000 66 69 67 70 #define ARM_SMCCC_VERSION_FUNC_ID \ 68 71 ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ ··· 219 216 220 217 void __init arm_smccc_version_init(u32 version, enum arm_smccc_conduit conduit); 221 218 219 + extern u64 smccc_has_sve_hint; 220 + 222 221 /** 223 222 * struct arm_smccc_res - Result from SMC/HVC call 224 223 * @a0-a3 result values from registers 0 to 3 ··· 231 226 unsigned long a2; 232 227 unsigned long a3; 233 228 }; 229 + 230 + #ifdef CONFIG_ARM64 231 + /** 232 + * struct arm_smccc_1_2_regs - Arguments for or Results from SMC/HVC call 233 + * @a0-a17 argument values from registers 0 to 17 234 + */ 235 + struct arm_smccc_1_2_regs { 236 + unsigned long a0; 237 + unsigned long a1; 238 + unsigned long a2; 239 + unsigned long a3; 240 + unsigned long a4; 241 + unsigned long a5; 242 + unsigned long a6; 243 + unsigned long a7; 244 + unsigned long a8; 245 + unsigned long a9; 246 + unsigned long a10; 247 + unsigned long a11; 248 + unsigned long a12; 249 + unsigned long a13; 250 + unsigned long a14; 251 + unsigned long a15; 252 + unsigned long a16; 253 + unsigned long a17; 254 + }; 255 + 256 + /** 257 + * arm_smccc_1_2_hvc() - make HVC calls 258 + * @args: arguments passed via struct arm_smccc_1_2_regs 259 + * @res: result values via struct arm_smccc_1_2_regs 260 + * 261 + * This function is used to make HVC calls following SMC Calling Convention 262 + * v1.2 or above. The content of the supplied param are copied from the 263 + * structure to registers prior to the HVC instruction. The return values 264 + * are updated with the content from registers on return from the HVC 265 + * instruction. 266 + */ 267 + asmlinkage void arm_smccc_1_2_hvc(const struct arm_smccc_1_2_regs *args, 268 + struct arm_smccc_1_2_regs *res); 269 + 270 + /** 271 + * arm_smccc_1_2_smc() - make SMC calls 272 + * @args: arguments passed via struct arm_smccc_1_2_regs 273 + * @res: result values via struct arm_smccc_1_2_regs 274 + * 275 + * This function is used to make SMC calls following SMC Calling Convention 276 + * v1.2 or above. The content of the supplied param are copied from the 277 + * structure to registers prior to the SMC instruction. The return values 278 + * are updated with the content from registers on return from the SMC 279 + * instruction. 280 + */ 281 + asmlinkage void arm_smccc_1_2_smc(const struct arm_smccc_1_2_regs *args, 282 + struct arm_smccc_1_2_regs *res); 283 + #endif 234 284 235 285 /** 236 286 * struct arm_smccc_quirk - Contains quirk information ··· 299 239 unsigned long a6; 300 240 } state; 301 241 }; 242 + 243 + /** 244 + * __arm_smccc_sve_check() - Set the SVE hint bit when doing SMC calls 245 + * 246 + * Sets the SMCCC hint bit to indicate if there is live state in the SVE 247 + * registers, this modifies x0 in place and should never be called from C 248 + * code. 249 + */ 250 + asmlinkage unsigned long __arm_smccc_sve_check(unsigned long x0); 302 251 303 252 /** 304 253 * __arm_smccc_smc() - make SMC calls ··· 363 294 364 295 #define SMCCC_SMC_INST __SMC(0) 365 296 #define SMCCC_HVC_INST __HVC(0) 297 + 298 + #endif 299 + 300 + /* nVHE hypervisor doesn't have a current thread so needs separate checks */ 301 + #if defined(CONFIG_ARM64_SVE) && !defined(__KVM_NVHE_HYPERVISOR__) 302 + 303 + #define SMCCC_SVE_CHECK ALTERNATIVE("nop \n", "bl __arm_smccc_sve_check \n", \ 304 + ARM64_SVE) 305 + #define smccc_sve_clobbers "x16", "x30", "cc", 306 + 307 + #else 308 + 309 + #define SMCCC_SVE_CHECK 310 + #define smccc_sve_clobbers 366 311 367 312 #endif 368 313 ··· 447 364 448 365 #define ___constraints(count) \ 449 366 : __constraint_read_ ## count \ 450 - : "memory" 367 + : smccc_sve_clobbers "memory" 451 368 #define __constraints(count) ___constraints(count) 452 369 453 370 /* ··· 462 379 register unsigned long r2 asm("r2"); \ 463 380 register unsigned long r3 asm("r3"); \ 464 381 __declare_args(__count_args(__VA_ARGS__), __VA_ARGS__); \ 465 - asm volatile(inst "\n" : \ 382 + asm volatile(SMCCC_SVE_CHECK \ 383 + inst "\n" : \ 466 384 "=r" (r0), "=r" (r1), "=r" (r2), "=r" (r3) \ 467 385 __constraints(__count_args(__VA_ARGS__))); \ 468 386 if (___res) \
+15 -3
include/linux/gfp.h
··· 53 53 #define ___GFP_HARDWALL 0x100000u 54 54 #define ___GFP_THISNODE 0x200000u 55 55 #define ___GFP_ACCOUNT 0x400000u 56 + #define ___GFP_ZEROTAGS 0x800000u 57 + #define ___GFP_SKIP_KASAN_POISON 0x1000000u 56 58 #ifdef CONFIG_LOCKDEP 57 - #define ___GFP_NOLOCKDEP 0x800000u 59 + #define ___GFP_NOLOCKDEP 0x2000000u 58 60 #else 59 61 #define ___GFP_NOLOCKDEP 0 60 62 #endif ··· 231 229 * %__GFP_COMP address compound page metadata. 232 230 * 233 231 * %__GFP_ZERO returns a zeroed page on success. 232 + * 233 + * %__GFP_ZEROTAGS returns a page with zeroed memory tags on success, if 234 + * __GFP_ZERO is set. 235 + * 236 + * %__GFP_SKIP_KASAN_POISON returns a page which does not need to be poisoned 237 + * on deallocation. Typically used for userspace pages. Currently only has an 238 + * effect in HW tags mode. 234 239 */ 235 240 #define __GFP_NOWARN ((__force gfp_t)___GFP_NOWARN) 236 241 #define __GFP_COMP ((__force gfp_t)___GFP_COMP) 237 242 #define __GFP_ZERO ((__force gfp_t)___GFP_ZERO) 243 + #define __GFP_ZEROTAGS ((__force gfp_t)___GFP_ZEROTAGS) 244 + #define __GFP_SKIP_KASAN_POISON ((__force gfp_t)___GFP_SKIP_KASAN_POISON) 238 245 239 246 /* Disable lockdep for GFP context tracking */ 240 247 #define __GFP_NOLOCKDEP ((__force gfp_t)___GFP_NOLOCKDEP) 241 248 242 249 /* Room for N __GFP_FOO bits */ 243 - #define __GFP_BITS_SHIFT (23 + IS_ENABLED(CONFIG_LOCKDEP)) 250 + #define __GFP_BITS_SHIFT (25 + IS_ENABLED(CONFIG_LOCKDEP)) 244 251 #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1)) 245 252 246 253 /** ··· 330 319 #define GFP_DMA __GFP_DMA 331 320 #define GFP_DMA32 __GFP_DMA32 332 321 #define GFP_HIGHUSER (GFP_USER | __GFP_HIGHMEM) 333 - #define GFP_HIGHUSER_MOVABLE (GFP_HIGHUSER | __GFP_MOVABLE) 322 + #define GFP_HIGHUSER_MOVABLE (GFP_HIGHUSER | __GFP_MOVABLE | \ 323 + __GFP_SKIP_KASAN_POISON) 334 324 #define GFP_TRANSHUGE_LIGHT ((GFP_HIGHUSER_MOVABLE | __GFP_COMP | \ 335 325 __GFP_NOMEMALLOC | __GFP_NOWARN) & ~__GFP_RECLAIM) 336 326 #define GFP_TRANSHUGE (GFP_TRANSHUGE_LIGHT | __GFP_DIRECT_RECLAIM)
+16 -27
include/linux/highmem.h
··· 152 152 } 153 153 #endif 154 154 155 - #ifndef __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE 155 + #ifndef __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE_MOVABLE 156 156 /** 157 - * __alloc_zeroed_user_highpage - Allocate a zeroed HIGHMEM page for a VMA with caller-specified movable GFP flags 158 - * @movableflags: The GFP flags related to the pages future ability to move like __GFP_MOVABLE 157 + * alloc_zeroed_user_highpage_movable - Allocate a zeroed HIGHMEM page for a VMA that the caller knows can move 159 158 * @vma: The VMA the page is to be allocated for 160 159 * @vaddr: The virtual address the page will be inserted into 161 160 * 162 - * This function will allocate a page for a VMA but the caller is expected 163 - * to specify via movableflags whether the page will be movable in the 164 - * future or not 161 + * This function will allocate a page for a VMA that the caller knows will 162 + * be able to migrate in the future using move_pages() or reclaimed 165 163 * 166 164 * An architecture may override this function by defining 167 - * __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE and providing their own 165 + * __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE_MOVABLE and providing their own 168 166 * implementation. 169 167 */ 170 168 static inline struct page * 171 - __alloc_zeroed_user_highpage(gfp_t movableflags, 172 - struct vm_area_struct *vma, 173 - unsigned long vaddr) 169 + alloc_zeroed_user_highpage_movable(struct vm_area_struct *vma, 170 + unsigned long vaddr) 174 171 { 175 - struct page *page = alloc_page_vma(GFP_HIGHUSER | movableflags, 176 - vma, vaddr); 172 + struct page *page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vaddr); 177 173 178 174 if (page) 179 175 clear_user_highpage(page, vaddr); ··· 178 182 } 179 183 #endif 180 184 181 - /** 182 - * alloc_zeroed_user_highpage_movable - Allocate a zeroed HIGHMEM page for a VMA that the caller knows can move 183 - * @vma: The VMA the page is to be allocated for 184 - * @vaddr: The virtual address the page will be inserted into 185 - * 186 - * This function will allocate a page for a VMA that the caller knows will 187 - * be able to migrate in the future using move_pages() or reclaimed 188 - */ 189 - static inline struct page * 190 - alloc_zeroed_user_highpage_movable(struct vm_area_struct *vma, 191 - unsigned long vaddr) 192 - { 193 - return __alloc_zeroed_user_highpage(__GFP_MOVABLE, vma, vaddr); 194 - } 195 - 196 185 static inline void clear_highpage(struct page *page) 197 186 { 198 187 void *kaddr = kmap_atomic(page); 199 188 clear_page(kaddr); 200 189 kunmap_atomic(kaddr); 201 190 } 191 + 192 + #ifndef __HAVE_ARCH_TAG_CLEAR_HIGHPAGE 193 + 194 + static inline void tag_clear_highpage(struct page *page) 195 + { 196 + } 197 + 198 + #endif 202 199 203 200 /* 204 201 * If we pass in a base or tail page, we can zero up to PAGE_SIZE.
+2 -33
include/linux/interrupt.h
··· 319 319 320 320 extern cpumask_var_t irq_default_affinity; 321 321 322 - /* Internal implementation. Use the helpers below */ 323 - extern int __irq_set_affinity(unsigned int irq, const struct cpumask *cpumask, 324 - bool force); 325 - 326 - /** 327 - * irq_set_affinity - Set the irq affinity of a given irq 328 - * @irq: Interrupt to set affinity 329 - * @cpumask: cpumask 330 - * 331 - * Fails if cpumask does not contain an online CPU 332 - */ 333 - static inline int 334 - irq_set_affinity(unsigned int irq, const struct cpumask *cpumask) 335 - { 336 - return __irq_set_affinity(irq, cpumask, false); 337 - } 338 - 339 - /** 340 - * irq_force_affinity - Force the irq affinity of a given irq 341 - * @irq: Interrupt to set affinity 342 - * @cpumask: cpumask 343 - * 344 - * Same as irq_set_affinity, but without checking the mask against 345 - * online cpus. 346 - * 347 - * Solely for low level cpu hotplug code, where we need to make per 348 - * cpu interrupts affine before the cpu becomes online. 349 - */ 350 - static inline int 351 - irq_force_affinity(unsigned int irq, const struct cpumask *cpumask) 352 - { 353 - return __irq_set_affinity(irq, cpumask, true); 354 - } 322 + extern int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask); 323 + extern int irq_force_affinity(unsigned int irq, const struct cpumask *cpumask); 355 324 356 325 extern int irq_can_set_affinity(unsigned int irq); 357 326 extern int irq_select_affinity(unsigned int irq);
+38 -26
include/linux/kasan.h
··· 2 2 #ifndef _LINUX_KASAN_H 3 3 #define _LINUX_KASAN_H 4 4 5 + #include <linux/bug.h> 5 6 #include <linux/static_key.h> 6 7 #include <linux/types.h> 7 8 ··· 80 79 81 80 #endif /* CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS */ 82 81 83 - #ifdef CONFIG_KASAN 84 - 85 - struct kasan_cache { 86 - int alloc_meta_offset; 87 - int free_meta_offset; 88 - bool is_kmalloc; 89 - }; 90 - 91 82 #ifdef CONFIG_KASAN_HW_TAGS 92 83 93 84 DECLARE_STATIC_KEY_FALSE(kasan_flag_enabled); ··· 94 101 return kasan_enabled(); 95 102 } 96 103 104 + void kasan_alloc_pages(struct page *page, unsigned int order, gfp_t flags); 105 + void kasan_free_pages(struct page *page, unsigned int order); 106 + 97 107 #else /* CONFIG_KASAN_HW_TAGS */ 98 108 99 109 static inline bool kasan_enabled(void) 100 110 { 101 - return true; 111 + return IS_ENABLED(CONFIG_KASAN); 102 112 } 103 113 104 114 static inline bool kasan_has_integrated_init(void) ··· 109 113 return false; 110 114 } 111 115 116 + static __always_inline void kasan_alloc_pages(struct page *page, 117 + unsigned int order, gfp_t flags) 118 + { 119 + /* Only available for integrated init. */ 120 + BUILD_BUG(); 121 + } 122 + 123 + static __always_inline void kasan_free_pages(struct page *page, 124 + unsigned int order) 125 + { 126 + /* Only available for integrated init. */ 127 + BUILD_BUG(); 128 + } 129 + 112 130 #endif /* CONFIG_KASAN_HW_TAGS */ 131 + 132 + #ifdef CONFIG_KASAN 133 + 134 + struct kasan_cache { 135 + int alloc_meta_offset; 136 + int free_meta_offset; 137 + bool is_kmalloc; 138 + }; 113 139 114 140 slab_flags_t __kasan_never_merge(void); 115 141 static __always_inline slab_flags_t kasan_never_merge(void) ··· 148 130 __kasan_unpoison_range(addr, size); 149 131 } 150 132 151 - void __kasan_alloc_pages(struct page *page, unsigned int order, bool init); 152 - static __always_inline void kasan_alloc_pages(struct page *page, 133 + void __kasan_poison_pages(struct page *page, unsigned int order, bool init); 134 + static __always_inline void kasan_poison_pages(struct page *page, 153 135 unsigned int order, bool init) 154 136 { 155 137 if (kasan_enabled()) 156 - __kasan_alloc_pages(page, order, init); 138 + __kasan_poison_pages(page, order, init); 157 139 } 158 140 159 - void __kasan_free_pages(struct page *page, unsigned int order, bool init); 160 - static __always_inline void kasan_free_pages(struct page *page, 161 - unsigned int order, bool init) 141 + void __kasan_unpoison_pages(struct page *page, unsigned int order, bool init); 142 + static __always_inline void kasan_unpoison_pages(struct page *page, 143 + unsigned int order, bool init) 162 144 { 163 145 if (kasan_enabled()) 164 - __kasan_free_pages(page, order, init); 146 + __kasan_unpoison_pages(page, order, init); 165 147 } 166 148 167 149 void __kasan_cache_create(struct kmem_cache *cache, unsigned int *size, ··· 303 285 304 286 #else /* CONFIG_KASAN */ 305 287 306 - static inline bool kasan_enabled(void) 307 - { 308 - return false; 309 - } 310 - static inline bool kasan_has_integrated_init(void) 311 - { 312 - return false; 313 - } 314 288 static inline slab_flags_t kasan_never_merge(void) 315 289 { 316 290 return 0; 317 291 } 318 292 static inline void kasan_unpoison_range(const void *address, size_t size) {} 319 - static inline void kasan_alloc_pages(struct page *page, unsigned int order, bool init) {} 320 - static inline void kasan_free_pages(struct page *page, unsigned int order, bool init) {} 293 + static inline void kasan_poison_pages(struct page *page, unsigned int order, 294 + bool init) {} 295 + static inline void kasan_unpoison_pages(struct page *page, unsigned int order, 296 + bool init) {} 321 297 static inline void kasan_cache_create(struct kmem_cache *cache, 322 298 unsigned int *size, 323 299 slab_flags_t *flags) {}
+9
include/linux/page-flags.h
··· 138 138 #ifdef CONFIG_64BIT 139 139 PG_arch_2, 140 140 #endif 141 + #ifdef CONFIG_KASAN_HW_TAGS 142 + PG_skip_kasan_poison, 143 + #endif 141 144 __NR_PAGEFLAGS, 142 145 143 146 /* Filesystems */ ··· 444 441 SETPAGEFLAG(Young, young, PF_ANY) 445 442 TESTCLEARFLAG(Young, young, PF_ANY) 446 443 PAGEFLAG(Idle, idle, PF_ANY) 444 + #endif 445 + 446 + #ifdef CONFIG_KASAN_HW_TAGS 447 + PAGEFLAG(SkipKASanPoison, skip_kasan_poison, PF_HEAD) 448 + #else 449 + PAGEFLAG_FALSE(SkipKASanPoison) 447 450 #endif 448 451 449 452 /*
+6
include/linux/perf_event.h
··· 1576 1576 .event_str = _str, \ 1577 1577 }; 1578 1578 1579 + #define PMU_EVENT_ATTR_ID(_name, _show, _id) \ 1580 + (&((struct perf_pmu_events_attr[]) { \ 1581 + { .attr = __ATTR(_name, 0444, _show, NULL), \ 1582 + .id = _id, } \ 1583 + })[0].attr.attr) 1584 + 1579 1585 #define PMU_FORMAT_ATTR(_name, _format) \ 1580 1586 static ssize_t \ 1581 1587 _name##_show(struct device *dev, \
+8 -1
include/trace/events/mmflags.h
··· 85 85 #define IF_HAVE_PG_ARCH_2(flag,string) 86 86 #endif 87 87 88 + #ifdef CONFIG_KASAN_HW_TAGS 89 + #define IF_HAVE_PG_SKIP_KASAN_POISON(flag,string) ,{1UL << flag, string} 90 + #else 91 + #define IF_HAVE_PG_SKIP_KASAN_POISON(flag,string) 92 + #endif 93 + 88 94 #define __def_pageflag_names \ 89 95 {1UL << PG_locked, "locked" }, \ 90 96 {1UL << PG_waiters, "waiters" }, \ ··· 118 112 IF_HAVE_PG_HWPOISON(PG_hwpoison, "hwpoison" ) \ 119 113 IF_HAVE_PG_IDLE(PG_young, "young" ) \ 120 114 IF_HAVE_PG_IDLE(PG_idle, "idle" ) \ 121 - IF_HAVE_PG_ARCH_2(PG_arch_2, "arch_2" ) 115 + IF_HAVE_PG_ARCH_2(PG_arch_2, "arch_2" ) \ 116 + IF_HAVE_PG_SKIP_KASAN_POISON(PG_skip_kasan_poison, "skip_kasan_poison") 122 117 123 118 #define show_page_flags(flags) \ 124 119 (flags) ? __print_flags(flags, "|", \
+32 -1
kernel/irq/manage.c
··· 441 441 return ret; 442 442 } 443 443 444 - int __irq_set_affinity(unsigned int irq, const struct cpumask *mask, bool force) 444 + static int __irq_set_affinity(unsigned int irq, const struct cpumask *mask, 445 + bool force) 445 446 { 446 447 struct irq_desc *desc = irq_to_desc(irq); 447 448 unsigned long flags; ··· 456 455 raw_spin_unlock_irqrestore(&desc->lock, flags); 457 456 return ret; 458 457 } 458 + 459 + /** 460 + * irq_set_affinity - Set the irq affinity of a given irq 461 + * @irq: Interrupt to set affinity 462 + * @cpumask: cpumask 463 + * 464 + * Fails if cpumask does not contain an online CPU 465 + */ 466 + int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask) 467 + { 468 + return __irq_set_affinity(irq, cpumask, false); 469 + } 470 + EXPORT_SYMBOL_GPL(irq_set_affinity); 471 + 472 + /** 473 + * irq_force_affinity - Force the irq affinity of a given irq 474 + * @irq: Interrupt to set affinity 475 + * @cpumask: cpumask 476 + * 477 + * Same as irq_set_affinity, but without checking the mask against 478 + * online cpus. 479 + * 480 + * Solely for low level cpu hotplug code, where we need to make per 481 + * cpu interrupts affine before the cpu becomes online. 482 + */ 483 + int irq_force_affinity(unsigned int irq, const struct cpumask *cpumask) 484 + { 485 + return __irq_set_affinity(irq, cpumask, true); 486 + } 487 + EXPORT_SYMBOL_GPL(irq_force_affinity); 459 488 460 489 int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m) 461 490 {
+2 -2
mm/kasan/common.c
··· 97 97 return 0; 98 98 } 99 99 100 - void __kasan_alloc_pages(struct page *page, unsigned int order, bool init) 100 + void __kasan_unpoison_pages(struct page *page, unsigned int order, bool init) 101 101 { 102 102 u8 tag; 103 103 unsigned long i; ··· 111 111 kasan_unpoison(page_address(page), PAGE_SIZE << order, init); 112 112 } 113 113 114 - void __kasan_free_pages(struct page *page, unsigned int order, bool init) 114 + void __kasan_poison_pages(struct page *page, unsigned int order, bool init) 115 115 { 116 116 if (likely(!PageHighMem(page))) 117 117 kasan_poison(page_address(page), PAGE_SIZE << order,
+32
mm/kasan/hw_tags.c
··· 238 238 return &alloc_meta->free_track[0]; 239 239 } 240 240 241 + void kasan_alloc_pages(struct page *page, unsigned int order, gfp_t flags) 242 + { 243 + /* 244 + * This condition should match the one in post_alloc_hook() in 245 + * page_alloc.c. 246 + */ 247 + bool init = !want_init_on_free() && want_init_on_alloc(flags); 248 + 249 + if (flags & __GFP_SKIP_KASAN_POISON) 250 + SetPageSkipKASanPoison(page); 251 + 252 + if (flags & __GFP_ZEROTAGS) { 253 + int i; 254 + 255 + for (i = 0; i != 1 << order; ++i) 256 + tag_clear_highpage(page + i); 257 + } else { 258 + kasan_unpoison_pages(page, order, init); 259 + } 260 + } 261 + 262 + void kasan_free_pages(struct page *page, unsigned int order) 263 + { 264 + /* 265 + * This condition should match the one in free_pages_prepare() in 266 + * page_alloc.c. 267 + */ 268 + bool init = want_init_on_free(); 269 + 270 + kasan_poison_pages(page, order, init); 271 + } 272 + 241 273 #if IS_ENABLED(CONFIG_KASAN_KUNIT_TEST) 242 274 243 275 void kasan_set_tagging_report_once(bool state)
+7
mm/kasan/sw_tags.c
··· 207 207 208 208 return &alloc_meta->free_track[i]; 209 209 } 210 + 211 + void kasan_tag_mismatch(unsigned long addr, unsigned long access_info, 212 + unsigned long ret_ip) 213 + { 214 + kasan_report(addr, 1 << (access_info & 0xf), access_info & 0x10, 215 + ret_ip); 216 + }
+4 -2
mm/mempool.c
··· 106 106 if (pool->alloc == mempool_alloc_slab || pool->alloc == mempool_kmalloc) 107 107 kasan_slab_free_mempool(element); 108 108 else if (pool->alloc == mempool_alloc_pages) 109 - kasan_free_pages(element, (unsigned long)pool->pool_data, false); 109 + kasan_poison_pages(element, (unsigned long)pool->pool_data, 110 + false); 110 111 } 111 112 112 113 static void kasan_unpoison_element(mempool_t *pool, void *element) ··· 115 114 if (pool->alloc == mempool_alloc_slab || pool->alloc == mempool_kmalloc) 116 115 kasan_unpoison_range(element, __ksize(element)); 117 116 else if (pool->alloc == mempool_alloc_pages) 118 - kasan_alloc_pages(element, (unsigned long)pool->pool_data, false); 117 + kasan_unpoison_pages(element, (unsigned long)pool->pool_data, 118 + false); 119 119 } 120 120 121 121 static __always_inline void add_element(mempool_t *pool, void *element)
+39 -27
mm/page_alloc.c
··· 382 382 static DEFINE_STATIC_KEY_TRUE(deferred_pages); 383 383 384 384 /* 385 - * Calling kasan_free_pages() only after deferred memory initialization 385 + * Calling kasan_poison_pages() only after deferred memory initialization 386 386 * has completed. Poisoning pages during deferred memory init will greatly 387 387 * lengthen the process and cause problem in large memory systems as the 388 388 * deferred pages initialization is done with interrupt disabled. ··· 394 394 * on-demand allocation and then freed again before the deferred pages 395 395 * initialization is done, but this is not likely to happen. 396 396 */ 397 - static inline void kasan_free_nondeferred_pages(struct page *page, int order, 398 - bool init, fpi_t fpi_flags) 397 + static inline bool should_skip_kasan_poison(struct page *page, fpi_t fpi_flags) 399 398 { 400 - if (static_branch_unlikely(&deferred_pages)) 401 - return; 402 - if (!IS_ENABLED(CONFIG_KASAN_GENERIC) && 403 - (fpi_flags & FPI_SKIP_KASAN_POISON)) 404 - return; 405 - kasan_free_pages(page, order, init); 399 + return static_branch_unlikely(&deferred_pages) || 400 + (!IS_ENABLED(CONFIG_KASAN_GENERIC) && 401 + (fpi_flags & FPI_SKIP_KASAN_POISON)) || 402 + PageSkipKASanPoison(page); 406 403 } 407 404 408 405 /* Returns true if the struct page for the pfn is uninitialised */ ··· 450 453 return false; 451 454 } 452 455 #else 453 - static inline void kasan_free_nondeferred_pages(struct page *page, int order, 454 - bool init, fpi_t fpi_flags) 456 + static inline bool should_skip_kasan_poison(struct page *page, fpi_t fpi_flags) 455 457 { 456 - if (!IS_ENABLED(CONFIG_KASAN_GENERIC) && 457 - (fpi_flags & FPI_SKIP_KASAN_POISON)) 458 - return; 459 - kasan_free_pages(page, order, init); 458 + return (!IS_ENABLED(CONFIG_KASAN_GENERIC) && 459 + (fpi_flags & FPI_SKIP_KASAN_POISON)) || 460 + PageSkipKASanPoison(page); 460 461 } 461 462 462 463 static inline bool early_page_uninitialised(unsigned long pfn) ··· 1221 1226 return ret; 1222 1227 } 1223 1228 1224 - static void kernel_init_free_pages(struct page *page, int numpages) 1229 + static void kernel_init_free_pages(struct page *page, int numpages, bool zero_tags) 1225 1230 { 1226 1231 int i; 1232 + 1233 + if (zero_tags) { 1234 + for (i = 0; i < numpages; i++) 1235 + tag_clear_highpage(page + i); 1236 + return; 1237 + } 1227 1238 1228 1239 /* s390's use of memset() could override KASAN redzones. */ 1229 1240 kasan_disable_current(); ··· 1246 1245 unsigned int order, bool check_free, fpi_t fpi_flags) 1247 1246 { 1248 1247 int bad = 0; 1249 - bool init; 1248 + bool skip_kasan_poison = should_skip_kasan_poison(page, fpi_flags); 1250 1249 1251 1250 VM_BUG_ON_PAGE(PageTail(page), page); 1252 1251 ··· 1315 1314 * With hardware tag-based KASAN, memory tags must be set before the 1316 1315 * page becomes unavailable via debug_pagealloc or arch_free_page. 1317 1316 */ 1318 - init = want_init_on_free(); 1319 - if (init && !kasan_has_integrated_init()) 1320 - kernel_init_free_pages(page, 1 << order); 1321 - kasan_free_nondeferred_pages(page, order, init, fpi_flags); 1317 + if (kasan_has_integrated_init()) { 1318 + if (!skip_kasan_poison) 1319 + kasan_free_pages(page, order); 1320 + } else { 1321 + bool init = want_init_on_free(); 1322 + 1323 + if (init) 1324 + kernel_init_free_pages(page, 1 << order, false); 1325 + if (!skip_kasan_poison) 1326 + kasan_poison_pages(page, order, init); 1327 + } 1322 1328 1323 1329 /* 1324 1330 * arch_free_page() can make the page's contents inaccessible. s390 ··· 2332 2324 inline void post_alloc_hook(struct page *page, unsigned int order, 2333 2325 gfp_t gfp_flags) 2334 2326 { 2335 - bool init; 2336 - 2337 2327 set_page_private(page, 0); 2338 2328 set_page_refcounted(page); 2339 2329 ··· 2350 2344 * kasan_alloc_pages and kernel_init_free_pages must be 2351 2345 * kept together to avoid discrepancies in behavior. 2352 2346 */ 2353 - init = !want_init_on_free() && want_init_on_alloc(gfp_flags); 2354 - kasan_alloc_pages(page, order, init); 2355 - if (init && !kasan_has_integrated_init()) 2356 - kernel_init_free_pages(page, 1 << order); 2347 + if (kasan_has_integrated_init()) { 2348 + kasan_alloc_pages(page, order, gfp_flags); 2349 + } else { 2350 + bool init = !want_init_on_free() && want_init_on_alloc(gfp_flags); 2351 + 2352 + kasan_unpoison_pages(page, order, init); 2353 + if (init) 2354 + kernel_init_free_pages(page, 1 << order, 2355 + gfp_flags & __GFP_ZEROTAGS); 2356 + } 2357 2357 2358 2358 set_page_owner(page, order, gfp_flags); 2359 2359 }
+1
scripts/Makefile.kasan
··· 50 50 CFLAGS_KASAN := -fsanitize=kernel-hwaddress \ 51 51 $(call cc-param,hwasan-instrument-stack=$(stack_enable)) \ 52 52 $(call cc-param,hwasan-use-short-granules=0) \ 53 + $(call cc-param,hwasan-inline-all-checks=0) \ 53 54 $(instrumentation_flags) 54 55 55 56 endif # CONFIG_KASAN_SW_TAGS
+2 -1
scripts/tools-support-relr.sh
··· 7 7 cat << "END" | $CC -c -x c - -o $tmp_file.o >/dev/null 2>&1 8 8 void *p = &p; 9 9 END 10 - $LD $tmp_file.o -shared -Bsymbolic --pack-dyn-relocs=relr -o $tmp_file 10 + $LD $tmp_file.o -shared -Bsymbolic --pack-dyn-relocs=relr \ 11 + --use-android-relr-tags -o $tmp_file 11 12 12 13 # Despite printing an error message, GNU nm still exits with exit code 0 if it 13 14 # sees a relr section. So we need to check that nothing is printed to stderr.
+1 -1
tools/testing/selftests/arm64/fp/sve-probe-vls.c
··· 25 25 ksft_set_plan(2); 26 26 27 27 if (!(getauxval(AT_HWCAP) & HWCAP_SVE)) 28 - ksft_exit_skip("SVE not available"); 28 + ksft_exit_skip("SVE not available\n"); 29 29 30 30 /* 31 31 * Enumerate up to SVE_VQ_MAX vector lengths