Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'arm64-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux

Pull arm64 fixes from Catalin Marinas:
"Two left-over updates that could not go into -rc1 due to conflicts
with other series:

- Simplify checks in arch_kfence_init_pool() since
force_pte_mapping() already takes BBML2-noabort (break-before-make
Level 2 with no aborts generated) into account

- Remove unneeded SVE/SME fallback preserve/store handling in the
arm64 EFI. With the recent updates, the fallback path is only taken
for EFI runtime calls from hardirq or NMI contexts. In practice,
this only happens under panic/oops/emergency_restart() and no
restoring of the user state expected.

There's a corresponding lkdtm update to trigger a BUG() or panic()
from hardirq context together with a fixup not to confuse
clang/objtool about the control flow

GCS (guarded control stacks) fix: flush the GCS locking state on exec,
otherwise the new task will not be able to enable GCS (locked as
disabled)"

* tag 'arm64-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux:
lkdtm/bugs: Do not confuse the clang/objtool with busy wait loop
arm64/gcs: Flush the GCS locking state on exec
arm64/efi: Remove unneeded SVE/SME fallback preserve/store handling
lkdtm/bugs: Add cases for BUG and PANIC occurring in hardirq context
arm64: mm: Simplify check in arch_kfence_init_pool()

+92 -127
+20 -110
arch/arm64/kernel/fpsimd.c
··· 180 180 set_default_vl(ARM64_VEC_SVE, val); 181 181 } 182 182 183 - static u8 *efi_sve_state; 184 - 185 - #else /* ! CONFIG_ARM64_SVE */ 186 - 187 - /* Dummy declaration for code that will be optimised out: */ 188 - extern u8 *efi_sve_state; 189 - 190 183 #endif /* ! CONFIG_ARM64_SVE */ 191 184 192 185 #ifdef CONFIG_ARM64_SME ··· 1088 1095 return 0; 1089 1096 } 1090 1097 1091 - static void __init sve_efi_setup(void) 1092 - { 1093 - int max_vl = 0; 1094 - int i; 1095 - 1096 - if (!IS_ENABLED(CONFIG_EFI)) 1097 - return; 1098 - 1099 - for (i = 0; i < ARRAY_SIZE(vl_info); i++) 1100 - max_vl = max(vl_info[i].max_vl, max_vl); 1101 - 1102 - /* 1103 - * alloc_percpu() warns and prints a backtrace if this goes wrong. 1104 - * This is evidence of a crippled system and we are returning void, 1105 - * so no attempt is made to handle this situation here. 1106 - */ 1107 - if (!sve_vl_valid(max_vl)) 1108 - goto fail; 1109 - 1110 - efi_sve_state = kmalloc(SVE_SIG_REGS_SIZE(sve_vq_from_vl(max_vl)), 1111 - GFP_KERNEL); 1112 - if (!efi_sve_state) 1113 - goto fail; 1114 - 1115 - return; 1116 - 1117 - fail: 1118 - panic("Cannot allocate memory for EFI SVE save/restore"); 1119 - } 1120 - 1121 1098 void cpu_enable_sve(const struct arm64_cpu_capabilities *__always_unused p) 1122 1099 { 1123 1100 write_sysreg(read_sysreg(CPACR_EL1) | CPACR_EL1_ZEN_EL1EN, CPACR_EL1); ··· 1148 1185 if (sve_max_virtualisable_vl() < sve_max_vl()) 1149 1186 pr_warn("%s: unvirtualisable vector lengths present\n", 1150 1187 info->name); 1151 - 1152 - sve_efi_setup(); 1153 1188 } 1154 1189 1155 1190 /* ··· 1908 1947 #ifdef CONFIG_EFI 1909 1948 1910 1949 static struct user_fpsimd_state efi_fpsimd_state; 1911 - static bool efi_fpsimd_state_used; 1912 - static bool efi_sve_state_used; 1913 - static bool efi_sm_state; 1914 1950 1915 1951 /* 1916 1952 * EFI runtime services support functions ··· 1934 1976 if (may_use_simd()) { 1935 1977 kernel_neon_begin(&efi_fpsimd_state); 1936 1978 } else { 1937 - WARN_ON(preemptible()); 1938 - 1939 1979 /* 1940 - * If !efi_sve_state, SVE can't be in use yet and doesn't need 1941 - * preserving: 1980 + * We are running in hardirq or NMI context, and the only 1981 + * legitimate case where this might happen is when EFI pstore 1982 + * is attempting to record the system's dying gasps into EFI 1983 + * variables. This could be due to an oops, a panic or a call 1984 + * to emergency_restart(), and in none of those cases, we can 1985 + * expect the current task to ever return to user space again, 1986 + * or for the kernel to resume any normal execution, for that 1987 + * matter (an oops in hardirq context triggers a panic too). 1988 + * 1989 + * Therefore, there is no point in attempting to preserve any 1990 + * SVE/SME state here. On the off chance that we might have 1991 + * ended up here for a different reason inadvertently, kill the 1992 + * task and preserve/restore the base FP/SIMD state, which 1993 + * might belong to kernel mode FP/SIMD. 1942 1994 */ 1943 - if (system_supports_sve() && efi_sve_state != NULL) { 1944 - bool ffr = true; 1945 - u64 svcr; 1946 - 1947 - efi_sve_state_used = true; 1948 - 1949 - if (system_supports_sme()) { 1950 - svcr = read_sysreg_s(SYS_SVCR); 1951 - 1952 - efi_sm_state = svcr & SVCR_SM_MASK; 1953 - 1954 - /* 1955 - * Unless we have FA64 FFR does not 1956 - * exist in streaming mode. 1957 - */ 1958 - if (!system_supports_fa64()) 1959 - ffr = !(svcr & SVCR_SM_MASK); 1960 - } 1961 - 1962 - sve_save_state(efi_sve_state + sve_ffr_offset(sve_max_vl()), 1963 - &efi_fpsimd_state.fpsr, ffr); 1964 - 1965 - if (system_supports_sme()) 1966 - sysreg_clear_set_s(SYS_SVCR, 1967 - SVCR_SM_MASK, 0); 1968 - 1969 - } else { 1970 - fpsimd_save_state(&efi_fpsimd_state); 1971 - } 1972 - 1973 - efi_fpsimd_state_used = true; 1995 + pr_warn_ratelimited("Calling EFI runtime from %s context\n", 1996 + in_nmi() ? "NMI" : "hardirq"); 1997 + force_signal_inject(SIGKILL, SI_KERNEL, 0, 0); 1998 + fpsimd_save_state(&efi_fpsimd_state); 1974 1999 } 1975 2000 } 1976 2001 ··· 1965 2024 if (!system_supports_fpsimd()) 1966 2025 return; 1967 2026 1968 - if (!efi_fpsimd_state_used) { 2027 + if (may_use_simd()) { 1969 2028 kernel_neon_end(&efi_fpsimd_state); 1970 2029 } else { 1971 - if (system_supports_sve() && efi_sve_state_used) { 1972 - bool ffr = true; 1973 - 1974 - /* 1975 - * Restore streaming mode; EFI calls are 1976 - * normal function calls so should not return in 1977 - * streaming mode. 1978 - */ 1979 - if (system_supports_sme()) { 1980 - if (efi_sm_state) { 1981 - sysreg_clear_set_s(SYS_SVCR, 1982 - 0, 1983 - SVCR_SM_MASK); 1984 - 1985 - /* 1986 - * Unless we have FA64 FFR does not 1987 - * exist in streaming mode. 1988 - */ 1989 - if (!system_supports_fa64()) 1990 - ffr = false; 1991 - } 1992 - } 1993 - 1994 - sve_load_state(efi_sve_state + sve_ffr_offset(sve_max_vl()), 1995 - &efi_fpsimd_state.fpsr, ffr); 1996 - 1997 - efi_sve_state_used = false; 1998 - } else { 1999 - fpsimd_load_state(&efi_fpsimd_state); 2000 - } 2001 - 2002 - efi_fpsimd_state_used = false; 2030 + fpsimd_load_state(&efi_fpsimd_state); 2003 2031 } 2004 2032 } 2005 2033
+1
arch/arm64/kernel/process.c
··· 292 292 current->thread.gcs_base = 0; 293 293 current->thread.gcs_size = 0; 294 294 current->thread.gcs_el0_mode = 0; 295 + current->thread.gcs_el0_locked = 0; 295 296 write_sysreg_s(GCSCRE0_EL1_nTR, SYS_GCSCRE0_EL1); 296 297 write_sysreg_s(0, SYS_GCSPR_EL0); 297 298 }
+16 -17
arch/arm64/mm/mmu.c
··· 767 767 return rodata_full || arm64_kfence_can_set_direct_map() || is_realm_world(); 768 768 } 769 769 770 - static inline bool split_leaf_mapping_possible(void) 771 - { 772 - /* 773 - * !BBML2_NOABORT systems should never run into scenarios where we would 774 - * have to split. So exit early and let calling code detect it and raise 775 - * a warning. 776 - */ 777 - if (!system_supports_bbml2_noabort()) 778 - return false; 779 - return !force_pte_mapping(); 780 - } 781 - 782 770 static DEFINE_MUTEX(pgtable_split_lock); 783 771 784 772 int split_kernel_leaf_mapping(unsigned long start, unsigned long end) ··· 774 786 int ret; 775 787 776 788 /* 777 - * Exit early if the region is within a pte-mapped area or if we can't 778 - * split. For the latter case, the permission change code will raise a 779 - * warning if not already pte-mapped. 789 + * !BBML2_NOABORT systems should not be trying to change permissions on 790 + * anything that is not pte-mapped in the first place. Just return early 791 + * and let the permission change code raise a warning if not already 792 + * pte-mapped. 780 793 */ 781 - if (!split_leaf_mapping_possible() || is_kfence_address((void *)start)) 794 + if (!system_supports_bbml2_noabort()) 795 + return 0; 796 + 797 + /* 798 + * If the region is within a pte-mapped area, there is no need to try to 799 + * split. Additionally, CONFIG_DEBUG_PAGEALLOC and CONFIG_KFENCE may 800 + * change permissions from atomic context so for those cases (which are 801 + * always pte-mapped), we must not go any further because taking the 802 + * mutex below may sleep. 803 + */ 804 + if (force_pte_mapping() || is_kfence_address((void *)start)) 782 805 return 0; 783 806 784 807 /* ··· 1088 1089 int ret; 1089 1090 1090 1091 /* Exit early if we know the linear map is already pte-mapped. */ 1091 - if (!split_leaf_mapping_possible()) 1092 + if (force_pte_mapping()) 1092 1093 return true; 1093 1094 1094 1095 /* Kfence pool is already pte-mapped for the early init case. */
+53
drivers/misc/lkdtm/bugs.c
··· 8 8 #include "lkdtm.h" 9 9 #include <linux/cpu.h> 10 10 #include <linux/list.h> 11 + #include <linux/hrtimer.h> 11 12 #include <linux/sched.h> 12 13 #include <linux/sched/signal.h> 13 14 #include <linux/sched/task_stack.h> ··· 101 100 stop_machine(panic_stop_irqoff_fn, &v, cpu_online_mask); 102 101 } 103 102 103 + static bool wait_for_panic; 104 + 105 + static enum hrtimer_restart panic_in_hardirq(struct hrtimer *timer) 106 + { 107 + panic("from hard IRQ context"); 108 + 109 + wait_for_panic = false; 110 + return HRTIMER_NORESTART; 111 + } 112 + 113 + static void lkdtm_PANIC_IN_HARDIRQ(void) 114 + { 115 + struct hrtimer timer; 116 + 117 + wait_for_panic = true; 118 + hrtimer_setup_on_stack(&timer, panic_in_hardirq, 119 + CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD); 120 + hrtimer_start(&timer, us_to_ktime(100), HRTIMER_MODE_REL_HARD); 121 + 122 + while (READ_ONCE(wait_for_panic)) 123 + cpu_relax(); 124 + 125 + hrtimer_cancel(&timer); 126 + } 127 + 104 128 static void lkdtm_BUG(void) 105 129 { 106 130 BUG(); 131 + } 132 + 133 + static bool wait_for_bug; 134 + 135 + static enum hrtimer_restart bug_in_hardirq(struct hrtimer *timer) 136 + { 137 + BUG(); 138 + 139 + wait_for_bug = false; 140 + return HRTIMER_NORESTART; 141 + } 142 + 143 + static void lkdtm_BUG_IN_HARDIRQ(void) 144 + { 145 + struct hrtimer timer; 146 + 147 + wait_for_bug = true; 148 + hrtimer_setup_on_stack(&timer, bug_in_hardirq, 149 + CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD); 150 + hrtimer_start(&timer, us_to_ktime(100), HRTIMER_MODE_REL_HARD); 151 + 152 + while (READ_ONCE(wait_for_bug)) 153 + cpu_relax(); 154 + 155 + hrtimer_cancel(&timer); 107 156 } 108 157 109 158 static int warn_counter; ··· 747 696 static struct crashtype crashtypes[] = { 748 697 CRASHTYPE(PANIC), 749 698 CRASHTYPE(PANIC_STOP_IRQOFF), 699 + CRASHTYPE(PANIC_IN_HARDIRQ), 750 700 CRASHTYPE(BUG), 701 + CRASHTYPE(BUG_IN_HARDIRQ), 751 702 CRASHTYPE(WARNING), 752 703 CRASHTYPE(WARNING_MESSAGE), 753 704 CRASHTYPE(EXCEPTION),
+2
tools/testing/selftests/lkdtm/tests.txt
··· 1 1 #PANIC 2 2 #PANIC_STOP_IRQOFF Crashes entire system 3 + #PANIC_IN_HARDIRQ Crashes entire system 3 4 BUG kernel BUG at 5 + #BUG_IN_HARDIRQ Crashes entire system 4 6 WARNING WARNING: 5 7 WARNING_MESSAGE message trigger 6 8 EXCEPTION