Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

arm64: sdei: abort running SDEI handlers during crash

Interrupts are blocked in SDEI context, per the SDEI spec: "The client
interrupts cannot preempt the event handler." If we crashed in the SDEI
handler-running context (as with ACPI's AGDI) then we need to clean up the
SDEI state before proceeding to the crash kernel so that the crash kernel
can have working interrupts.

Track the active SDEI handler per-cpu so that we can COMPLETE_AND_RESUME
the handler, discarding the interrupted context.

Fixes: f5df26961853 ("arm64: kernel: Add arch-specific SDEI entry code and CPU masking")
Signed-off-by: D Scott Phillips <scott@os.amperecomputing.com>
Cc: stable@vger.kernel.org
Reviewed-by: James Morse <james.morse@arm.com>
Tested-by: Mihai Carabas <mihai.carabas@oracle.com>
Link: https://lore.kernel.org/r/20230627002939.2758-1-scott@os.amperecomputing.com
Signed-off-by: Will Deacon <will@kernel.org>

authored by

D Scott Phillips and committed by
Will Deacon
5cd474e5 b9d60124

+59 -6
+6
arch/arm64/include/asm/sdei.h
··· 17 17 18 18 #include <asm/virt.h> 19 19 20 + DECLARE_PER_CPU(struct sdei_registered_event *, sdei_active_normal_event); 21 + DECLARE_PER_CPU(struct sdei_registered_event *, sdei_active_critical_event); 22 + 20 23 extern unsigned long sdei_exit_mode; 21 24 22 25 /* Software Delegated Exception entry point from firmware*/ ··· 31 28 unsigned long arg, 32 29 unsigned long pc, 33 30 unsigned long pstate); 31 + 32 + /* Abort a running handler. Context is discarded. */ 33 + void __sdei_handler_abort(void); 34 34 35 35 /* 36 36 * The above entry point does the minimum to call C code. This function does
+25 -2
arch/arm64/kernel/entry.S
··· 986 986 987 987 mov x19, x1 988 988 989 - #if defined(CONFIG_VMAP_STACK) || defined(CONFIG_SHADOW_CALL_STACK) 989 + /* Store the registered-event for crash_smp_send_stop() */ 990 990 ldrb w4, [x19, #SDEI_EVENT_PRIORITY] 991 - #endif 991 + cbnz w4, 1f 992 + adr_this_cpu dst=x5, sym=sdei_active_normal_event, tmp=x6 993 + b 2f 994 + 1: adr_this_cpu dst=x5, sym=sdei_active_critical_event, tmp=x6 995 + 2: str x19, [x5] 992 996 993 997 #ifdef CONFIG_VMAP_STACK 994 998 /* ··· 1059 1055 1060 1056 ldr_l x2, sdei_exit_mode 1061 1057 1058 + /* Clear the registered-event seen by crash_smp_send_stop() */ 1059 + ldrb w3, [x4, #SDEI_EVENT_PRIORITY] 1060 + cbnz w3, 1f 1061 + adr_this_cpu dst=x5, sym=sdei_active_normal_event, tmp=x6 1062 + b 2f 1063 + 1: adr_this_cpu dst=x5, sym=sdei_active_critical_event, tmp=x6 1064 + 2: str xzr, [x5] 1065 + 1062 1066 alternative_if_not ARM64_UNMAP_KERNEL_AT_EL0 1063 1067 sdei_handler_exit exit_mode=x2 1064 1068 alternative_else_nop_endif ··· 1077 1065 #endif 1078 1066 SYM_CODE_END(__sdei_asm_handler) 1079 1067 NOKPROBE(__sdei_asm_handler) 1068 + 1069 + SYM_CODE_START(__sdei_handler_abort) 1070 + mov_q x0, SDEI_1_0_FN_SDEI_EVENT_COMPLETE_AND_RESUME 1071 + adr x1, 1f 1072 + ldr_l x2, sdei_exit_mode 1073 + sdei_handler_exit exit_mode=x2 1074 + // exit the handler and jump to the next instruction. 1075 + // Exit will stomp x0-x17, PSTATE, ELR_ELx, and SPSR_ELx. 1076 + 1: ret 1077 + SYM_CODE_END(__sdei_handler_abort) 1078 + NOKPROBE(__sdei_handler_abort) 1080 1079 #endif /* CONFIG_ARM_SDE_INTERFACE */
+3
arch/arm64/kernel/sdei.c
··· 47 47 DEFINE_PER_CPU(unsigned long *, sdei_shadow_call_stack_critical_ptr); 48 48 #endif 49 49 50 + DEFINE_PER_CPU(struct sdei_registered_event *, sdei_active_normal_event); 51 + DEFINE_PER_CPU(struct sdei_registered_event *, sdei_active_critical_event); 52 + 50 53 static void _free_sdei_stack(unsigned long * __percpu *ptr, int cpu) 51 54 { 52 55 unsigned long *p;
+4 -4
arch/arm64/kernel/smp.c
··· 1044 1044 * If this cpu is the only one alive at this point in time, online or 1045 1045 * not, there are no stop messages to be sent around, so just back out. 1046 1046 */ 1047 - if (num_other_online_cpus() == 0) { 1048 - sdei_mask_local_cpu(); 1049 - return; 1050 - } 1047 + if (num_other_online_cpus() == 0) 1048 + goto skip_ipi; 1051 1049 1052 1050 cpumask_copy(&mask, cpu_online_mask); 1053 1051 cpumask_clear_cpu(smp_processor_id(), &mask); ··· 1064 1066 pr_warn("SMP: failed to stop secondary CPUs %*pbl\n", 1065 1067 cpumask_pr_args(&mask)); 1066 1068 1069 + skip_ipi: 1067 1070 sdei_mask_local_cpu(); 1071 + sdei_handler_abort(); 1068 1072 } 1069 1073 1070 1074 bool smp_crash_stop_failed(void)
+19
drivers/firmware/arm_sdei.c
··· 1095 1095 return err; 1096 1096 } 1097 1097 NOKPROBE_SYMBOL(sdei_event_handler); 1098 + 1099 + void sdei_handler_abort(void) 1100 + { 1101 + /* 1102 + * If the crash happened in an SDEI event handler then we need to 1103 + * finish the handler with the firmware so that we can have working 1104 + * interrupts in the crash kernel. 1105 + */ 1106 + if (__this_cpu_read(sdei_active_critical_event)) { 1107 + pr_warn("still in SDEI critical event context, attempting to finish handler.\n"); 1108 + __sdei_handler_abort(); 1109 + __this_cpu_write(sdei_active_critical_event, NULL); 1110 + } 1111 + if (__this_cpu_read(sdei_active_normal_event)) { 1112 + pr_warn("still in SDEI normal event context, attempting to finish handler.\n"); 1113 + __sdei_handler_abort(); 1114 + __this_cpu_write(sdei_active_normal_event, NULL); 1115 + } 1116 + }
+2
include/linux/arm_sdei.h
··· 47 47 int sdei_mask_local_cpu(void); 48 48 int sdei_unmask_local_cpu(void); 49 49 void __init sdei_init(void); 50 + void sdei_handler_abort(void); 50 51 #else 51 52 static inline int sdei_mask_local_cpu(void) { return 0; } 52 53 static inline int sdei_unmask_local_cpu(void) { return 0; } 53 54 static inline void sdei_init(void) { } 55 + static inline void sdei_handler_abort(void) { } 54 56 #endif /* CONFIG_ARM_SDE_INTERFACE */ 55 57 56 58