Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

powerpc/mce: Avoid using irq_work_queue() in realmode

In realmode mce handler we use irq_work_queue() to defer
the processing of mce events, irq_work_queue() can only
be called when translation is enabled because it touches
memory outside RMA, hence we enable translation before
calling irq_work_queue and disable on return, though it
is not safe to do in realmode.

To avoid this, program the decrementer and call the event
processing functions from timer handler.

Signed-off-by: Ganesh Goudar <ganeshgr@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20220120121931.517974-1-ganeshgr@linux.ibm.com

authored by

Ganesh Goudar and committed by
Michael Ellerman
cc15ff32 0a182611

+53 -59
+2
arch/powerpc/include/asm/machdep.h
··· 94 94 /* Called during machine check exception to retrive fixup address. */ 95 95 bool (*mce_check_early_recovery)(struct pt_regs *regs); 96 96 97 + void (*machine_check_log_err)(void); 98 + 97 99 /* Motherboard/chipset features. This is a kind of general purpose 98 100 * hook used to control some machine specific features (like reset 99 101 * lines, chip power control, etc...).
+13
arch/powerpc/include/asm/mce.h
··· 235 235 unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr); 236 236 extern void mce_common_process_ue(struct pt_regs *regs, 237 237 struct mce_error_info *mce_err); 238 + void mce_irq_work_queue(void); 238 239 int mce_register_notifier(struct notifier_block *nb); 239 240 int mce_unregister_notifier(struct notifier_block *nb); 241 + 242 + #ifdef CONFIG_PPC_BOOK3S_64 243 + void mce_run_irq_context_handlers(void); 244 + #else 245 + static inline void mce_run_irq_context_handlers(void) { }; 246 + #endif /* CONFIG_PPC_BOOK3S_64 */ 247 + 248 + #ifdef CONFIG_PPC_BOOK3S_64 249 + void set_mce_pending_irq_work(void); 250 + void clear_mce_pending_irq_work(void); 251 + #endif /* CONFIG_PPC_BOOK3S_64 */ 252 + 240 253 #ifdef CONFIG_PPC_BOOK3S_64 241 254 void flush_and_reload_slb(void); 242 255 void flush_erat(void);
+1
arch/powerpc/include/asm/paca.h
··· 288 288 #endif 289 289 #ifdef CONFIG_PPC_BOOK3S_64 290 290 struct mce_info *mce_info; 291 + u8 mce_pending_irq_work; 291 292 #endif /* CONFIG_PPC_BOOK3S_64 */ 292 293 } ____cacheline_aligned; 293 294
+32 -28
arch/powerpc/kernel/mce.c
··· 28 28 29 29 #include "setup.h" 30 30 31 - static void machine_check_process_queued_event(struct irq_work *work); 32 - static void machine_check_ue_irq_work(struct irq_work *work); 33 31 static void machine_check_ue_event(struct machine_check_event *evt); 34 32 static void machine_process_ue_event(struct work_struct *work); 35 - 36 - static struct irq_work mce_event_process_work = { 37 - .func = machine_check_process_queued_event, 38 - }; 39 - 40 - static struct irq_work mce_ue_event_irq_work = { 41 - .func = machine_check_ue_irq_work, 42 - }; 43 33 44 34 static DECLARE_WORK(mce_ue_event_work, machine_process_ue_event); 45 35 ··· 77 87 default: 78 88 break; 79 89 } 90 + } 91 + 92 + void mce_irq_work_queue(void) 93 + { 94 + /* Raise decrementer interrupt */ 95 + arch_irq_work_raise(); 96 + set_mce_pending_irq_work(); 80 97 } 81 98 82 99 /* ··· 214 217 get_mce_event(NULL, true); 215 218 } 216 219 217 - static void machine_check_ue_irq_work(struct irq_work *work) 220 + static void machine_check_ue_work(void) 218 221 { 219 222 schedule_work(&mce_ue_event_work); 220 223 } ··· 236 239 evt, sizeof(*evt)); 237 240 238 241 /* Queue work to process this event later. */ 239 - irq_work_queue(&mce_ue_event_irq_work); 242 + mce_irq_work_queue(); 240 243 } 241 244 242 245 /* ··· 246 249 { 247 250 int index; 248 251 struct machine_check_event evt; 249 - unsigned long msr; 250 252 251 253 if (!get_mce_event(&evt, MCE_EVENT_RELEASE)) 252 254 return; ··· 259 263 memcpy(&local_paca->mce_info->mce_event_queue[index], 260 264 &evt, sizeof(evt)); 261 265 262 - /* 263 - * Queue irq work to process this event later. Before 264 - * queuing the work enable translation for non radix LPAR, 265 - * as irq_work_queue may try to access memory outside RMO 266 - * region. 267 - */ 268 - if (!radix_enabled() && firmware_has_feature(FW_FEATURE_LPAR)) { 269 - msr = mfmsr(); 270 - mtmsr(msr | MSR_IR | MSR_DR); 271 - irq_work_queue(&mce_event_process_work); 272 - mtmsr(msr); 273 - } else { 274 - irq_work_queue(&mce_event_process_work); 275 - } 266 + mce_irq_work_queue(); 276 267 } 277 268 278 269 void mce_common_process_ue(struct pt_regs *regs, ··· 321 338 * process pending MCE event from the mce event queue. This function will be 322 339 * called during syscall exit. 323 340 */ 324 - static void machine_check_process_queued_event(struct irq_work *work) 341 + static void machine_check_process_queued_event(void) 325 342 { 326 343 int index; 327 344 struct machine_check_event *evt; ··· 343 360 } 344 361 machine_check_print_event_info(evt, false, false); 345 362 local_paca->mce_info->mce_queue_count--; 363 + } 364 + } 365 + 366 + void set_mce_pending_irq_work(void) 367 + { 368 + local_paca->mce_pending_irq_work = 1; 369 + } 370 + 371 + void clear_mce_pending_irq_work(void) 372 + { 373 + local_paca->mce_pending_irq_work = 0; 374 + } 375 + 376 + void mce_run_irq_context_handlers(void) 377 + { 378 + if (unlikely(local_paca->mce_pending_irq_work)) { 379 + if (ppc_md.machine_check_log_err) 380 + ppc_md.machine_check_log_err(); 381 + machine_check_process_queued_event(); 382 + machine_check_ue_work(); 383 + clear_mce_pending_irq_work(); 346 384 } 347 385 } 348 386
+2
arch/powerpc/kernel/time.c
··· 70 70 #include <asm/vdso_datapage.h> 71 71 #include <asm/firmware.h> 72 72 #include <asm/asm-prototypes.h> 73 + #include <asm/mce.h> 73 74 74 75 /* powerpc clocksource/clockevent code */ 75 76 ··· 639 638 640 639 if (test_irq_work_pending()) { 641 640 clear_irq_work_pending(); 641 + mce_run_irq_context_handlers(); 642 642 irq_work_run(); 643 643 } 644 644
+1
arch/powerpc/platforms/pseries/pseries.h
··· 21 21 extern int pSeries_system_reset_exception(struct pt_regs *regs); 22 22 extern int pSeries_machine_check_exception(struct pt_regs *regs); 23 23 extern long pseries_machine_check_realmode(struct pt_regs *regs); 24 + void pSeries_machine_check_log_err(void); 24 25 25 26 #ifdef CONFIG_SMP 26 27 extern void smp_init_pseries(void);
+1 -31
arch/powerpc/platforms/pseries/ras.c
··· 23 23 24 24 static int ras_check_exception_token; 25 25 26 - static void mce_process_errlog_event(struct irq_work *work); 27 - static struct irq_work mce_errlog_process_work = { 28 - .func = mce_process_errlog_event, 29 - }; 30 - 31 26 #define EPOW_SENSOR_TOKEN 9 32 27 #define EPOW_SENSOR_INDEX 0 33 28 ··· 740 745 struct pseries_errorlog *pseries_log; 741 746 struct pseries_mc_errorlog *mce_log = NULL; 742 747 int disposition = rtas_error_disposition(errp); 743 - unsigned long msr; 744 748 u8 error_type; 745 749 746 750 if (!rtas_error_extended(errp)) ··· 753 759 error_type = mce_log->error_type; 754 760 755 761 disposition = mce_handle_err_realmode(disposition, error_type); 756 - 757 - /* 758 - * Enable translation as we will be accessing per-cpu variables 759 - * in save_mce_event() which may fall outside RMO region, also 760 - * leave it enabled because subsequently we will be queuing work 761 - * to workqueues where again per-cpu variables accessed, besides 762 - * fwnmi_release_errinfo() crashes when called in realmode on 763 - * pseries. 764 - * Note: All the realmode handling like flushing SLB entries for 765 - * SLB multihit is done by now. 766 - */ 767 762 out: 768 - msr = mfmsr(); 769 - mtmsr(msr | MSR_IR | MSR_DR); 770 - 771 763 disposition = mce_handle_err_virtmode(regs, errp, mce_log, 772 764 disposition); 773 - 774 - /* 775 - * Queue irq work to log this rtas event later. 776 - * irq_work_queue uses per-cpu variables, so do this in virt 777 - * mode as well. 778 - */ 779 - irq_work_queue(&mce_errlog_process_work); 780 - 781 - mtmsr(msr); 782 - 783 765 return disposition; 784 766 } 785 767 786 768 /* 787 769 * Process MCE rtas errlog event. 788 770 */ 789 - static void mce_process_errlog_event(struct irq_work *work) 771 + void pSeries_machine_check_log_err(void) 790 772 { 791 773 struct rtas_error_log *err; 792 774
+1
arch/powerpc/platforms/pseries/setup.c
··· 1086 1086 .system_reset_exception = pSeries_system_reset_exception, 1087 1087 .machine_check_early = pseries_machine_check_realmode, 1088 1088 .machine_check_exception = pSeries_machine_check_exception, 1089 + .machine_check_log_err = pSeries_machine_check_log_err, 1089 1090 #ifdef CONFIG_KEXEC_CORE 1090 1091 .machine_kexec = pSeries_machine_kexec, 1091 1092 .kexec_cpu_down = pseries_kexec_cpu_down,