arch/powerpc/platforms/pseries/ras.c at v5.8-rc4

tjh.dev / kernel
fork
Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
fork
kernel / arch / powerpc / platforms / pseries / ras.c
at v5.8-rc4 856 lines 24 kB view raw
wrap content
  1// SPDX-License-Identifier: GPL-2.0-or-later
  2/*
  3 * Copyright (C) 2001 Dave Engebretsen IBM Corporation
  4 */
  5
  6#include <linux/sched.h>
  7#include <linux/interrupt.h>
  8#include <linux/irq.h>
  9#include <linux/of.h>
 10#include <linux/fs.h>
 11#include <linux/reboot.h>
 12#include <linux/irq_work.h>
 13
 14#include <asm/machdep.h>
 15#include <asm/rtas.h>
 16#include <asm/firmware.h>
 17#include <asm/mce.h>
 18
 19#include "pseries.h"
 20
 21static unsigned char ras_log_buf[RTAS_ERROR_LOG_MAX];
 22static DEFINE_SPINLOCK(ras_log_buf_lock);
 23
 24static int ras_check_exception_token;
 25
 26static void mce_process_errlog_event(struct irq_work *work);
 27static struct irq_work mce_errlog_process_work = {
 28	.func = mce_process_errlog_event,
 29};
 30
 31#define EPOW_SENSOR_TOKEN	9
 32#define EPOW_SENSOR_INDEX	0
 33
 34/* EPOW events counter variable */
 35static int num_epow_events;
 36
 37static irqreturn_t ras_hotplug_interrupt(int irq, void *dev_id);
 38static irqreturn_t ras_epow_interrupt(int irq, void *dev_id);
 39static irqreturn_t ras_error_interrupt(int irq, void *dev_id);
 40
 41/* RTAS pseries MCE errorlog section. */
 42struct pseries_mc_errorlog {
 43	__be32	fru_id;
 44	__be32	proc_id;
 45	u8	error_type;
 46	/*
 47	 * sub_err_type (1 byte). Bit fields depends on error_type
 48	 *
 49	 *   MSB0
 50	 *   |
 51	 *   V
 52	 *   01234567
 53	 *   XXXXXXXX
 54	 *
 55	 * For error_type == MC_ERROR_TYPE_UE
 56	 *   XXXXXXXX
 57	 *   X		1: Permanent or Transient UE.
 58	 *    X		1: Effective address provided.
 59	 *     X	1: Logical address provided.
 60	 *      XX	2: Reserved.
 61	 *        XXX	3: Type of UE error.
 62	 *
 63	 * For error_type != MC_ERROR_TYPE_UE
 64	 *   XXXXXXXX
 65	 *   X		1: Effective address provided.
 66	 *    XXXXX	5: Reserved.
 67	 *         XX	2: Type of SLB/ERAT/TLB error.
 68	 */
 69	u8	sub_err_type;
 70	u8	reserved_1[6];
 71	__be64	effective_address;
 72	__be64	logical_address;
 73} __packed;
 74
 75/* RTAS pseries MCE error types */
 76#define MC_ERROR_TYPE_UE		0x00
 77#define MC_ERROR_TYPE_SLB		0x01
 78#define MC_ERROR_TYPE_ERAT		0x02
 79#define MC_ERROR_TYPE_UNKNOWN		0x03
 80#define MC_ERROR_TYPE_TLB		0x04
 81#define MC_ERROR_TYPE_D_CACHE		0x05
 82#define MC_ERROR_TYPE_I_CACHE		0x07
 83
 84/* RTAS pseries MCE error sub types */
 85#define MC_ERROR_UE_INDETERMINATE		0
 86#define MC_ERROR_UE_IFETCH			1
 87#define MC_ERROR_UE_PAGE_TABLE_WALK_IFETCH	2
 88#define MC_ERROR_UE_LOAD_STORE			3
 89#define MC_ERROR_UE_PAGE_TABLE_WALK_LOAD_STORE	4
 90
 91#define UE_EFFECTIVE_ADDR_PROVIDED		0x40
 92#define UE_LOGICAL_ADDR_PROVIDED		0x20
 93
 94#define MC_ERROR_SLB_PARITY		0
 95#define MC_ERROR_SLB_MULTIHIT		1
 96#define MC_ERROR_SLB_INDETERMINATE	2
 97
 98#define MC_ERROR_ERAT_PARITY		1
 99#define MC_ERROR_ERAT_MULTIHIT		2
100#define MC_ERROR_ERAT_INDETERMINATE	3
101
102#define MC_ERROR_TLB_PARITY		1
103#define MC_ERROR_TLB_MULTIHIT		2
104#define MC_ERROR_TLB_INDETERMINATE	3
105
106static inline u8 rtas_mc_error_sub_type(const struct pseries_mc_errorlog *mlog)
107{
108	switch (mlog->error_type) {
109	case	MC_ERROR_TYPE_UE:
110		return (mlog->sub_err_type & 0x07);
111	case	MC_ERROR_TYPE_SLB:
112	case	MC_ERROR_TYPE_ERAT:
113	case	MC_ERROR_TYPE_TLB:
114		return (mlog->sub_err_type & 0x03);
115	default:
116		return 0;
117	}
118}
119
120/*
121 * Enable the hotplug interrupt late because processing them may touch other
122 * devices or systems (e.g. hugepages) that have not been initialized at the
123 * subsys stage.
124 */
125int __init init_ras_hotplug_IRQ(void)
126{
127	struct device_node *np;
128
129	/* Hotplug Events */
130	np = of_find_node_by_path("/event-sources/hot-plug-events");
131	if (np != NULL) {
132		if (dlpar_workqueue_init() == 0)
133			request_event_sources_irqs(np, ras_hotplug_interrupt,
134						   "RAS_HOTPLUG");
135		of_node_put(np);
136	}
137
138	return 0;
139}
140machine_late_initcall(pseries, init_ras_hotplug_IRQ);
141
142/*
143 * Initialize handlers for the set of interrupts caused by hardware errors
144 * and power system events.
145 */
146static int __init init_ras_IRQ(void)
147{
148	struct device_node *np;
149
150	ras_check_exception_token = rtas_token("check-exception");
151
152	/* Internal Errors */
153	np = of_find_node_by_path("/event-sources/internal-errors");
154	if (np != NULL) {
155		request_event_sources_irqs(np, ras_error_interrupt,
156					   "RAS_ERROR");
157		of_node_put(np);
158	}
159
160	/* EPOW Events */
161	np = of_find_node_by_path("/event-sources/epow-events");
162	if (np != NULL) {
163		request_event_sources_irqs(np, ras_epow_interrupt, "RAS_EPOW");
164		of_node_put(np);
165	}
166
167	return 0;
168}
169machine_subsys_initcall(pseries, init_ras_IRQ);
170
171#define EPOW_SHUTDOWN_NORMAL				1
172#define EPOW_SHUTDOWN_ON_UPS				2
173#define EPOW_SHUTDOWN_LOSS_OF_CRITICAL_FUNCTIONS	3
174#define EPOW_SHUTDOWN_AMBIENT_TEMPERATURE_TOO_HIGH	4
175
176static void handle_system_shutdown(char event_modifier)
177{
178	switch (event_modifier) {
179	case EPOW_SHUTDOWN_NORMAL:
180		pr_emerg("Power off requested\n");
181		orderly_poweroff(true);
182		break;
183
184	case EPOW_SHUTDOWN_ON_UPS:
185		pr_emerg("Loss of system power detected. System is running on"
186			 " UPS/battery. Check RTAS error log for details\n");
187		orderly_poweroff(true);
188		break;
189
190	case EPOW_SHUTDOWN_LOSS_OF_CRITICAL_FUNCTIONS:
191		pr_emerg("Loss of system critical functions detected. Check"
192			 " RTAS error log for details\n");
193		orderly_poweroff(true);
194		break;
195
196	case EPOW_SHUTDOWN_AMBIENT_TEMPERATURE_TOO_HIGH:
197		pr_emerg("High ambient temperature detected. Check RTAS"
198			 " error log for details\n");
199		orderly_poweroff(true);
200		break;
201
202	default:
203		pr_err("Unknown power/cooling shutdown event (modifier = %d)\n",
204			event_modifier);
205	}
206}
207
208struct epow_errorlog {
209	unsigned char sensor_value;
210	unsigned char event_modifier;
211	unsigned char extended_modifier;
212	unsigned char reserved;
213	unsigned char platform_reason;
214};
215
216#define EPOW_RESET			0
217#define EPOW_WARN_COOLING		1
218#define EPOW_WARN_POWER			2
219#define EPOW_SYSTEM_SHUTDOWN		3
220#define EPOW_SYSTEM_HALT		4
221#define EPOW_MAIN_ENCLOSURE		5
222#define EPOW_POWER_OFF			7
223
224static void rtas_parse_epow_errlog(struct rtas_error_log *log)
225{
226	struct pseries_errorlog *pseries_log;
227	struct epow_errorlog *epow_log;
228	char action_code;
229	char modifier;
230
231	pseries_log = get_pseries_errorlog(log, PSERIES_ELOG_SECT_ID_EPOW);
232	if (pseries_log == NULL)
233		return;
234
235	epow_log = (struct epow_errorlog *)pseries_log->data;
236	action_code = epow_log->sensor_value & 0xF;	/* bottom 4 bits */
237	modifier = epow_log->event_modifier & 0xF;	/* bottom 4 bits */
238
239	switch (action_code) {
240	case EPOW_RESET:
241		if (num_epow_events) {
242			pr_info("Non critical power/cooling issue cleared\n");
243			num_epow_events--;
244		}
245		break;
246
247	case EPOW_WARN_COOLING:
248		pr_info("Non-critical cooling issue detected. Check RTAS error"
249			" log for details\n");
250		break;
251
252	case EPOW_WARN_POWER:
253		pr_info("Non-critical power issue detected. Check RTAS error"
254			" log for details\n");
255		break;
256
257	case EPOW_SYSTEM_SHUTDOWN:
258		handle_system_shutdown(modifier);
259		break;
260
261	case EPOW_SYSTEM_HALT:
262		pr_emerg("Critical power/cooling issue detected. Check RTAS"
263			 " error log for details. Powering off.\n");
264		orderly_poweroff(true);
265		break;
266
267	case EPOW_MAIN_ENCLOSURE:
268	case EPOW_POWER_OFF:
269		pr_emerg("System about to lose power. Check RTAS error log "
270			 " for details. Powering off immediately.\n");
271		emergency_sync();
272		kernel_power_off();
273		break;
274
275	default:
276		pr_err("Unknown power/cooling event (action code  = %d)\n",
277			action_code);
278	}
279
280	/* Increment epow events counter variable */
281	if (action_code != EPOW_RESET)
282		num_epow_events++;
283}
284
285static irqreturn_t ras_hotplug_interrupt(int irq, void *dev_id)
286{
287	struct pseries_errorlog *pseries_log;
288	struct pseries_hp_errorlog *hp_elog;
289
290	spin_lock(&ras_log_buf_lock);
291
292	rtas_call(ras_check_exception_token, 6, 1, NULL,
293		  RTAS_VECTOR_EXTERNAL_INTERRUPT, virq_to_hw(irq),
294		  RTAS_HOTPLUG_EVENTS, 0, __pa(&ras_log_buf),
295		  rtas_get_error_log_max());
296
297	pseries_log = get_pseries_errorlog((struct rtas_error_log *)ras_log_buf,
298					   PSERIES_ELOG_SECT_ID_HOTPLUG);
299	hp_elog = (struct pseries_hp_errorlog *)pseries_log->data;
300
301	/*
302	 * Since PCI hotplug is not currently supported on pseries, put PCI
303	 * hotplug events on the ras_log_buf to be handled by rtas_errd.
304	 */
305	if (hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_MEM ||
306	    hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_CPU ||
307	    hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_PMEM)
308		queue_hotplug_event(hp_elog);
309	else
310		log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, 0);
311
312	spin_unlock(&ras_log_buf_lock);
313	return IRQ_HANDLED;
314}
315
316/* Handle environmental and power warning (EPOW) interrupts. */
317static irqreturn_t ras_epow_interrupt(int irq, void *dev_id)
318{
319	int status;
320	int state;
321	int critical;
322
323	status = rtas_get_sensor_fast(EPOW_SENSOR_TOKEN, EPOW_SENSOR_INDEX,
324				      &state);
325
326	if (state > 3)
327		critical = 1;		/* Time Critical */
328	else
329		critical = 0;
330
331	spin_lock(&ras_log_buf_lock);
332
333	status = rtas_call(ras_check_exception_token, 6, 1, NULL,
334			   RTAS_VECTOR_EXTERNAL_INTERRUPT,
335			   virq_to_hw(irq),
336			   RTAS_EPOW_WARNING,
337			   critical, __pa(&ras_log_buf),
338				rtas_get_error_log_max());
339
340	log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, 0);
341
342	rtas_parse_epow_errlog((struct rtas_error_log *)ras_log_buf);
343
344	spin_unlock(&ras_log_buf_lock);
345	return IRQ_HANDLED;
346}
347
348/*
349 * Handle hardware error interrupts.
350 *
351 * RTAS check-exception is called to collect data on the exception.  If
352 * the error is deemed recoverable, we log a warning and return.
353 * For nonrecoverable errors, an error is logged and we stop all processing
354 * as quickly as possible in order to prevent propagation of the failure.
355 */
356static irqreturn_t ras_error_interrupt(int irq, void *dev_id)
357{
358	struct rtas_error_log *rtas_elog;
359	int status;
360	int fatal;
361
362	spin_lock(&ras_log_buf_lock);
363
364	status = rtas_call(ras_check_exception_token, 6, 1, NULL,
365			   RTAS_VECTOR_EXTERNAL_INTERRUPT,
366			   virq_to_hw(irq),
367			   RTAS_INTERNAL_ERROR, 1 /* Time Critical */,
368			   __pa(&ras_log_buf),
369				rtas_get_error_log_max());
370
371	rtas_elog = (struct rtas_error_log *)ras_log_buf;
372
373	if (status == 0 &&
374	    rtas_error_severity(rtas_elog) >= RTAS_SEVERITY_ERROR_SYNC)
375		fatal = 1;
376	else
377		fatal = 0;
378
379	/* format and print the extended information */
380	log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, fatal);
381
382	if (fatal) {
383		pr_emerg("Fatal hardware error detected. Check RTAS error"
384			 " log for details. Powering off immediately\n");
385		emergency_sync();
386		kernel_power_off();
387	} else {
388		pr_err("Recoverable hardware error detected\n");
389	}
390
391	spin_unlock(&ras_log_buf_lock);
392	return IRQ_HANDLED;
393}
394
395/*
396 * Some versions of FWNMI place the buffer inside the 4kB page starting at
397 * 0x7000. Other versions place it inside the rtas buffer. We check both.
398 * Minimum size of the buffer is 16 bytes.
399 */
400#define VALID_FWNMI_BUFFER(A) \
401	((((A) >= 0x7000) && ((A) <= 0x8000 - 16)) || \
402	(((A) >= rtas.base) && ((A) <= (rtas.base + rtas.size - 16))))
403
404static inline struct rtas_error_log *fwnmi_get_errlog(void)
405{
406	return (struct rtas_error_log *)local_paca->mce_data_buf;
407}
408
409static __be64 *fwnmi_get_savep(struct pt_regs *regs)
410{
411	unsigned long savep_ra;
412
413	/* Mask top two bits */
414	savep_ra = regs->gpr[3] & ~(0x3UL << 62);
415	if (!VALID_FWNMI_BUFFER(savep_ra)) {
416		printk(KERN_ERR "FWNMI: corrupt r3 0x%016lx\n", regs->gpr[3]);
417		return NULL;
418	}
419
420	return __va(savep_ra);
421}
422
423/*
424 * Get the error information for errors coming through the
425 * FWNMI vectors.  The pt_regs' r3 will be updated to reflect
426 * the actual r3 if possible, and a ptr to the error log entry
427 * will be returned if found.
428 *
429 * Use one buffer mce_data_buf per cpu to store RTAS error.
430 *
431 * The mce_data_buf does not have any locks or protection around it,
432 * if a second machine check comes in, or a system reset is done
433 * before we have logged the error, then we will get corruption in the
434 * error log.  This is preferable over holding off on calling
435 * ibm,nmi-interlock which would result in us checkstopping if a
436 * second machine check did come in.
437 */
438static struct rtas_error_log *fwnmi_get_errinfo(struct pt_regs *regs)
439{
440	struct rtas_error_log *h;
441	__be64 *savep;
442
443	savep = fwnmi_get_savep(regs);
444	if (!savep)
445		return NULL;
446
447	regs->gpr[3] = be64_to_cpu(savep[0]); /* restore original r3 */
448
449	h = (struct rtas_error_log *)&savep[1];
450	/* Use the per cpu buffer from paca to store rtas error log */
451	memset(local_paca->mce_data_buf, 0, RTAS_ERROR_LOG_MAX);
452	if (!rtas_error_extended(h)) {
453		memcpy(local_paca->mce_data_buf, h, sizeof(__u64));
454	} else {
455		int len, error_log_length;
456
457		error_log_length = 8 + rtas_error_extended_log_length(h);
458		len = min_t(int, error_log_length, RTAS_ERROR_LOG_MAX);
459		memcpy(local_paca->mce_data_buf, h, len);
460	}
461
462	return (struct rtas_error_log *)local_paca->mce_data_buf;
463}
464
465/* Call this when done with the data returned by FWNMI_get_errinfo.
466 * It will release the saved data area for other CPUs in the
467 * partition to receive FWNMI errors.
468 */
469static void fwnmi_release_errinfo(void)
470{
471	struct rtas_args rtas_args;
472	int ret;
473
474	/*
475	 * On pseries, the machine check stack is limited to under 4GB, so
476	 * args can be on-stack.
477	 */
478	rtas_call_unlocked(&rtas_args, ibm_nmi_interlock_token, 0, 1, NULL);
479	ret = be32_to_cpu(rtas_args.rets[0]);
480	if (ret != 0)
481		printk(KERN_ERR "FWNMI: nmi-interlock failed: %d\n", ret);
482}
483
484int pSeries_system_reset_exception(struct pt_regs *regs)
485{
486#ifdef __LITTLE_ENDIAN__
487	/*
488	 * Some firmware byteswaps SRR registers and gives incorrect SRR1. Try
489	 * to detect the bad SRR1 pattern here. Flip the NIP back to correct
490	 * endian for reporting purposes. Unfortunately the MSR can't be fixed,
491	 * so clear it. It will be missing MSR_RI so we won't try to recover.
492	 */
493	if ((be64_to_cpu(regs->msr) &
494			(MSR_LE|MSR_RI|MSR_DR|MSR_IR|MSR_ME|MSR_PR|
495			 MSR_ILE|MSR_HV|MSR_SF)) == (MSR_DR|MSR_SF)) {
496		regs->nip = be64_to_cpu((__be64)regs->nip);
497		regs->msr = 0;
498	}
499#endif
500
501	if (fwnmi_active) {
502		__be64 *savep;
503
504		/*
505		 * Firmware (PowerVM and KVM) saves r3 to a save area like
506		 * machine check, which is not exactly what PAPR (2.9)
507		 * suggests but there is no way to detect otherwise, so this
508		 * is the interface now.
509		 *
510		 * System resets do not save any error log or require an
511		 * "ibm,nmi-interlock" rtas call to release.
512		 */
513
514		savep = fwnmi_get_savep(regs);
515		if (savep)
516			regs->gpr[3] = be64_to_cpu(savep[0]); /* restore original r3 */
517	}
518
519	if (smp_handle_nmi_ipi(regs))
520		return 1;
521
522	return 0; /* need to perform reset */
523}
524
525
526static int mce_handle_error(struct pt_regs *regs, struct rtas_error_log *errp)
527{
528	struct mce_error_info mce_err = { 0 };
529	unsigned long eaddr = 0, paddr = 0;
530	struct pseries_errorlog *pseries_log;
531	struct pseries_mc_errorlog *mce_log;
532	int disposition = rtas_error_disposition(errp);
533	int initiator = rtas_error_initiator(errp);
534	int severity = rtas_error_severity(errp);
535	u8 error_type, err_sub_type;
536
537	if (initiator == RTAS_INITIATOR_UNKNOWN)
538		mce_err.initiator = MCE_INITIATOR_UNKNOWN;
539	else if (initiator == RTAS_INITIATOR_CPU)
540		mce_err.initiator = MCE_INITIATOR_CPU;
541	else if (initiator == RTAS_INITIATOR_PCI)
542		mce_err.initiator = MCE_INITIATOR_PCI;
543	else if (initiator == RTAS_INITIATOR_ISA)
544		mce_err.initiator = MCE_INITIATOR_ISA;
545	else if (initiator == RTAS_INITIATOR_MEMORY)
546		mce_err.initiator = MCE_INITIATOR_MEMORY;
547	else if (initiator == RTAS_INITIATOR_POWERMGM)
548		mce_err.initiator = MCE_INITIATOR_POWERMGM;
549	else
550		mce_err.initiator = MCE_INITIATOR_UNKNOWN;
551
552	if (severity == RTAS_SEVERITY_NO_ERROR)
553		mce_err.severity = MCE_SEV_NO_ERROR;
554	else if (severity == RTAS_SEVERITY_EVENT)
555		mce_err.severity = MCE_SEV_WARNING;
556	else if (severity == RTAS_SEVERITY_WARNING)
557		mce_err.severity = MCE_SEV_WARNING;
558	else if (severity == RTAS_SEVERITY_ERROR_SYNC)
559		mce_err.severity = MCE_SEV_SEVERE;
560	else if (severity == RTAS_SEVERITY_ERROR)
561		mce_err.severity = MCE_SEV_SEVERE;
562	else if (severity == RTAS_SEVERITY_FATAL)
563		mce_err.severity = MCE_SEV_FATAL;
564	else
565		mce_err.severity = MCE_SEV_FATAL;
566
567	if (severity <= RTAS_SEVERITY_ERROR_SYNC)
568		mce_err.sync_error = true;
569	else
570		mce_err.sync_error = false;
571
572	mce_err.error_type = MCE_ERROR_TYPE_UNKNOWN;
573	mce_err.error_class = MCE_ECLASS_UNKNOWN;
574
575	if (!rtas_error_extended(errp))
576		goto out;
577
578	pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE);
579	if (pseries_log == NULL)
580		goto out;
581
582	mce_log = (struct pseries_mc_errorlog *)pseries_log->data;
583	error_type = mce_log->error_type;
584	err_sub_type = rtas_mc_error_sub_type(mce_log);
585
586	switch (mce_log->error_type) {
587	case MC_ERROR_TYPE_UE:
588		mce_err.error_type = MCE_ERROR_TYPE_UE;
589		mce_common_process_ue(regs, &mce_err);
590		if (mce_err.ignore_event)
591			disposition = RTAS_DISP_FULLY_RECOVERED;
592		switch (err_sub_type) {
593		case MC_ERROR_UE_IFETCH:
594			mce_err.u.ue_error_type = MCE_UE_ERROR_IFETCH;
595			break;
596		case MC_ERROR_UE_PAGE_TABLE_WALK_IFETCH:
597			mce_err.u.ue_error_type = MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH;
598			break;
599		case MC_ERROR_UE_LOAD_STORE:
600			mce_err.u.ue_error_type = MCE_UE_ERROR_LOAD_STORE;
601			break;
602		case MC_ERROR_UE_PAGE_TABLE_WALK_LOAD_STORE:
603			mce_err.u.ue_error_type = MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE;
604			break;
605		case MC_ERROR_UE_INDETERMINATE:
606		default:
607			mce_err.u.ue_error_type = MCE_UE_ERROR_INDETERMINATE;
608			break;
609		}
610		if (mce_log->sub_err_type & UE_EFFECTIVE_ADDR_PROVIDED)
611			eaddr = be64_to_cpu(mce_log->effective_address);
612
613		if (mce_log->sub_err_type & UE_LOGICAL_ADDR_PROVIDED) {
614			paddr = be64_to_cpu(mce_log->logical_address);
615		} else if (mce_log->sub_err_type & UE_EFFECTIVE_ADDR_PROVIDED) {
616			unsigned long pfn;
617
618			pfn = addr_to_pfn(regs, eaddr);
619			if (pfn != ULONG_MAX)
620				paddr = pfn << PAGE_SHIFT;
621		}
622
623		break;
624	case MC_ERROR_TYPE_SLB:
625		mce_err.error_type = MCE_ERROR_TYPE_SLB;
626		switch (err_sub_type) {
627		case MC_ERROR_SLB_PARITY:
628			mce_err.u.slb_error_type = MCE_SLB_ERROR_PARITY;
629			break;
630		case MC_ERROR_SLB_MULTIHIT:
631			mce_err.u.slb_error_type = MCE_SLB_ERROR_MULTIHIT;
632			break;
633		case MC_ERROR_SLB_INDETERMINATE:
634		default:
635			mce_err.u.slb_error_type = MCE_SLB_ERROR_INDETERMINATE;
636			break;
637		}
638		if (mce_log->sub_err_type & 0x80)
639			eaddr = be64_to_cpu(mce_log->effective_address);
640		break;
641	case MC_ERROR_TYPE_ERAT:
642		mce_err.error_type = MCE_ERROR_TYPE_ERAT;
643		switch (err_sub_type) {
644		case MC_ERROR_ERAT_PARITY:
645			mce_err.u.erat_error_type = MCE_ERAT_ERROR_PARITY;
646			break;
647		case MC_ERROR_ERAT_MULTIHIT:
648			mce_err.u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT;
649			break;
650		case MC_ERROR_ERAT_INDETERMINATE:
651		default:
652			mce_err.u.erat_error_type = MCE_ERAT_ERROR_INDETERMINATE;
653			break;
654		}
655		if (mce_log->sub_err_type & 0x80)
656			eaddr = be64_to_cpu(mce_log->effective_address);
657		break;
658	case MC_ERROR_TYPE_TLB:
659		mce_err.error_type = MCE_ERROR_TYPE_TLB;
660		switch (err_sub_type) {
661		case MC_ERROR_TLB_PARITY:
662			mce_err.u.tlb_error_type = MCE_TLB_ERROR_PARITY;
663			break;
664		case MC_ERROR_TLB_MULTIHIT:
665			mce_err.u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT;
666			break;
667		case MC_ERROR_TLB_INDETERMINATE:
668		default:
669			mce_err.u.tlb_error_type = MCE_TLB_ERROR_INDETERMINATE;
670			break;
671		}
672		if (mce_log->sub_err_type & 0x80)
673			eaddr = be64_to_cpu(mce_log->effective_address);
674		break;
675	case MC_ERROR_TYPE_D_CACHE:
676		mce_err.error_type = MCE_ERROR_TYPE_DCACHE;
677		break;
678	case MC_ERROR_TYPE_I_CACHE:
679		mce_err.error_type = MCE_ERROR_TYPE_DCACHE;
680		break;
681	case MC_ERROR_TYPE_UNKNOWN:
682	default:
683		mce_err.error_type = MCE_ERROR_TYPE_UNKNOWN;
684		break;
685	}
686
687#ifdef CONFIG_PPC_BOOK3S_64
688	if (disposition == RTAS_DISP_NOT_RECOVERED) {
689		switch (error_type) {
690		case	MC_ERROR_TYPE_SLB:
691		case	MC_ERROR_TYPE_ERAT:
692			/*
693			 * Store the old slb content in paca before flushing.
694			 * Print this when we go to virtual mode.
695			 * There are chances that we may hit MCE again if there
696			 * is a parity error on the SLB entry we trying to read
697			 * for saving. Hence limit the slb saving to single
698			 * level of recursion.
699			 */
700			if (local_paca->in_mce == 1)
701				slb_save_contents(local_paca->mce_faulty_slbs);
702			flush_and_reload_slb();
703			disposition = RTAS_DISP_FULLY_RECOVERED;
704			break;
705		default:
706			break;
707		}
708	} else if (disposition == RTAS_DISP_LIMITED_RECOVERY) {
709		/* Platform corrected itself but could be degraded */
710		printk(KERN_ERR "MCE: limited recovery, system may "
711		       "be degraded\n");
712		disposition = RTAS_DISP_FULLY_RECOVERED;
713	}
714#endif
715
716out:
717	/*
718	 * Enable translation as we will be accessing per-cpu variables
719	 * in save_mce_event() which may fall outside RMO region, also
720	 * leave it enabled because subsequently we will be queuing work
721	 * to workqueues where again per-cpu variables accessed, besides
722	 * fwnmi_release_errinfo() crashes when called in realmode on
723	 * pseries.
724	 * Note: All the realmode handling like flushing SLB entries for
725	 *       SLB multihit is done by now.
726	 */
727	mtmsr(mfmsr() | MSR_IR | MSR_DR);
728	save_mce_event(regs, disposition == RTAS_DISP_FULLY_RECOVERED,
729			&mce_err, regs->nip, eaddr, paddr);
730
731	return disposition;
732}
733
734/*
735 * Process MCE rtas errlog event.
736 */
737static void mce_process_errlog_event(struct irq_work *work)
738{
739	struct rtas_error_log *err;
740
741	err = fwnmi_get_errlog();
742	log_error((char *)err, ERR_TYPE_RTAS_LOG, 0);
743}
744
745/*
746 * See if we can recover from a machine check exception.
747 * This is only called on power4 (or above) and only via
748 * the Firmware Non-Maskable Interrupts (fwnmi) handler
749 * which provides the error analysis for us.
750 *
751 * Return 1 if corrected (or delivered a signal).
752 * Return 0 if there is nothing we can do.
753 */
754static int recover_mce(struct pt_regs *regs, struct machine_check_event *evt)
755{
756	int recovered = 0;
757
758	if (!(regs->msr & MSR_RI)) {
759		/* If MSR_RI isn't set, we cannot recover */
760		pr_err("Machine check interrupt unrecoverable: MSR(RI=0)\n");
761		recovered = 0;
762	} else if (evt->disposition == MCE_DISPOSITION_RECOVERED) {
763		/* Platform corrected itself */
764		recovered = 1;
765	} else if (evt->severity == MCE_SEV_FATAL) {
766		/* Fatal machine check */
767		pr_err("Machine check interrupt is fatal\n");
768		recovered = 0;
769	}
770
771	if (!recovered && evt->sync_error) {
772		/*
773		 * Try to kill processes if we get a synchronous machine check
774		 * (e.g., one caused by execution of this instruction). This
775		 * will devolve into a panic if we try to kill init or are in
776		 * an interrupt etc.
777		 *
778		 * TODO: Queue up this address for hwpoisioning later.
779		 * TODO: This is not quite right for d-side machine
780		 *       checks ->nip is not necessarily the important
781		 *       address.
782		 */
783		if ((user_mode(regs))) {
784			_exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip);
785			recovered = 1;
786		} else if (die_will_crash()) {
787			/*
788			 * die() would kill the kernel, so better to go via
789			 * the platform reboot code that will log the
790			 * machine check.
791			 */
792			recovered = 0;
793		} else {
794			die("Machine check", regs, SIGBUS);
795			recovered = 1;
796		}
797	}
798
799	return recovered;
800}
801
802/*
803 * Handle a machine check.
804 *
805 * Note that on Power 4 and beyond Firmware Non-Maskable Interrupts (fwnmi)
806 * should be present.  If so the handler which called us tells us if the
807 * error was recovered (never true if RI=0).
808 *
809 * On hardware prior to Power 4 these exceptions were asynchronous which
810 * means we can't tell exactly where it occurred and so we can't recover.
811 */
812int pSeries_machine_check_exception(struct pt_regs *regs)
813{
814	struct machine_check_event evt;
815
816	if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
817		return 0;
818
819	/* Print things out */
820	if (evt.version != MCE_V1) {
821		pr_err("Machine Check Exception, Unknown event version %d !\n",
822		       evt.version);
823		return 0;
824	}
825	machine_check_print_event_info(&evt, user_mode(regs), false);
826
827	if (recover_mce(regs, &evt))
828		return 1;
829
830	return 0;
831}
832
833long pseries_machine_check_realmode(struct pt_regs *regs)
834{
835	struct rtas_error_log *errp;
836	int disposition;
837
838	if (fwnmi_active) {
839		errp = fwnmi_get_errinfo(regs);
840		/*
841		 * Call to fwnmi_release_errinfo() in real mode causes kernel
842		 * to panic. Hence we will call it as soon as we go into
843		 * virtual mode.
844		 */
845		disposition = mce_handle_error(regs, errp);
846		fwnmi_release_errinfo();
847
848		/* Queue irq work to log this rtas event later. */
849		irq_work_queue(&mce_errlog_process_work);
850
851		if (disposition == RTAS_DISP_FULLY_RECOVERED)
852			return 1;
853	}
854
855	return 0;
856}
Configure Feed

Configure Feed