arch/powerpc/kexec/crash.c at master

tjh.dev / kernel
fork
Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
fork
kernel / arch / powerpc / kexec / crash.c
at master 656 lines 16 kB view raw
wrap content
  1// SPDX-License-Identifier: GPL-2.0-only
  2/*
  3 * Architecture specific (PPC64) functions for kexec based crash dumps.
  4 *
  5 * Copyright (C) 2005, IBM Corp.
  6 *
  7 * Created by: Haren Myneni
  8 */
  9
 10#include <linux/kernel.h>
 11#include <linux/smp.h>
 12#include <linux/reboot.h>
 13#include <linux/kexec.h>
 14#include <linux/export.h>
 15#include <linux/crash_dump.h>
 16#include <linux/delay.h>
 17#include <linux/irq.h>
 18#include <linux/types.h>
 19#include <linux/libfdt.h>
 20#include <linux/memory.h>
 21
 22#include <asm/processor.h>
 23#include <asm/machdep.h>
 24#include <asm/kexec.h>
 25#include <asm/smp.h>
 26#include <asm/setjmp.h>
 27#include <asm/debug.h>
 28#include <asm/interrupt.h>
 29#include <asm/kexec_ranges.h>
 30#include <asm/crashdump-ppc64.h>
 31
 32/*
 33 * The primary CPU waits a while for all secondary CPUs to enter. This is to
 34 * avoid sending an IPI if the secondary CPUs are entering
 35 * crash_kexec_secondary on their own (eg via a system reset).
 36 *
 37 * The secondary timeout has to be longer than the primary. Both timeouts are
 38 * in milliseconds.
 39 */
 40#define PRIMARY_TIMEOUT		500
 41#define SECONDARY_TIMEOUT	1000
 42
 43#define IPI_TIMEOUT		10000
 44#define REAL_MODE_TIMEOUT	10000
 45
 46static int time_to_dump;
 47
 48/*
 49 * In case of system reset, secondary CPUs enter crash_kexec_secondary with out
 50 * having to send an IPI explicitly. So, indicate if the crash is via
 51 * system reset to avoid sending another IPI.
 52 */
 53static int is_via_system_reset;
 54
 55/*
 56 * crash_wake_offline should be set to 1 by platforms that intend to wake
 57 * up offline cpus prior to jumping to a kdump kernel. Currently powernv
 58 * sets it to 1, since we want to avoid things from happening when an
 59 * offline CPU wakes up due to something like an HMI (malfunction error),
 60 * which propagates to all threads.
 61 */
 62int crash_wake_offline;
 63
 64#define CRASH_HANDLER_MAX 3
 65/* List of shutdown handles */
 66static crash_shutdown_t crash_shutdown_handles[CRASH_HANDLER_MAX];
 67static DEFINE_SPINLOCK(crash_handlers_lock);
 68
 69static unsigned long crash_shutdown_buf[JMP_BUF_LEN];
 70static int crash_shutdown_cpu = -1;
 71
 72static int handle_fault(struct pt_regs *regs)
 73{
 74	if (crash_shutdown_cpu == smp_processor_id())
 75		longjmp(crash_shutdown_buf, 1);
 76	return 0;
 77}
 78
 79#ifdef CONFIG_SMP
 80
 81static atomic_t cpus_in_crash;
 82void crash_ipi_callback(struct pt_regs *regs)
 83{
 84	static cpumask_t cpus_state_saved = CPU_MASK_NONE;
 85
 86	int cpu = smp_processor_id();
 87
 88	hard_irq_disable();
 89	if (!cpumask_test_cpu(cpu, &cpus_state_saved)) {
 90		crash_save_cpu(regs, cpu);
 91		cpumask_set_cpu(cpu, &cpus_state_saved);
 92	}
 93
 94	atomic_inc(&cpus_in_crash);
 95	smp_mb__after_atomic();
 96
 97	/*
 98	 * Starting the kdump boot.
 99	 * This barrier is needed to make sure that all CPUs are stopped.
100	 */
101	while (!time_to_dump)
102		cpu_relax();
103
104	if (ppc_md.kexec_cpu_down)
105		ppc_md.kexec_cpu_down(1, 1);
106
107#ifdef CONFIG_PPC64
108	kexec_smp_wait();
109#else
110	for (;;);	/* FIXME */
111#endif
112
113	/* NOTREACHED */
114}
115
116static void crash_kexec_prepare_cpus(void)
117{
118	unsigned int msecs;
119	volatile unsigned int ncpus = num_online_cpus() - 1;/* Excluding the panic cpu */
120	volatile int tries = 0;
121	int (*old_handler)(struct pt_regs *regs);
122
123	printk(KERN_EMERG "Sending IPI to other CPUs\n");
124
125	if (crash_wake_offline)
126		ncpus = num_present_cpus() - 1;
127
128	/*
129	 * If we came in via system reset, secondaries enter via crash_kexec_secondary().
130	 * So, wait a while for the secondary CPUs to enter for that case.
131	 * Else, send IPI to all other CPUs.
132	 */
133	if (is_via_system_reset)
134		mdelay(PRIMARY_TIMEOUT);
135	else
136		crash_send_ipi(crash_ipi_callback);
137	smp_wmb();
138
139again:
140	/*
141	 * FIXME: Until we will have the way to stop other CPUs reliably,
142	 * the crash CPU will send an IPI and wait for other CPUs to
143	 * respond.
144	 */
145	msecs = IPI_TIMEOUT;
146	while ((atomic_read(&cpus_in_crash) < ncpus) && (--msecs > 0))
147		mdelay(1);
148
149	/* Would it be better to replace the trap vector here? */
150
151	if (atomic_read(&cpus_in_crash) >= ncpus) {
152		printk(KERN_EMERG "IPI complete\n");
153		return;
154	}
155
156	printk(KERN_EMERG "ERROR: %d cpu(s) not responding\n",
157		ncpus - atomic_read(&cpus_in_crash));
158
159	/*
160	 * If we have a panic timeout set then we can't wait indefinitely
161	 * for someone to activate system reset. We also give up on the
162	 * second time through if system reset fail to work.
163	 */
164	if ((panic_timeout > 0) || (tries > 0))
165		return;
166
167	/*
168	 * A system reset will cause all CPUs to take an 0x100 exception.
169	 * The primary CPU returns here via setjmp, and the secondary
170	 * CPUs reexecute the crash_kexec_secondary path.
171	 */
172	old_handler = __debugger;
173	__debugger = handle_fault;
174	crash_shutdown_cpu = smp_processor_id();
175
176	if (setjmp(crash_shutdown_buf) == 0) {
177		printk(KERN_EMERG "Activate system reset (dumprestart) "
178				  "to stop other cpu(s)\n");
179
180		/*
181		 * A system reset will force all CPUs to execute the
182		 * crash code again. We need to reset cpus_in_crash so we
183		 * wait for everyone to do this.
184		 */
185		atomic_set(&cpus_in_crash, 0);
186		smp_mb();
187
188		while (atomic_read(&cpus_in_crash) < ncpus)
189			cpu_relax();
190	}
191
192	crash_shutdown_cpu = -1;
193	__debugger = old_handler;
194
195	tries++;
196	goto again;
197}
198
199/*
200 * This function will be called by secondary cpus.
201 */
202void crash_kexec_secondary(struct pt_regs *regs)
203{
204	unsigned long flags;
205	int msecs = SECONDARY_TIMEOUT;
206
207	local_irq_save(flags);
208
209	/* Wait for the primary crash CPU to signal its progress */
210	while (crashing_cpu < 0) {
211		if (--msecs < 0) {
212			/* No response, kdump image may not have been loaded */
213			local_irq_restore(flags);
214			return;
215		}
216
217		mdelay(1);
218	}
219
220	crash_ipi_callback(regs);
221}
222
223#else	/* ! CONFIG_SMP */
224
225static void crash_kexec_prepare_cpus(void)
226{
227	/*
228	 * move the secondaries to us so that we can copy
229	 * the new kernel 0-0x100 safely
230	 *
231	 * do this if kexec in setup.c ?
232	 */
233#ifdef CONFIG_PPC64
234	smp_release_cpus();
235#else
236	/* FIXME */
237#endif
238}
239
240void crash_kexec_secondary(struct pt_regs *regs)
241{
242}
243#endif	/* CONFIG_SMP */
244
245/* wait for all the CPUs to hit real mode but timeout if they don't come in */
246#if defined(CONFIG_SMP) && defined(CONFIG_PPC64)
247noinstr static void __maybe_unused crash_kexec_wait_realmode(int cpu)
248{
249	unsigned int msecs;
250	int i;
251
252	msecs = REAL_MODE_TIMEOUT;
253	for (i=0; i < nr_cpu_ids && msecs > 0; i++) {
254		if (i == cpu)
255			continue;
256
257		while (paca_ptrs[i]->kexec_state < KEXEC_STATE_REAL_MODE) {
258			barrier();
259			if (!cpu_possible(i) || !cpu_online(i) || (msecs <= 0))
260				break;
261			msecs--;
262			mdelay(1);
263		}
264	}
265	mb();
266}
267#else
268static inline void crash_kexec_wait_realmode(int cpu) {}
269#endif	/* CONFIG_SMP && CONFIG_PPC64 */
270
271void crash_kexec_prepare(void)
272{
273	/* Avoid hardlocking with irresponsive CPU holding logbuf_lock */
274	printk_deferred_enter();
275
276	/*
277	 * This function is only called after the system
278	 * has panicked or is otherwise in a critical state.
279	 * The minimum amount of code to allow a kexec'd kernel
280	 * to run successfully needs to happen here.
281	 *
282	 * In practice this means stopping other cpus in
283	 * an SMP system.
284	 * The kernel is broken so disable interrupts.
285	 */
286	hard_irq_disable();
287
288	/*
289	 * Make a note of crashing cpu. Will be used in machine_kexec
290	 * such that another IPI will not be sent.
291	 */
292	crashing_cpu = smp_processor_id();
293
294	crash_kexec_prepare_cpus();
295}
296
297/*
298 * Register a function to be called on shutdown.  Only use this if you
299 * can't reset your device in the second kernel.
300 */
301int crash_shutdown_register(crash_shutdown_t handler)
302{
303	unsigned int i, rc;
304
305	spin_lock(&crash_handlers_lock);
306	for (i = 0 ; i < CRASH_HANDLER_MAX; i++)
307		if (!crash_shutdown_handles[i]) {
308			/* Insert handle at first empty entry */
309			crash_shutdown_handles[i] = handler;
310			rc = 0;
311			break;
312		}
313
314	if (i == CRASH_HANDLER_MAX) {
315		printk(KERN_ERR "Crash shutdown handles full, "
316		       "not registered.\n");
317		rc = 1;
318	}
319
320	spin_unlock(&crash_handlers_lock);
321	return rc;
322}
323EXPORT_SYMBOL(crash_shutdown_register);
324
325int crash_shutdown_unregister(crash_shutdown_t handler)
326{
327	unsigned int i, rc;
328
329	spin_lock(&crash_handlers_lock);
330	for (i = 0 ; i < CRASH_HANDLER_MAX; i++)
331		if (crash_shutdown_handles[i] == handler)
332			break;
333
334	if (i == CRASH_HANDLER_MAX) {
335		printk(KERN_ERR "Crash shutdown handle not found\n");
336		rc = 1;
337	} else {
338		/* Shift handles down */
339		for (; i < (CRASH_HANDLER_MAX - 1); i++)
340			crash_shutdown_handles[i] =
341				crash_shutdown_handles[i+1];
342		/*
343		 * Reset last entry to NULL now that it has been shifted down,
344		 * this will allow new handles to be added here.
345		 */
346		crash_shutdown_handles[i] = NULL;
347		rc = 0;
348	}
349
350	spin_unlock(&crash_handlers_lock);
351	return rc;
352}
353EXPORT_SYMBOL(crash_shutdown_unregister);
354
355void default_machine_crash_shutdown(struct pt_regs *regs)
356{
357	volatile unsigned int i;
358	int (*old_handler)(struct pt_regs *regs);
359
360	if (TRAP(regs) == INTERRUPT_SYSTEM_RESET)
361		is_via_system_reset = 1;
362
363	if (IS_ENABLED(CONFIG_SMP))
364		crash_smp_send_stop();
365	else
366		crash_kexec_prepare();
367
368	crash_save_cpu(regs, crashing_cpu);
369
370	time_to_dump = 1;
371
372	crash_kexec_wait_realmode(crashing_cpu);
373
374	machine_kexec_mask_interrupts();
375
376	/*
377	 * Call registered shutdown routines safely.  Swap out
378	 * __debugger_fault_handler, and replace on exit.
379	 */
380	old_handler = __debugger_fault_handler;
381	__debugger_fault_handler = handle_fault;
382	crash_shutdown_cpu = smp_processor_id();
383	for (i = 0; i < CRASH_HANDLER_MAX && crash_shutdown_handles[i]; i++) {
384		if (setjmp(crash_shutdown_buf) == 0) {
385			/*
386			 * Insert syncs and delay to ensure
387			 * instructions in the dangerous region don't
388			 * leak away from this protected region.
389			 */
390			asm volatile("sync; isync");
391			/* dangerous region */
392			crash_shutdown_handles[i]();
393			asm volatile("sync; isync");
394		}
395	}
396	crash_shutdown_cpu = -1;
397	__debugger_fault_handler = old_handler;
398
399	if (ppc_md.kexec_cpu_down)
400		ppc_md.kexec_cpu_down(1, 0);
401}
402
403#ifdef CONFIG_CRASH_DUMP
404/**
405 * sync_backup_region_phdr - synchronize backup region offset between
406 *			    kexec image and ELF core header.
407 * @image: Kexec image.
408 * @ehdr: ELF core header.
409 * @phdr_to_kimage: If true, read the offset from the ELF program header
410 *		    and update the kimage backup region. If false, update
411 *		    the ELF program header offset from the kimage backup
412 *		    region.
413 *
414 * Note: During kexec_load, this is called with phdr_to_kimage = true. For
415 * kexec_file_load and ELF core header recreation during memory hotplug
416 * events, it is called with phdr_to_kimage = false.
417 *
418 * Returns nothing.
419 */
420void sync_backup_region_phdr(struct kimage *image, Elf64_Ehdr *ehdr, bool phdr_to_kimage)
421{
422	Elf64_Phdr *phdr;
423	unsigned int i;
424
425	phdr = (Elf64_Phdr *)(ehdr + 1);
426	for (i = 0; i < ehdr->e_phnum; i++, phdr++) {
427		if (phdr->p_paddr == BACKUP_SRC_START) {
428			if (phdr_to_kimage)
429				image->arch.backup_start = phdr->p_offset;
430			else
431				phdr->p_offset = image->arch.backup_start;
432
433			kexec_dprintk("Backup region offset updated to 0x%lx\n",
434				      image->arch.backup_start);
435			return;
436		}
437	}
438}
439#endif /* CONFIG_CRASH_DUMP */
440
441#ifdef CONFIG_CRASH_HOTPLUG
442
443int machine_kexec_post_load(struct kimage *image)
444{
445	int i;
446	unsigned long mem;
447	unsigned char *ptr;
448
449	if (image->type != KEXEC_TYPE_CRASH)
450		return 0;
451
452	if (image->file_mode)
453		return 0;
454
455	for (i = 0; i < image->nr_segments; i++) {
456		mem = image->segment[i].mem;
457		ptr = (char *)__va(mem);
458
459		if (ptr && memcmp(ptr, ELFMAG, SELFMAG) == 0)
460			sync_backup_region_phdr(image, (Elf64_Ehdr *) ptr, true);
461	}
462	return 0;
463}
464
465#undef pr_fmt
466#define pr_fmt(fmt) "crash hp: " fmt
467
468/*
469 * Advertise preferred elfcorehdr size to userspace via
470 * /sys/kernel/crash_elfcorehdr_size sysfs interface.
471 */
472unsigned int arch_crash_get_elfcorehdr_size(void)
473{
474	unsigned long phdr_cnt;
475
476	/* A program header for possible CPUs + vmcoreinfo */
477	phdr_cnt = num_possible_cpus() + 1;
478	if (IS_ENABLED(CONFIG_MEMORY_HOTPLUG))
479		phdr_cnt += CONFIG_CRASH_MAX_MEMORY_RANGES;
480
481	return sizeof(struct elfhdr) + (phdr_cnt * sizeof(Elf64_Phdr));
482}
483
484/**
485 * update_crash_elfcorehdr() - Recreate the elfcorehdr and replace it with old
486 *			       elfcorehdr in the kexec segment array.
487 * @image: the active struct kimage
488 * @mn: struct memory_notify data handler
489 */
490static void update_crash_elfcorehdr(struct kimage *image, struct memory_notify *mn)
491{
492	int ret;
493	struct crash_mem *cmem = NULL;
494	struct kexec_segment *ksegment;
495	void *ptr, *mem, *elfbuf = NULL;
496	unsigned long elfsz, memsz, base_addr, size;
497
498	ksegment = &image->segment[image->elfcorehdr_index];
499	mem = (void *) ksegment->mem;
500	memsz = ksegment->memsz;
501
502	ret = get_crash_memory_ranges(&cmem);
503	if (ret) {
504		pr_err("Failed to get crash mem range\n");
505		return;
506	}
507
508	/*
509	 * The hot unplugged memory is part of crash memory ranges,
510	 * remove it here.
511	 */
512	if (image->hp_action == KEXEC_CRASH_HP_REMOVE_MEMORY) {
513		base_addr = PFN_PHYS(mn->start_pfn);
514		size = mn->nr_pages * PAGE_SIZE;
515		ret = remove_mem_range(&cmem, base_addr, size);
516		if (ret) {
517			pr_err("Failed to remove hot-unplugged memory from crash memory ranges\n");
518			goto out;
519		}
520	}
521
522	ret = crash_prepare_elf64_headers(cmem, false, &elfbuf, &elfsz);
523	if (ret) {
524		pr_err("Failed to prepare elf header\n");
525		goto out;
526	}
527
528	/*
529	 * It is unlikely that kernel hit this because elfcorehdr kexec
530	 * segment (memsz) is built with addition space to accommodate growing
531	 * number of crash memory ranges while loading the kdump kernel. It is
532	 * Just to avoid any unforeseen case.
533	 */
534	if (elfsz > memsz) {
535		pr_err("Updated crash elfcorehdr elfsz %lu > memsz %lu", elfsz, memsz);
536		goto out;
537	}
538
539	sync_backup_region_phdr(image, (Elf64_Ehdr *) elfbuf, false);
540
541	ptr = __va(mem);
542	if (ptr) {
543		/* Temporarily invalidate the crash image while it is replaced */
544		xchg(&kexec_crash_image, NULL);
545
546		/* Replace the old elfcorehdr with newly prepared elfcorehdr */
547		memcpy((void *)ptr, elfbuf, elfsz);
548
549		/* The crash image is now valid once again */
550		xchg(&kexec_crash_image, image);
551	}
552out:
553	kvfree(cmem);
554	kvfree(elfbuf);
555}
556
557/**
558 * get_fdt_index - Loop through the kexec segment array and find
559 *		   the index of the FDT segment.
560 * @image: a pointer to kexec_crash_image
561 *
562 * Returns the index of FDT segment in the kexec segment array
563 * if found; otherwise -1.
564 */
565static int get_fdt_index(struct kimage *image)
566{
567	void *ptr;
568	unsigned long mem;
569	int i, fdt_index = -1;
570
571	/* Find the FDT segment index in kexec segment array. */
572	for (i = 0; i < image->nr_segments; i++) {
573		mem = image->segment[i].mem;
574		ptr = __va(mem);
575
576		if (ptr && fdt_magic(ptr) == FDT_MAGIC) {
577			fdt_index = i;
578			break;
579		}
580	}
581
582	return fdt_index;
583}
584
585/**
586 * update_crash_fdt - updates the cpus node of the crash FDT.
587 *
588 * @image: a pointer to kexec_crash_image
589 */
590static void update_crash_fdt(struct kimage *image)
591{
592	void *fdt;
593	int fdt_index;
594
595	fdt_index = get_fdt_index(image);
596	if (fdt_index < 0) {
597		pr_err("Unable to locate FDT segment.\n");
598		return;
599	}
600
601	fdt = __va((void *)image->segment[fdt_index].mem);
602
603	/* Temporarily invalidate the crash image while it is replaced */
604	xchg(&kexec_crash_image, NULL);
605
606	/* update FDT to reflect changes in CPU resources */
607	if (update_cpus_node(fdt))
608		pr_err("Failed to update crash FDT");
609
610	/* The crash image is now valid once again */
611	xchg(&kexec_crash_image, image);
612}
613
614int arch_crash_hotplug_support(struct kimage *image, unsigned long kexec_flags)
615{
616#ifdef CONFIG_KEXEC_FILE
617	if (image->file_mode)
618		return 1;
619#endif
620	return kexec_flags & KEXEC_CRASH_HOTPLUG_SUPPORT;
621}
622
623/**
624 * arch_crash_handle_hotplug_event - Handle crash CPU/Memory hotplug events to update the
625 *				     necessary kexec segments based on the hotplug event.
626 * @image: a pointer to kexec_crash_image
627 * @arg: struct memory_notify handler for memory hotplug case and NULL for CPU hotplug case.
628 *
629 * Update the kdump image based on the type of hotplug event, represented by image->hp_action.
630 * CPU add: Update the FDT segment to include the newly added CPU.
631 * CPU remove: No action is needed, with the assumption that it's okay to have offline CPUs
632 *	       part of the FDT.
633 * Memory add/remove: No action is taken as this is not yet supported.
634 */
635void arch_crash_handle_hotplug_event(struct kimage *image, void *arg)
636{
637	struct memory_notify *mn;
638
639	switch (image->hp_action) {
640	case KEXEC_CRASH_HP_REMOVE_CPU:
641		return;
642
643	case KEXEC_CRASH_HP_ADD_CPU:
644		update_crash_fdt(image);
645		break;
646
647	case KEXEC_CRASH_HP_REMOVE_MEMORY:
648	case KEXEC_CRASH_HP_ADD_MEMORY:
649		mn = (struct memory_notify *)arg;
650		update_crash_elfcorehdr(image, mn);
651		return;
652	default:
653		pr_warn_once("Unknown hotplug action\n");
654	}
655}
656#endif /* CONFIG_CRASH_HOTPLUG */
Configure Feed

Configure Feed