Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

kexec: split kexec_load syscall from kexec core code

There are two kexec load syscalls, kexec_load another and kexec_file_load.
kexec_file_load has been splited as kernel/kexec_file.c. In this patch I
split kexec_load syscall code to kernel/kexec.c.

And add a new kconfig option KEXEC_CORE, so we can disable kexec_load and
use kexec_file_load only, or vice verse.

The original requirement is from Ted Ts'o, he want kexec kernel signature
being checked with CONFIG_KEXEC_VERIFY_SIG enabled. But kexec-tools use
kexec_load syscall can bypass the checking.

Vivek Goyal proposed to create a common kconfig option so user can compile
in only one syscall for loading kexec kernel. KEXEC/KEXEC_FILE selects
KEXEC_CORE so that old config files still work.

Because there's general code need CONFIG_KEXEC_CORE, so I updated all the
architecture Kconfig with a new option KEXEC_CORE, and let KEXEC selects
KEXEC_CORE in arch Kconfig. Also updated general kernel code with to
kexec_load syscall.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Dave Young <dyoung@redhat.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: Petr Tesarik <ptesarik@suse.cz>
Cc: Theodore Ts'o <tytso@mit.edu>
Cc: Josh Boyer <jwboyer@fedoraproject.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by

Dave Young and committed by
Linus Torvalds
2965faa5 a43cac0d

+1560 -1527
+3
arch/Kconfig
··· 2 2 # General architecture dependent options 3 3 # 4 4 5 + config KEXEC_CORE 6 + bool 7 + 5 8 config OPROFILE 6 9 tristate "OProfile system profiling" 7 10 depends on PROFILING
+1
arch/arm/Kconfig
··· 2020 2020 bool "Kexec system call (EXPERIMENTAL)" 2021 2021 depends on (!SMP || PM_SLEEP_SMP) 2022 2022 depends on !CPU_V7M 2023 + select KEXEC_CORE 2023 2024 help 2024 2025 kexec is a system call that implements the ability to shutdown your 2025 2026 current kernel, and to start another kernel. It is like a reboot
+1
arch/ia64/Kconfig
··· 518 518 config KEXEC 519 519 bool "kexec system call" 520 520 depends on !IA64_HP_SIM && (!SMP || HOTPLUG_CPU) 521 + select KEXEC_CORE 521 522 help 522 523 kexec is a system call that implements the ability to shutdown your 523 524 current kernel, and to start another kernel. It is like a reboot
+1
arch/m68k/Kconfig
··· 95 95 config KEXEC 96 96 bool "kexec system call" 97 97 depends on M68KCLASSIC 98 + select KEXEC_CORE 98 99 help 99 100 kexec is a system call that implements the ability to shutdown your 100 101 current kernel, and to start another kernel. It is like a reboot
+1
arch/mips/Kconfig
··· 2597 2597 2598 2598 config KEXEC 2599 2599 bool "Kexec system call" 2600 + select KEXEC_CORE 2600 2601 help 2601 2602 kexec is a system call that implements the ability to shutdown your 2602 2603 current kernel, and to start another kernel. It is like a reboot
+1
arch/powerpc/Kconfig
··· 420 420 config KEXEC 421 421 bool "kexec system call" 422 422 depends on (PPC_BOOK3S || FSL_BOOKE || (44x && !SMP)) 423 + select KEXEC_CORE 423 424 help 424 425 kexec is a system call that implements the ability to shutdown your 425 426 current kernel, and to start another kernel. It is like a reboot
+1
arch/s390/Kconfig
··· 48 48 49 49 config KEXEC 50 50 def_bool y 51 + select KEXEC_CORE 51 52 52 53 config AUDIT_ARCH 53 54 def_bool y
+1
arch/sh/Kconfig
··· 602 602 config KEXEC 603 603 bool "kexec system call (EXPERIMENTAL)" 604 604 depends on SUPERH32 && MMU 605 + select KEXEC_CORE 605 606 help 606 607 kexec is a system call that implements the ability to shutdown your 607 608 current kernel, and to start another kernel. It is like a reboot
+1
arch/tile/Kconfig
··· 205 205 206 206 config KEXEC 207 207 bool "kexec system call" 208 + select KEXEC_CORE 208 209 ---help--- 209 210 kexec is a system call that implements the ability to shutdown your 210 211 current kernel, and to start another kernel. It is like a reboot
+2 -1
arch/x86/Kconfig
··· 1754 1754 1755 1755 config KEXEC 1756 1756 bool "kexec system call" 1757 + select KEXEC_CORE 1757 1758 ---help--- 1758 1759 kexec is a system call that implements the ability to shutdown your 1759 1760 current kernel, and to start another kernel. It is like a reboot ··· 1771 1770 1772 1771 config KEXEC_FILE 1773 1772 bool "kexec file based system call" 1773 + select KEXEC_CORE 1774 1774 select BUILD_BIN2C 1775 - depends on KEXEC 1776 1775 depends on X86_64 1777 1776 depends on CRYPTO=y 1778 1777 depends on CRYPTO_SHA256=y
+1 -1
arch/x86/boot/header.S
··· 414 414 # define XLF23 0 415 415 #endif 416 416 417 - #if defined(CONFIG_X86_64) && defined(CONFIG_EFI) && defined(CONFIG_KEXEC) 417 + #if defined(CONFIG_X86_64) && defined(CONFIG_EFI) && defined(CONFIG_KEXEC_CORE) 418 418 # define XLF4 XLF_EFI_KEXEC 419 419 #else 420 420 # define XLF4 0
+1 -1
arch/x86/include/asm/kdebug.h
··· 29 29 extern void __show_regs(struct pt_regs *regs, int all); 30 30 extern unsigned long oops_begin(void); 31 31 extern void oops_end(unsigned long, struct pt_regs *, int signr); 32 - #ifdef CONFIG_KEXEC 32 + #ifdef CONFIG_KEXEC_CORE 33 33 extern int in_crash_kexec; 34 34 #else 35 35 /* no crash dump is ever in progress if no crash kernel can be kexec'd */
+2 -2
arch/x86/kernel/Makefile
··· 71 71 obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o 72 72 obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o 73 73 obj-$(CONFIG_X86_TSC) += trace_clock.o 74 - obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o 75 - obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o 74 + obj-$(CONFIG_KEXEC_CORE) += machine_kexec_$(BITS).o 75 + obj-$(CONFIG_KEXEC_CORE) += relocate_kernel_$(BITS).o crash.o 76 76 obj-$(CONFIG_KEXEC_FILE) += kexec-bzimage64.o 77 77 obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o 78 78 obj-y += kprobes/
+2 -2
arch/x86/kernel/kvmclock.c
··· 200 200 * kind of shutdown from our side, we unregister the clock by writting anything 201 201 * that does not have the 'enable' bit set in the msr 202 202 */ 203 - #ifdef CONFIG_KEXEC 203 + #ifdef CONFIG_KEXEC_CORE 204 204 static void kvm_crash_shutdown(struct pt_regs *regs) 205 205 { 206 206 native_write_msr(msr_kvm_system_time, 0, 0); ··· 259 259 x86_platform.save_sched_clock_state = kvm_save_sched_clock_state; 260 260 x86_platform.restore_sched_clock_state = kvm_restore_sched_clock_state; 261 261 machine_ops.shutdown = kvm_shutdown; 262 - #ifdef CONFIG_KEXEC 262 + #ifdef CONFIG_KEXEC_CORE 263 263 machine_ops.crash_shutdown = kvm_crash_shutdown; 264 264 #endif 265 265 kvm_get_preset_lpj();
+2 -2
arch/x86/kernel/reboot.c
··· 673 673 .emergency_restart = native_machine_emergency_restart, 674 674 .restart = native_machine_restart, 675 675 .halt = native_machine_halt, 676 - #ifdef CONFIG_KEXEC 676 + #ifdef CONFIG_KEXEC_CORE 677 677 .crash_shutdown = native_machine_crash_shutdown, 678 678 #endif 679 679 }; ··· 703 703 machine_ops.halt(); 704 704 } 705 705 706 - #ifdef CONFIG_KEXEC 706 + #ifdef CONFIG_KEXEC_CORE 707 707 void machine_crash_shutdown(struct pt_regs *regs) 708 708 { 709 709 machine_ops.crash_shutdown(regs);
+1 -1
arch/x86/kernel/setup.c
··· 478 478 * --------- Crashkernel reservation ------------------------------ 479 479 */ 480 480 481 - #ifdef CONFIG_KEXEC 481 + #ifdef CONFIG_KEXEC_CORE 482 482 483 483 /* 484 484 * Keep the crash kernel below this limit. On 32 bits earlier kernels
+1 -1
arch/x86/kernel/vmlinux.lds.S
··· 364 364 365 365 #endif /* CONFIG_X86_32 */ 366 366 367 - #ifdef CONFIG_KEXEC 367 + #ifdef CONFIG_KEXEC_CORE 368 368 #include <asm/kexec.h> 369 369 370 370 . = ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE,
+4 -4
arch/x86/kvm/vmx.c
··· 1264 1264 vmcs, phys_addr); 1265 1265 } 1266 1266 1267 - #ifdef CONFIG_KEXEC 1267 + #ifdef CONFIG_KEXEC_CORE 1268 1268 /* 1269 1269 * This bitmap is used to indicate whether the vmclear 1270 1270 * operation is enabled on all cpus. All disabled by ··· 1302 1302 #else 1303 1303 static inline void crash_enable_local_vmclear(int cpu) { } 1304 1304 static inline void crash_disable_local_vmclear(int cpu) { } 1305 - #endif /* CONFIG_KEXEC */ 1305 + #endif /* CONFIG_KEXEC_CORE */ 1306 1306 1307 1307 static void __loaded_vmcs_clear(void *arg) 1308 1308 { ··· 10411 10411 if (r) 10412 10412 return r; 10413 10413 10414 - #ifdef CONFIG_KEXEC 10414 + #ifdef CONFIG_KEXEC_CORE 10415 10415 rcu_assign_pointer(crash_vmclear_loaded_vmcss, 10416 10416 crash_vmclear_local_loaded_vmcss); 10417 10417 #endif ··· 10421 10421 10422 10422 static void __exit vmx_exit(void) 10423 10423 { 10424 - #ifdef CONFIG_KEXEC 10424 + #ifdef CONFIG_KEXEC_CORE 10425 10425 RCU_INIT_POINTER(crash_vmclear_loaded_vmcss, NULL); 10426 10426 synchronize_rcu(); 10427 10427 #endif
+2 -2
arch/x86/platform/efi/efi.c
··· 650 650 651 651 static void __init save_runtime_map(void) 652 652 { 653 - #ifdef CONFIG_KEXEC 653 + #ifdef CONFIG_KEXEC_CORE 654 654 efi_memory_desc_t *md; 655 655 void *tmp, *p, *q = NULL; 656 656 int count = 0; ··· 748 748 749 749 static void __init kexec_enter_virtual_mode(void) 750 750 { 751 - #ifdef CONFIG_KEXEC 751 + #ifdef CONFIG_KEXEC_CORE 752 752 efi_memory_desc_t *md; 753 753 void *p; 754 754
+3 -3
arch/x86/platform/uv/uv_nmi.c
··· 492 492 touch_nmi_watchdog(); 493 493 } 494 494 495 - #if defined(CONFIG_KEXEC) 495 + #if defined(CONFIG_KEXEC_CORE) 496 496 static atomic_t uv_nmi_kexec_failed; 497 497 static void uv_nmi_kdump(int cpu, int master, struct pt_regs *regs) 498 498 { ··· 519 519 uv_nmi_sync_exit(0); 520 520 } 521 521 522 - #else /* !CONFIG_KEXEC */ 522 + #else /* !CONFIG_KEXEC_CORE */ 523 523 static inline void uv_nmi_kdump(int cpu, int master, struct pt_regs *regs) 524 524 { 525 525 if (master) 526 526 pr_err("UV: NMI kdump: KEXEC not supported in this kernel\n"); 527 527 } 528 - #endif /* !CONFIG_KEXEC */ 528 + #endif /* !CONFIG_KEXEC_CORE */ 529 529 530 530 #ifdef CONFIG_KGDB 531 531 #ifdef CONFIG_KGDB_KDB
+1 -1
drivers/firmware/efi/Kconfig
··· 43 43 44 44 config EFI_RUNTIME_MAP 45 45 bool "Export efi runtime maps to sysfs" 46 - depends on X86 && EFI && KEXEC 46 + depends on X86 && EFI && KEXEC_CORE 47 47 default y 48 48 help 49 49 Export efi runtime memory maps to /sys/firmware/efi/runtime-map.
+1 -1
drivers/pci/pci-driver.c
··· 467 467 pci_msi_shutdown(pci_dev); 468 468 pci_msix_shutdown(pci_dev); 469 469 470 - #ifdef CONFIG_KEXEC 470 + #ifdef CONFIG_KEXEC_CORE 471 471 /* 472 472 * If this is a kexec reboot, turn off Bus Master bit on the 473 473 * device to tell it to not continue to do DMA. Don't touch
+3 -3
include/linux/kexec.h
··· 16 16 17 17 #include <uapi/linux/kexec.h> 18 18 19 - #ifdef CONFIG_KEXEC 19 + #ifdef CONFIG_KEXEC_CORE 20 20 #include <linux/list.h> 21 21 #include <linux/linkage.h> 22 22 #include <linux/compat.h> ··· 329 329 int __weak arch_kexec_apply_relocations(const Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, 330 330 unsigned int relsec); 331 331 332 - #else /* !CONFIG_KEXEC */ 332 + #else /* !CONFIG_KEXEC_CORE */ 333 333 struct pt_regs; 334 334 struct task_struct; 335 335 static inline void crash_kexec(struct pt_regs *regs) { } 336 336 static inline int kexec_should_crash(struct task_struct *p) { return 0; } 337 337 #define kexec_in_progress false 338 - #endif /* CONFIG_KEXEC */ 338 + #endif /* CONFIG_KEXEC_CORE */ 339 339 340 340 #endif /* !defined(__ASSEBMLY__) */ 341 341
+2 -2
init/initramfs.c
··· 526 526 527 527 static void __init free_initrd(void) 528 528 { 529 - #ifdef CONFIG_KEXEC 529 + #ifdef CONFIG_KEXEC_CORE 530 530 unsigned long crashk_start = (unsigned long)__va(crashk_res.start); 531 531 unsigned long crashk_end = (unsigned long)__va(crashk_res.end); 532 532 #endif 533 533 if (do_retain_initrd) 534 534 goto skip; 535 535 536 - #ifdef CONFIG_KEXEC 536 + #ifdef CONFIG_KEXEC_CORE 537 537 /* 538 538 * If the initrd region is overlapped with crashkernel reserved region, 539 539 * free only memory that is not part of crashkernel region.
+1
kernel/Makefile
··· 49 49 obj-$(CONFIG_MODULE_SIG) += module_signing.o 50 50 obj-$(CONFIG_KALLSYMS) += kallsyms.o 51 51 obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o 52 + obj-$(CONFIG_KEXEC_CORE) += kexec_core.o 52 53 obj-$(CONFIG_KEXEC) += kexec.o 53 54 obj-$(CONFIG_KEXEC_FILE) += kexec_file.o 54 55 obj-$(CONFIG_BACKTRACE_SELF_TEST) += backtracetest.o
+1 -1
kernel/events/core.c
··· 9094 9094 mutex_unlock(&swhash->hlist_mutex); 9095 9095 } 9096 9096 9097 - #if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC 9097 + #if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC_CORE 9098 9098 static void __perf_event_exit_context(void *__info) 9099 9099 { 9100 9100 struct remove_event re = { .detach_group = true };
+2 -1493
kernel/kexec.c
··· 1 1 /* 2 - * kexec.c - kexec system call 2 + * kexec.c - kexec_load system call 3 3 * Copyright (C) 2002-2004 Eric Biederman <ebiederm@xmission.com> 4 4 * 5 5 * This source code is licensed under the GNU General Public License, 6 6 * Version 2. See the file COPYING for more details. 7 7 */ 8 8 9 - #define pr_fmt(fmt) "kexec: " fmt 10 - 11 9 #include <linux/capability.h> 12 10 #include <linux/mm.h> 13 11 #include <linux/file.h> 14 - #include <linux/slab.h> 15 - #include <linux/fs.h> 16 12 #include <linux/kexec.h> 17 13 #include <linux/mutex.h> 18 14 #include <linux/list.h> 19 - #include <linux/highmem.h> 20 15 #include <linux/syscalls.h> 21 - #include <linux/reboot.h> 22 - #include <linux/ioport.h> 23 - #include <linux/hardirq.h> 24 - #include <linux/elf.h> 25 - #include <linux/elfcore.h> 26 - #include <linux/utsname.h> 27 - #include <linux/numa.h> 28 - #include <linux/suspend.h> 29 - #include <linux/device.h> 30 - #include <linux/freezer.h> 31 16 #include <linux/vmalloc.h> 32 - #include <linux/pm.h> 33 - #include <linux/cpu.h> 34 - #include <linux/console.h> 35 - #include <linux/swap.h> 36 - #include <linux/syscore_ops.h> 37 - #include <linux/compiler.h> 38 - #include <linux/hugetlb.h> 17 + #include <linux/slab.h> 39 18 40 - #include <asm/page.h> 41 - #include <asm/uaccess.h> 42 - #include <asm/io.h> 43 - #include <asm/sections.h> 44 - 45 - #include <crypto/hash.h> 46 - #include <crypto/sha.h> 47 19 #include "kexec_internal.h" 48 - 49 - DEFINE_MUTEX(kexec_mutex); 50 - 51 - /* Per cpu memory for storing cpu states in case of system crash. */ 52 - note_buf_t __percpu *crash_notes; 53 - 54 - /* vmcoreinfo stuff */ 55 - static unsigned char vmcoreinfo_data[VMCOREINFO_BYTES]; 56 - u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4]; 57 - size_t vmcoreinfo_size; 58 - size_t vmcoreinfo_max_size = sizeof(vmcoreinfo_data); 59 - 60 - /* Flag to indicate we are going to kexec a new kernel */ 61 - bool kexec_in_progress = false; 62 - 63 - 64 - /* Location of the reserved area for the crash kernel */ 65 - struct resource crashk_res = { 66 - .name = "Crash kernel", 67 - .start = 0, 68 - .end = 0, 69 - .flags = IORESOURCE_BUSY | IORESOURCE_MEM 70 - }; 71 - struct resource crashk_low_res = { 72 - .name = "Crash kernel", 73 - .start = 0, 74 - .end = 0, 75 - .flags = IORESOURCE_BUSY | IORESOURCE_MEM 76 - }; 77 - 78 - int kexec_should_crash(struct task_struct *p) 79 - { 80 - /* 81 - * If crash_kexec_post_notifiers is enabled, don't run 82 - * crash_kexec() here yet, which must be run after panic 83 - * notifiers in panic(). 84 - */ 85 - if (crash_kexec_post_notifiers) 86 - return 0; 87 - /* 88 - * There are 4 panic() calls in do_exit() path, each of which 89 - * corresponds to each of these 4 conditions. 90 - */ 91 - if (in_interrupt() || !p->pid || is_global_init(p) || panic_on_oops) 92 - return 1; 93 - return 0; 94 - } 95 - 96 - /* 97 - * When kexec transitions to the new kernel there is a one-to-one 98 - * mapping between physical and virtual addresses. On processors 99 - * where you can disable the MMU this is trivial, and easy. For 100 - * others it is still a simple predictable page table to setup. 101 - * 102 - * In that environment kexec copies the new kernel to its final 103 - * resting place. This means I can only support memory whose 104 - * physical address can fit in an unsigned long. In particular 105 - * addresses where (pfn << PAGE_SHIFT) > ULONG_MAX cannot be handled. 106 - * If the assembly stub has more restrictive requirements 107 - * KEXEC_SOURCE_MEMORY_LIMIT and KEXEC_DEST_MEMORY_LIMIT can be 108 - * defined more restrictively in <asm/kexec.h>. 109 - * 110 - * The code for the transition from the current kernel to the 111 - * the new kernel is placed in the control_code_buffer, whose size 112 - * is given by KEXEC_CONTROL_PAGE_SIZE. In the best case only a single 113 - * page of memory is necessary, but some architectures require more. 114 - * Because this memory must be identity mapped in the transition from 115 - * virtual to physical addresses it must live in the range 116 - * 0 - TASK_SIZE, as only the user space mappings are arbitrarily 117 - * modifiable. 118 - * 119 - * The assembly stub in the control code buffer is passed a linked list 120 - * of descriptor pages detailing the source pages of the new kernel, 121 - * and the destination addresses of those source pages. As this data 122 - * structure is not used in the context of the current OS, it must 123 - * be self-contained. 124 - * 125 - * The code has been made to work with highmem pages and will use a 126 - * destination page in its final resting place (if it happens 127 - * to allocate it). The end product of this is that most of the 128 - * physical address space, and most of RAM can be used. 129 - * 130 - * Future directions include: 131 - * - allocating a page table with the control code buffer identity 132 - * mapped, to simplify machine_kexec and make kexec_on_panic more 133 - * reliable. 134 - */ 135 - 136 - /* 137 - * KIMAGE_NO_DEST is an impossible destination address..., for 138 - * allocating pages whose destination address we do not care about. 139 - */ 140 - #define KIMAGE_NO_DEST (-1UL) 141 - 142 - static struct page *kimage_alloc_page(struct kimage *image, 143 - gfp_t gfp_mask, 144 - unsigned long dest); 145 20 146 21 static int copy_user_segment_list(struct kimage *image, 147 22 unsigned long nr_segments, ··· 33 158 ret = -EFAULT; 34 159 35 160 return ret; 36 - } 37 - 38 - int sanity_check_segment_list(struct kimage *image) 39 - { 40 - int result, i; 41 - unsigned long nr_segments = image->nr_segments; 42 - 43 - /* 44 - * Verify we have good destination addresses. The caller is 45 - * responsible for making certain we don't attempt to load 46 - * the new image into invalid or reserved areas of RAM. This 47 - * just verifies it is an address we can use. 48 - * 49 - * Since the kernel does everything in page size chunks ensure 50 - * the destination addresses are page aligned. Too many 51 - * special cases crop of when we don't do this. The most 52 - * insidious is getting overlapping destination addresses 53 - * simply because addresses are changed to page size 54 - * granularity. 55 - */ 56 - result = -EADDRNOTAVAIL; 57 - for (i = 0; i < nr_segments; i++) { 58 - unsigned long mstart, mend; 59 - 60 - mstart = image->segment[i].mem; 61 - mend = mstart + image->segment[i].memsz; 62 - if ((mstart & ~PAGE_MASK) || (mend & ~PAGE_MASK)) 63 - return result; 64 - if (mend >= KEXEC_DESTINATION_MEMORY_LIMIT) 65 - return result; 66 - } 67 - 68 - /* Verify our destination addresses do not overlap. 69 - * If we alloed overlapping destination addresses 70 - * through very weird things can happen with no 71 - * easy explanation as one segment stops on another. 72 - */ 73 - result = -EINVAL; 74 - for (i = 0; i < nr_segments; i++) { 75 - unsigned long mstart, mend; 76 - unsigned long j; 77 - 78 - mstart = image->segment[i].mem; 79 - mend = mstart + image->segment[i].memsz; 80 - for (j = 0; j < i; j++) { 81 - unsigned long pstart, pend; 82 - pstart = image->segment[j].mem; 83 - pend = pstart + image->segment[j].memsz; 84 - /* Do the segments overlap ? */ 85 - if ((mend > pstart) && (mstart < pend)) 86 - return result; 87 - } 88 - } 89 - 90 - /* Ensure our buffer sizes are strictly less than 91 - * our memory sizes. This should always be the case, 92 - * and it is easier to check up front than to be surprised 93 - * later on. 94 - */ 95 - result = -EINVAL; 96 - for (i = 0; i < nr_segments; i++) { 97 - if (image->segment[i].bufsz > image->segment[i].memsz) 98 - return result; 99 - } 100 - 101 - /* 102 - * Verify we have good destination addresses. Normally 103 - * the caller is responsible for making certain we don't 104 - * attempt to load the new image into invalid or reserved 105 - * areas of RAM. But crash kernels are preloaded into a 106 - * reserved area of ram. We must ensure the addresses 107 - * are in the reserved area otherwise preloading the 108 - * kernel could corrupt things. 109 - */ 110 - 111 - if (image->type == KEXEC_TYPE_CRASH) { 112 - result = -EADDRNOTAVAIL; 113 - for (i = 0; i < nr_segments; i++) { 114 - unsigned long mstart, mend; 115 - 116 - mstart = image->segment[i].mem; 117 - mend = mstart + image->segment[i].memsz - 1; 118 - /* Ensure we are within the crash kernel limits */ 119 - if ((mstart < crashk_res.start) || 120 - (mend > crashk_res.end)) 121 - return result; 122 - } 123 - } 124 - 125 - return 0; 126 - } 127 - 128 - struct kimage *do_kimage_alloc_init(void) 129 - { 130 - struct kimage *image; 131 - 132 - /* Allocate a controlling structure */ 133 - image = kzalloc(sizeof(*image), GFP_KERNEL); 134 - if (!image) 135 - return NULL; 136 - 137 - image->head = 0; 138 - image->entry = &image->head; 139 - image->last_entry = &image->head; 140 - image->control_page = ~0; /* By default this does not apply */ 141 - image->type = KEXEC_TYPE_DEFAULT; 142 - 143 - /* Initialize the list of control pages */ 144 - INIT_LIST_HEAD(&image->control_pages); 145 - 146 - /* Initialize the list of destination pages */ 147 - INIT_LIST_HEAD(&image->dest_pages); 148 - 149 - /* Initialize the list of unusable pages */ 150 - INIT_LIST_HEAD(&image->unusable_pages); 151 - 152 - return image; 153 161 } 154 162 155 163 static int kimage_alloc_init(struct kimage **rimage, unsigned long entry, ··· 101 343 return ret; 102 344 } 103 345 104 - int kimage_is_destination_range(struct kimage *image, 105 - unsigned long start, 106 - unsigned long end) 107 - { 108 - unsigned long i; 109 - 110 - for (i = 0; i < image->nr_segments; i++) { 111 - unsigned long mstart, mend; 112 - 113 - mstart = image->segment[i].mem; 114 - mend = mstart + image->segment[i].memsz; 115 - if ((end > mstart) && (start < mend)) 116 - return 1; 117 - } 118 - 119 - return 0; 120 - } 121 - 122 - static struct page *kimage_alloc_pages(gfp_t gfp_mask, unsigned int order) 123 - { 124 - struct page *pages; 125 - 126 - pages = alloc_pages(gfp_mask, order); 127 - if (pages) { 128 - unsigned int count, i; 129 - pages->mapping = NULL; 130 - set_page_private(pages, order); 131 - count = 1 << order; 132 - for (i = 0; i < count; i++) 133 - SetPageReserved(pages + i); 134 - } 135 - 136 - return pages; 137 - } 138 - 139 - static void kimage_free_pages(struct page *page) 140 - { 141 - unsigned int order, count, i; 142 - 143 - order = page_private(page); 144 - count = 1 << order; 145 - for (i = 0; i < count; i++) 146 - ClearPageReserved(page + i); 147 - __free_pages(page, order); 148 - } 149 - 150 - void kimage_free_page_list(struct list_head *list) 151 - { 152 - struct list_head *pos, *next; 153 - 154 - list_for_each_safe(pos, next, list) { 155 - struct page *page; 156 - 157 - page = list_entry(pos, struct page, lru); 158 - list_del(&page->lru); 159 - kimage_free_pages(page); 160 - } 161 - } 162 - 163 - static struct page *kimage_alloc_normal_control_pages(struct kimage *image, 164 - unsigned int order) 165 - { 166 - /* Control pages are special, they are the intermediaries 167 - * that are needed while we copy the rest of the pages 168 - * to their final resting place. As such they must 169 - * not conflict with either the destination addresses 170 - * or memory the kernel is already using. 171 - * 172 - * The only case where we really need more than one of 173 - * these are for architectures where we cannot disable 174 - * the MMU and must instead generate an identity mapped 175 - * page table for all of the memory. 176 - * 177 - * At worst this runs in O(N) of the image size. 178 - */ 179 - struct list_head extra_pages; 180 - struct page *pages; 181 - unsigned int count; 182 - 183 - count = 1 << order; 184 - INIT_LIST_HEAD(&extra_pages); 185 - 186 - /* Loop while I can allocate a page and the page allocated 187 - * is a destination page. 188 - */ 189 - do { 190 - unsigned long pfn, epfn, addr, eaddr; 191 - 192 - pages = kimage_alloc_pages(KEXEC_CONTROL_MEMORY_GFP, order); 193 - if (!pages) 194 - break; 195 - pfn = page_to_pfn(pages); 196 - epfn = pfn + count; 197 - addr = pfn << PAGE_SHIFT; 198 - eaddr = epfn << PAGE_SHIFT; 199 - if ((epfn >= (KEXEC_CONTROL_MEMORY_LIMIT >> PAGE_SHIFT)) || 200 - kimage_is_destination_range(image, addr, eaddr)) { 201 - list_add(&pages->lru, &extra_pages); 202 - pages = NULL; 203 - } 204 - } while (!pages); 205 - 206 - if (pages) { 207 - /* Remember the allocated page... */ 208 - list_add(&pages->lru, &image->control_pages); 209 - 210 - /* Because the page is already in it's destination 211 - * location we will never allocate another page at 212 - * that address. Therefore kimage_alloc_pages 213 - * will not return it (again) and we don't need 214 - * to give it an entry in image->segment[]. 215 - */ 216 - } 217 - /* Deal with the destination pages I have inadvertently allocated. 218 - * 219 - * Ideally I would convert multi-page allocations into single 220 - * page allocations, and add everything to image->dest_pages. 221 - * 222 - * For now it is simpler to just free the pages. 223 - */ 224 - kimage_free_page_list(&extra_pages); 225 - 226 - return pages; 227 - } 228 - 229 - static struct page *kimage_alloc_crash_control_pages(struct kimage *image, 230 - unsigned int order) 231 - { 232 - /* Control pages are special, they are the intermediaries 233 - * that are needed while we copy the rest of the pages 234 - * to their final resting place. As such they must 235 - * not conflict with either the destination addresses 236 - * or memory the kernel is already using. 237 - * 238 - * Control pages are also the only pags we must allocate 239 - * when loading a crash kernel. All of the other pages 240 - * are specified by the segments and we just memcpy 241 - * into them directly. 242 - * 243 - * The only case where we really need more than one of 244 - * these are for architectures where we cannot disable 245 - * the MMU and must instead generate an identity mapped 246 - * page table for all of the memory. 247 - * 248 - * Given the low demand this implements a very simple 249 - * allocator that finds the first hole of the appropriate 250 - * size in the reserved memory region, and allocates all 251 - * of the memory up to and including the hole. 252 - */ 253 - unsigned long hole_start, hole_end, size; 254 - struct page *pages; 255 - 256 - pages = NULL; 257 - size = (1 << order) << PAGE_SHIFT; 258 - hole_start = (image->control_page + (size - 1)) & ~(size - 1); 259 - hole_end = hole_start + size - 1; 260 - while (hole_end <= crashk_res.end) { 261 - unsigned long i; 262 - 263 - if (hole_end > KEXEC_CRASH_CONTROL_MEMORY_LIMIT) 264 - break; 265 - /* See if I overlap any of the segments */ 266 - for (i = 0; i < image->nr_segments; i++) { 267 - unsigned long mstart, mend; 268 - 269 - mstart = image->segment[i].mem; 270 - mend = mstart + image->segment[i].memsz - 1; 271 - if ((hole_end >= mstart) && (hole_start <= mend)) { 272 - /* Advance the hole to the end of the segment */ 273 - hole_start = (mend + (size - 1)) & ~(size - 1); 274 - hole_end = hole_start + size - 1; 275 - break; 276 - } 277 - } 278 - /* If I don't overlap any segments I have found my hole! */ 279 - if (i == image->nr_segments) { 280 - pages = pfn_to_page(hole_start >> PAGE_SHIFT); 281 - break; 282 - } 283 - } 284 - if (pages) 285 - image->control_page = hole_end; 286 - 287 - return pages; 288 - } 289 - 290 - 291 - struct page *kimage_alloc_control_pages(struct kimage *image, 292 - unsigned int order) 293 - { 294 - struct page *pages = NULL; 295 - 296 - switch (image->type) { 297 - case KEXEC_TYPE_DEFAULT: 298 - pages = kimage_alloc_normal_control_pages(image, order); 299 - break; 300 - case KEXEC_TYPE_CRASH: 301 - pages = kimage_alloc_crash_control_pages(image, order); 302 - break; 303 - } 304 - 305 - return pages; 306 - } 307 - 308 - static int kimage_add_entry(struct kimage *image, kimage_entry_t entry) 309 - { 310 - if (*image->entry != 0) 311 - image->entry++; 312 - 313 - if (image->entry == image->last_entry) { 314 - kimage_entry_t *ind_page; 315 - struct page *page; 316 - 317 - page = kimage_alloc_page(image, GFP_KERNEL, KIMAGE_NO_DEST); 318 - if (!page) 319 - return -ENOMEM; 320 - 321 - ind_page = page_address(page); 322 - *image->entry = virt_to_phys(ind_page) | IND_INDIRECTION; 323 - image->entry = ind_page; 324 - image->last_entry = ind_page + 325 - ((PAGE_SIZE/sizeof(kimage_entry_t)) - 1); 326 - } 327 - *image->entry = entry; 328 - image->entry++; 329 - *image->entry = 0; 330 - 331 - return 0; 332 - } 333 - 334 - static int kimage_set_destination(struct kimage *image, 335 - unsigned long destination) 336 - { 337 - int result; 338 - 339 - destination &= PAGE_MASK; 340 - result = kimage_add_entry(image, destination | IND_DESTINATION); 341 - 342 - return result; 343 - } 344 - 345 - 346 - static int kimage_add_page(struct kimage *image, unsigned long page) 347 - { 348 - int result; 349 - 350 - page &= PAGE_MASK; 351 - result = kimage_add_entry(image, page | IND_SOURCE); 352 - 353 - return result; 354 - } 355 - 356 - 357 - static void kimage_free_extra_pages(struct kimage *image) 358 - { 359 - /* Walk through and free any extra destination pages I may have */ 360 - kimage_free_page_list(&image->dest_pages); 361 - 362 - /* Walk through and free any unusable pages I have cached */ 363 - kimage_free_page_list(&image->unusable_pages); 364 - 365 - } 366 - void kimage_terminate(struct kimage *image) 367 - { 368 - if (*image->entry != 0) 369 - image->entry++; 370 - 371 - *image->entry = IND_DONE; 372 - } 373 - 374 - #define for_each_kimage_entry(image, ptr, entry) \ 375 - for (ptr = &image->head; (entry = *ptr) && !(entry & IND_DONE); \ 376 - ptr = (entry & IND_INDIRECTION) ? \ 377 - phys_to_virt((entry & PAGE_MASK)) : ptr + 1) 378 - 379 - static void kimage_free_entry(kimage_entry_t entry) 380 - { 381 - struct page *page; 382 - 383 - page = pfn_to_page(entry >> PAGE_SHIFT); 384 - kimage_free_pages(page); 385 - } 386 - 387 - void kimage_free(struct kimage *image) 388 - { 389 - kimage_entry_t *ptr, entry; 390 - kimage_entry_t ind = 0; 391 - 392 - if (!image) 393 - return; 394 - 395 - kimage_free_extra_pages(image); 396 - for_each_kimage_entry(image, ptr, entry) { 397 - if (entry & IND_INDIRECTION) { 398 - /* Free the previous indirection page */ 399 - if (ind & IND_INDIRECTION) 400 - kimage_free_entry(ind); 401 - /* Save this indirection page until we are 402 - * done with it. 403 - */ 404 - ind = entry; 405 - } else if (entry & IND_SOURCE) 406 - kimage_free_entry(entry); 407 - } 408 - /* Free the final indirection page */ 409 - if (ind & IND_INDIRECTION) 410 - kimage_free_entry(ind); 411 - 412 - /* Handle any machine specific cleanup */ 413 - machine_kexec_cleanup(image); 414 - 415 - /* Free the kexec control pages... */ 416 - kimage_free_page_list(&image->control_pages); 417 - 418 - /* 419 - * Free up any temporary buffers allocated. This might hit if 420 - * error occurred much later after buffer allocation. 421 - */ 422 - if (image->file_mode) 423 - kimage_file_post_load_cleanup(image); 424 - 425 - kfree(image); 426 - } 427 - 428 - static kimage_entry_t *kimage_dst_used(struct kimage *image, 429 - unsigned long page) 430 - { 431 - kimage_entry_t *ptr, entry; 432 - unsigned long destination = 0; 433 - 434 - for_each_kimage_entry(image, ptr, entry) { 435 - if (entry & IND_DESTINATION) 436 - destination = entry & PAGE_MASK; 437 - else if (entry & IND_SOURCE) { 438 - if (page == destination) 439 - return ptr; 440 - destination += PAGE_SIZE; 441 - } 442 - } 443 - 444 - return NULL; 445 - } 446 - 447 - static struct page *kimage_alloc_page(struct kimage *image, 448 - gfp_t gfp_mask, 449 - unsigned long destination) 450 - { 451 - /* 452 - * Here we implement safeguards to ensure that a source page 453 - * is not copied to its destination page before the data on 454 - * the destination page is no longer useful. 455 - * 456 - * To do this we maintain the invariant that a source page is 457 - * either its own destination page, or it is not a 458 - * destination page at all. 459 - * 460 - * That is slightly stronger than required, but the proof 461 - * that no problems will not occur is trivial, and the 462 - * implementation is simply to verify. 463 - * 464 - * When allocating all pages normally this algorithm will run 465 - * in O(N) time, but in the worst case it will run in O(N^2) 466 - * time. If the runtime is a problem the data structures can 467 - * be fixed. 468 - */ 469 - struct page *page; 470 - unsigned long addr; 471 - 472 - /* 473 - * Walk through the list of destination pages, and see if I 474 - * have a match. 475 - */ 476 - list_for_each_entry(page, &image->dest_pages, lru) { 477 - addr = page_to_pfn(page) << PAGE_SHIFT; 478 - if (addr == destination) { 479 - list_del(&page->lru); 480 - return page; 481 - } 482 - } 483 - page = NULL; 484 - while (1) { 485 - kimage_entry_t *old; 486 - 487 - /* Allocate a page, if we run out of memory give up */ 488 - page = kimage_alloc_pages(gfp_mask, 0); 489 - if (!page) 490 - return NULL; 491 - /* If the page cannot be used file it away */ 492 - if (page_to_pfn(page) > 493 - (KEXEC_SOURCE_MEMORY_LIMIT >> PAGE_SHIFT)) { 494 - list_add(&page->lru, &image->unusable_pages); 495 - continue; 496 - } 497 - addr = page_to_pfn(page) << PAGE_SHIFT; 498 - 499 - /* If it is the destination page we want use it */ 500 - if (addr == destination) 501 - break; 502 - 503 - /* If the page is not a destination page use it */ 504 - if (!kimage_is_destination_range(image, addr, 505 - addr + PAGE_SIZE)) 506 - break; 507 - 508 - /* 509 - * I know that the page is someones destination page. 510 - * See if there is already a source page for this 511 - * destination page. And if so swap the source pages. 512 - */ 513 - old = kimage_dst_used(image, addr); 514 - if (old) { 515 - /* If so move it */ 516 - unsigned long old_addr; 517 - struct page *old_page; 518 - 519 - old_addr = *old & PAGE_MASK; 520 - old_page = pfn_to_page(old_addr >> PAGE_SHIFT); 521 - copy_highpage(page, old_page); 522 - *old = addr | (*old & ~PAGE_MASK); 523 - 524 - /* The old page I have found cannot be a 525 - * destination page, so return it if it's 526 - * gfp_flags honor the ones passed in. 527 - */ 528 - if (!(gfp_mask & __GFP_HIGHMEM) && 529 - PageHighMem(old_page)) { 530 - kimage_free_pages(old_page); 531 - continue; 532 - } 533 - addr = old_addr; 534 - page = old_page; 535 - break; 536 - } else { 537 - /* Place the page on the destination list I 538 - * will use it later. 539 - */ 540 - list_add(&page->lru, &image->dest_pages); 541 - } 542 - } 543 - 544 - return page; 545 - } 546 - 547 - static int kimage_load_normal_segment(struct kimage *image, 548 - struct kexec_segment *segment) 549 - { 550 - unsigned long maddr; 551 - size_t ubytes, mbytes; 552 - int result; 553 - unsigned char __user *buf = NULL; 554 - unsigned char *kbuf = NULL; 555 - 556 - result = 0; 557 - if (image->file_mode) 558 - kbuf = segment->kbuf; 559 - else 560 - buf = segment->buf; 561 - ubytes = segment->bufsz; 562 - mbytes = segment->memsz; 563 - maddr = segment->mem; 564 - 565 - result = kimage_set_destination(image, maddr); 566 - if (result < 0) 567 - goto out; 568 - 569 - while (mbytes) { 570 - struct page *page; 571 - char *ptr; 572 - size_t uchunk, mchunk; 573 - 574 - page = kimage_alloc_page(image, GFP_HIGHUSER, maddr); 575 - if (!page) { 576 - result = -ENOMEM; 577 - goto out; 578 - } 579 - result = kimage_add_page(image, page_to_pfn(page) 580 - << PAGE_SHIFT); 581 - if (result < 0) 582 - goto out; 583 - 584 - ptr = kmap(page); 585 - /* Start with a clear page */ 586 - clear_page(ptr); 587 - ptr += maddr & ~PAGE_MASK; 588 - mchunk = min_t(size_t, mbytes, 589 - PAGE_SIZE - (maddr & ~PAGE_MASK)); 590 - uchunk = min(ubytes, mchunk); 591 - 592 - /* For file based kexec, source pages are in kernel memory */ 593 - if (image->file_mode) 594 - memcpy(ptr, kbuf, uchunk); 595 - else 596 - result = copy_from_user(ptr, buf, uchunk); 597 - kunmap(page); 598 - if (result) { 599 - result = -EFAULT; 600 - goto out; 601 - } 602 - ubytes -= uchunk; 603 - maddr += mchunk; 604 - if (image->file_mode) 605 - kbuf += mchunk; 606 - else 607 - buf += mchunk; 608 - mbytes -= mchunk; 609 - } 610 - out: 611 - return result; 612 - } 613 - 614 - static int kimage_load_crash_segment(struct kimage *image, 615 - struct kexec_segment *segment) 616 - { 617 - /* For crash dumps kernels we simply copy the data from 618 - * user space to it's destination. 619 - * We do things a page at a time for the sake of kmap. 620 - */ 621 - unsigned long maddr; 622 - size_t ubytes, mbytes; 623 - int result; 624 - unsigned char __user *buf = NULL; 625 - unsigned char *kbuf = NULL; 626 - 627 - result = 0; 628 - if (image->file_mode) 629 - kbuf = segment->kbuf; 630 - else 631 - buf = segment->buf; 632 - ubytes = segment->bufsz; 633 - mbytes = segment->memsz; 634 - maddr = segment->mem; 635 - while (mbytes) { 636 - struct page *page; 637 - char *ptr; 638 - size_t uchunk, mchunk; 639 - 640 - page = pfn_to_page(maddr >> PAGE_SHIFT); 641 - if (!page) { 642 - result = -ENOMEM; 643 - goto out; 644 - } 645 - ptr = kmap(page); 646 - ptr += maddr & ~PAGE_MASK; 647 - mchunk = min_t(size_t, mbytes, 648 - PAGE_SIZE - (maddr & ~PAGE_MASK)); 649 - uchunk = min(ubytes, mchunk); 650 - if (mchunk > uchunk) { 651 - /* Zero the trailing part of the page */ 652 - memset(ptr + uchunk, 0, mchunk - uchunk); 653 - } 654 - 655 - /* For file based kexec, source pages are in kernel memory */ 656 - if (image->file_mode) 657 - memcpy(ptr, kbuf, uchunk); 658 - else 659 - result = copy_from_user(ptr, buf, uchunk); 660 - kexec_flush_icache_page(page); 661 - kunmap(page); 662 - if (result) { 663 - result = -EFAULT; 664 - goto out; 665 - } 666 - ubytes -= uchunk; 667 - maddr += mchunk; 668 - if (image->file_mode) 669 - kbuf += mchunk; 670 - else 671 - buf += mchunk; 672 - mbytes -= mchunk; 673 - } 674 - out: 675 - return result; 676 - } 677 - 678 - int kimage_load_segment(struct kimage *image, 679 - struct kexec_segment *segment) 680 - { 681 - int result = -ENOMEM; 682 - 683 - switch (image->type) { 684 - case KEXEC_TYPE_DEFAULT: 685 - result = kimage_load_normal_segment(image, segment); 686 - break; 687 - case KEXEC_TYPE_CRASH: 688 - result = kimage_load_crash_segment(image, segment); 689 - break; 690 - } 691 - 692 - return result; 693 - } 694 - 695 346 /* 696 347 * Exec Kernel system call: for obvious reasons only root may call it. 697 348 * ··· 121 954 * kexec does not sync, or unmount filesystems so if you need 122 955 * that to happen you need to do that yourself. 123 956 */ 124 - struct kimage *kexec_image; 125 - struct kimage *kexec_crash_image; 126 - int kexec_load_disabled; 127 957 128 958 SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments, 129 959 struct kexec_segment __user *, segments, unsigned long, flags) ··· 215 1051 return result; 216 1052 } 217 1053 218 - /* 219 - * Add and remove page tables for crashkernel memory 220 - * 221 - * Provide an empty default implementation here -- architecture 222 - * code may override this 223 - */ 224 - void __weak crash_map_reserved_pages(void) 225 - {} 226 - 227 - void __weak crash_unmap_reserved_pages(void) 228 - {} 229 - 230 1054 #ifdef CONFIG_COMPAT 231 1055 COMPAT_SYSCALL_DEFINE4(kexec_load, compat_ulong_t, entry, 232 1056 compat_ulong_t, nr_segments, ··· 253 1101 return sys_kexec_load(entry, nr_segments, ksegments, flags); 254 1102 } 255 1103 #endif 256 - 257 - void crash_kexec(struct pt_regs *regs) 258 - { 259 - /* Take the kexec_mutex here to prevent sys_kexec_load 260 - * running on one cpu from replacing the crash kernel 261 - * we are using after a panic on a different cpu. 262 - * 263 - * If the crash kernel was not located in a fixed area 264 - * of memory the xchg(&kexec_crash_image) would be 265 - * sufficient. But since I reuse the memory... 266 - */ 267 - if (mutex_trylock(&kexec_mutex)) { 268 - if (kexec_crash_image) { 269 - struct pt_regs fixed_regs; 270 - 271 - crash_setup_regs(&fixed_regs, regs); 272 - crash_save_vmcoreinfo(); 273 - machine_crash_shutdown(&fixed_regs); 274 - machine_kexec(kexec_crash_image); 275 - } 276 - mutex_unlock(&kexec_mutex); 277 - } 278 - } 279 - 280 - size_t crash_get_memory_size(void) 281 - { 282 - size_t size = 0; 283 - mutex_lock(&kexec_mutex); 284 - if (crashk_res.end != crashk_res.start) 285 - size = resource_size(&crashk_res); 286 - mutex_unlock(&kexec_mutex); 287 - return size; 288 - } 289 - 290 - void __weak crash_free_reserved_phys_range(unsigned long begin, 291 - unsigned long end) 292 - { 293 - unsigned long addr; 294 - 295 - for (addr = begin; addr < end; addr += PAGE_SIZE) 296 - free_reserved_page(pfn_to_page(addr >> PAGE_SHIFT)); 297 - } 298 - 299 - int crash_shrink_memory(unsigned long new_size) 300 - { 301 - int ret = 0; 302 - unsigned long start, end; 303 - unsigned long old_size; 304 - struct resource *ram_res; 305 - 306 - mutex_lock(&kexec_mutex); 307 - 308 - if (kexec_crash_image) { 309 - ret = -ENOENT; 310 - goto unlock; 311 - } 312 - start = crashk_res.start; 313 - end = crashk_res.end; 314 - old_size = (end == 0) ? 0 : end - start + 1; 315 - if (new_size >= old_size) { 316 - ret = (new_size == old_size) ? 0 : -EINVAL; 317 - goto unlock; 318 - } 319 - 320 - ram_res = kzalloc(sizeof(*ram_res), GFP_KERNEL); 321 - if (!ram_res) { 322 - ret = -ENOMEM; 323 - goto unlock; 324 - } 325 - 326 - start = roundup(start, KEXEC_CRASH_MEM_ALIGN); 327 - end = roundup(start + new_size, KEXEC_CRASH_MEM_ALIGN); 328 - 329 - crash_map_reserved_pages(); 330 - crash_free_reserved_phys_range(end, crashk_res.end); 331 - 332 - if ((start == end) && (crashk_res.parent != NULL)) 333 - release_resource(&crashk_res); 334 - 335 - ram_res->start = end; 336 - ram_res->end = crashk_res.end; 337 - ram_res->flags = IORESOURCE_BUSY | IORESOURCE_MEM; 338 - ram_res->name = "System RAM"; 339 - 340 - crashk_res.end = end - 1; 341 - 342 - insert_resource(&iomem_resource, ram_res); 343 - crash_unmap_reserved_pages(); 344 - 345 - unlock: 346 - mutex_unlock(&kexec_mutex); 347 - return ret; 348 - } 349 - 350 - static u32 *append_elf_note(u32 *buf, char *name, unsigned type, void *data, 351 - size_t data_len) 352 - { 353 - struct elf_note note; 354 - 355 - note.n_namesz = strlen(name) + 1; 356 - note.n_descsz = data_len; 357 - note.n_type = type; 358 - memcpy(buf, &note, sizeof(note)); 359 - buf += (sizeof(note) + 3)/4; 360 - memcpy(buf, name, note.n_namesz); 361 - buf += (note.n_namesz + 3)/4; 362 - memcpy(buf, data, note.n_descsz); 363 - buf += (note.n_descsz + 3)/4; 364 - 365 - return buf; 366 - } 367 - 368 - static void final_note(u32 *buf) 369 - { 370 - struct elf_note note; 371 - 372 - note.n_namesz = 0; 373 - note.n_descsz = 0; 374 - note.n_type = 0; 375 - memcpy(buf, &note, sizeof(note)); 376 - } 377 - 378 - void crash_save_cpu(struct pt_regs *regs, int cpu) 379 - { 380 - struct elf_prstatus prstatus; 381 - u32 *buf; 382 - 383 - if ((cpu < 0) || (cpu >= nr_cpu_ids)) 384 - return; 385 - 386 - /* Using ELF notes here is opportunistic. 387 - * I need a well defined structure format 388 - * for the data I pass, and I need tags 389 - * on the data to indicate what information I have 390 - * squirrelled away. ELF notes happen to provide 391 - * all of that, so there is no need to invent something new. 392 - */ 393 - buf = (u32 *)per_cpu_ptr(crash_notes, cpu); 394 - if (!buf) 395 - return; 396 - memset(&prstatus, 0, sizeof(prstatus)); 397 - prstatus.pr_pid = current->pid; 398 - elf_core_copy_kernel_regs(&prstatus.pr_reg, regs); 399 - buf = append_elf_note(buf, KEXEC_CORE_NOTE_NAME, NT_PRSTATUS, 400 - &prstatus, sizeof(prstatus)); 401 - final_note(buf); 402 - } 403 - 404 - static int __init crash_notes_memory_init(void) 405 - { 406 - /* Allocate memory for saving cpu registers. */ 407 - crash_notes = alloc_percpu(note_buf_t); 408 - if (!crash_notes) { 409 - pr_warn("Kexec: Memory allocation for saving cpu register states failed\n"); 410 - return -ENOMEM; 411 - } 412 - return 0; 413 - } 414 - subsys_initcall(crash_notes_memory_init); 415 - 416 - 417 - /* 418 - * parsing the "crashkernel" commandline 419 - * 420 - * this code is intended to be called from architecture specific code 421 - */ 422 - 423 - 424 - /* 425 - * This function parses command lines in the format 426 - * 427 - * crashkernel=ramsize-range:size[,...][@offset] 428 - * 429 - * The function returns 0 on success and -EINVAL on failure. 430 - */ 431 - static int __init parse_crashkernel_mem(char *cmdline, 432 - unsigned long long system_ram, 433 - unsigned long long *crash_size, 434 - unsigned long long *crash_base) 435 - { 436 - char *cur = cmdline, *tmp; 437 - 438 - /* for each entry of the comma-separated list */ 439 - do { 440 - unsigned long long start, end = ULLONG_MAX, size; 441 - 442 - /* get the start of the range */ 443 - start = memparse(cur, &tmp); 444 - if (cur == tmp) { 445 - pr_warn("crashkernel: Memory value expected\n"); 446 - return -EINVAL; 447 - } 448 - cur = tmp; 449 - if (*cur != '-') { 450 - pr_warn("crashkernel: '-' expected\n"); 451 - return -EINVAL; 452 - } 453 - cur++; 454 - 455 - /* if no ':' is here, than we read the end */ 456 - if (*cur != ':') { 457 - end = memparse(cur, &tmp); 458 - if (cur == tmp) { 459 - pr_warn("crashkernel: Memory value expected\n"); 460 - return -EINVAL; 461 - } 462 - cur = tmp; 463 - if (end <= start) { 464 - pr_warn("crashkernel: end <= start\n"); 465 - return -EINVAL; 466 - } 467 - } 468 - 469 - if (*cur != ':') { 470 - pr_warn("crashkernel: ':' expected\n"); 471 - return -EINVAL; 472 - } 473 - cur++; 474 - 475 - size = memparse(cur, &tmp); 476 - if (cur == tmp) { 477 - pr_warn("Memory value expected\n"); 478 - return -EINVAL; 479 - } 480 - cur = tmp; 481 - if (size >= system_ram) { 482 - pr_warn("crashkernel: invalid size\n"); 483 - return -EINVAL; 484 - } 485 - 486 - /* match ? */ 487 - if (system_ram >= start && system_ram < end) { 488 - *crash_size = size; 489 - break; 490 - } 491 - } while (*cur++ == ','); 492 - 493 - if (*crash_size > 0) { 494 - while (*cur && *cur != ' ' && *cur != '@') 495 - cur++; 496 - if (*cur == '@') { 497 - cur++; 498 - *crash_base = memparse(cur, &tmp); 499 - if (cur == tmp) { 500 - pr_warn("Memory value expected after '@'\n"); 501 - return -EINVAL; 502 - } 503 - } 504 - } 505 - 506 - return 0; 507 - } 508 - 509 - /* 510 - * That function parses "simple" (old) crashkernel command lines like 511 - * 512 - * crashkernel=size[@offset] 513 - * 514 - * It returns 0 on success and -EINVAL on failure. 515 - */ 516 - static int __init parse_crashkernel_simple(char *cmdline, 517 - unsigned long long *crash_size, 518 - unsigned long long *crash_base) 519 - { 520 - char *cur = cmdline; 521 - 522 - *crash_size = memparse(cmdline, &cur); 523 - if (cmdline == cur) { 524 - pr_warn("crashkernel: memory value expected\n"); 525 - return -EINVAL; 526 - } 527 - 528 - if (*cur == '@') 529 - *crash_base = memparse(cur+1, &cur); 530 - else if (*cur != ' ' && *cur != '\0') { 531 - pr_warn("crashkernel: unrecognized char\n"); 532 - return -EINVAL; 533 - } 534 - 535 - return 0; 536 - } 537 - 538 - #define SUFFIX_HIGH 0 539 - #define SUFFIX_LOW 1 540 - #define SUFFIX_NULL 2 541 - static __initdata char *suffix_tbl[] = { 542 - [SUFFIX_HIGH] = ",high", 543 - [SUFFIX_LOW] = ",low", 544 - [SUFFIX_NULL] = NULL, 545 - }; 546 - 547 - /* 548 - * That function parses "suffix" crashkernel command lines like 549 - * 550 - * crashkernel=size,[high|low] 551 - * 552 - * It returns 0 on success and -EINVAL on failure. 553 - */ 554 - static int __init parse_crashkernel_suffix(char *cmdline, 555 - unsigned long long *crash_size, 556 - const char *suffix) 557 - { 558 - char *cur = cmdline; 559 - 560 - *crash_size = memparse(cmdline, &cur); 561 - if (cmdline == cur) { 562 - pr_warn("crashkernel: memory value expected\n"); 563 - return -EINVAL; 564 - } 565 - 566 - /* check with suffix */ 567 - if (strncmp(cur, suffix, strlen(suffix))) { 568 - pr_warn("crashkernel: unrecognized char\n"); 569 - return -EINVAL; 570 - } 571 - cur += strlen(suffix); 572 - if (*cur != ' ' && *cur != '\0') { 573 - pr_warn("crashkernel: unrecognized char\n"); 574 - return -EINVAL; 575 - } 576 - 577 - return 0; 578 - } 579 - 580 - static __init char *get_last_crashkernel(char *cmdline, 581 - const char *name, 582 - const char *suffix) 583 - { 584 - char *p = cmdline, *ck_cmdline = NULL; 585 - 586 - /* find crashkernel and use the last one if there are more */ 587 - p = strstr(p, name); 588 - while (p) { 589 - char *end_p = strchr(p, ' '); 590 - char *q; 591 - 592 - if (!end_p) 593 - end_p = p + strlen(p); 594 - 595 - if (!suffix) { 596 - int i; 597 - 598 - /* skip the one with any known suffix */ 599 - for (i = 0; suffix_tbl[i]; i++) { 600 - q = end_p - strlen(suffix_tbl[i]); 601 - if (!strncmp(q, suffix_tbl[i], 602 - strlen(suffix_tbl[i]))) 603 - goto next; 604 - } 605 - ck_cmdline = p; 606 - } else { 607 - q = end_p - strlen(suffix); 608 - if (!strncmp(q, suffix, strlen(suffix))) 609 - ck_cmdline = p; 610 - } 611 - next: 612 - p = strstr(p+1, name); 613 - } 614 - 615 - if (!ck_cmdline) 616 - return NULL; 617 - 618 - return ck_cmdline; 619 - } 620 - 621 - static int __init __parse_crashkernel(char *cmdline, 622 - unsigned long long system_ram, 623 - unsigned long long *crash_size, 624 - unsigned long long *crash_base, 625 - const char *name, 626 - const char *suffix) 627 - { 628 - char *first_colon, *first_space; 629 - char *ck_cmdline; 630 - 631 - BUG_ON(!crash_size || !crash_base); 632 - *crash_size = 0; 633 - *crash_base = 0; 634 - 635 - ck_cmdline = get_last_crashkernel(cmdline, name, suffix); 636 - 637 - if (!ck_cmdline) 638 - return -EINVAL; 639 - 640 - ck_cmdline += strlen(name); 641 - 642 - if (suffix) 643 - return parse_crashkernel_suffix(ck_cmdline, crash_size, 644 - suffix); 645 - /* 646 - * if the commandline contains a ':', then that's the extended 647 - * syntax -- if not, it must be the classic syntax 648 - */ 649 - first_colon = strchr(ck_cmdline, ':'); 650 - first_space = strchr(ck_cmdline, ' '); 651 - if (first_colon && (!first_space || first_colon < first_space)) 652 - return parse_crashkernel_mem(ck_cmdline, system_ram, 653 - crash_size, crash_base); 654 - 655 - return parse_crashkernel_simple(ck_cmdline, crash_size, crash_base); 656 - } 657 - 658 - /* 659 - * That function is the entry point for command line parsing and should be 660 - * called from the arch-specific code. 661 - */ 662 - int __init parse_crashkernel(char *cmdline, 663 - unsigned long long system_ram, 664 - unsigned long long *crash_size, 665 - unsigned long long *crash_base) 666 - { 667 - return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, 668 - "crashkernel=", NULL); 669 - } 670 - 671 - int __init parse_crashkernel_high(char *cmdline, 672 - unsigned long long system_ram, 673 - unsigned long long *crash_size, 674 - unsigned long long *crash_base) 675 - { 676 - return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, 677 - "crashkernel=", suffix_tbl[SUFFIX_HIGH]); 678 - } 679 - 680 - int __init parse_crashkernel_low(char *cmdline, 681 - unsigned long long system_ram, 682 - unsigned long long *crash_size, 683 - unsigned long long *crash_base) 684 - { 685 - return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, 686 - "crashkernel=", suffix_tbl[SUFFIX_LOW]); 687 - } 688 - 689 - static void update_vmcoreinfo_note(void) 690 - { 691 - u32 *buf = vmcoreinfo_note; 692 - 693 - if (!vmcoreinfo_size) 694 - return; 695 - buf = append_elf_note(buf, VMCOREINFO_NOTE_NAME, 0, vmcoreinfo_data, 696 - vmcoreinfo_size); 697 - final_note(buf); 698 - } 699 - 700 - void crash_save_vmcoreinfo(void) 701 - { 702 - vmcoreinfo_append_str("CRASHTIME=%ld\n", get_seconds()); 703 - update_vmcoreinfo_note(); 704 - } 705 - 706 - void vmcoreinfo_append_str(const char *fmt, ...) 707 - { 708 - va_list args; 709 - char buf[0x50]; 710 - size_t r; 711 - 712 - va_start(args, fmt); 713 - r = vscnprintf(buf, sizeof(buf), fmt, args); 714 - va_end(args); 715 - 716 - r = min(r, vmcoreinfo_max_size - vmcoreinfo_size); 717 - 718 - memcpy(&vmcoreinfo_data[vmcoreinfo_size], buf, r); 719 - 720 - vmcoreinfo_size += r; 721 - } 722 - 723 - /* 724 - * provide an empty default implementation here -- architecture 725 - * code may override this 726 - */ 727 - void __weak arch_crash_save_vmcoreinfo(void) 728 - {} 729 - 730 - unsigned long __weak paddr_vmcoreinfo_note(void) 731 - { 732 - return __pa((unsigned long)(char *)&vmcoreinfo_note); 733 - } 734 - 735 - static int __init crash_save_vmcoreinfo_init(void) 736 - { 737 - VMCOREINFO_OSRELEASE(init_uts_ns.name.release); 738 - VMCOREINFO_PAGESIZE(PAGE_SIZE); 739 - 740 - VMCOREINFO_SYMBOL(init_uts_ns); 741 - VMCOREINFO_SYMBOL(node_online_map); 742 - #ifdef CONFIG_MMU 743 - VMCOREINFO_SYMBOL(swapper_pg_dir); 744 - #endif 745 - VMCOREINFO_SYMBOL(_stext); 746 - VMCOREINFO_SYMBOL(vmap_area_list); 747 - 748 - #ifndef CONFIG_NEED_MULTIPLE_NODES 749 - VMCOREINFO_SYMBOL(mem_map); 750 - VMCOREINFO_SYMBOL(contig_page_data); 751 - #endif 752 - #ifdef CONFIG_SPARSEMEM 753 - VMCOREINFO_SYMBOL(mem_section); 754 - VMCOREINFO_LENGTH(mem_section, NR_SECTION_ROOTS); 755 - VMCOREINFO_STRUCT_SIZE(mem_section); 756 - VMCOREINFO_OFFSET(mem_section, section_mem_map); 757 - #endif 758 - VMCOREINFO_STRUCT_SIZE(page); 759 - VMCOREINFO_STRUCT_SIZE(pglist_data); 760 - VMCOREINFO_STRUCT_SIZE(zone); 761 - VMCOREINFO_STRUCT_SIZE(free_area); 762 - VMCOREINFO_STRUCT_SIZE(list_head); 763 - VMCOREINFO_SIZE(nodemask_t); 764 - VMCOREINFO_OFFSET(page, flags); 765 - VMCOREINFO_OFFSET(page, _count); 766 - VMCOREINFO_OFFSET(page, mapping); 767 - VMCOREINFO_OFFSET(page, lru); 768 - VMCOREINFO_OFFSET(page, _mapcount); 769 - VMCOREINFO_OFFSET(page, private); 770 - VMCOREINFO_OFFSET(pglist_data, node_zones); 771 - VMCOREINFO_OFFSET(pglist_data, nr_zones); 772 - #ifdef CONFIG_FLAT_NODE_MEM_MAP 773 - VMCOREINFO_OFFSET(pglist_data, node_mem_map); 774 - #endif 775 - VMCOREINFO_OFFSET(pglist_data, node_start_pfn); 776 - VMCOREINFO_OFFSET(pglist_data, node_spanned_pages); 777 - VMCOREINFO_OFFSET(pglist_data, node_id); 778 - VMCOREINFO_OFFSET(zone, free_area); 779 - VMCOREINFO_OFFSET(zone, vm_stat); 780 - VMCOREINFO_OFFSET(zone, spanned_pages); 781 - VMCOREINFO_OFFSET(free_area, free_list); 782 - VMCOREINFO_OFFSET(list_head, next); 783 - VMCOREINFO_OFFSET(list_head, prev); 784 - VMCOREINFO_OFFSET(vmap_area, va_start); 785 - VMCOREINFO_OFFSET(vmap_area, list); 786 - VMCOREINFO_LENGTH(zone.free_area, MAX_ORDER); 787 - log_buf_kexec_setup(); 788 - VMCOREINFO_LENGTH(free_area.free_list, MIGRATE_TYPES); 789 - VMCOREINFO_NUMBER(NR_FREE_PAGES); 790 - VMCOREINFO_NUMBER(PG_lru); 791 - VMCOREINFO_NUMBER(PG_private); 792 - VMCOREINFO_NUMBER(PG_swapcache); 793 - VMCOREINFO_NUMBER(PG_slab); 794 - #ifdef CONFIG_MEMORY_FAILURE 795 - VMCOREINFO_NUMBER(PG_hwpoison); 796 - #endif 797 - VMCOREINFO_NUMBER(PG_head_mask); 798 - VMCOREINFO_NUMBER(PAGE_BUDDY_MAPCOUNT_VALUE); 799 - #ifdef CONFIG_HUGETLBFS 800 - VMCOREINFO_SYMBOL(free_huge_page); 801 - #endif 802 - 803 - arch_crash_save_vmcoreinfo(); 804 - update_vmcoreinfo_note(); 805 - 806 - return 0; 807 - } 808 - 809 - subsys_initcall(crash_save_vmcoreinfo_init); 810 - 811 - /* 812 - * Move into place and start executing a preloaded standalone 813 - * executable. If nothing was preloaded return an error. 814 - */ 815 - int kernel_kexec(void) 816 - { 817 - int error = 0; 818 - 819 - if (!mutex_trylock(&kexec_mutex)) 820 - return -EBUSY; 821 - if (!kexec_image) { 822 - error = -EINVAL; 823 - goto Unlock; 824 - } 825 - 826 - #ifdef CONFIG_KEXEC_JUMP 827 - if (kexec_image->preserve_context) { 828 - lock_system_sleep(); 829 - pm_prepare_console(); 830 - error = freeze_processes(); 831 - if (error) { 832 - error = -EBUSY; 833 - goto Restore_console; 834 - } 835 - suspend_console(); 836 - error = dpm_suspend_start(PMSG_FREEZE); 837 - if (error) 838 - goto Resume_console; 839 - /* At this point, dpm_suspend_start() has been called, 840 - * but *not* dpm_suspend_end(). We *must* call 841 - * dpm_suspend_end() now. Otherwise, drivers for 842 - * some devices (e.g. interrupt controllers) become 843 - * desynchronized with the actual state of the 844 - * hardware at resume time, and evil weirdness ensues. 845 - */ 846 - error = dpm_suspend_end(PMSG_FREEZE); 847 - if (error) 848 - goto Resume_devices; 849 - error = disable_nonboot_cpus(); 850 - if (error) 851 - goto Enable_cpus; 852 - local_irq_disable(); 853 - error = syscore_suspend(); 854 - if (error) 855 - goto Enable_irqs; 856 - } else 857 - #endif 858 - { 859 - kexec_in_progress = true; 860 - kernel_restart_prepare(NULL); 861 - migrate_to_reboot_cpu(); 862 - 863 - /* 864 - * migrate_to_reboot_cpu() disables CPU hotplug assuming that 865 - * no further code needs to use CPU hotplug (which is true in 866 - * the reboot case). However, the kexec path depends on using 867 - * CPU hotplug again; so re-enable it here. 868 - */ 869 - cpu_hotplug_enable(); 870 - pr_emerg("Starting new kernel\n"); 871 - machine_shutdown(); 872 - } 873 - 874 - machine_kexec(kexec_image); 875 - 876 - #ifdef CONFIG_KEXEC_JUMP 877 - if (kexec_image->preserve_context) { 878 - syscore_resume(); 879 - Enable_irqs: 880 - local_irq_enable(); 881 - Enable_cpus: 882 - enable_nonboot_cpus(); 883 - dpm_resume_start(PMSG_RESTORE); 884 - Resume_devices: 885 - dpm_resume_end(PMSG_RESTORE); 886 - Resume_console: 887 - resume_console(); 888 - thaw_processes(); 889 - Restore_console: 890 - pm_restore_console(); 891 - unlock_system_sleep(); 892 - } 893 - #endif 894 - 895 - Unlock: 896 - mutex_unlock(&kexec_mutex); 897 - return error; 898 - }
+1511
kernel/kexec_core.c
··· 1 + /* 2 + * kexec.c - kexec system call core code. 3 + * Copyright (C) 2002-2004 Eric Biederman <ebiederm@xmission.com> 4 + * 5 + * This source code is licensed under the GNU General Public License, 6 + * Version 2. See the file COPYING for more details. 7 + */ 8 + 9 + #define pr_fmt(fmt) "kexec: " fmt 10 + 11 + #include <linux/capability.h> 12 + #include <linux/mm.h> 13 + #include <linux/file.h> 14 + #include <linux/slab.h> 15 + #include <linux/fs.h> 16 + #include <linux/kexec.h> 17 + #include <linux/mutex.h> 18 + #include <linux/list.h> 19 + #include <linux/highmem.h> 20 + #include <linux/syscalls.h> 21 + #include <linux/reboot.h> 22 + #include <linux/ioport.h> 23 + #include <linux/hardirq.h> 24 + #include <linux/elf.h> 25 + #include <linux/elfcore.h> 26 + #include <linux/utsname.h> 27 + #include <linux/numa.h> 28 + #include <linux/suspend.h> 29 + #include <linux/device.h> 30 + #include <linux/freezer.h> 31 + #include <linux/pm.h> 32 + #include <linux/cpu.h> 33 + #include <linux/uaccess.h> 34 + #include <linux/io.h> 35 + #include <linux/console.h> 36 + #include <linux/vmalloc.h> 37 + #include <linux/swap.h> 38 + #include <linux/syscore_ops.h> 39 + #include <linux/compiler.h> 40 + #include <linux/hugetlb.h> 41 + 42 + #include <asm/page.h> 43 + #include <asm/sections.h> 44 + 45 + #include <crypto/hash.h> 46 + #include <crypto/sha.h> 47 + #include "kexec_internal.h" 48 + 49 + DEFINE_MUTEX(kexec_mutex); 50 + 51 + /* Per cpu memory for storing cpu states in case of system crash. */ 52 + note_buf_t __percpu *crash_notes; 53 + 54 + /* vmcoreinfo stuff */ 55 + static unsigned char vmcoreinfo_data[VMCOREINFO_BYTES]; 56 + u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4]; 57 + size_t vmcoreinfo_size; 58 + size_t vmcoreinfo_max_size = sizeof(vmcoreinfo_data); 59 + 60 + /* Flag to indicate we are going to kexec a new kernel */ 61 + bool kexec_in_progress = false; 62 + 63 + 64 + /* Location of the reserved area for the crash kernel */ 65 + struct resource crashk_res = { 66 + .name = "Crash kernel", 67 + .start = 0, 68 + .end = 0, 69 + .flags = IORESOURCE_BUSY | IORESOURCE_MEM 70 + }; 71 + struct resource crashk_low_res = { 72 + .name = "Crash kernel", 73 + .start = 0, 74 + .end = 0, 75 + .flags = IORESOURCE_BUSY | IORESOURCE_MEM 76 + }; 77 + 78 + int kexec_should_crash(struct task_struct *p) 79 + { 80 + /* 81 + * If crash_kexec_post_notifiers is enabled, don't run 82 + * crash_kexec() here yet, which must be run after panic 83 + * notifiers in panic(). 84 + */ 85 + if (crash_kexec_post_notifiers) 86 + return 0; 87 + /* 88 + * There are 4 panic() calls in do_exit() path, each of which 89 + * corresponds to each of these 4 conditions. 90 + */ 91 + if (in_interrupt() || !p->pid || is_global_init(p) || panic_on_oops) 92 + return 1; 93 + return 0; 94 + } 95 + 96 + /* 97 + * When kexec transitions to the new kernel there is a one-to-one 98 + * mapping between physical and virtual addresses. On processors 99 + * where you can disable the MMU this is trivial, and easy. For 100 + * others it is still a simple predictable page table to setup. 101 + * 102 + * In that environment kexec copies the new kernel to its final 103 + * resting place. This means I can only support memory whose 104 + * physical address can fit in an unsigned long. In particular 105 + * addresses where (pfn << PAGE_SHIFT) > ULONG_MAX cannot be handled. 106 + * If the assembly stub has more restrictive requirements 107 + * KEXEC_SOURCE_MEMORY_LIMIT and KEXEC_DEST_MEMORY_LIMIT can be 108 + * defined more restrictively in <asm/kexec.h>. 109 + * 110 + * The code for the transition from the current kernel to the 111 + * the new kernel is placed in the control_code_buffer, whose size 112 + * is given by KEXEC_CONTROL_PAGE_SIZE. In the best case only a single 113 + * page of memory is necessary, but some architectures require more. 114 + * Because this memory must be identity mapped in the transition from 115 + * virtual to physical addresses it must live in the range 116 + * 0 - TASK_SIZE, as only the user space mappings are arbitrarily 117 + * modifiable. 118 + * 119 + * The assembly stub in the control code buffer is passed a linked list 120 + * of descriptor pages detailing the source pages of the new kernel, 121 + * and the destination addresses of those source pages. As this data 122 + * structure is not used in the context of the current OS, it must 123 + * be self-contained. 124 + * 125 + * The code has been made to work with highmem pages and will use a 126 + * destination page in its final resting place (if it happens 127 + * to allocate it). The end product of this is that most of the 128 + * physical address space, and most of RAM can be used. 129 + * 130 + * Future directions include: 131 + * - allocating a page table with the control code buffer identity 132 + * mapped, to simplify machine_kexec and make kexec_on_panic more 133 + * reliable. 134 + */ 135 + 136 + /* 137 + * KIMAGE_NO_DEST is an impossible destination address..., for 138 + * allocating pages whose destination address we do not care about. 139 + */ 140 + #define KIMAGE_NO_DEST (-1UL) 141 + 142 + static struct page *kimage_alloc_page(struct kimage *image, 143 + gfp_t gfp_mask, 144 + unsigned long dest); 145 + 146 + int sanity_check_segment_list(struct kimage *image) 147 + { 148 + int result, i; 149 + unsigned long nr_segments = image->nr_segments; 150 + 151 + /* 152 + * Verify we have good destination addresses. The caller is 153 + * responsible for making certain we don't attempt to load 154 + * the new image into invalid or reserved areas of RAM. This 155 + * just verifies it is an address we can use. 156 + * 157 + * Since the kernel does everything in page size chunks ensure 158 + * the destination addresses are page aligned. Too many 159 + * special cases crop of when we don't do this. The most 160 + * insidious is getting overlapping destination addresses 161 + * simply because addresses are changed to page size 162 + * granularity. 163 + */ 164 + result = -EADDRNOTAVAIL; 165 + for (i = 0; i < nr_segments; i++) { 166 + unsigned long mstart, mend; 167 + 168 + mstart = image->segment[i].mem; 169 + mend = mstart + image->segment[i].memsz; 170 + if ((mstart & ~PAGE_MASK) || (mend & ~PAGE_MASK)) 171 + return result; 172 + if (mend >= KEXEC_DESTINATION_MEMORY_LIMIT) 173 + return result; 174 + } 175 + 176 + /* Verify our destination addresses do not overlap. 177 + * If we alloed overlapping destination addresses 178 + * through very weird things can happen with no 179 + * easy explanation as one segment stops on another. 180 + */ 181 + result = -EINVAL; 182 + for (i = 0; i < nr_segments; i++) { 183 + unsigned long mstart, mend; 184 + unsigned long j; 185 + 186 + mstart = image->segment[i].mem; 187 + mend = mstart + image->segment[i].memsz; 188 + for (j = 0; j < i; j++) { 189 + unsigned long pstart, pend; 190 + 191 + pstart = image->segment[j].mem; 192 + pend = pstart + image->segment[j].memsz; 193 + /* Do the segments overlap ? */ 194 + if ((mend > pstart) && (mstart < pend)) 195 + return result; 196 + } 197 + } 198 + 199 + /* Ensure our buffer sizes are strictly less than 200 + * our memory sizes. This should always be the case, 201 + * and it is easier to check up front than to be surprised 202 + * later on. 203 + */ 204 + result = -EINVAL; 205 + for (i = 0; i < nr_segments; i++) { 206 + if (image->segment[i].bufsz > image->segment[i].memsz) 207 + return result; 208 + } 209 + 210 + /* 211 + * Verify we have good destination addresses. Normally 212 + * the caller is responsible for making certain we don't 213 + * attempt to load the new image into invalid or reserved 214 + * areas of RAM. But crash kernels are preloaded into a 215 + * reserved area of ram. We must ensure the addresses 216 + * are in the reserved area otherwise preloading the 217 + * kernel could corrupt things. 218 + */ 219 + 220 + if (image->type == KEXEC_TYPE_CRASH) { 221 + result = -EADDRNOTAVAIL; 222 + for (i = 0; i < nr_segments; i++) { 223 + unsigned long mstart, mend; 224 + 225 + mstart = image->segment[i].mem; 226 + mend = mstart + image->segment[i].memsz - 1; 227 + /* Ensure we are within the crash kernel limits */ 228 + if ((mstart < crashk_res.start) || 229 + (mend > crashk_res.end)) 230 + return result; 231 + } 232 + } 233 + 234 + return 0; 235 + } 236 + 237 + struct kimage *do_kimage_alloc_init(void) 238 + { 239 + struct kimage *image; 240 + 241 + /* Allocate a controlling structure */ 242 + image = kzalloc(sizeof(*image), GFP_KERNEL); 243 + if (!image) 244 + return NULL; 245 + 246 + image->head = 0; 247 + image->entry = &image->head; 248 + image->last_entry = &image->head; 249 + image->control_page = ~0; /* By default this does not apply */ 250 + image->type = KEXEC_TYPE_DEFAULT; 251 + 252 + /* Initialize the list of control pages */ 253 + INIT_LIST_HEAD(&image->control_pages); 254 + 255 + /* Initialize the list of destination pages */ 256 + INIT_LIST_HEAD(&image->dest_pages); 257 + 258 + /* Initialize the list of unusable pages */ 259 + INIT_LIST_HEAD(&image->unusable_pages); 260 + 261 + return image; 262 + } 263 + 264 + int kimage_is_destination_range(struct kimage *image, 265 + unsigned long start, 266 + unsigned long end) 267 + { 268 + unsigned long i; 269 + 270 + for (i = 0; i < image->nr_segments; i++) { 271 + unsigned long mstart, mend; 272 + 273 + mstart = image->segment[i].mem; 274 + mend = mstart + image->segment[i].memsz; 275 + if ((end > mstart) && (start < mend)) 276 + return 1; 277 + } 278 + 279 + return 0; 280 + } 281 + 282 + static struct page *kimage_alloc_pages(gfp_t gfp_mask, unsigned int order) 283 + { 284 + struct page *pages; 285 + 286 + pages = alloc_pages(gfp_mask, order); 287 + if (pages) { 288 + unsigned int count, i; 289 + 290 + pages->mapping = NULL; 291 + set_page_private(pages, order); 292 + count = 1 << order; 293 + for (i = 0; i < count; i++) 294 + SetPageReserved(pages + i); 295 + } 296 + 297 + return pages; 298 + } 299 + 300 + static void kimage_free_pages(struct page *page) 301 + { 302 + unsigned int order, count, i; 303 + 304 + order = page_private(page); 305 + count = 1 << order; 306 + for (i = 0; i < count; i++) 307 + ClearPageReserved(page + i); 308 + __free_pages(page, order); 309 + } 310 + 311 + void kimage_free_page_list(struct list_head *list) 312 + { 313 + struct list_head *pos, *next; 314 + 315 + list_for_each_safe(pos, next, list) { 316 + struct page *page; 317 + 318 + page = list_entry(pos, struct page, lru); 319 + list_del(&page->lru); 320 + kimage_free_pages(page); 321 + } 322 + } 323 + 324 + static struct page *kimage_alloc_normal_control_pages(struct kimage *image, 325 + unsigned int order) 326 + { 327 + /* Control pages are special, they are the intermediaries 328 + * that are needed while we copy the rest of the pages 329 + * to their final resting place. As such they must 330 + * not conflict with either the destination addresses 331 + * or memory the kernel is already using. 332 + * 333 + * The only case where we really need more than one of 334 + * these are for architectures where we cannot disable 335 + * the MMU and must instead generate an identity mapped 336 + * page table for all of the memory. 337 + * 338 + * At worst this runs in O(N) of the image size. 339 + */ 340 + struct list_head extra_pages; 341 + struct page *pages; 342 + unsigned int count; 343 + 344 + count = 1 << order; 345 + INIT_LIST_HEAD(&extra_pages); 346 + 347 + /* Loop while I can allocate a page and the page allocated 348 + * is a destination page. 349 + */ 350 + do { 351 + unsigned long pfn, epfn, addr, eaddr; 352 + 353 + pages = kimage_alloc_pages(KEXEC_CONTROL_MEMORY_GFP, order); 354 + if (!pages) 355 + break; 356 + pfn = page_to_pfn(pages); 357 + epfn = pfn + count; 358 + addr = pfn << PAGE_SHIFT; 359 + eaddr = epfn << PAGE_SHIFT; 360 + if ((epfn >= (KEXEC_CONTROL_MEMORY_LIMIT >> PAGE_SHIFT)) || 361 + kimage_is_destination_range(image, addr, eaddr)) { 362 + list_add(&pages->lru, &extra_pages); 363 + pages = NULL; 364 + } 365 + } while (!pages); 366 + 367 + if (pages) { 368 + /* Remember the allocated page... */ 369 + list_add(&pages->lru, &image->control_pages); 370 + 371 + /* Because the page is already in it's destination 372 + * location we will never allocate another page at 373 + * that address. Therefore kimage_alloc_pages 374 + * will not return it (again) and we don't need 375 + * to give it an entry in image->segment[]. 376 + */ 377 + } 378 + /* Deal with the destination pages I have inadvertently allocated. 379 + * 380 + * Ideally I would convert multi-page allocations into single 381 + * page allocations, and add everything to image->dest_pages. 382 + * 383 + * For now it is simpler to just free the pages. 384 + */ 385 + kimage_free_page_list(&extra_pages); 386 + 387 + return pages; 388 + } 389 + 390 + static struct page *kimage_alloc_crash_control_pages(struct kimage *image, 391 + unsigned int order) 392 + { 393 + /* Control pages are special, they are the intermediaries 394 + * that are needed while we copy the rest of the pages 395 + * to their final resting place. As such they must 396 + * not conflict with either the destination addresses 397 + * or memory the kernel is already using. 398 + * 399 + * Control pages are also the only pags we must allocate 400 + * when loading a crash kernel. All of the other pages 401 + * are specified by the segments and we just memcpy 402 + * into them directly. 403 + * 404 + * The only case where we really need more than one of 405 + * these are for architectures where we cannot disable 406 + * the MMU and must instead generate an identity mapped 407 + * page table for all of the memory. 408 + * 409 + * Given the low demand this implements a very simple 410 + * allocator that finds the first hole of the appropriate 411 + * size in the reserved memory region, and allocates all 412 + * of the memory up to and including the hole. 413 + */ 414 + unsigned long hole_start, hole_end, size; 415 + struct page *pages; 416 + 417 + pages = NULL; 418 + size = (1 << order) << PAGE_SHIFT; 419 + hole_start = (image->control_page + (size - 1)) & ~(size - 1); 420 + hole_end = hole_start + size - 1; 421 + while (hole_end <= crashk_res.end) { 422 + unsigned long i; 423 + 424 + if (hole_end > KEXEC_CRASH_CONTROL_MEMORY_LIMIT) 425 + break; 426 + /* See if I overlap any of the segments */ 427 + for (i = 0; i < image->nr_segments; i++) { 428 + unsigned long mstart, mend; 429 + 430 + mstart = image->segment[i].mem; 431 + mend = mstart + image->segment[i].memsz - 1; 432 + if ((hole_end >= mstart) && (hole_start <= mend)) { 433 + /* Advance the hole to the end of the segment */ 434 + hole_start = (mend + (size - 1)) & ~(size - 1); 435 + hole_end = hole_start + size - 1; 436 + break; 437 + } 438 + } 439 + /* If I don't overlap any segments I have found my hole! */ 440 + if (i == image->nr_segments) { 441 + pages = pfn_to_page(hole_start >> PAGE_SHIFT); 442 + break; 443 + } 444 + } 445 + if (pages) 446 + image->control_page = hole_end; 447 + 448 + return pages; 449 + } 450 + 451 + 452 + struct page *kimage_alloc_control_pages(struct kimage *image, 453 + unsigned int order) 454 + { 455 + struct page *pages = NULL; 456 + 457 + switch (image->type) { 458 + case KEXEC_TYPE_DEFAULT: 459 + pages = kimage_alloc_normal_control_pages(image, order); 460 + break; 461 + case KEXEC_TYPE_CRASH: 462 + pages = kimage_alloc_crash_control_pages(image, order); 463 + break; 464 + } 465 + 466 + return pages; 467 + } 468 + 469 + static int kimage_add_entry(struct kimage *image, kimage_entry_t entry) 470 + { 471 + if (*image->entry != 0) 472 + image->entry++; 473 + 474 + if (image->entry == image->last_entry) { 475 + kimage_entry_t *ind_page; 476 + struct page *page; 477 + 478 + page = kimage_alloc_page(image, GFP_KERNEL, KIMAGE_NO_DEST); 479 + if (!page) 480 + return -ENOMEM; 481 + 482 + ind_page = page_address(page); 483 + *image->entry = virt_to_phys(ind_page) | IND_INDIRECTION; 484 + image->entry = ind_page; 485 + image->last_entry = ind_page + 486 + ((PAGE_SIZE/sizeof(kimage_entry_t)) - 1); 487 + } 488 + *image->entry = entry; 489 + image->entry++; 490 + *image->entry = 0; 491 + 492 + return 0; 493 + } 494 + 495 + static int kimage_set_destination(struct kimage *image, 496 + unsigned long destination) 497 + { 498 + int result; 499 + 500 + destination &= PAGE_MASK; 501 + result = kimage_add_entry(image, destination | IND_DESTINATION); 502 + 503 + return result; 504 + } 505 + 506 + 507 + static int kimage_add_page(struct kimage *image, unsigned long page) 508 + { 509 + int result; 510 + 511 + page &= PAGE_MASK; 512 + result = kimage_add_entry(image, page | IND_SOURCE); 513 + 514 + return result; 515 + } 516 + 517 + 518 + static void kimage_free_extra_pages(struct kimage *image) 519 + { 520 + /* Walk through and free any extra destination pages I may have */ 521 + kimage_free_page_list(&image->dest_pages); 522 + 523 + /* Walk through and free any unusable pages I have cached */ 524 + kimage_free_page_list(&image->unusable_pages); 525 + 526 + } 527 + void kimage_terminate(struct kimage *image) 528 + { 529 + if (*image->entry != 0) 530 + image->entry++; 531 + 532 + *image->entry = IND_DONE; 533 + } 534 + 535 + #define for_each_kimage_entry(image, ptr, entry) \ 536 + for (ptr = &image->head; (entry = *ptr) && !(entry & IND_DONE); \ 537 + ptr = (entry & IND_INDIRECTION) ? \ 538 + phys_to_virt((entry & PAGE_MASK)) : ptr + 1) 539 + 540 + static void kimage_free_entry(kimage_entry_t entry) 541 + { 542 + struct page *page; 543 + 544 + page = pfn_to_page(entry >> PAGE_SHIFT); 545 + kimage_free_pages(page); 546 + } 547 + 548 + void kimage_free(struct kimage *image) 549 + { 550 + kimage_entry_t *ptr, entry; 551 + kimage_entry_t ind = 0; 552 + 553 + if (!image) 554 + return; 555 + 556 + kimage_free_extra_pages(image); 557 + for_each_kimage_entry(image, ptr, entry) { 558 + if (entry & IND_INDIRECTION) { 559 + /* Free the previous indirection page */ 560 + if (ind & IND_INDIRECTION) 561 + kimage_free_entry(ind); 562 + /* Save this indirection page until we are 563 + * done with it. 564 + */ 565 + ind = entry; 566 + } else if (entry & IND_SOURCE) 567 + kimage_free_entry(entry); 568 + } 569 + /* Free the final indirection page */ 570 + if (ind & IND_INDIRECTION) 571 + kimage_free_entry(ind); 572 + 573 + /* Handle any machine specific cleanup */ 574 + machine_kexec_cleanup(image); 575 + 576 + /* Free the kexec control pages... */ 577 + kimage_free_page_list(&image->control_pages); 578 + 579 + /* 580 + * Free up any temporary buffers allocated. This might hit if 581 + * error occurred much later after buffer allocation. 582 + */ 583 + if (image->file_mode) 584 + kimage_file_post_load_cleanup(image); 585 + 586 + kfree(image); 587 + } 588 + 589 + static kimage_entry_t *kimage_dst_used(struct kimage *image, 590 + unsigned long page) 591 + { 592 + kimage_entry_t *ptr, entry; 593 + unsigned long destination = 0; 594 + 595 + for_each_kimage_entry(image, ptr, entry) { 596 + if (entry & IND_DESTINATION) 597 + destination = entry & PAGE_MASK; 598 + else if (entry & IND_SOURCE) { 599 + if (page == destination) 600 + return ptr; 601 + destination += PAGE_SIZE; 602 + } 603 + } 604 + 605 + return NULL; 606 + } 607 + 608 + static struct page *kimage_alloc_page(struct kimage *image, 609 + gfp_t gfp_mask, 610 + unsigned long destination) 611 + { 612 + /* 613 + * Here we implement safeguards to ensure that a source page 614 + * is not copied to its destination page before the data on 615 + * the destination page is no longer useful. 616 + * 617 + * To do this we maintain the invariant that a source page is 618 + * either its own destination page, or it is not a 619 + * destination page at all. 620 + * 621 + * That is slightly stronger than required, but the proof 622 + * that no problems will not occur is trivial, and the 623 + * implementation is simply to verify. 624 + * 625 + * When allocating all pages normally this algorithm will run 626 + * in O(N) time, but in the worst case it will run in O(N^2) 627 + * time. If the runtime is a problem the data structures can 628 + * be fixed. 629 + */ 630 + struct page *page; 631 + unsigned long addr; 632 + 633 + /* 634 + * Walk through the list of destination pages, and see if I 635 + * have a match. 636 + */ 637 + list_for_each_entry(page, &image->dest_pages, lru) { 638 + addr = page_to_pfn(page) << PAGE_SHIFT; 639 + if (addr == destination) { 640 + list_del(&page->lru); 641 + return page; 642 + } 643 + } 644 + page = NULL; 645 + while (1) { 646 + kimage_entry_t *old; 647 + 648 + /* Allocate a page, if we run out of memory give up */ 649 + page = kimage_alloc_pages(gfp_mask, 0); 650 + if (!page) 651 + return NULL; 652 + /* If the page cannot be used file it away */ 653 + if (page_to_pfn(page) > 654 + (KEXEC_SOURCE_MEMORY_LIMIT >> PAGE_SHIFT)) { 655 + list_add(&page->lru, &image->unusable_pages); 656 + continue; 657 + } 658 + addr = page_to_pfn(page) << PAGE_SHIFT; 659 + 660 + /* If it is the destination page we want use it */ 661 + if (addr == destination) 662 + break; 663 + 664 + /* If the page is not a destination page use it */ 665 + if (!kimage_is_destination_range(image, addr, 666 + addr + PAGE_SIZE)) 667 + break; 668 + 669 + /* 670 + * I know that the page is someones destination page. 671 + * See if there is already a source page for this 672 + * destination page. And if so swap the source pages. 673 + */ 674 + old = kimage_dst_used(image, addr); 675 + if (old) { 676 + /* If so move it */ 677 + unsigned long old_addr; 678 + struct page *old_page; 679 + 680 + old_addr = *old & PAGE_MASK; 681 + old_page = pfn_to_page(old_addr >> PAGE_SHIFT); 682 + copy_highpage(page, old_page); 683 + *old = addr | (*old & ~PAGE_MASK); 684 + 685 + /* The old page I have found cannot be a 686 + * destination page, so return it if it's 687 + * gfp_flags honor the ones passed in. 688 + */ 689 + if (!(gfp_mask & __GFP_HIGHMEM) && 690 + PageHighMem(old_page)) { 691 + kimage_free_pages(old_page); 692 + continue; 693 + } 694 + addr = old_addr; 695 + page = old_page; 696 + break; 697 + } 698 + /* Place the page on the destination list, to be used later */ 699 + list_add(&page->lru, &image->dest_pages); 700 + } 701 + 702 + return page; 703 + } 704 + 705 + static int kimage_load_normal_segment(struct kimage *image, 706 + struct kexec_segment *segment) 707 + { 708 + unsigned long maddr; 709 + size_t ubytes, mbytes; 710 + int result; 711 + unsigned char __user *buf = NULL; 712 + unsigned char *kbuf = NULL; 713 + 714 + result = 0; 715 + if (image->file_mode) 716 + kbuf = segment->kbuf; 717 + else 718 + buf = segment->buf; 719 + ubytes = segment->bufsz; 720 + mbytes = segment->memsz; 721 + maddr = segment->mem; 722 + 723 + result = kimage_set_destination(image, maddr); 724 + if (result < 0) 725 + goto out; 726 + 727 + while (mbytes) { 728 + struct page *page; 729 + char *ptr; 730 + size_t uchunk, mchunk; 731 + 732 + page = kimage_alloc_page(image, GFP_HIGHUSER, maddr); 733 + if (!page) { 734 + result = -ENOMEM; 735 + goto out; 736 + } 737 + result = kimage_add_page(image, page_to_pfn(page) 738 + << PAGE_SHIFT); 739 + if (result < 0) 740 + goto out; 741 + 742 + ptr = kmap(page); 743 + /* Start with a clear page */ 744 + clear_page(ptr); 745 + ptr += maddr & ~PAGE_MASK; 746 + mchunk = min_t(size_t, mbytes, 747 + PAGE_SIZE - (maddr & ~PAGE_MASK)); 748 + uchunk = min(ubytes, mchunk); 749 + 750 + /* For file based kexec, source pages are in kernel memory */ 751 + if (image->file_mode) 752 + memcpy(ptr, kbuf, uchunk); 753 + else 754 + result = copy_from_user(ptr, buf, uchunk); 755 + kunmap(page); 756 + if (result) { 757 + result = -EFAULT; 758 + goto out; 759 + } 760 + ubytes -= uchunk; 761 + maddr += mchunk; 762 + if (image->file_mode) 763 + kbuf += mchunk; 764 + else 765 + buf += mchunk; 766 + mbytes -= mchunk; 767 + } 768 + out: 769 + return result; 770 + } 771 + 772 + static int kimage_load_crash_segment(struct kimage *image, 773 + struct kexec_segment *segment) 774 + { 775 + /* For crash dumps kernels we simply copy the data from 776 + * user space to it's destination. 777 + * We do things a page at a time for the sake of kmap. 778 + */ 779 + unsigned long maddr; 780 + size_t ubytes, mbytes; 781 + int result; 782 + unsigned char __user *buf = NULL; 783 + unsigned char *kbuf = NULL; 784 + 785 + result = 0; 786 + if (image->file_mode) 787 + kbuf = segment->kbuf; 788 + else 789 + buf = segment->buf; 790 + ubytes = segment->bufsz; 791 + mbytes = segment->memsz; 792 + maddr = segment->mem; 793 + while (mbytes) { 794 + struct page *page; 795 + char *ptr; 796 + size_t uchunk, mchunk; 797 + 798 + page = pfn_to_page(maddr >> PAGE_SHIFT); 799 + if (!page) { 800 + result = -ENOMEM; 801 + goto out; 802 + } 803 + ptr = kmap(page); 804 + ptr += maddr & ~PAGE_MASK; 805 + mchunk = min_t(size_t, mbytes, 806 + PAGE_SIZE - (maddr & ~PAGE_MASK)); 807 + uchunk = min(ubytes, mchunk); 808 + if (mchunk > uchunk) { 809 + /* Zero the trailing part of the page */ 810 + memset(ptr + uchunk, 0, mchunk - uchunk); 811 + } 812 + 813 + /* For file based kexec, source pages are in kernel memory */ 814 + if (image->file_mode) 815 + memcpy(ptr, kbuf, uchunk); 816 + else 817 + result = copy_from_user(ptr, buf, uchunk); 818 + kexec_flush_icache_page(page); 819 + kunmap(page); 820 + if (result) { 821 + result = -EFAULT; 822 + goto out; 823 + } 824 + ubytes -= uchunk; 825 + maddr += mchunk; 826 + if (image->file_mode) 827 + kbuf += mchunk; 828 + else 829 + buf += mchunk; 830 + mbytes -= mchunk; 831 + } 832 + out: 833 + return result; 834 + } 835 + 836 + int kimage_load_segment(struct kimage *image, 837 + struct kexec_segment *segment) 838 + { 839 + int result = -ENOMEM; 840 + 841 + switch (image->type) { 842 + case KEXEC_TYPE_DEFAULT: 843 + result = kimage_load_normal_segment(image, segment); 844 + break; 845 + case KEXEC_TYPE_CRASH: 846 + result = kimage_load_crash_segment(image, segment); 847 + break; 848 + } 849 + 850 + return result; 851 + } 852 + 853 + struct kimage *kexec_image; 854 + struct kimage *kexec_crash_image; 855 + int kexec_load_disabled; 856 + 857 + void crash_kexec(struct pt_regs *regs) 858 + { 859 + /* Take the kexec_mutex here to prevent sys_kexec_load 860 + * running on one cpu from replacing the crash kernel 861 + * we are using after a panic on a different cpu. 862 + * 863 + * If the crash kernel was not located in a fixed area 864 + * of memory the xchg(&kexec_crash_image) would be 865 + * sufficient. But since I reuse the memory... 866 + */ 867 + if (mutex_trylock(&kexec_mutex)) { 868 + if (kexec_crash_image) { 869 + struct pt_regs fixed_regs; 870 + 871 + crash_setup_regs(&fixed_regs, regs); 872 + crash_save_vmcoreinfo(); 873 + machine_crash_shutdown(&fixed_regs); 874 + machine_kexec(kexec_crash_image); 875 + } 876 + mutex_unlock(&kexec_mutex); 877 + } 878 + } 879 + 880 + size_t crash_get_memory_size(void) 881 + { 882 + size_t size = 0; 883 + 884 + mutex_lock(&kexec_mutex); 885 + if (crashk_res.end != crashk_res.start) 886 + size = resource_size(&crashk_res); 887 + mutex_unlock(&kexec_mutex); 888 + return size; 889 + } 890 + 891 + void __weak crash_free_reserved_phys_range(unsigned long begin, 892 + unsigned long end) 893 + { 894 + unsigned long addr; 895 + 896 + for (addr = begin; addr < end; addr += PAGE_SIZE) 897 + free_reserved_page(pfn_to_page(addr >> PAGE_SHIFT)); 898 + } 899 + 900 + int crash_shrink_memory(unsigned long new_size) 901 + { 902 + int ret = 0; 903 + unsigned long start, end; 904 + unsigned long old_size; 905 + struct resource *ram_res; 906 + 907 + mutex_lock(&kexec_mutex); 908 + 909 + if (kexec_crash_image) { 910 + ret = -ENOENT; 911 + goto unlock; 912 + } 913 + start = crashk_res.start; 914 + end = crashk_res.end; 915 + old_size = (end == 0) ? 0 : end - start + 1; 916 + if (new_size >= old_size) { 917 + ret = (new_size == old_size) ? 0 : -EINVAL; 918 + goto unlock; 919 + } 920 + 921 + ram_res = kzalloc(sizeof(*ram_res), GFP_KERNEL); 922 + if (!ram_res) { 923 + ret = -ENOMEM; 924 + goto unlock; 925 + } 926 + 927 + start = roundup(start, KEXEC_CRASH_MEM_ALIGN); 928 + end = roundup(start + new_size, KEXEC_CRASH_MEM_ALIGN); 929 + 930 + crash_map_reserved_pages(); 931 + crash_free_reserved_phys_range(end, crashk_res.end); 932 + 933 + if ((start == end) && (crashk_res.parent != NULL)) 934 + release_resource(&crashk_res); 935 + 936 + ram_res->start = end; 937 + ram_res->end = crashk_res.end; 938 + ram_res->flags = IORESOURCE_BUSY | IORESOURCE_MEM; 939 + ram_res->name = "System RAM"; 940 + 941 + crashk_res.end = end - 1; 942 + 943 + insert_resource(&iomem_resource, ram_res); 944 + crash_unmap_reserved_pages(); 945 + 946 + unlock: 947 + mutex_unlock(&kexec_mutex); 948 + return ret; 949 + } 950 + 951 + static u32 *append_elf_note(u32 *buf, char *name, unsigned type, void *data, 952 + size_t data_len) 953 + { 954 + struct elf_note note; 955 + 956 + note.n_namesz = strlen(name) + 1; 957 + note.n_descsz = data_len; 958 + note.n_type = type; 959 + memcpy(buf, &note, sizeof(note)); 960 + buf += (sizeof(note) + 3)/4; 961 + memcpy(buf, name, note.n_namesz); 962 + buf += (note.n_namesz + 3)/4; 963 + memcpy(buf, data, note.n_descsz); 964 + buf += (note.n_descsz + 3)/4; 965 + 966 + return buf; 967 + } 968 + 969 + static void final_note(u32 *buf) 970 + { 971 + struct elf_note note; 972 + 973 + note.n_namesz = 0; 974 + note.n_descsz = 0; 975 + note.n_type = 0; 976 + memcpy(buf, &note, sizeof(note)); 977 + } 978 + 979 + void crash_save_cpu(struct pt_regs *regs, int cpu) 980 + { 981 + struct elf_prstatus prstatus; 982 + u32 *buf; 983 + 984 + if ((cpu < 0) || (cpu >= nr_cpu_ids)) 985 + return; 986 + 987 + /* Using ELF notes here is opportunistic. 988 + * I need a well defined structure format 989 + * for the data I pass, and I need tags 990 + * on the data to indicate what information I have 991 + * squirrelled away. ELF notes happen to provide 992 + * all of that, so there is no need to invent something new. 993 + */ 994 + buf = (u32 *)per_cpu_ptr(crash_notes, cpu); 995 + if (!buf) 996 + return; 997 + memset(&prstatus, 0, sizeof(prstatus)); 998 + prstatus.pr_pid = current->pid; 999 + elf_core_copy_kernel_regs(&prstatus.pr_reg, regs); 1000 + buf = append_elf_note(buf, KEXEC_CORE_NOTE_NAME, NT_PRSTATUS, 1001 + &prstatus, sizeof(prstatus)); 1002 + final_note(buf); 1003 + } 1004 + 1005 + static int __init crash_notes_memory_init(void) 1006 + { 1007 + /* Allocate memory for saving cpu registers. */ 1008 + crash_notes = alloc_percpu(note_buf_t); 1009 + if (!crash_notes) { 1010 + pr_warn("Kexec: Memory allocation for saving cpu register states failed\n"); 1011 + return -ENOMEM; 1012 + } 1013 + return 0; 1014 + } 1015 + subsys_initcall(crash_notes_memory_init); 1016 + 1017 + 1018 + /* 1019 + * parsing the "crashkernel" commandline 1020 + * 1021 + * this code is intended to be called from architecture specific code 1022 + */ 1023 + 1024 + 1025 + /* 1026 + * This function parses command lines in the format 1027 + * 1028 + * crashkernel=ramsize-range:size[,...][@offset] 1029 + * 1030 + * The function returns 0 on success and -EINVAL on failure. 1031 + */ 1032 + static int __init parse_crashkernel_mem(char *cmdline, 1033 + unsigned long long system_ram, 1034 + unsigned long long *crash_size, 1035 + unsigned long long *crash_base) 1036 + { 1037 + char *cur = cmdline, *tmp; 1038 + 1039 + /* for each entry of the comma-separated list */ 1040 + do { 1041 + unsigned long long start, end = ULLONG_MAX, size; 1042 + 1043 + /* get the start of the range */ 1044 + start = memparse(cur, &tmp); 1045 + if (cur == tmp) { 1046 + pr_warn("crashkernel: Memory value expected\n"); 1047 + return -EINVAL; 1048 + } 1049 + cur = tmp; 1050 + if (*cur != '-') { 1051 + pr_warn("crashkernel: '-' expected\n"); 1052 + return -EINVAL; 1053 + } 1054 + cur++; 1055 + 1056 + /* if no ':' is here, than we read the end */ 1057 + if (*cur != ':') { 1058 + end = memparse(cur, &tmp); 1059 + if (cur == tmp) { 1060 + pr_warn("crashkernel: Memory value expected\n"); 1061 + return -EINVAL; 1062 + } 1063 + cur = tmp; 1064 + if (end <= start) { 1065 + pr_warn("crashkernel: end <= start\n"); 1066 + return -EINVAL; 1067 + } 1068 + } 1069 + 1070 + if (*cur != ':') { 1071 + pr_warn("crashkernel: ':' expected\n"); 1072 + return -EINVAL; 1073 + } 1074 + cur++; 1075 + 1076 + size = memparse(cur, &tmp); 1077 + if (cur == tmp) { 1078 + pr_warn("Memory value expected\n"); 1079 + return -EINVAL; 1080 + } 1081 + cur = tmp; 1082 + if (size >= system_ram) { 1083 + pr_warn("crashkernel: invalid size\n"); 1084 + return -EINVAL; 1085 + } 1086 + 1087 + /* match ? */ 1088 + if (system_ram >= start && system_ram < end) { 1089 + *crash_size = size; 1090 + break; 1091 + } 1092 + } while (*cur++ == ','); 1093 + 1094 + if (*crash_size > 0) { 1095 + while (*cur && *cur != ' ' && *cur != '@') 1096 + cur++; 1097 + if (*cur == '@') { 1098 + cur++; 1099 + *crash_base = memparse(cur, &tmp); 1100 + if (cur == tmp) { 1101 + pr_warn("Memory value expected after '@'\n"); 1102 + return -EINVAL; 1103 + } 1104 + } 1105 + } 1106 + 1107 + return 0; 1108 + } 1109 + 1110 + /* 1111 + * That function parses "simple" (old) crashkernel command lines like 1112 + * 1113 + * crashkernel=size[@offset] 1114 + * 1115 + * It returns 0 on success and -EINVAL on failure. 1116 + */ 1117 + static int __init parse_crashkernel_simple(char *cmdline, 1118 + unsigned long long *crash_size, 1119 + unsigned long long *crash_base) 1120 + { 1121 + char *cur = cmdline; 1122 + 1123 + *crash_size = memparse(cmdline, &cur); 1124 + if (cmdline == cur) { 1125 + pr_warn("crashkernel: memory value expected\n"); 1126 + return -EINVAL; 1127 + } 1128 + 1129 + if (*cur == '@') 1130 + *crash_base = memparse(cur+1, &cur); 1131 + else if (*cur != ' ' && *cur != '\0') { 1132 + pr_warn("crashkernel: unrecognized char\n"); 1133 + return -EINVAL; 1134 + } 1135 + 1136 + return 0; 1137 + } 1138 + 1139 + #define SUFFIX_HIGH 0 1140 + #define SUFFIX_LOW 1 1141 + #define SUFFIX_NULL 2 1142 + static __initdata char *suffix_tbl[] = { 1143 + [SUFFIX_HIGH] = ",high", 1144 + [SUFFIX_LOW] = ",low", 1145 + [SUFFIX_NULL] = NULL, 1146 + }; 1147 + 1148 + /* 1149 + * That function parses "suffix" crashkernel command lines like 1150 + * 1151 + * crashkernel=size,[high|low] 1152 + * 1153 + * It returns 0 on success and -EINVAL on failure. 1154 + */ 1155 + static int __init parse_crashkernel_suffix(char *cmdline, 1156 + unsigned long long *crash_size, 1157 + const char *suffix) 1158 + { 1159 + char *cur = cmdline; 1160 + 1161 + *crash_size = memparse(cmdline, &cur); 1162 + if (cmdline == cur) { 1163 + pr_warn("crashkernel: memory value expected\n"); 1164 + return -EINVAL; 1165 + } 1166 + 1167 + /* check with suffix */ 1168 + if (strncmp(cur, suffix, strlen(suffix))) { 1169 + pr_warn("crashkernel: unrecognized char\n"); 1170 + return -EINVAL; 1171 + } 1172 + cur += strlen(suffix); 1173 + if (*cur != ' ' && *cur != '\0') { 1174 + pr_warn("crashkernel: unrecognized char\n"); 1175 + return -EINVAL; 1176 + } 1177 + 1178 + return 0; 1179 + } 1180 + 1181 + static __init char *get_last_crashkernel(char *cmdline, 1182 + const char *name, 1183 + const char *suffix) 1184 + { 1185 + char *p = cmdline, *ck_cmdline = NULL; 1186 + 1187 + /* find crashkernel and use the last one if there are more */ 1188 + p = strstr(p, name); 1189 + while (p) { 1190 + char *end_p = strchr(p, ' '); 1191 + char *q; 1192 + 1193 + if (!end_p) 1194 + end_p = p + strlen(p); 1195 + 1196 + if (!suffix) { 1197 + int i; 1198 + 1199 + /* skip the one with any known suffix */ 1200 + for (i = 0; suffix_tbl[i]; i++) { 1201 + q = end_p - strlen(suffix_tbl[i]); 1202 + if (!strncmp(q, suffix_tbl[i], 1203 + strlen(suffix_tbl[i]))) 1204 + goto next; 1205 + } 1206 + ck_cmdline = p; 1207 + } else { 1208 + q = end_p - strlen(suffix); 1209 + if (!strncmp(q, suffix, strlen(suffix))) 1210 + ck_cmdline = p; 1211 + } 1212 + next: 1213 + p = strstr(p+1, name); 1214 + } 1215 + 1216 + if (!ck_cmdline) 1217 + return NULL; 1218 + 1219 + return ck_cmdline; 1220 + } 1221 + 1222 + static int __init __parse_crashkernel(char *cmdline, 1223 + unsigned long long system_ram, 1224 + unsigned long long *crash_size, 1225 + unsigned long long *crash_base, 1226 + const char *name, 1227 + const char *suffix) 1228 + { 1229 + char *first_colon, *first_space; 1230 + char *ck_cmdline; 1231 + 1232 + BUG_ON(!crash_size || !crash_base); 1233 + *crash_size = 0; 1234 + *crash_base = 0; 1235 + 1236 + ck_cmdline = get_last_crashkernel(cmdline, name, suffix); 1237 + 1238 + if (!ck_cmdline) 1239 + return -EINVAL; 1240 + 1241 + ck_cmdline += strlen(name); 1242 + 1243 + if (suffix) 1244 + return parse_crashkernel_suffix(ck_cmdline, crash_size, 1245 + suffix); 1246 + /* 1247 + * if the commandline contains a ':', then that's the extended 1248 + * syntax -- if not, it must be the classic syntax 1249 + */ 1250 + first_colon = strchr(ck_cmdline, ':'); 1251 + first_space = strchr(ck_cmdline, ' '); 1252 + if (first_colon && (!first_space || first_colon < first_space)) 1253 + return parse_crashkernel_mem(ck_cmdline, system_ram, 1254 + crash_size, crash_base); 1255 + 1256 + return parse_crashkernel_simple(ck_cmdline, crash_size, crash_base); 1257 + } 1258 + 1259 + /* 1260 + * That function is the entry point for command line parsing and should be 1261 + * called from the arch-specific code. 1262 + */ 1263 + int __init parse_crashkernel(char *cmdline, 1264 + unsigned long long system_ram, 1265 + unsigned long long *crash_size, 1266 + unsigned long long *crash_base) 1267 + { 1268 + return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, 1269 + "crashkernel=", NULL); 1270 + } 1271 + 1272 + int __init parse_crashkernel_high(char *cmdline, 1273 + unsigned long long system_ram, 1274 + unsigned long long *crash_size, 1275 + unsigned long long *crash_base) 1276 + { 1277 + return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, 1278 + "crashkernel=", suffix_tbl[SUFFIX_HIGH]); 1279 + } 1280 + 1281 + int __init parse_crashkernel_low(char *cmdline, 1282 + unsigned long long system_ram, 1283 + unsigned long long *crash_size, 1284 + unsigned long long *crash_base) 1285 + { 1286 + return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, 1287 + "crashkernel=", suffix_tbl[SUFFIX_LOW]); 1288 + } 1289 + 1290 + static void update_vmcoreinfo_note(void) 1291 + { 1292 + u32 *buf = vmcoreinfo_note; 1293 + 1294 + if (!vmcoreinfo_size) 1295 + return; 1296 + buf = append_elf_note(buf, VMCOREINFO_NOTE_NAME, 0, vmcoreinfo_data, 1297 + vmcoreinfo_size); 1298 + final_note(buf); 1299 + } 1300 + 1301 + void crash_save_vmcoreinfo(void) 1302 + { 1303 + vmcoreinfo_append_str("CRASHTIME=%ld\n", get_seconds()); 1304 + update_vmcoreinfo_note(); 1305 + } 1306 + 1307 + void vmcoreinfo_append_str(const char *fmt, ...) 1308 + { 1309 + va_list args; 1310 + char buf[0x50]; 1311 + size_t r; 1312 + 1313 + va_start(args, fmt); 1314 + r = vscnprintf(buf, sizeof(buf), fmt, args); 1315 + va_end(args); 1316 + 1317 + r = min(r, vmcoreinfo_max_size - vmcoreinfo_size); 1318 + 1319 + memcpy(&vmcoreinfo_data[vmcoreinfo_size], buf, r); 1320 + 1321 + vmcoreinfo_size += r; 1322 + } 1323 + 1324 + /* 1325 + * provide an empty default implementation here -- architecture 1326 + * code may override this 1327 + */ 1328 + void __weak arch_crash_save_vmcoreinfo(void) 1329 + {} 1330 + 1331 + unsigned long __weak paddr_vmcoreinfo_note(void) 1332 + { 1333 + return __pa((unsigned long)(char *)&vmcoreinfo_note); 1334 + } 1335 + 1336 + static int __init crash_save_vmcoreinfo_init(void) 1337 + { 1338 + VMCOREINFO_OSRELEASE(init_uts_ns.name.release); 1339 + VMCOREINFO_PAGESIZE(PAGE_SIZE); 1340 + 1341 + VMCOREINFO_SYMBOL(init_uts_ns); 1342 + VMCOREINFO_SYMBOL(node_online_map); 1343 + #ifdef CONFIG_MMU 1344 + VMCOREINFO_SYMBOL(swapper_pg_dir); 1345 + #endif 1346 + VMCOREINFO_SYMBOL(_stext); 1347 + VMCOREINFO_SYMBOL(vmap_area_list); 1348 + 1349 + #ifndef CONFIG_NEED_MULTIPLE_NODES 1350 + VMCOREINFO_SYMBOL(mem_map); 1351 + VMCOREINFO_SYMBOL(contig_page_data); 1352 + #endif 1353 + #ifdef CONFIG_SPARSEMEM 1354 + VMCOREINFO_SYMBOL(mem_section); 1355 + VMCOREINFO_LENGTH(mem_section, NR_SECTION_ROOTS); 1356 + VMCOREINFO_STRUCT_SIZE(mem_section); 1357 + VMCOREINFO_OFFSET(mem_section, section_mem_map); 1358 + #endif 1359 + VMCOREINFO_STRUCT_SIZE(page); 1360 + VMCOREINFO_STRUCT_SIZE(pglist_data); 1361 + VMCOREINFO_STRUCT_SIZE(zone); 1362 + VMCOREINFO_STRUCT_SIZE(free_area); 1363 + VMCOREINFO_STRUCT_SIZE(list_head); 1364 + VMCOREINFO_SIZE(nodemask_t); 1365 + VMCOREINFO_OFFSET(page, flags); 1366 + VMCOREINFO_OFFSET(page, _count); 1367 + VMCOREINFO_OFFSET(page, mapping); 1368 + VMCOREINFO_OFFSET(page, lru); 1369 + VMCOREINFO_OFFSET(page, _mapcount); 1370 + VMCOREINFO_OFFSET(page, private); 1371 + VMCOREINFO_OFFSET(pglist_data, node_zones); 1372 + VMCOREINFO_OFFSET(pglist_data, nr_zones); 1373 + #ifdef CONFIG_FLAT_NODE_MEM_MAP 1374 + VMCOREINFO_OFFSET(pglist_data, node_mem_map); 1375 + #endif 1376 + VMCOREINFO_OFFSET(pglist_data, node_start_pfn); 1377 + VMCOREINFO_OFFSET(pglist_data, node_spanned_pages); 1378 + VMCOREINFO_OFFSET(pglist_data, node_id); 1379 + VMCOREINFO_OFFSET(zone, free_area); 1380 + VMCOREINFO_OFFSET(zone, vm_stat); 1381 + VMCOREINFO_OFFSET(zone, spanned_pages); 1382 + VMCOREINFO_OFFSET(free_area, free_list); 1383 + VMCOREINFO_OFFSET(list_head, next); 1384 + VMCOREINFO_OFFSET(list_head, prev); 1385 + VMCOREINFO_OFFSET(vmap_area, va_start); 1386 + VMCOREINFO_OFFSET(vmap_area, list); 1387 + VMCOREINFO_LENGTH(zone.free_area, MAX_ORDER); 1388 + log_buf_kexec_setup(); 1389 + VMCOREINFO_LENGTH(free_area.free_list, MIGRATE_TYPES); 1390 + VMCOREINFO_NUMBER(NR_FREE_PAGES); 1391 + VMCOREINFO_NUMBER(PG_lru); 1392 + VMCOREINFO_NUMBER(PG_private); 1393 + VMCOREINFO_NUMBER(PG_swapcache); 1394 + VMCOREINFO_NUMBER(PG_slab); 1395 + #ifdef CONFIG_MEMORY_FAILURE 1396 + VMCOREINFO_NUMBER(PG_hwpoison); 1397 + #endif 1398 + VMCOREINFO_NUMBER(PG_head_mask); 1399 + VMCOREINFO_NUMBER(PAGE_BUDDY_MAPCOUNT_VALUE); 1400 + #ifdef CONFIG_HUGETLBFS 1401 + VMCOREINFO_SYMBOL(free_huge_page); 1402 + #endif 1403 + 1404 + arch_crash_save_vmcoreinfo(); 1405 + update_vmcoreinfo_note(); 1406 + 1407 + return 0; 1408 + } 1409 + 1410 + subsys_initcall(crash_save_vmcoreinfo_init); 1411 + 1412 + /* 1413 + * Move into place and start executing a preloaded standalone 1414 + * executable. If nothing was preloaded return an error. 1415 + */ 1416 + int kernel_kexec(void) 1417 + { 1418 + int error = 0; 1419 + 1420 + if (!mutex_trylock(&kexec_mutex)) 1421 + return -EBUSY; 1422 + if (!kexec_image) { 1423 + error = -EINVAL; 1424 + goto Unlock; 1425 + } 1426 + 1427 + #ifdef CONFIG_KEXEC_JUMP 1428 + if (kexec_image->preserve_context) { 1429 + lock_system_sleep(); 1430 + pm_prepare_console(); 1431 + error = freeze_processes(); 1432 + if (error) { 1433 + error = -EBUSY; 1434 + goto Restore_console; 1435 + } 1436 + suspend_console(); 1437 + error = dpm_suspend_start(PMSG_FREEZE); 1438 + if (error) 1439 + goto Resume_console; 1440 + /* At this point, dpm_suspend_start() has been called, 1441 + * but *not* dpm_suspend_end(). We *must* call 1442 + * dpm_suspend_end() now. Otherwise, drivers for 1443 + * some devices (e.g. interrupt controllers) become 1444 + * desynchronized with the actual state of the 1445 + * hardware at resume time, and evil weirdness ensues. 1446 + */ 1447 + error = dpm_suspend_end(PMSG_FREEZE); 1448 + if (error) 1449 + goto Resume_devices; 1450 + error = disable_nonboot_cpus(); 1451 + if (error) 1452 + goto Enable_cpus; 1453 + local_irq_disable(); 1454 + error = syscore_suspend(); 1455 + if (error) 1456 + goto Enable_irqs; 1457 + } else 1458 + #endif 1459 + { 1460 + kexec_in_progress = true; 1461 + kernel_restart_prepare(NULL); 1462 + migrate_to_reboot_cpu(); 1463 + 1464 + /* 1465 + * migrate_to_reboot_cpu() disables CPU hotplug assuming that 1466 + * no further code needs to use CPU hotplug (which is true in 1467 + * the reboot case). However, the kexec path depends on using 1468 + * CPU hotplug again; so re-enable it here. 1469 + */ 1470 + cpu_hotplug_enable(); 1471 + pr_emerg("Starting new kernel\n"); 1472 + machine_shutdown(); 1473 + } 1474 + 1475 + machine_kexec(kexec_image); 1476 + 1477 + #ifdef CONFIG_KEXEC_JUMP 1478 + if (kexec_image->preserve_context) { 1479 + syscore_resume(); 1480 + Enable_irqs: 1481 + local_irq_enable(); 1482 + Enable_cpus: 1483 + enable_nonboot_cpus(); 1484 + dpm_resume_start(PMSG_RESTORE); 1485 + Resume_devices: 1486 + dpm_resume_end(PMSG_RESTORE); 1487 + Resume_console: 1488 + resume_console(); 1489 + thaw_processes(); 1490 + Restore_console: 1491 + pm_restore_console(); 1492 + unlock_system_sleep(); 1493 + } 1494 + #endif 1495 + 1496 + Unlock: 1497 + mutex_unlock(&kexec_mutex); 1498 + return error; 1499 + } 1500 + 1501 + /* 1502 + * Add and remove page tables for crashkernel memory 1503 + * 1504 + * Provide an empty default implementation here -- architecture 1505 + * code may override this 1506 + */ 1507 + void __weak crash_map_reserved_pages(void) 1508 + {} 1509 + 1510 + void __weak crash_unmap_reserved_pages(void) 1511 + {}
+3 -3
kernel/ksysfs.c
··· 90 90 KERNEL_ATTR_RW(profiling); 91 91 #endif 92 92 93 - #ifdef CONFIG_KEXEC 93 + #ifdef CONFIG_KEXEC_CORE 94 94 static ssize_t kexec_loaded_show(struct kobject *kobj, 95 95 struct kobj_attribute *attr, char *buf) 96 96 { ··· 134 134 } 135 135 KERNEL_ATTR_RO(vmcoreinfo); 136 136 137 - #endif /* CONFIG_KEXEC */ 137 + #endif /* CONFIG_KEXEC_CORE */ 138 138 139 139 /* whether file capabilities are enabled */ 140 140 static ssize_t fscaps_show(struct kobject *kobj, ··· 196 196 #ifdef CONFIG_PROFILING 197 197 &profiling_attr.attr, 198 198 #endif 199 - #ifdef CONFIG_KEXEC 199 + #ifdef CONFIG_KEXEC_CORE 200 200 &kexec_loaded_attr.attr, 201 201 &kexec_crash_loaded_attr.attr, 202 202 &kexec_crash_size_attr.attr,
+1 -1
kernel/printk/printk.c
··· 835 835 .release = devkmsg_release, 836 836 }; 837 837 838 - #ifdef CONFIG_KEXEC 838 + #ifdef CONFIG_KEXEC_CORE 839 839 /* 840 840 * This appends the listed symbols to /proc/vmcore 841 841 *
+1 -1
kernel/reboot.c
··· 346 346 kernel_restart(buffer); 347 347 break; 348 348 349 - #ifdef CONFIG_KEXEC 349 + #ifdef CONFIG_KEXEC_CORE 350 350 case LINUX_REBOOT_CMD_KEXEC: 351 351 ret = kernel_kexec(); 352 352 break;
+1 -1
kernel/sysctl.c
··· 621 621 .proc_handler = proc_dointvec, 622 622 }, 623 623 #endif 624 - #ifdef CONFIG_KEXEC 624 + #ifdef CONFIG_KEXEC_CORE 625 625 { 626 626 .procname = "kexec_load_disabled", 627 627 .data = &kexec_load_disabled,