Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

x86/crash: add x86 crash hotplug support

When CPU or memory is hot un/plugged, or off/onlined, the crash
elfcorehdr, which describes the CPUs and memory in the system, must also
be updated.

A new elfcorehdr is generated from the available CPUs and memory and
replaces the existing elfcorehdr. The segment containing the elfcorehdr
is identified at run-time in crash_core:crash_handle_hotplug_event().

No modifications to purgatory (see 'kexec: exclude elfcorehdr from the
segment digest') or boot_params (as the elfcorehdr= capture kernel command
line parameter pointer remains unchanged and correct) are needed, just
elfcorehdr.

For kexec_file_load(), the elfcorehdr segment size is based on NR_CPUS and
CRASH_MAX_MEMORY_RANGES in order to accommodate a growing number of CPU
and memory resources.

For kexec_load(), the userspace kexec utility needs to size the elfcorehdr
segment in the same/similar manner.

To accommodate kexec_load() syscall in the absence of kexec_file_load()
syscall support, prepare_elf_headers() and dependents are moved outside of
CONFIG_KEXEC_FILE.

[eric.devolder@oracle.com: correct unused function build error]
Link: https://lkml.kernel.org/r/20230821182644.2143-1-eric.devolder@oracle.com
Link: https://lkml.kernel.org/r/20230814214446.6659-6-eric.devolder@oracle.com
Signed-off-by: Eric DeVolder <eric.devolder@oracle.com>
Reviewed-by: Sourabh Jain <sourabhjain@linux.ibm.com>
Acked-by: Hari Bathini <hbathini@linux.ibm.com>
Acked-by: Baoquan He <bhe@redhat.com>
Cc: Akhil Raj <lf32.dev@gmail.com>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Borislav Petkov (AMD) <bp@alien8.de>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Dave Young <dyoung@redhat.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Mimi Zohar <zohar@linux.ibm.com>
Cc: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: "Rafael J. Wysocki" <rafael@kernel.org>
Cc: Sean Christopherson <seanjc@google.com>
Cc: Takashi Iwai <tiwai@suse.de>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Thomas Weißschuh <linux@weissschuh.net>
Cc: Valentin Schneider <vschneid@redhat.com>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

authored by

Eric DeVolder and committed by
Andrew Morton
ea53ad9c 88a6f899

+116 -7
+3
arch/x86/Kconfig
··· 2069 2069 config ARCH_SUPPORTS_CRASH_DUMP 2070 2070 def_bool X86_64 || (X86_32 && HIGHMEM) 2071 2071 2072 + config ARCH_SUPPORTS_CRASH_HOTPLUG 2073 + def_bool y 2074 + 2072 2075 config PHYSICAL_START 2073 2076 hex "Physical address where the kernel is loaded" if (EXPERT || CRASH_DUMP) 2074 2077 default "0x1000000"
+15
arch/x86/include/asm/kexec.h
··· 209 209 extern crash_vmclear_fn __rcu *crash_vmclear_loaded_vmcss; 210 210 extern void kdump_nmi_shootdown_cpus(void); 211 211 212 + #ifdef CONFIG_CRASH_HOTPLUG 213 + void arch_crash_handle_hotplug_event(struct kimage *image); 214 + #define arch_crash_handle_hotplug_event arch_crash_handle_hotplug_event 215 + 216 + #ifdef CONFIG_HOTPLUG_CPU 217 + static inline int crash_hotplug_cpu_support(void) { return 1; } 218 + #define crash_hotplug_cpu_support crash_hotplug_cpu_support 219 + #endif 220 + 221 + #ifdef CONFIG_MEMORY_HOTPLUG 222 + static inline int crash_hotplug_memory_support(void) { return 1; } 223 + #define crash_hotplug_memory_support crash_hotplug_memory_support 224 + #endif 225 + #endif 226 + 212 227 #endif /* __ASSEMBLY__ */ 213 228 214 229 #endif /* _ASM_X86_KEXEC_H */
+98 -7
arch/x86/kernel/crash.c
··· 158 158 crash_save_cpu(regs, safe_smp_processor_id()); 159 159 } 160 160 161 - #ifdef CONFIG_KEXEC_FILE 162 - 161 + #if defined(CONFIG_KEXEC_FILE) || defined(CONFIG_CRASH_HOTPLUG) 163 162 static int get_nr_ram_ranges_callback(struct resource *res, void *arg) 164 163 { 165 164 unsigned int *nr_ranges = arg; ··· 230 231 231 232 /* Prepare elf headers. Return addr and size */ 232 233 static int prepare_elf_headers(struct kimage *image, void **addr, 233 - unsigned long *sz) 234 + unsigned long *sz, unsigned long *nr_mem_ranges) 234 235 { 235 236 struct crash_mem *cmem; 236 237 int ret; ··· 248 249 if (ret) 249 250 goto out; 250 251 252 + /* Return the computed number of memory ranges, for hotplug usage */ 253 + *nr_mem_ranges = cmem->nr_ranges; 254 + 251 255 /* By default prepare 64bit headers */ 252 256 ret = crash_prepare_elf64_headers(cmem, IS_ENABLED(CONFIG_X86_64), addr, sz); 253 257 ··· 258 256 vfree(cmem); 259 257 return ret; 260 258 } 259 + #endif 261 260 261 + #ifdef CONFIG_KEXEC_FILE 262 262 static int add_e820_entry(struct boot_params *params, struct e820_entry *entry) 263 263 { 264 264 unsigned int nr_e820_entries; ··· 375 371 int crash_load_segments(struct kimage *image) 376 372 { 377 373 int ret; 374 + unsigned long pnum = 0; 378 375 struct kexec_buf kbuf = { .image = image, .buf_min = 0, 379 376 .buf_max = ULONG_MAX, .top_down = false }; 380 377 381 378 /* Prepare elf headers and add a segment */ 382 - ret = prepare_elf_headers(image, &kbuf.buffer, &kbuf.bufsz); 379 + ret = prepare_elf_headers(image, &kbuf.buffer, &kbuf.bufsz, &pnum); 383 380 if (ret) 384 381 return ret; 385 382 386 - image->elf_headers = kbuf.buffer; 387 - image->elf_headers_sz = kbuf.bufsz; 383 + image->elf_headers = kbuf.buffer; 384 + image->elf_headers_sz = kbuf.bufsz; 385 + kbuf.memsz = kbuf.bufsz; 388 386 389 - kbuf.memsz = kbuf.bufsz; 387 + #ifdef CONFIG_CRASH_HOTPLUG 388 + /* 389 + * The elfcorehdr segment size accounts for VMCOREINFO, kernel_map, 390 + * maximum CPUs and maximum memory ranges. 391 + */ 392 + if (IS_ENABLED(CONFIG_MEMORY_HOTPLUG)) 393 + pnum = 2 + CONFIG_NR_CPUS_DEFAULT + CONFIG_CRASH_MAX_MEMORY_RANGES; 394 + else 395 + pnum += 2 + CONFIG_NR_CPUS_DEFAULT; 396 + 397 + if (pnum < (unsigned long)PN_XNUM) { 398 + kbuf.memsz = pnum * sizeof(Elf64_Phdr); 399 + kbuf.memsz += sizeof(Elf64_Ehdr); 400 + 401 + image->elfcorehdr_index = image->nr_segments; 402 + 403 + /* Mark as usable to crash kernel, else crash kernel fails on boot */ 404 + image->elf_headers_sz = kbuf.memsz; 405 + } else { 406 + pr_err("number of Phdrs %lu exceeds max\n", pnum); 407 + } 408 + #endif 409 + 390 410 kbuf.buf_align = ELF_CORE_HEADER_ALIGN; 391 411 kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; 392 412 ret = kexec_add_buffer(&kbuf); ··· 423 395 return ret; 424 396 } 425 397 #endif /* CONFIG_KEXEC_FILE */ 398 + 399 + #ifdef CONFIG_CRASH_HOTPLUG 400 + 401 + #undef pr_fmt 402 + #define pr_fmt(fmt) "crash hp: " fmt 403 + 404 + /** 405 + * arch_crash_handle_hotplug_event() - Handle hotplug elfcorehdr changes 406 + * @image: a pointer to kexec_crash_image 407 + * 408 + * Prepare the new elfcorehdr and replace the existing elfcorehdr. 409 + */ 410 + void arch_crash_handle_hotplug_event(struct kimage *image) 411 + { 412 + void *elfbuf = NULL, *old_elfcorehdr; 413 + unsigned long nr_mem_ranges; 414 + unsigned long mem, memsz; 415 + unsigned long elfsz = 0; 416 + 417 + /* 418 + * Create the new elfcorehdr reflecting the changes to CPU and/or 419 + * memory resources. 420 + */ 421 + if (prepare_elf_headers(image, &elfbuf, &elfsz, &nr_mem_ranges)) { 422 + pr_err("unable to create new elfcorehdr"); 423 + goto out; 424 + } 425 + 426 + /* 427 + * Obtain address and size of the elfcorehdr segment, and 428 + * check it against the new elfcorehdr buffer. 429 + */ 430 + mem = image->segment[image->elfcorehdr_index].mem; 431 + memsz = image->segment[image->elfcorehdr_index].memsz; 432 + if (elfsz > memsz) { 433 + pr_err("update elfcorehdr elfsz %lu > memsz %lu", 434 + elfsz, memsz); 435 + goto out; 436 + } 437 + 438 + /* 439 + * Copy new elfcorehdr over the old elfcorehdr at destination. 440 + */ 441 + old_elfcorehdr = kmap_local_page(pfn_to_page(mem >> PAGE_SHIFT)); 442 + if (!old_elfcorehdr) { 443 + pr_err("mapping elfcorehdr segment failed\n"); 444 + goto out; 445 + } 446 + 447 + /* 448 + * Temporarily invalidate the crash image while the 449 + * elfcorehdr is updated. 450 + */ 451 + xchg(&kexec_crash_image, NULL); 452 + memcpy_flushcache(old_elfcorehdr, elfbuf, elfsz); 453 + xchg(&kexec_crash_image, image); 454 + kunmap_local(old_elfcorehdr); 455 + pr_debug("updated elfcorehdr\n"); 456 + 457 + out: 458 + vfree(elfbuf); 459 + } 460 + #endif