Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at master 656 lines 16 kB view raw
1// SPDX-License-Identifier: GPL-2.0-only 2/* 3 * Architecture specific (PPC64) functions for kexec based crash dumps. 4 * 5 * Copyright (C) 2005, IBM Corp. 6 * 7 * Created by: Haren Myneni 8 */ 9 10#include <linux/kernel.h> 11#include <linux/smp.h> 12#include <linux/reboot.h> 13#include <linux/kexec.h> 14#include <linux/export.h> 15#include <linux/crash_dump.h> 16#include <linux/delay.h> 17#include <linux/irq.h> 18#include <linux/types.h> 19#include <linux/libfdt.h> 20#include <linux/memory.h> 21 22#include <asm/processor.h> 23#include <asm/machdep.h> 24#include <asm/kexec.h> 25#include <asm/smp.h> 26#include <asm/setjmp.h> 27#include <asm/debug.h> 28#include <asm/interrupt.h> 29#include <asm/kexec_ranges.h> 30#include <asm/crashdump-ppc64.h> 31 32/* 33 * The primary CPU waits a while for all secondary CPUs to enter. This is to 34 * avoid sending an IPI if the secondary CPUs are entering 35 * crash_kexec_secondary on their own (eg via a system reset). 36 * 37 * The secondary timeout has to be longer than the primary. Both timeouts are 38 * in milliseconds. 39 */ 40#define PRIMARY_TIMEOUT 500 41#define SECONDARY_TIMEOUT 1000 42 43#define IPI_TIMEOUT 10000 44#define REAL_MODE_TIMEOUT 10000 45 46static int time_to_dump; 47 48/* 49 * In case of system reset, secondary CPUs enter crash_kexec_secondary with out 50 * having to send an IPI explicitly. So, indicate if the crash is via 51 * system reset to avoid sending another IPI. 52 */ 53static int is_via_system_reset; 54 55/* 56 * crash_wake_offline should be set to 1 by platforms that intend to wake 57 * up offline cpus prior to jumping to a kdump kernel. Currently powernv 58 * sets it to 1, since we want to avoid things from happening when an 59 * offline CPU wakes up due to something like an HMI (malfunction error), 60 * which propagates to all threads. 61 */ 62int crash_wake_offline; 63 64#define CRASH_HANDLER_MAX 3 65/* List of shutdown handles */ 66static crash_shutdown_t crash_shutdown_handles[CRASH_HANDLER_MAX]; 67static DEFINE_SPINLOCK(crash_handlers_lock); 68 69static unsigned long crash_shutdown_buf[JMP_BUF_LEN]; 70static int crash_shutdown_cpu = -1; 71 72static int handle_fault(struct pt_regs *regs) 73{ 74 if (crash_shutdown_cpu == smp_processor_id()) 75 longjmp(crash_shutdown_buf, 1); 76 return 0; 77} 78 79#ifdef CONFIG_SMP 80 81static atomic_t cpus_in_crash; 82void crash_ipi_callback(struct pt_regs *regs) 83{ 84 static cpumask_t cpus_state_saved = CPU_MASK_NONE; 85 86 int cpu = smp_processor_id(); 87 88 hard_irq_disable(); 89 if (!cpumask_test_cpu(cpu, &cpus_state_saved)) { 90 crash_save_cpu(regs, cpu); 91 cpumask_set_cpu(cpu, &cpus_state_saved); 92 } 93 94 atomic_inc(&cpus_in_crash); 95 smp_mb__after_atomic(); 96 97 /* 98 * Starting the kdump boot. 99 * This barrier is needed to make sure that all CPUs are stopped. 100 */ 101 while (!time_to_dump) 102 cpu_relax(); 103 104 if (ppc_md.kexec_cpu_down) 105 ppc_md.kexec_cpu_down(1, 1); 106 107#ifdef CONFIG_PPC64 108 kexec_smp_wait(); 109#else 110 for (;;); /* FIXME */ 111#endif 112 113 /* NOTREACHED */ 114} 115 116static void crash_kexec_prepare_cpus(void) 117{ 118 unsigned int msecs; 119 volatile unsigned int ncpus = num_online_cpus() - 1;/* Excluding the panic cpu */ 120 volatile int tries = 0; 121 int (*old_handler)(struct pt_regs *regs); 122 123 printk(KERN_EMERG "Sending IPI to other CPUs\n"); 124 125 if (crash_wake_offline) 126 ncpus = num_present_cpus() - 1; 127 128 /* 129 * If we came in via system reset, secondaries enter via crash_kexec_secondary(). 130 * So, wait a while for the secondary CPUs to enter for that case. 131 * Else, send IPI to all other CPUs. 132 */ 133 if (is_via_system_reset) 134 mdelay(PRIMARY_TIMEOUT); 135 else 136 crash_send_ipi(crash_ipi_callback); 137 smp_wmb(); 138 139again: 140 /* 141 * FIXME: Until we will have the way to stop other CPUs reliably, 142 * the crash CPU will send an IPI and wait for other CPUs to 143 * respond. 144 */ 145 msecs = IPI_TIMEOUT; 146 while ((atomic_read(&cpus_in_crash) < ncpus) && (--msecs > 0)) 147 mdelay(1); 148 149 /* Would it be better to replace the trap vector here? */ 150 151 if (atomic_read(&cpus_in_crash) >= ncpus) { 152 printk(KERN_EMERG "IPI complete\n"); 153 return; 154 } 155 156 printk(KERN_EMERG "ERROR: %d cpu(s) not responding\n", 157 ncpus - atomic_read(&cpus_in_crash)); 158 159 /* 160 * If we have a panic timeout set then we can't wait indefinitely 161 * for someone to activate system reset. We also give up on the 162 * second time through if system reset fail to work. 163 */ 164 if ((panic_timeout > 0) || (tries > 0)) 165 return; 166 167 /* 168 * A system reset will cause all CPUs to take an 0x100 exception. 169 * The primary CPU returns here via setjmp, and the secondary 170 * CPUs reexecute the crash_kexec_secondary path. 171 */ 172 old_handler = __debugger; 173 __debugger = handle_fault; 174 crash_shutdown_cpu = smp_processor_id(); 175 176 if (setjmp(crash_shutdown_buf) == 0) { 177 printk(KERN_EMERG "Activate system reset (dumprestart) " 178 "to stop other cpu(s)\n"); 179 180 /* 181 * A system reset will force all CPUs to execute the 182 * crash code again. We need to reset cpus_in_crash so we 183 * wait for everyone to do this. 184 */ 185 atomic_set(&cpus_in_crash, 0); 186 smp_mb(); 187 188 while (atomic_read(&cpus_in_crash) < ncpus) 189 cpu_relax(); 190 } 191 192 crash_shutdown_cpu = -1; 193 __debugger = old_handler; 194 195 tries++; 196 goto again; 197} 198 199/* 200 * This function will be called by secondary cpus. 201 */ 202void crash_kexec_secondary(struct pt_regs *regs) 203{ 204 unsigned long flags; 205 int msecs = SECONDARY_TIMEOUT; 206 207 local_irq_save(flags); 208 209 /* Wait for the primary crash CPU to signal its progress */ 210 while (crashing_cpu < 0) { 211 if (--msecs < 0) { 212 /* No response, kdump image may not have been loaded */ 213 local_irq_restore(flags); 214 return; 215 } 216 217 mdelay(1); 218 } 219 220 crash_ipi_callback(regs); 221} 222 223#else /* ! CONFIG_SMP */ 224 225static void crash_kexec_prepare_cpus(void) 226{ 227 /* 228 * move the secondaries to us so that we can copy 229 * the new kernel 0-0x100 safely 230 * 231 * do this if kexec in setup.c ? 232 */ 233#ifdef CONFIG_PPC64 234 smp_release_cpus(); 235#else 236 /* FIXME */ 237#endif 238} 239 240void crash_kexec_secondary(struct pt_regs *regs) 241{ 242} 243#endif /* CONFIG_SMP */ 244 245/* wait for all the CPUs to hit real mode but timeout if they don't come in */ 246#if defined(CONFIG_SMP) && defined(CONFIG_PPC64) 247noinstr static void __maybe_unused crash_kexec_wait_realmode(int cpu) 248{ 249 unsigned int msecs; 250 int i; 251 252 msecs = REAL_MODE_TIMEOUT; 253 for (i=0; i < nr_cpu_ids && msecs > 0; i++) { 254 if (i == cpu) 255 continue; 256 257 while (paca_ptrs[i]->kexec_state < KEXEC_STATE_REAL_MODE) { 258 barrier(); 259 if (!cpu_possible(i) || !cpu_online(i) || (msecs <= 0)) 260 break; 261 msecs--; 262 mdelay(1); 263 } 264 } 265 mb(); 266} 267#else 268static inline void crash_kexec_wait_realmode(int cpu) {} 269#endif /* CONFIG_SMP && CONFIG_PPC64 */ 270 271void crash_kexec_prepare(void) 272{ 273 /* Avoid hardlocking with irresponsive CPU holding logbuf_lock */ 274 printk_deferred_enter(); 275 276 /* 277 * This function is only called after the system 278 * has panicked or is otherwise in a critical state. 279 * The minimum amount of code to allow a kexec'd kernel 280 * to run successfully needs to happen here. 281 * 282 * In practice this means stopping other cpus in 283 * an SMP system. 284 * The kernel is broken so disable interrupts. 285 */ 286 hard_irq_disable(); 287 288 /* 289 * Make a note of crashing cpu. Will be used in machine_kexec 290 * such that another IPI will not be sent. 291 */ 292 crashing_cpu = smp_processor_id(); 293 294 crash_kexec_prepare_cpus(); 295} 296 297/* 298 * Register a function to be called on shutdown. Only use this if you 299 * can't reset your device in the second kernel. 300 */ 301int crash_shutdown_register(crash_shutdown_t handler) 302{ 303 unsigned int i, rc; 304 305 spin_lock(&crash_handlers_lock); 306 for (i = 0 ; i < CRASH_HANDLER_MAX; i++) 307 if (!crash_shutdown_handles[i]) { 308 /* Insert handle at first empty entry */ 309 crash_shutdown_handles[i] = handler; 310 rc = 0; 311 break; 312 } 313 314 if (i == CRASH_HANDLER_MAX) { 315 printk(KERN_ERR "Crash shutdown handles full, " 316 "not registered.\n"); 317 rc = 1; 318 } 319 320 spin_unlock(&crash_handlers_lock); 321 return rc; 322} 323EXPORT_SYMBOL(crash_shutdown_register); 324 325int crash_shutdown_unregister(crash_shutdown_t handler) 326{ 327 unsigned int i, rc; 328 329 spin_lock(&crash_handlers_lock); 330 for (i = 0 ; i < CRASH_HANDLER_MAX; i++) 331 if (crash_shutdown_handles[i] == handler) 332 break; 333 334 if (i == CRASH_HANDLER_MAX) { 335 printk(KERN_ERR "Crash shutdown handle not found\n"); 336 rc = 1; 337 } else { 338 /* Shift handles down */ 339 for (; i < (CRASH_HANDLER_MAX - 1); i++) 340 crash_shutdown_handles[i] = 341 crash_shutdown_handles[i+1]; 342 /* 343 * Reset last entry to NULL now that it has been shifted down, 344 * this will allow new handles to be added here. 345 */ 346 crash_shutdown_handles[i] = NULL; 347 rc = 0; 348 } 349 350 spin_unlock(&crash_handlers_lock); 351 return rc; 352} 353EXPORT_SYMBOL(crash_shutdown_unregister); 354 355void default_machine_crash_shutdown(struct pt_regs *regs) 356{ 357 volatile unsigned int i; 358 int (*old_handler)(struct pt_regs *regs); 359 360 if (TRAP(regs) == INTERRUPT_SYSTEM_RESET) 361 is_via_system_reset = 1; 362 363 if (IS_ENABLED(CONFIG_SMP)) 364 crash_smp_send_stop(); 365 else 366 crash_kexec_prepare(); 367 368 crash_save_cpu(regs, crashing_cpu); 369 370 time_to_dump = 1; 371 372 crash_kexec_wait_realmode(crashing_cpu); 373 374 machine_kexec_mask_interrupts(); 375 376 /* 377 * Call registered shutdown routines safely. Swap out 378 * __debugger_fault_handler, and replace on exit. 379 */ 380 old_handler = __debugger_fault_handler; 381 __debugger_fault_handler = handle_fault; 382 crash_shutdown_cpu = smp_processor_id(); 383 for (i = 0; i < CRASH_HANDLER_MAX && crash_shutdown_handles[i]; i++) { 384 if (setjmp(crash_shutdown_buf) == 0) { 385 /* 386 * Insert syncs and delay to ensure 387 * instructions in the dangerous region don't 388 * leak away from this protected region. 389 */ 390 asm volatile("sync; isync"); 391 /* dangerous region */ 392 crash_shutdown_handles[i](); 393 asm volatile("sync; isync"); 394 } 395 } 396 crash_shutdown_cpu = -1; 397 __debugger_fault_handler = old_handler; 398 399 if (ppc_md.kexec_cpu_down) 400 ppc_md.kexec_cpu_down(1, 0); 401} 402 403#ifdef CONFIG_CRASH_DUMP 404/** 405 * sync_backup_region_phdr - synchronize backup region offset between 406 * kexec image and ELF core header. 407 * @image: Kexec image. 408 * @ehdr: ELF core header. 409 * @phdr_to_kimage: If true, read the offset from the ELF program header 410 * and update the kimage backup region. If false, update 411 * the ELF program header offset from the kimage backup 412 * region. 413 * 414 * Note: During kexec_load, this is called with phdr_to_kimage = true. For 415 * kexec_file_load and ELF core header recreation during memory hotplug 416 * events, it is called with phdr_to_kimage = false. 417 * 418 * Returns nothing. 419 */ 420void sync_backup_region_phdr(struct kimage *image, Elf64_Ehdr *ehdr, bool phdr_to_kimage) 421{ 422 Elf64_Phdr *phdr; 423 unsigned int i; 424 425 phdr = (Elf64_Phdr *)(ehdr + 1); 426 for (i = 0; i < ehdr->e_phnum; i++, phdr++) { 427 if (phdr->p_paddr == BACKUP_SRC_START) { 428 if (phdr_to_kimage) 429 image->arch.backup_start = phdr->p_offset; 430 else 431 phdr->p_offset = image->arch.backup_start; 432 433 kexec_dprintk("Backup region offset updated to 0x%lx\n", 434 image->arch.backup_start); 435 return; 436 } 437 } 438} 439#endif /* CONFIG_CRASH_DUMP */ 440 441#ifdef CONFIG_CRASH_HOTPLUG 442 443int machine_kexec_post_load(struct kimage *image) 444{ 445 int i; 446 unsigned long mem; 447 unsigned char *ptr; 448 449 if (image->type != KEXEC_TYPE_CRASH) 450 return 0; 451 452 if (image->file_mode) 453 return 0; 454 455 for (i = 0; i < image->nr_segments; i++) { 456 mem = image->segment[i].mem; 457 ptr = (char *)__va(mem); 458 459 if (ptr && memcmp(ptr, ELFMAG, SELFMAG) == 0) 460 sync_backup_region_phdr(image, (Elf64_Ehdr *) ptr, true); 461 } 462 return 0; 463} 464 465#undef pr_fmt 466#define pr_fmt(fmt) "crash hp: " fmt 467 468/* 469 * Advertise preferred elfcorehdr size to userspace via 470 * /sys/kernel/crash_elfcorehdr_size sysfs interface. 471 */ 472unsigned int arch_crash_get_elfcorehdr_size(void) 473{ 474 unsigned long phdr_cnt; 475 476 /* A program header for possible CPUs + vmcoreinfo */ 477 phdr_cnt = num_possible_cpus() + 1; 478 if (IS_ENABLED(CONFIG_MEMORY_HOTPLUG)) 479 phdr_cnt += CONFIG_CRASH_MAX_MEMORY_RANGES; 480 481 return sizeof(struct elfhdr) + (phdr_cnt * sizeof(Elf64_Phdr)); 482} 483 484/** 485 * update_crash_elfcorehdr() - Recreate the elfcorehdr and replace it with old 486 * elfcorehdr in the kexec segment array. 487 * @image: the active struct kimage 488 * @mn: struct memory_notify data handler 489 */ 490static void update_crash_elfcorehdr(struct kimage *image, struct memory_notify *mn) 491{ 492 int ret; 493 struct crash_mem *cmem = NULL; 494 struct kexec_segment *ksegment; 495 void *ptr, *mem, *elfbuf = NULL; 496 unsigned long elfsz, memsz, base_addr, size; 497 498 ksegment = &image->segment[image->elfcorehdr_index]; 499 mem = (void *) ksegment->mem; 500 memsz = ksegment->memsz; 501 502 ret = get_crash_memory_ranges(&cmem); 503 if (ret) { 504 pr_err("Failed to get crash mem range\n"); 505 return; 506 } 507 508 /* 509 * The hot unplugged memory is part of crash memory ranges, 510 * remove it here. 511 */ 512 if (image->hp_action == KEXEC_CRASH_HP_REMOVE_MEMORY) { 513 base_addr = PFN_PHYS(mn->start_pfn); 514 size = mn->nr_pages * PAGE_SIZE; 515 ret = remove_mem_range(&cmem, base_addr, size); 516 if (ret) { 517 pr_err("Failed to remove hot-unplugged memory from crash memory ranges\n"); 518 goto out; 519 } 520 } 521 522 ret = crash_prepare_elf64_headers(cmem, false, &elfbuf, &elfsz); 523 if (ret) { 524 pr_err("Failed to prepare elf header\n"); 525 goto out; 526 } 527 528 /* 529 * It is unlikely that kernel hit this because elfcorehdr kexec 530 * segment (memsz) is built with addition space to accommodate growing 531 * number of crash memory ranges while loading the kdump kernel. It is 532 * Just to avoid any unforeseen case. 533 */ 534 if (elfsz > memsz) { 535 pr_err("Updated crash elfcorehdr elfsz %lu > memsz %lu", elfsz, memsz); 536 goto out; 537 } 538 539 sync_backup_region_phdr(image, (Elf64_Ehdr *) elfbuf, false); 540 541 ptr = __va(mem); 542 if (ptr) { 543 /* Temporarily invalidate the crash image while it is replaced */ 544 xchg(&kexec_crash_image, NULL); 545 546 /* Replace the old elfcorehdr with newly prepared elfcorehdr */ 547 memcpy((void *)ptr, elfbuf, elfsz); 548 549 /* The crash image is now valid once again */ 550 xchg(&kexec_crash_image, image); 551 } 552out: 553 kvfree(cmem); 554 kvfree(elfbuf); 555} 556 557/** 558 * get_fdt_index - Loop through the kexec segment array and find 559 * the index of the FDT segment. 560 * @image: a pointer to kexec_crash_image 561 * 562 * Returns the index of FDT segment in the kexec segment array 563 * if found; otherwise -1. 564 */ 565static int get_fdt_index(struct kimage *image) 566{ 567 void *ptr; 568 unsigned long mem; 569 int i, fdt_index = -1; 570 571 /* Find the FDT segment index in kexec segment array. */ 572 for (i = 0; i < image->nr_segments; i++) { 573 mem = image->segment[i].mem; 574 ptr = __va(mem); 575 576 if (ptr && fdt_magic(ptr) == FDT_MAGIC) { 577 fdt_index = i; 578 break; 579 } 580 } 581 582 return fdt_index; 583} 584 585/** 586 * update_crash_fdt - updates the cpus node of the crash FDT. 587 * 588 * @image: a pointer to kexec_crash_image 589 */ 590static void update_crash_fdt(struct kimage *image) 591{ 592 void *fdt; 593 int fdt_index; 594 595 fdt_index = get_fdt_index(image); 596 if (fdt_index < 0) { 597 pr_err("Unable to locate FDT segment.\n"); 598 return; 599 } 600 601 fdt = __va((void *)image->segment[fdt_index].mem); 602 603 /* Temporarily invalidate the crash image while it is replaced */ 604 xchg(&kexec_crash_image, NULL); 605 606 /* update FDT to reflect changes in CPU resources */ 607 if (update_cpus_node(fdt)) 608 pr_err("Failed to update crash FDT"); 609 610 /* The crash image is now valid once again */ 611 xchg(&kexec_crash_image, image); 612} 613 614int arch_crash_hotplug_support(struct kimage *image, unsigned long kexec_flags) 615{ 616#ifdef CONFIG_KEXEC_FILE 617 if (image->file_mode) 618 return 1; 619#endif 620 return kexec_flags & KEXEC_CRASH_HOTPLUG_SUPPORT; 621} 622 623/** 624 * arch_crash_handle_hotplug_event - Handle crash CPU/Memory hotplug events to update the 625 * necessary kexec segments based on the hotplug event. 626 * @image: a pointer to kexec_crash_image 627 * @arg: struct memory_notify handler for memory hotplug case and NULL for CPU hotplug case. 628 * 629 * Update the kdump image based on the type of hotplug event, represented by image->hp_action. 630 * CPU add: Update the FDT segment to include the newly added CPU. 631 * CPU remove: No action is needed, with the assumption that it's okay to have offline CPUs 632 * part of the FDT. 633 * Memory add/remove: No action is taken as this is not yet supported. 634 */ 635void arch_crash_handle_hotplug_event(struct kimage *image, void *arg) 636{ 637 struct memory_notify *mn; 638 639 switch (image->hp_action) { 640 case KEXEC_CRASH_HP_REMOVE_CPU: 641 return; 642 643 case KEXEC_CRASH_HP_ADD_CPU: 644 update_crash_fdt(image); 645 break; 646 647 case KEXEC_CRASH_HP_REMOVE_MEMORY: 648 case KEXEC_CRASH_HP_ADD_MEMORY: 649 mn = (struct memory_notify *)arg; 650 update_crash_elfcorehdr(image, mn); 651 return; 652 default: 653 pr_warn_once("Unknown hotplug action\n"); 654 } 655} 656#endif /* CONFIG_CRASH_HOTPLUG */