Merge branch 'x86-kdump-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull kdump fixes from Peter Anvin:
"The kexec/kdump people have found several problems with the support
for loading over 4 GiB that was introduced in this merge cycle. This
is partly due to a number of design problems inherent in the way the
various pieces of kdump fit together (it is pretty horrifically manual
in many places.)

After a *lot* of iterations this is the patchset that was agreed upon,
but of course it is now very late in the cycle. However, because it
changes both the syntax and semantics of the crashkernel option, it
would be desirable to avoid a stable release with the broken
interfaces."

I'm not happy with the timing, since originally the plan was to release
the final 3.9 tomorrow. But apparently I'm doing an -rc8 instead...

* 'x86-kdump-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
kexec: use Crash kernel for Crash kernel low
x86, kdump: Change crashkernel_high/low= to crashkernel=,high/low
x86, kdump: Retore crashkernel= to allocate under 896M
x86, kdump: Set crashkernel_low automatically

+180 -28
+20 -3
Documentation/kernel-parameters.txt
··· 596 is selected automatically. Check 597 Documentation/kdump/kdump.txt for further details. 598 599 - crashkernel_low=size[KMG] 600 - [KNL, x86] parts under 4G. 601 - 602 crashkernel=range1:size1[,range2:size2,...][@offset] 603 [KNL] Same as above, but depends on the memory 604 in the running system. The syntax of range is 605 start-[end] where start and end are both 606 a memory unit (amount[KMG]). See also 607 Documentation/kdump/kdump.txt for an example. 608 609 cs89x0_dma= [HW,NET] 610 Format: <dma>
··· 596 is selected automatically. Check 597 Documentation/kdump/kdump.txt for further details. 598 599 crashkernel=range1:size1[,range2:size2,...][@offset] 600 [KNL] Same as above, but depends on the memory 601 in the running system. The syntax of range is 602 start-[end] where start and end are both 603 a memory unit (amount[KMG]). See also 604 Documentation/kdump/kdump.txt for an example. 605 + 606 + crashkernel=size[KMG],high 607 + [KNL, x86_64] range could be above 4G. Allow kernel 608 + to allocate physical memory region from top, so could 609 + be above 4G if system have more than 4G ram installed. 610 + Otherwise memory region will be allocated below 4G, if 611 + available. 612 + It will be ignored if crashkernel=X is specified. 613 + crashkernel=size[KMG],low 614 + [KNL, x86_64] range under 4G. When crashkernel=X,high 615 + is passed, kernel could allocate physical memory region 616 + above 4G, that cause second kernel crash on system 617 + that require some amount of low memory, e.g. swiotlb 618 + requires at least 64M+32K low memory. Kernel would 619 + try to allocate 72M below 4G automatically. 620 + This one let user to specify own low range under 4G 621 + for second kernel instead. 622 + 0: to disable low allocation. 623 + It will be ignored when crashkernel=X,high is not used 624 + or memory reserved is below 4G. 625 626 cs89x0_dma= [HW,NET] 627 Format: <dma>
+37 -8
arch/x86/kernel/setup.c
··· 507 /* 508 * Keep the crash kernel below this limit. On 32 bits earlier kernels 509 * would limit the kernel to the low 512 MiB due to mapping restrictions. 510 */ 511 #ifdef CONFIG_X86_32 512 - # define CRASH_KERNEL_ADDR_MAX (512 << 20) 513 #else 514 - # define CRASH_KERNEL_ADDR_MAX MAXMEM 515 #endif 516 517 static void __init reserve_crashkernel_low(void) ··· 524 unsigned long long low_base = 0, low_size = 0; 525 unsigned long total_low_mem; 526 unsigned long long base; 527 int ret; 528 529 total_low_mem = memblock_mem_size(1UL<<(32-PAGE_SHIFT)); 530 ret = parse_crashkernel_low(boot_command_line, total_low_mem, 531 &low_size, &base); 532 - if (ret != 0 || low_size <= 0) 533 - return; 534 535 low_base = memblock_find_in_range(low_size, (1ULL<<32), 536 low_size, alignment); 537 538 if (!low_base) { 539 - pr_info("crashkernel low reservation failed - No suitable area found.\n"); 540 541 return; 542 } ··· 573 const unsigned long long alignment = 16<<20; /* 16M */ 574 unsigned long long total_mem; 575 unsigned long long crash_size, crash_base; 576 int ret; 577 578 total_mem = memblock_phys_mem_size(); 579 580 ret = parse_crashkernel(boot_command_line, total_mem, 581 &crash_size, &crash_base); 582 - if (ret != 0 || crash_size <= 0) 583 - return; 584 585 /* 0 means: find the address automatically */ 586 if (crash_base <= 0) { ··· 596 * kexec want bzImage is below CRASH_KERNEL_ADDR_MAX 597 */ 598 crash_base = memblock_find_in_range(alignment, 599 - CRASH_KERNEL_ADDR_MAX, crash_size, alignment); 600 601 if (!crash_base) { 602 pr_info("crashkernel reservation failed - No suitable area found.\n");
··· 507 /* 508 * Keep the crash kernel below this limit. On 32 bits earlier kernels 509 * would limit the kernel to the low 512 MiB due to mapping restrictions. 510 + * On 64bit, old kexec-tools need to under 896MiB. 511 */ 512 #ifdef CONFIG_X86_32 513 + # define CRASH_KERNEL_ADDR_LOW_MAX (512 << 20) 514 + # define CRASH_KERNEL_ADDR_HIGH_MAX (512 << 20) 515 #else 516 + # define CRASH_KERNEL_ADDR_LOW_MAX (896UL<<20) 517 + # define CRASH_KERNEL_ADDR_HIGH_MAX MAXMEM 518 #endif 519 520 static void __init reserve_crashkernel_low(void) ··· 521 unsigned long long low_base = 0, low_size = 0; 522 unsigned long total_low_mem; 523 unsigned long long base; 524 + bool auto_set = false; 525 int ret; 526 527 total_low_mem = memblock_mem_size(1UL<<(32-PAGE_SHIFT)); 528 + /* crashkernel=Y,low */ 529 ret = parse_crashkernel_low(boot_command_line, total_low_mem, 530 &low_size, &base); 531 + if (ret != 0) { 532 + /* 533 + * two parts from lib/swiotlb.c: 534 + * swiotlb size: user specified with swiotlb= or default. 535 + * swiotlb overflow buffer: now is hardcoded to 32k. 536 + * We round it to 8M for other buffers that 537 + * may need to stay low too. 538 + */ 539 + low_size = swiotlb_size_or_default() + (8UL<<20); 540 + auto_set = true; 541 + } else { 542 + /* passed with crashkernel=0,low ? */ 543 + if (!low_size) 544 + return; 545 + } 546 547 low_base = memblock_find_in_range(low_size, (1ULL<<32), 548 low_size, alignment); 549 550 if (!low_base) { 551 + if (!auto_set) 552 + pr_info("crashkernel low reservation failed - No suitable area found.\n"); 553 554 return; 555 } ··· 554 const unsigned long long alignment = 16<<20; /* 16M */ 555 unsigned long long total_mem; 556 unsigned long long crash_size, crash_base; 557 + bool high = false; 558 int ret; 559 560 total_mem = memblock_phys_mem_size(); 561 562 + /* crashkernel=XM */ 563 ret = parse_crashkernel(boot_command_line, total_mem, 564 &crash_size, &crash_base); 565 + if (ret != 0 || crash_size <= 0) { 566 + /* crashkernel=X,high */ 567 + ret = parse_crashkernel_high(boot_command_line, total_mem, 568 + &crash_size, &crash_base); 569 + if (ret != 0 || crash_size <= 0) 570 + return; 571 + high = true; 572 + } 573 574 /* 0 means: find the address automatically */ 575 if (crash_base <= 0) { ··· 569 * kexec want bzImage is below CRASH_KERNEL_ADDR_MAX 570 */ 571 crash_base = memblock_find_in_range(alignment, 572 + high ? CRASH_KERNEL_ADDR_HIGH_MAX : 573 + CRASH_KERNEL_ADDR_LOW_MAX, 574 + crash_size, alignment); 575 576 if (!crash_base) { 577 pr_info("crashkernel reservation failed - No suitable area found.\n");
+2
include/linux/kexec.h
··· 200 201 int __init parse_crashkernel(char *cmdline, unsigned long long system_ram, 202 unsigned long long *crash_size, unsigned long long *crash_base); 203 int parse_crashkernel_low(char *cmdline, unsigned long long system_ram, 204 unsigned long long *crash_size, unsigned long long *crash_base); 205 int crash_shrink_memory(unsigned long new_size);
··· 200 201 int __init parse_crashkernel(char *cmdline, unsigned long long system_ram, 202 unsigned long long *crash_size, unsigned long long *crash_base); 203 + int parse_crashkernel_high(char *cmdline, unsigned long long system_ram, 204 + unsigned long long *crash_size, unsigned long long *crash_base); 205 int parse_crashkernel_low(char *cmdline, unsigned long long system_ram, 206 unsigned long long *crash_size, unsigned long long *crash_base); 207 int crash_shrink_memory(unsigned long new_size);
+1
include/linux/swiotlb.h
··· 25 extern void swiotlb_init(int verbose); 26 int swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose); 27 extern unsigned long swiotlb_nr_tbl(void); 28 extern int swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs); 29 30 /*
··· 25 extern void swiotlb_init(int verbose); 26 int swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose); 27 extern unsigned long swiotlb_nr_tbl(void); 28 + unsigned long swiotlb_size_or_default(void); 29 extern int swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs); 30 31 /*
+105 -13
kernel/kexec.c
··· 55 .flags = IORESOURCE_BUSY | IORESOURCE_MEM 56 }; 57 struct resource crashk_low_res = { 58 - .name = "Crash kernel low", 59 .start = 0, 60 .end = 0, 61 .flags = IORESOURCE_BUSY | IORESOURCE_MEM ··· 1368 return 0; 1369 } 1370 1371 /* 1372 - * That function is the entry point for command line parsing and should be 1373 - * called from the arch-specific code. 1374 */ 1375 static int __init __parse_crashkernel(char *cmdline, 1376 unsigned long long system_ram, 1377 unsigned long long *crash_size, 1378 unsigned long long *crash_base, 1379 - const char *name) 1380 { 1381 - char *p = cmdline, *ck_cmdline = NULL; 1382 char *first_colon, *first_space; 1383 1384 BUG_ON(!crash_size || !crash_base); 1385 *crash_size = 0; 1386 *crash_base = 0; 1387 1388 - /* find crashkernel and use the last one if there are more */ 1389 - p = strstr(p, name); 1390 - while (p) { 1391 - ck_cmdline = p; 1392 - p = strstr(p+1, name); 1393 - } 1394 1395 if (!ck_cmdline) 1396 return -EINVAL; 1397 1398 ck_cmdline += strlen(name); 1399 1400 /* 1401 * if the commandline contains a ':', then that's the extended 1402 * syntax -- if not, it must be the classic syntax ··· 1492 return 0; 1493 } 1494 1495 int __init parse_crashkernel(char *cmdline, 1496 unsigned long long system_ram, 1497 unsigned long long *crash_size, 1498 unsigned long long *crash_base) 1499 { 1500 return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, 1501 - "crashkernel="); 1502 } 1503 1504 int __init parse_crashkernel_low(char *cmdline, ··· 1520 unsigned long long *crash_base) 1521 { 1522 return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, 1523 - "crashkernel_low="); 1524 } 1525 1526 static void update_vmcoreinfo_note(void)
··· 55 .flags = IORESOURCE_BUSY | IORESOURCE_MEM 56 }; 57 struct resource crashk_low_res = { 58 + .name = "Crash kernel", 59 .start = 0, 60 .end = 0, 61 .flags = IORESOURCE_BUSY | IORESOURCE_MEM ··· 1368 return 0; 1369 } 1370 1371 + #define SUFFIX_HIGH 0 1372 + #define SUFFIX_LOW 1 1373 + #define SUFFIX_NULL 2 1374 + static __initdata char *suffix_tbl[] = { 1375 + [SUFFIX_HIGH] = ",high", 1376 + [SUFFIX_LOW] = ",low", 1377 + [SUFFIX_NULL] = NULL, 1378 + }; 1379 + 1380 /* 1381 + * That function parses "suffix" crashkernel command lines like 1382 + * 1383 + * crashkernel=size,[high|low] 1384 + * 1385 + * It returns 0 on success and -EINVAL on failure. 1386 */ 1387 + static int __init parse_crashkernel_suffix(char *cmdline, 1388 + unsigned long long *crash_size, 1389 + unsigned long long *crash_base, 1390 + const char *suffix) 1391 + { 1392 + char *cur = cmdline; 1393 + 1394 + *crash_size = memparse(cmdline, &cur); 1395 + if (cmdline == cur) { 1396 + pr_warn("crashkernel: memory value expected\n"); 1397 + return -EINVAL; 1398 + } 1399 + 1400 + /* check with suffix */ 1401 + if (strncmp(cur, suffix, strlen(suffix))) { 1402 + pr_warn("crashkernel: unrecognized char\n"); 1403 + return -EINVAL; 1404 + } 1405 + cur += strlen(suffix); 1406 + if (*cur != ' ' && *cur != '\0') { 1407 + pr_warn("crashkernel: unrecognized char\n"); 1408 + return -EINVAL; 1409 + } 1410 + 1411 + return 0; 1412 + } 1413 + 1414 + static __init char *get_last_crashkernel(char *cmdline, 1415 + const char *name, 1416 + const char *suffix) 1417 + { 1418 + char *p = cmdline, *ck_cmdline = NULL; 1419 + 1420 + /* find crashkernel and use the last one if there are more */ 1421 + p = strstr(p, name); 1422 + while (p) { 1423 + char *end_p = strchr(p, ' '); 1424 + char *q; 1425 + 1426 + if (!end_p) 1427 + end_p = p + strlen(p); 1428 + 1429 + if (!suffix) { 1430 + int i; 1431 + 1432 + /* skip the one with any known suffix */ 1433 + for (i = 0; suffix_tbl[i]; i++) { 1434 + q = end_p - strlen(suffix_tbl[i]); 1435 + if (!strncmp(q, suffix_tbl[i], 1436 + strlen(suffix_tbl[i]))) 1437 + goto next; 1438 + } 1439 + ck_cmdline = p; 1440 + } else { 1441 + q = end_p - strlen(suffix); 1442 + if (!strncmp(q, suffix, strlen(suffix))) 1443 + ck_cmdline = p; 1444 + } 1445 + next: 1446 + p = strstr(p+1, name); 1447 + } 1448 + 1449 + if (!ck_cmdline) 1450 + return NULL; 1451 + 1452 + return ck_cmdline; 1453 + } 1454 + 1455 static int __init __parse_crashkernel(char *cmdline, 1456 unsigned long long system_ram, 1457 unsigned long long *crash_size, 1458 unsigned long long *crash_base, 1459 + const char *name, 1460 + const char *suffix) 1461 { 1462 char *first_colon, *first_space; 1463 + char *ck_cmdline; 1464 1465 BUG_ON(!crash_size || !crash_base); 1466 *crash_size = 0; 1467 *crash_base = 0; 1468 1469 + ck_cmdline = get_last_crashkernel(cmdline, name, suffix); 1470 1471 if (!ck_cmdline) 1472 return -EINVAL; 1473 1474 ck_cmdline += strlen(name); 1475 1476 + if (suffix) 1477 + return parse_crashkernel_suffix(ck_cmdline, crash_size, 1478 + crash_base, suffix); 1479 /* 1480 * if the commandline contains a ':', then that's the extended 1481 * syntax -- if not, it must be the classic syntax ··· 1413 return 0; 1414 } 1415 1416 + /* 1417 + * That function is the entry point for command line parsing and should be 1418 + * called from the arch-specific code. 1419 + */ 1420 int __init parse_crashkernel(char *cmdline, 1421 unsigned long long system_ram, 1422 unsigned long long *crash_size, 1423 unsigned long long *crash_base) 1424 { 1425 return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, 1426 + "crashkernel=", NULL); 1427 + } 1428 + 1429 + int __init parse_crashkernel_high(char *cmdline, 1430 + unsigned long long system_ram, 1431 + unsigned long long *crash_size, 1432 + unsigned long long *crash_base) 1433 + { 1434 + return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, 1435 + "crashkernel=", suffix_tbl[SUFFIX_HIGH]); 1436 } 1437 1438 int __init parse_crashkernel_low(char *cmdline, ··· 1428 unsigned long long *crash_base) 1429 { 1430 return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, 1431 + "crashkernel=", suffix_tbl[SUFFIX_LOW]); 1432 } 1433 1434 static void update_vmcoreinfo_note(void)
+15 -4
lib/swiotlb.c
··· 105 if (!strcmp(str, "force")) 106 swiotlb_force = 1; 107 108 - return 1; 109 } 110 - __setup("swiotlb=", setup_io_tlb_npages); 111 /* make io_tlb_overflow tunable too? */ 112 113 unsigned long swiotlb_nr_tbl(void) ··· 115 return io_tlb_nslabs; 116 } 117 EXPORT_SYMBOL_GPL(swiotlb_nr_tbl); 118 /* Note that this doesn't work with highmem page */ 119 static dma_addr_t swiotlb_virt_to_bus(struct device *hwdev, 120 volatile void *address) ··· 200 void __init 201 swiotlb_init(int verbose) 202 { 203 - /* default to 64MB */ 204 - size_t default_size = 64UL<<20; 205 unsigned char *vstart; 206 unsigned long bytes; 207
··· 105 if (!strcmp(str, "force")) 106 swiotlb_force = 1; 107 108 + return 0; 109 } 110 + early_param("swiotlb", setup_io_tlb_npages); 111 /* make io_tlb_overflow tunable too? */ 112 113 unsigned long swiotlb_nr_tbl(void) ··· 115 return io_tlb_nslabs; 116 } 117 EXPORT_SYMBOL_GPL(swiotlb_nr_tbl); 118 + 119 + /* default to 64MB */ 120 + #define IO_TLB_DEFAULT_SIZE (64UL<<20) 121 + unsigned long swiotlb_size_or_default(void) 122 + { 123 + unsigned long size; 124 + 125 + size = io_tlb_nslabs << IO_TLB_SHIFT; 126 + 127 + return size ? size : (IO_TLB_DEFAULT_SIZE); 128 + } 129 + 130 /* Note that this doesn't work with highmem page */ 131 static dma_addr_t swiotlb_virt_to_bus(struct device *hwdev, 132 volatile void *address) ··· 188 void __init 189 swiotlb_init(int verbose) 190 { 191 + size_t default_size = IO_TLB_DEFAULT_SIZE; 192 unsigned char *vstart; 193 unsigned long bytes; 194