Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

x86/build: Fix vmlinux size check on 64-bit

Commit

b4e0409a36f4 ("x86: check vmlinux limits, 64-bit")

added a check that the size of the 64-bit kernel is less than
KERNEL_IMAGE_SIZE.

The check uses (_end - _text), but this is not enough. The initial
PMD used in startup_64() (level2_kernel_pgt) can only map upto
KERNEL_IMAGE_SIZE from __START_KERNEL_map, not from _text, and the
modules area (MODULES_VADDR) starts at KERNEL_IMAGE_SIZE.

The correct check is what is currently done for 32-bit, since
LOAD_OFFSET is defined appropriately for the two architectures. Just
check (_end - LOAD_OFFSET) against KERNEL_IMAGE_SIZE unconditionally.

Note that on 32-bit, the limit is not strict: KERNEL_IMAGE_SIZE is not
really used by the main kernel. The higher the kernel is located, the
less the space available for the vmalloc area. However, it is used by
KASLR in the compressed stub to limit the maximum address of the kernel
to a safe value.

Clean up various comments to clarify that despite the name,
KERNEL_IMAGE_SIZE is not a limit on the size of the kernel image, but a
limit on the maximum virtual address that the image can occupy.

Signed-off-by: Arvind Sankar <nivedita@alum.mit.edu>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20201029161903.2553528-1-nivedita@alum.mit.edu

authored by

Arvind Sankar and committed by
Borislav Petkov
ea3186b9 3650b228

+29 -35
+7 -1
arch/x86/include/asm/page_32_types.h
··· 53 53 #define STACK_TOP_MAX STACK_TOP 54 54 55 55 /* 56 - * Kernel image size is limited to 512 MB (see in arch/x86/kernel/head_32.S) 56 + * In spite of the name, KERNEL_IMAGE_SIZE is a limit on the maximum virtual 57 + * address for the kernel image, rather than the limit on the size itself. On 58 + * 32-bit, this is not a strict limit, but this value is used to limit the 59 + * link-time virtual address range of the kernel, and by KASLR to limit the 60 + * randomized address from which the kernel is executed. A relocatable kernel 61 + * can be loaded somewhat higher than KERNEL_IMAGE_SIZE as long as enough space 62 + * remains for the vmalloc area. 57 63 */ 58 64 #define KERNEL_IMAGE_SIZE (512 * 1024 * 1024) 59 65
+4 -2
arch/x86/include/asm/page_64_types.h
··· 98 98 #define STACK_TOP_MAX TASK_SIZE_MAX 99 99 100 100 /* 101 - * Maximum kernel image size is limited to 1 GiB, due to the fixmap living 102 - * in the next 1 GiB (see level2_kernel_pgt in arch/x86/kernel/head_64.S). 101 + * In spite of the name, KERNEL_IMAGE_SIZE is a limit on the maximum virtual 102 + * address for the kernel image, rather than the limit on the size itself. 103 + * This can be at most 1 GiB, due to the fixmap living in the next 1 GiB (see 104 + * level2_kernel_pgt in arch/x86/kernel/head_64.S). 103 105 * 104 106 * On KASLR use 1 GiB by default, leaving 1 GiB for modules once the 105 107 * page tables are fully set up.
+6 -12
arch/x86/include/asm/pgtable_32.h
··· 57 57 #endif 58 58 59 59 /* 60 - * This is how much memory in addition to the memory covered up to 61 - * and including _end we need mapped initially. 62 - * We need: 63 - * (KERNEL_IMAGE_SIZE/4096) / 1024 pages (worst case, non PAE) 64 - * (KERNEL_IMAGE_SIZE/4096) / 512 + 4 pages (worst case for PAE) 60 + * This is used to calculate the .brk reservation for initial pagetables. 61 + * Enough space is reserved to allocate pagetables sufficient to cover all 62 + * of LOWMEM_PAGES, which is an upper bound on the size of the direct map of 63 + * lowmem. 65 64 * 66 - * Modulo rounding, each megabyte assigned here requires a kilobyte of 67 - * memory, which is currently unreclaimed. 68 - * 69 - * This should be a multiple of a page. 70 - * 71 - * KERNEL_IMAGE_SIZE should be greater than pa(_end) 72 - * and small than max_low_pfn, otherwise will waste some page table entries 65 + * With PAE paging (PTRS_PER_PMD > 1), we allocate PTRS_PER_PGD == 4 pages for 66 + * the PMD's in addition to the pages required for the last level pagetables. 73 67 */ 74 68 #if PTRS_PER_PMD > 1 75 69 #define PAGE_TABLE_SIZE(pages) (((pages) / PTRS_PER_PMD) + PTRS_PER_PGD)
+9 -11
arch/x86/kernel/head_64.S
··· 524 524 525 525 SYM_DATA_START_PAGE_ALIGNED(level2_kernel_pgt) 526 526 /* 527 - * 512 MB kernel mapping. We spend a full page on this pagetable 528 - * anyway. 527 + * Kernel high mapping. 529 528 * 530 - * The kernel code+data+bss must not be bigger than that. 529 + * The kernel code+data+bss must be located below KERNEL_IMAGE_SIZE in 530 + * virtual address space, which is 1 GiB if RANDOMIZE_BASE is enabled, 531 + * 512 MiB otherwise. 531 532 * 532 - * (NOTE: at +512MB starts the module area, see MODULES_VADDR. 533 - * If you want to increase this then increase MODULES_VADDR 534 - * too.) 533 + * (NOTE: after that starts the module area, see MODULES_VADDR.) 535 534 * 536 - * This table is eventually used by the kernel during normal 537 - * runtime. Care must be taken to clear out undesired bits 538 - * later, like _PAGE_RW or _PAGE_GLOBAL in some cases. 535 + * This table is eventually used by the kernel during normal runtime. 536 + * Care must be taken to clear out undesired bits later, like _PAGE_RW 537 + * or _PAGE_GLOBAL in some cases. 539 538 */ 540 - PMDS(0, __PAGE_KERNEL_LARGE_EXEC, 541 - KERNEL_IMAGE_SIZE/PMD_SIZE) 539 + PMDS(0, __PAGE_KERNEL_LARGE_EXEC, KERNEL_IMAGE_SIZE/PMD_SIZE) 542 540 SYM_DATA_END(level2_kernel_pgt) 543 541 544 542 SYM_DATA_START_PAGE_ALIGNED(level2_fixmap_pgt)
+3 -9
arch/x86/kernel/vmlinux.lds.S
··· 454 454 ASSERT(SIZEOF(.rela.dyn) == 0, "Unexpected run-time relocations (.rela) detected!") 455 455 } 456 456 457 - #ifdef CONFIG_X86_32 458 457 /* 459 458 * The ASSERT() sink to . is intentional, for binutils 2.14 compatibility: 460 459 */ 461 460 . = ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE), 462 461 "kernel image bigger than KERNEL_IMAGE_SIZE"); 463 - #else 462 + 463 + #ifdef CONFIG_X86_64 464 464 /* 465 465 * Per-cpu symbols which need to be offset from __per_cpu_load 466 466 * for the boot processor. ··· 470 470 INIT_PER_CPU(fixed_percpu_data); 471 471 INIT_PER_CPU(irq_stack_backing_store); 472 472 473 - /* 474 - * Build-time check on the image size: 475 - */ 476 - . = ASSERT((_end - _text <= KERNEL_IMAGE_SIZE), 477 - "kernel image bigger than KERNEL_IMAGE_SIZE"); 478 - 479 473 #ifdef CONFIG_SMP 480 474 . = ASSERT((fixed_percpu_data == 0), 481 475 "fixed_percpu_data is not at start of per-cpu area"); 482 476 #endif 483 477 484 - #endif /* CONFIG_X86_32 */ 478 + #endif /* CONFIG_X86_64 */ 485 479 486 480 #ifdef CONFIG_KEXEC_CORE 487 481 #include <asm/kexec.h>