Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

[PATCH] x86_64: Unmap NULL during early bootup

We should zap the low mappings, as soon as possible, so that we can catch
kernel bugs more effectively. Previously early boot had NULL mapped
and didn't trap on NULL references.

This patch introduces boot_level4_pgt, which will always have low identity
addresses mapped. Druing boot, all the processors will use this as their
level4 pgt. On BP, we will switch to init_level4_pgt as soon as we enter C
code and zap the low mappings as soon as we are done with the usage of
identity low mapped addresses. On AP's we will zap the low mappings as
soon as we jump to C code.

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: Ashok Raj <ashok.raj@intel.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

authored by

Siddha, Suresh B and committed by
Linus Torvalds
f6c2e333 69d81fcd

+56 -32
+1 -1
arch/i386/kernel/acpi/boot.c
··· 542 542 * RSDP signature. 543 543 */ 544 544 for (offset = 0; offset < length; offset += 16) { 545 - if (strncmp((char *)(start + offset), "RSD PTR ", sig_len)) 545 + if (strncmp((char *)(phys_to_virt(start) + offset), "RSD PTR ", sig_len)) 546 546 continue; 547 547 return (start + offset); 548 548 }
+23 -14
arch/x86_64/kernel/head.S
··· 12 12 13 13 #include <linux/linkage.h> 14 14 #include <linux/threads.h> 15 + #include <linux/init.h> 15 16 #include <asm/desc.h> 16 17 #include <asm/segment.h> 17 18 #include <asm/page.h> ··· 71 70 movl %eax, %cr4 72 71 73 72 /* Setup early boot stage 4 level pagetables */ 74 - movl $(init_level4_pgt - __START_KERNEL_map), %eax 73 + movl $(boot_level4_pgt - __START_KERNEL_map), %eax 75 74 movl %eax, %cr3 76 75 77 76 /* Setup EFER (Extended Feature Enable Register) */ ··· 114 113 movq %rax, %cr4 115 114 116 115 /* Setup early boot stage 4 level pagetables. */ 117 - movq $(init_level4_pgt - __START_KERNEL_map), %rax 116 + movq $(boot_level4_pgt - __START_KERNEL_map), %rax 118 117 movq %rax, %cr3 119 118 120 119 /* Check if nx is implemented */ ··· 241 240 ENTRY(stext) 242 241 ENTRY(_stext) 243 242 244 - /* 245 - * This default setting generates an ident mapping at address 0x100000 246 - * and a mapping for the kernel that precisely maps virtual address 247 - * 0xffffffff80000000 to physical address 0x000000. (always using 248 - * 2Mbyte large pages provided by PAE mode) 249 - */ 250 243 .org 0x1000 251 244 ENTRY(init_level4_pgt) 252 - .quad 0x0000000000002007 + __PHYSICAL_START /* -> level3_ident_pgt */ 253 - .fill 255,8,0 254 - .quad 0x000000000000a007 + __PHYSICAL_START 255 - .fill 254,8,0 256 - /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */ 257 - .quad 0x0000000000003007 + __PHYSICAL_START /* -> level3_kernel_pgt */ 245 + /* This gets initialized in x86_64_start_kernel */ 246 + .fill 512,8,0 258 247 259 248 .org 0x2000 260 249 ENTRY(level3_ident_pgt) ··· 340 349 /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */ 341 350 .quad 0x0000000000003007 + __PHYSICAL_START /* -> level3_kernel_pgt */ 342 351 #endif 352 + 353 + #ifndef CONFIG_HOTPLUG_CPU 354 + __INITDATA 355 + #endif 356 + /* 357 + * This default setting generates an ident mapping at address 0x100000 358 + * and a mapping for the kernel that precisely maps virtual address 359 + * 0xffffffff80000000 to physical address 0x000000. (always using 360 + * 2Mbyte large pages provided by PAE mode) 361 + */ 362 + .align PAGE_SIZE 363 + ENTRY(boot_level4_pgt) 364 + .quad 0x0000000000002007 + __PHYSICAL_START /* -> level3_ident_pgt */ 365 + .fill 255,8,0 366 + .quad 0x000000000000a007 + __PHYSICAL_START 367 + .fill 254,8,0 368 + /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */ 369 + .quad 0x0000000000003007 + __PHYSICAL_START /* -> level3_kernel_pgt */ 343 370 344 371 .data 345 372
+8
arch/x86_64/kernel/head64.c
··· 19 19 #include <asm/bootsetup.h> 20 20 #include <asm/setup.h> 21 21 #include <asm/desc.h> 22 + #include <asm/pgtable.h> 22 23 23 24 /* Don't add a printk in there. printk relies on the PDA which is not initialized 24 25 yet. */ ··· 87 86 set_intr_gate(i, early_idt_handler); 88 87 asm volatile("lidt %0" :: "m" (idt_descr)); 89 88 clear_bss(); 89 + 90 + /* 91 + * switch to init_level4_pgt from boot_level4_pgt 92 + */ 93 + memcpy(init_level4_pgt, boot_level4_pgt, PTRS_PER_PGD*sizeof(pgd_t)); 94 + asm volatile("movq %0,%%cr3" :: "r" (__pa_symbol(&init_level4_pgt))); 95 + 90 96 pda_init(0); 91 97 copy_bootdata(real_mode_data); 92 98 #ifdef CONFIG_SMP
+1 -1
arch/x86_64/kernel/mpparse.c
··· 549 549 * Read the physical hardware table. Anything here will 550 550 * override the defaults. 551 551 */ 552 - if (!smp_read_mpc((void *)(unsigned long)mpf->mpf_physptr)) { 552 + if (!smp_read_mpc(phys_to_virt(mpf->mpf_physptr))) { 553 553 smp_found_config = 0; 554 554 printk(KERN_ERR "BIOS bug, MP table errors detected!...\n"); 555 555 printk(KERN_ERR "... disabling SMP support. (tell your hw vendor)\n");
+2
arch/x86_64/kernel/setup.c
··· 571 571 572 572 init_memory_mapping(0, (end_pfn_map << PAGE_SHIFT)); 573 573 574 + zap_low_mappings(0); 575 + 574 576 #ifdef CONFIG_ACPI 575 577 /* 576 578 * Initialize the ACPI boot-time table parser (gets the RSDP and SDT).
+1 -1
arch/x86_64/kernel/setup64.c
··· 141 141 panic("cannot allocate irqstack for cpu %d", cpu); 142 142 } 143 143 144 - asm volatile("movq %0,%%cr3" :: "r" (__pa_symbol(&init_level4_pgt))); 145 144 146 145 pda->irqstackptr += IRQSTACKSIZE-64; 147 146 } ··· 196 197 /* CPU 0 is initialised in head64.c */ 197 198 if (cpu != 0) { 198 199 pda_init(cpu); 200 + zap_low_mappings(cpu); 199 201 } else 200 202 estacks = boot_exception_stacks; 201 203
-3
arch/x86_64/kernel/smpboot.c
··· 1063 1063 */ 1064 1064 void __init smp_cpus_done(unsigned int max_cpus) 1065 1065 { 1066 - #ifndef CONFIG_HOTPLUG_CPU 1067 - zap_low_mappings(); 1068 - #endif 1069 1066 smp_cleanup_boot(); 1070 1067 1071 1068 #ifdef CONFIG_X86_IO_APIC
+17 -11
arch/x86_64/mm/init.c
··· 312 312 313 313 extern struct x8664_pda cpu_pda[NR_CPUS]; 314 314 315 - /* Assumes all CPUs still execute in init_mm */ 316 - void zap_low_mappings(void) 315 + void __cpuinit zap_low_mappings(int cpu) 317 316 { 318 - pgd_t *pgd = pgd_offset_k(0UL); 319 - pgd_clear(pgd); 320 - flush_tlb_all(); 317 + if (cpu == 0) { 318 + pgd_t *pgd = pgd_offset_k(0UL); 319 + pgd_clear(pgd); 320 + } else { 321 + /* 322 + * For AP's, zap the low identity mappings by changing the cr3 323 + * to init_level4_pgt and doing local flush tlb all 324 + */ 325 + asm volatile("movq %0,%%cr3" :: "r" (__pa_symbol(&init_level4_pgt))); 326 + } 327 + __flush_tlb_all(); 321 328 } 322 329 323 330 /* Compute zone sizes for the DMA and DMA32 zones in a node. */ ··· 481 474 datasize >> 10, 482 475 initsize >> 10); 483 476 477 + #ifdef CONFIG_SMP 484 478 /* 485 - * Subtle. SMP is doing its boot stuff late (because it has to 486 - * fork idle threads) - but it also needs low mappings for the 487 - * protected-mode entry to work. We zap these entries only after 488 - * the WP-bit has been tested. 479 + * Sync boot_level4_pgt mappings with the init_level4_pgt 480 + * except for the low identity mappings which are already zapped 481 + * in init_level4_pgt. This sync-up is essential for AP's bringup 489 482 */ 490 - #ifndef CONFIG_SMP 491 - zap_low_mappings(); 483 + memcpy(boot_level4_pgt+1, init_level4_pgt+1, (PTRS_PER_PGD-1)*sizeof(pgd_t)); 492 484 #endif 493 485 } 494 486
+1
include/asm-x86_64/pgtable.h
··· 16 16 extern pud_t level3_ident_pgt[512]; 17 17 extern pmd_t level2_kernel_pgt[512]; 18 18 extern pgd_t init_level4_pgt[]; 19 + extern pgd_t boot_level4_pgt[]; 19 20 extern unsigned long __supported_pte_mask; 20 21 21 22 #define swapper_pg_dir init_level4_pgt
+2
include/asm-x86_64/proto.h
··· 11 11 extern void start_kernel(void); 12 12 extern void pda_init(int); 13 13 14 + extern void zap_low_mappings(int cpu); 15 + 14 16 extern void early_idt_handler(void); 15 17 16 18 extern void mcheck_init(struct cpuinfo_x86 *c);
-1
include/asm-x86_64/smp.h
··· 47 47 extern void unlock_ipi_call_lock(void); 48 48 extern int smp_num_siblings; 49 49 extern void smp_send_reschedule(int cpu); 50 - extern void zap_low_mappings(void); 51 50 void smp_stop_cpu(void); 52 51 extern int smp_call_function_single(int cpuid, void (*func) (void *info), 53 52 void *info, int retry, int wait);