Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

x86-32, mm: Add an initial page table for core bootstrapping

This patch adds an initial page table with low mappings used exclusively
for booting APs/resuming after ACPI suspend/machine restart. After this,
there's no need to add low mappings to swapper_pg_dir and zap them later
or create own swsusp PGD page solely for ACPI sleep needs - we have
initial_page_table for that.

Signed-off-by: Borislav Petkov <bp@alien8.de>
LKML-Reference: <20101020070526.GA9588@liondog.tnic>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>

authored by

Borislav Petkov and committed by
H. Peter Anvin
b40827fa d25e6b0b

+58 -121
+1 -1
arch/x86/include/asm/pgtable_32.h
··· 26 26 struct vm_area_struct; 27 27 28 28 extern pgd_t swapper_pg_dir[1024]; 29 - extern pgd_t trampoline_pg_dir[1024]; 29 + extern pgd_t initial_page_table[1024]; 30 30 31 31 static inline void pgtable_cache_init(void) { } 32 32 static inline void check_pgt_cache(void) { }
-2
arch/x86/include/asm/tlbflush.h
··· 172 172 flush_tlb_all(); 173 173 } 174 174 175 - extern void zap_low_mappings(bool early); 176 - 177 175 #endif /* _ASM_X86_TLBFLUSH_H */
-3
arch/x86/include/asm/trampoline.h
··· 13 13 14 14 extern unsigned long init_rsp; 15 15 extern unsigned long initial_code; 16 - extern unsigned long initial_page_table; 17 16 extern unsigned long initial_gs; 18 17 19 18 #define TRAMPOLINE_SIZE roundup(trampoline_end - trampoline_data, PAGE_SIZE) 20 19 21 20 extern unsigned long setup_trampoline(void); 22 - extern void __init setup_trampoline_page_table(void); 23 21 extern void __init reserve_trampoline_memory(void); 24 22 #else 25 - static inline void setup_trampoline_page_table(void) {} 26 23 static inline void reserve_trampoline_memory(void) {} 27 24 #endif /* CONFIG_X86_TRAMPOLINE */ 28 25
+6 -1
arch/x86/kernel/acpi/sleep.c
··· 12 12 #include <asm/segment.h> 13 13 #include <asm/desc.h> 14 14 15 + #ifdef CONFIG_X86_32 16 + #include <asm/pgtable.h> 17 + #include <asm/pgtable_32.h> 18 + #endif 19 + 15 20 #include "realmode/wakeup.h" 16 21 #include "sleep.h" 17 22 ··· 95 90 96 91 #ifndef CONFIG_64BIT 97 92 header->pmode_entry = (u32)&wakeup_pmode_return; 98 - header->pmode_cr3 = (u32)(swsusp_pg_dir - __PAGE_OFFSET); 93 + header->pmode_cr3 = (u32)__pa(&initial_page_table); 99 94 saved_magic = 0x12345678; 100 95 #else /* CONFIG_64BIT */ 101 96 header->trampoline_segment = setup_trampoline() >> 4;
+1
arch/x86/kernel/head32.c
··· 17 17 #include <asm/apic.h> 18 18 #include <asm/io_apic.h> 19 19 #include <asm/bios_ebda.h> 20 + #include <asm/tlbflush.h> 20 21 21 22 static void __init i386_default_early_setup(void) 22 23 {
+25 -30
arch/x86/kernel/head_32.S
··· 183 183 #ifdef CONFIG_X86_PAE 184 184 185 185 /* 186 - * In PAE mode swapper_pg_dir is statically defined to contain enough 187 - * entries to cover the VMSPLIT option (that is the top 1, 2 or 3 188 - * entries). The identity mapping is handled by pointing two PGD 189 - * entries to the first kernel PMD. 186 + * In PAE mode initial_page_table is statically defined to contain 187 + * enough entries to cover the VMSPLIT option (that is the top 1, 2 or 3 188 + * entries). The identity mapping is handled by pointing two PGD entries 189 + * to the first kernel PMD. 190 190 * 191 - * Note the upper half of each PMD or PTE are always zero at 192 - * this stage. 191 + * Note the upper half of each PMD or PTE are always zero at this stage. 193 192 */ 194 193 195 194 #define KPMDS (((-__PAGE_OFFSET) >> 30) & 3) /* Number of kernel PMDs */ ··· 196 197 xorl %ebx,%ebx /* %ebx is kept at zero */ 197 198 198 199 movl $pa(__brk_base), %edi 199 - movl $pa(swapper_pg_pmd), %edx 200 + movl $pa(initial_pg_pmd), %edx 200 201 movl $PTE_IDENT_ATTR, %eax 201 202 10: 202 203 leal PDE_IDENT_ATTR(%edi),%ecx /* Create PMD entry */ ··· 225 226 movl %eax, pa(max_pfn_mapped) 226 227 227 228 /* Do early initialization of the fixmap area */ 228 - movl $pa(swapper_pg_fixmap)+PDE_IDENT_ATTR,%eax 229 - movl %eax,pa(swapper_pg_pmd+0x1000*KPMDS-8) 229 + movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax 230 + movl %eax,pa(initial_pg_pmd+0x1000*KPMDS-8) 230 231 #else /* Not PAE */ 231 232 232 233 page_pde_offset = (__PAGE_OFFSET >> 20); 233 234 234 235 movl $pa(__brk_base), %edi 235 - movl $pa(swapper_pg_dir), %edx 236 + movl $pa(initial_page_table), %edx 236 237 movl $PTE_IDENT_ATTR, %eax 237 238 10: 238 239 leal PDE_IDENT_ATTR(%edi),%ecx /* Create PDE entry */ ··· 256 257 movl %eax, pa(max_pfn_mapped) 257 258 258 259 /* Do early initialization of the fixmap area */ 259 - movl $pa(swapper_pg_fixmap)+PDE_IDENT_ATTR,%eax 260 - movl %eax,pa(swapper_pg_dir+0xffc) 260 + movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax 261 + movl %eax,pa(initial_page_table+0xffc) 261 262 #endif 262 263 jmp 3f 263 264 /* ··· 333 334 /* 334 335 * Enable paging 335 336 */ 336 - movl pa(initial_page_table), %eax 337 + movl $pa(initial_page_table), %eax 337 338 movl %eax,%cr3 /* set the page table pointer.. */ 338 339 movl %cr0,%eax 339 340 orl $X86_CR0_PG,%eax ··· 613 614 .align 4 614 615 ENTRY(initial_code) 615 616 .long i386_start_kernel 616 - ENTRY(initial_page_table) 617 - .long pa(swapper_pg_dir) 618 617 619 618 /* 620 619 * BSS section ··· 620 623 __PAGE_ALIGNED_BSS 621 624 .align PAGE_SIZE_asm 622 625 #ifdef CONFIG_X86_PAE 623 - swapper_pg_pmd: 626 + initial_pg_pmd: 624 627 .fill 1024*KPMDS,4,0 625 628 #else 626 - ENTRY(swapper_pg_dir) 629 + ENTRY(initial_page_table) 627 630 .fill 1024,4,0 628 631 #endif 629 - swapper_pg_fixmap: 632 + initial_pg_fixmap: 630 633 .fill 1024,4,0 631 - #ifdef CONFIG_X86_TRAMPOLINE 632 - ENTRY(trampoline_pg_dir) 633 - .fill 1024,4,0 634 - #endif 635 634 ENTRY(empty_zero_page) 636 635 .fill 4096,1,0 636 + ENTRY(swapper_pg_dir) 637 + .fill 1024,4,0 637 638 638 639 /* 639 640 * This starts the data section. ··· 640 645 __PAGE_ALIGNED_DATA 641 646 /* Page-aligned for the benefit of paravirt? */ 642 647 .align PAGE_SIZE_asm 643 - ENTRY(swapper_pg_dir) 644 - .long pa(swapper_pg_pmd+PGD_IDENT_ATTR),0 /* low identity map */ 648 + ENTRY(initial_page_table) 649 + .long pa(initial_pg_pmd+PGD_IDENT_ATTR),0 /* low identity map */ 645 650 # if KPMDS == 3 646 - .long pa(swapper_pg_pmd+PGD_IDENT_ATTR),0 647 - .long pa(swapper_pg_pmd+PGD_IDENT_ATTR+0x1000),0 648 - .long pa(swapper_pg_pmd+PGD_IDENT_ATTR+0x2000),0 651 + .long pa(initial_pg_pmd+PGD_IDENT_ATTR),0 652 + .long pa(initial_pg_pmd+PGD_IDENT_ATTR+0x1000),0 653 + .long pa(initial_pg_pmd+PGD_IDENT_ATTR+0x2000),0 649 654 # elif KPMDS == 2 650 655 .long 0,0 651 - .long pa(swapper_pg_pmd+PGD_IDENT_ATTR),0 652 - .long pa(swapper_pg_pmd+PGD_IDENT_ATTR+0x1000),0 656 + .long pa(initial_pg_pmd+PGD_IDENT_ATTR),0 657 + .long pa(initial_pg_pmd+PGD_IDENT_ATTR+0x1000),0 653 658 # elif KPMDS == 1 654 659 .long 0,0 655 660 .long 0,0 656 - .long pa(swapper_pg_pmd+PGD_IDENT_ATTR),0 661 + .long pa(initial_pg_pmd+PGD_IDENT_ATTR),0 657 662 # else 658 663 # error "Kernel PMDs should be 1, 2 or 3" 659 664 # endif
+2 -8
arch/x86/kernel/reboot.c
··· 371 371 CMOS_WRITE(0x00, 0x8f); 372 372 spin_unlock(&rtc_lock); 373 373 374 - /* Remap the kernel at virtual address zero, as well as offset zero 375 - from the kernel segment. This assumes the kernel segment starts at 376 - virtual address PAGE_OFFSET. */ 377 - memcpy(swapper_pg_dir, swapper_pg_dir + KERNEL_PGD_BOUNDARY, 378 - sizeof(swapper_pg_dir [0]) * KERNEL_PGD_PTRS); 379 - 380 374 /* 381 - * Use `swapper_pg_dir' as our page directory. 375 + * Switch back to the initial page table. 382 376 */ 383 - load_cr3(swapper_pg_dir); 377 + load_cr3(initial_page_table); 384 378 385 379 /* Write 0x1234 to absolute memory location 0x472. The BIOS reads 386 380 this on booting to tell it to "Bypass memory test (also warm
+17 -1
arch/x86/kernel/setup.c
··· 728 728 #ifdef CONFIG_X86_32 729 729 memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data)); 730 730 visws_early_detect(); 731 + 732 + /* 733 + * copy kernel address range established so far and switch 734 + * to the proper swapper page table 735 + */ 736 + clone_pgd_range(swapper_pg_dir + KERNEL_PGD_BOUNDARY, 737 + initial_page_table + KERNEL_PGD_BOUNDARY, 738 + KERNEL_PGD_PTRS); 739 + 740 + load_cr3(swapper_pg_dir); 741 + __flush_tlb_all(); 731 742 #else 732 743 printk(KERN_INFO "Command line: %s\n", boot_command_line); 733 744 #endif ··· 1020 1009 paging_init(); 1021 1010 x86_init.paging.pagetable_setup_done(swapper_pg_dir); 1022 1011 1023 - setup_trampoline_page_table(); 1012 + #ifdef CONFIG_X86_32 1013 + /* sync back kernel address range */ 1014 + clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY, 1015 + swapper_pg_dir + KERNEL_PGD_BOUNDARY, 1016 + KERNEL_PGD_PTRS); 1017 + #endif 1024 1018 1025 1019 tboot_probe(); 1026 1020
+6 -14
arch/x86/kernel/smpboot.c
··· 298 298 * fragile that we want to limit the things done here to the 299 299 * most necessary things. 300 300 */ 301 - 302 - #ifdef CONFIG_X86_32 303 - /* 304 - * Switch away from the trampoline page-table 305 - * 306 - * Do this before cpu_init() because it needs to access per-cpu 307 - * data which may not be mapped in the trampoline page-table. 308 - */ 309 - load_cr3(swapper_pg_dir); 310 - __flush_tlb_all(); 311 - #endif 312 - 313 301 cpu_init(); 314 302 preempt_disable(); 315 303 smp_callin(); 304 + 305 + #ifdef CONFIG_X86_32 306 + /* switch away from the initial page table */ 307 + load_cr3(swapper_pg_dir); 308 + __flush_tlb_all(); 309 + #endif 316 310 317 311 /* otherwise gcc will move up smp_processor_id before the cpu_init */ 318 312 barrier(); ··· 766 772 #ifdef CONFIG_X86_32 767 773 /* Stack for startup_32 can be just as for start_secondary onwards */ 768 774 irq_ctx_init(cpu); 769 - initial_page_table = __pa(&trampoline_pg_dir); 770 775 #else 771 776 clear_tsk_thread_flag(c_idle.idle, TIF_FORK); 772 777 initial_gs = per_cpu_offset(cpu); ··· 914 921 per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; 915 922 916 923 err = do_boot_cpu(apicid, cpu); 917 - 918 924 if (err) { 919 925 pr_debug("do_boot_cpu failed %d\n", err); 920 926 return -EIO;
-16
arch/x86/kernel/trampoline.c
··· 38 38 memcpy(trampoline_base, trampoline_data, TRAMPOLINE_SIZE); 39 39 return virt_to_phys(trampoline_base); 40 40 } 41 - 42 - void __init setup_trampoline_page_table(void) 43 - { 44 - #ifdef CONFIG_X86_32 45 - /* Copy kernel address range */ 46 - clone_pgd_range(trampoline_pg_dir + KERNEL_PGD_BOUNDARY, 47 - swapper_pg_dir + KERNEL_PGD_BOUNDARY, 48 - KERNEL_PGD_PTRS); 49 - 50 - /* Initialize low mappings */ 51 - clone_pgd_range(trampoline_pg_dir, 52 - swapper_pg_dir + KERNEL_PGD_BOUNDARY, 53 - min_t(unsigned long, KERNEL_PGD_PTRS, 54 - KERNEL_PGD_BOUNDARY)); 55 - #endif 56 - }
-45
arch/x86/mm/init_32.c
··· 548 548 permanent_kmaps_init(pgd_base); 549 549 } 550 550 551 - #ifdef CONFIG_ACPI_SLEEP 552 - /* 553 - * ACPI suspend needs this for resume, because things like the intel-agp 554 - * driver might have split up a kernel 4MB mapping. 555 - */ 556 - char swsusp_pg_dir[PAGE_SIZE] 557 - __attribute__ ((aligned(PAGE_SIZE))); 558 - 559 - static inline void save_pg_dir(void) 560 - { 561 - copy_page(swsusp_pg_dir, swapper_pg_dir); 562 - } 563 - #else /* !CONFIG_ACPI_SLEEP */ 564 - static inline void save_pg_dir(void) 565 - { 566 - } 567 - #endif /* !CONFIG_ACPI_SLEEP */ 568 - 569 - void zap_low_mappings(bool early) 570 - { 571 - int i; 572 - 573 - /* 574 - * Zap initial low-memory mappings. 575 - * 576 - * Note that "pgd_clear()" doesn't do it for 577 - * us, because pgd_clear() is a no-op on i386. 578 - */ 579 - for (i = 0; i < KERNEL_PGD_BOUNDARY; i++) { 580 - #ifdef CONFIG_X86_PAE 581 - set_pgd(swapper_pg_dir+i, __pgd(1 + __pa(empty_zero_page))); 582 - #else 583 - set_pgd(swapper_pg_dir+i, __pgd(0)); 584 - #endif 585 - } 586 - 587 - if (early) 588 - __flush_tlb(); 589 - else 590 - flush_tlb_all(); 591 - } 592 - 593 551 pteval_t __supported_pte_mask __read_mostly = ~(_PAGE_NX | _PAGE_GLOBAL | _PAGE_IOMAP); 594 552 EXPORT_SYMBOL_GPL(__supported_pte_mask); 595 553 ··· 916 958 917 959 if (boot_cpu_data.wp_works_ok < 0) 918 960 test_wp_bit(); 919 - 920 - save_pg_dir(); 921 - zap_low_mappings(true); 922 961 } 923 962 924 963 #ifdef CONFIG_MEMORY_HOTPLUG