Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

[PARISC] only make executable areas executable

Currently parisc has the whole kernel marked as RWX, meaning any
kernel page at all is eligible to be executed. This can cause a
theoretical problem on systems with combined I/D TLB because the act
of referencing a page causes a TLB insertion with an executable bit.
This TLB entry may be used by the CPU as the basis for speculating the
page into the I-Cache. If this speculated page is subsequently used
for a user process, there is the possibility we will get a stale
I-cache line picked up as the binary executes.

As a point of good practise, only mark actual kernel text pages as
executable. The same has to be done for init_text pages, but they're
converted to data pages (and the I-Cache flushed) when the init memory
is released.

Signed-off-by: James Bottomley <James.Bottomley@suse.de>

authored by

James Bottomley and committed by
James Bottomley
d7dd2ff1 e38f5b74

+166 -122
+7 -2
arch/parisc/include/asm/pgtable.h
··· 177 177 178 178 #define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | _PAGE_DIRTY | _PAGE_ACCESSED) 179 179 #define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY) 180 - #define _PAGE_KERNEL (_PAGE_PRESENT | _PAGE_EXEC | _PAGE_READ | _PAGE_WRITE | _PAGE_DIRTY | _PAGE_ACCESSED) 180 + #define _PAGE_KERNEL_RO (_PAGE_PRESENT | _PAGE_READ | _PAGE_DIRTY | _PAGE_ACCESSED) 181 + #define _PAGE_KERNEL_EXEC (_PAGE_KERNEL_RO | _PAGE_EXEC) 182 + #define _PAGE_KERNEL_RWX (_PAGE_KERNEL_EXEC | _PAGE_WRITE) 183 + #define _PAGE_KERNEL (_PAGE_KERNEL_RO | _PAGE_WRITE) 181 184 182 185 /* The pgd/pmd contains a ptr (in phys addr space); since all pgds/pmds 183 186 * are page-aligned, we don't care about the PAGE_OFFSET bits, except ··· 211 208 #define PAGE_COPY PAGE_EXECREAD 212 209 #define PAGE_RWX __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_READ | _PAGE_WRITE | _PAGE_EXEC |_PAGE_ACCESSED) 213 210 #define PAGE_KERNEL __pgprot(_PAGE_KERNEL) 214 - #define PAGE_KERNEL_RO __pgprot(_PAGE_KERNEL & ~_PAGE_WRITE) 211 + #define PAGE_KERNEL_EXEC __pgprot(_PAGE_KERNEL_EXEC) 212 + #define PAGE_KERNEL_RWX __pgprot(_PAGE_KERNEL_RWX) 213 + #define PAGE_KERNEL_RO __pgprot(_PAGE_KERNEL_RO) 215 214 #define PAGE_KERNEL_UNC __pgprot(_PAGE_KERNEL | _PAGE_NO_CACHE) 216 215 #define PAGE_GATEWAY __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_GATEWAY| _PAGE_READ) 217 216
+3
arch/parisc/kernel/entry.S
··· 692 692 END(fault_vector_11) 693 693 694 694 #endif 695 + /* Fault vector is separately protected and *must* be on its own page */ 696 + .align PAGE_SIZE 697 + ENTRY(end_fault_vector) 695 698 696 699 .import handle_interruption,code 697 700 .import do_cpu_irq_mask,code
+3 -2
arch/parisc/kernel/head.S
··· 106 106 #endif 107 107 108 108 109 - /* Now initialize the PTEs themselves */ 110 - ldo 0+_PAGE_KERNEL(%r0),%r3 /* Hardwired 0 phys addr start */ 109 + /* Now initialize the PTEs themselves. We use RWX for 110 + * everything ... it will get remapped correctly later */ 111 + ldo 0+_PAGE_KERNEL_RWX(%r0),%r3 /* Hardwired 0 phys addr start */ 111 112 ldi (1<<(KERNEL_INITIAL_ORDER-PAGE_SHIFT)),%r11 /* PFN count */ 112 113 load32 PA(pg0),%r1 113 114
+9 -1
arch/parisc/kernel/module.c
··· 61 61 #include <linux/string.h> 62 62 #include <linux/kernel.h> 63 63 #include <linux/bug.h> 64 + #include <linux/mm.h> 64 65 #include <linux/slab.h> 65 66 67 + #include <asm/pgtable.h> 66 68 #include <asm/unwind.h> 67 69 68 70 #if 0 ··· 216 214 { 217 215 if (size == 0) 218 216 return NULL; 219 - return vmalloc(size); 217 + /* using RWX means less protection for modules, but it's 218 + * easier than trying to map the text, data, init_text and 219 + * init_data correctly */ 220 + return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END, 221 + GFP_KERNEL | __GFP_HIGHMEM, 222 + PAGE_KERNEL_RWX, -1, 223 + __builtin_return_address(0)); 220 224 } 221 225 222 226 #ifndef CONFIG_64BIT
+1
arch/parisc/kernel/vmlinux.lds.S
··· 134 134 . = ALIGN(16384); 135 135 __init_begin = .; 136 136 INIT_TEXT_SECTION(16384) 137 + . = ALIGN(PAGE_SIZE); 137 138 INIT_DATA_SECTION(16) 138 139 /* we have to discard exit text and such at runtime, not link time */ 139 140 .exit.text :
+143 -117
arch/parisc/mm/init.c
··· 369 369 request_resource(&sysram_resources[0], &pdcdata_resource); 370 370 } 371 371 372 + static void __init map_pages(unsigned long start_vaddr, 373 + unsigned long start_paddr, unsigned long size, 374 + pgprot_t pgprot, int force) 375 + { 376 + pgd_t *pg_dir; 377 + pmd_t *pmd; 378 + pte_t *pg_table; 379 + unsigned long end_paddr; 380 + unsigned long start_pmd; 381 + unsigned long start_pte; 382 + unsigned long tmp1; 383 + unsigned long tmp2; 384 + unsigned long address; 385 + unsigned long vaddr; 386 + unsigned long ro_start; 387 + unsigned long ro_end; 388 + unsigned long fv_addr; 389 + unsigned long gw_addr; 390 + extern const unsigned long fault_vector_20; 391 + extern void * const linux_gateway_page; 392 + 393 + ro_start = __pa((unsigned long)_text); 394 + ro_end = __pa((unsigned long)&data_start); 395 + fv_addr = __pa((unsigned long)&fault_vector_20) & PAGE_MASK; 396 + gw_addr = __pa((unsigned long)&linux_gateway_page) & PAGE_MASK; 397 + 398 + end_paddr = start_paddr + size; 399 + 400 + pg_dir = pgd_offset_k(start_vaddr); 401 + 402 + #if PTRS_PER_PMD == 1 403 + start_pmd = 0; 404 + #else 405 + start_pmd = ((start_vaddr >> PMD_SHIFT) & (PTRS_PER_PMD - 1)); 406 + #endif 407 + start_pte = ((start_vaddr >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)); 408 + 409 + address = start_paddr; 410 + vaddr = start_vaddr; 411 + while (address < end_paddr) { 412 + #if PTRS_PER_PMD == 1 413 + pmd = (pmd_t *)__pa(pg_dir); 414 + #else 415 + pmd = (pmd_t *)pgd_address(*pg_dir); 416 + 417 + /* 418 + * pmd is physical at this point 419 + */ 420 + 421 + if (!pmd) { 422 + pmd = (pmd_t *) alloc_bootmem_low_pages_node(NODE_DATA(0), PAGE_SIZE << PMD_ORDER); 423 + pmd = (pmd_t *) __pa(pmd); 424 + } 425 + 426 + pgd_populate(NULL, pg_dir, __va(pmd)); 427 + #endif 428 + pg_dir++; 429 + 430 + /* now change pmd to kernel virtual addresses */ 431 + 432 + pmd = (pmd_t *)__va(pmd) + start_pmd; 433 + for (tmp1 = start_pmd; tmp1 < PTRS_PER_PMD; tmp1++, pmd++) { 434 + 435 + /* 436 + * pg_table is physical at this point 437 + */ 438 + 439 + pg_table = (pte_t *)pmd_address(*pmd); 440 + if (!pg_table) { 441 + pg_table = (pte_t *) 442 + alloc_bootmem_low_pages_node(NODE_DATA(0), PAGE_SIZE); 443 + pg_table = (pte_t *) __pa(pg_table); 444 + } 445 + 446 + pmd_populate_kernel(NULL, pmd, __va(pg_table)); 447 + 448 + /* now change pg_table to kernel virtual addresses */ 449 + 450 + pg_table = (pte_t *) __va(pg_table) + start_pte; 451 + for (tmp2 = start_pte; tmp2 < PTRS_PER_PTE; tmp2++, pg_table++) { 452 + pte_t pte; 453 + 454 + /* 455 + * Map the fault vector writable so we can 456 + * write the HPMC checksum. 457 + */ 458 + if (force) 459 + pte = __mk_pte(address, pgprot); 460 + else if (core_kernel_text(vaddr) && 461 + address != fv_addr) 462 + pte = __mk_pte(address, PAGE_KERNEL_EXEC); 463 + else 464 + #if defined(CONFIG_PARISC_PAGE_SIZE_4KB) 465 + if (address >= ro_start && address < ro_end 466 + && address != fv_addr 467 + && address != gw_addr) 468 + pte = __mk_pte(address, PAGE_KERNEL_RO); 469 + else 470 + #endif 471 + pte = __mk_pte(address, pgprot); 472 + 473 + if (address >= end_paddr) { 474 + if (force) 475 + break; 476 + else 477 + pte_val(pte) = 0; 478 + } 479 + 480 + set_pte(pg_table, pte); 481 + 482 + address += PAGE_SIZE; 483 + vaddr += PAGE_SIZE; 484 + } 485 + start_pte = 0; 486 + 487 + if (address >= end_paddr) 488 + break; 489 + } 490 + start_pmd = 0; 491 + } 492 + } 493 + 372 494 void free_initmem(void) 373 495 { 374 496 unsigned long addr; 375 497 unsigned long init_begin = (unsigned long)__init_begin; 376 498 unsigned long init_end = (unsigned long)__init_end; 377 499 378 - #ifdef CONFIG_DEBUG_KERNEL 500 + /* The init text pages are marked R-X. We have to 501 + * flush the icache and mark them RW- 502 + * 503 + * This is tricky, because map_pages is in the init section. 504 + * Do a dummy remap of the data section first (the data 505 + * section is already PAGE_KERNEL) to pull in the TLB entries 506 + * for map_kernel */ 507 + map_pages(init_begin, __pa(init_begin), init_end - init_begin, 508 + PAGE_KERNEL_RWX, 1); 509 + /* now remap at PAGE_KERNEL since the TLB is pre-primed to execute 510 + * map_pages */ 511 + map_pages(init_begin, __pa(init_begin), init_end - init_begin, 512 + PAGE_KERNEL, 1); 513 + 514 + /* force the kernel to see the new TLB entries */ 515 + __flush_tlb_range(0, init_begin, init_end); 379 516 /* Attempt to catch anyone trying to execute code here 380 517 * by filling the page with BRK insns. 381 518 */ 382 519 memset((void *)init_begin, 0x00, init_end - init_begin); 520 + /* finally dump all the instructions which were cached, since the 521 + * pages are no-longer executable */ 383 522 flush_icache_range(init_begin, init_end); 384 - #endif 385 523 386 - /* align __init_begin and __init_end to page size, 387 - ignoring linker script where we might have tried to save RAM */ 388 - init_begin = PAGE_ALIGN(init_begin); 389 - init_end = PAGE_ALIGN(init_end); 390 524 for (addr = init_begin; addr < init_end; addr += PAGE_SIZE) { 391 525 ClearPageReserved(virt_to_page(addr)); 392 526 init_page_count(virt_to_page(addr)); ··· 750 616 #endif 751 617 } 752 618 753 - 754 - static void __init map_pages(unsigned long start_vaddr, unsigned long start_paddr, unsigned long size, pgprot_t pgprot) 755 - { 756 - pgd_t *pg_dir; 757 - pmd_t *pmd; 758 - pte_t *pg_table; 759 - unsigned long end_paddr; 760 - unsigned long start_pmd; 761 - unsigned long start_pte; 762 - unsigned long tmp1; 763 - unsigned long tmp2; 764 - unsigned long address; 765 - unsigned long ro_start; 766 - unsigned long ro_end; 767 - unsigned long fv_addr; 768 - unsigned long gw_addr; 769 - extern const unsigned long fault_vector_20; 770 - extern void * const linux_gateway_page; 771 - 772 - ro_start = __pa((unsigned long)_text); 773 - ro_end = __pa((unsigned long)&data_start); 774 - fv_addr = __pa((unsigned long)&fault_vector_20) & PAGE_MASK; 775 - gw_addr = __pa((unsigned long)&linux_gateway_page) & PAGE_MASK; 776 - 777 - end_paddr = start_paddr + size; 778 - 779 - pg_dir = pgd_offset_k(start_vaddr); 780 - 781 - #if PTRS_PER_PMD == 1 782 - start_pmd = 0; 783 - #else 784 - start_pmd = ((start_vaddr >> PMD_SHIFT) & (PTRS_PER_PMD - 1)); 785 - #endif 786 - start_pte = ((start_vaddr >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)); 787 - 788 - address = start_paddr; 789 - while (address < end_paddr) { 790 - #if PTRS_PER_PMD == 1 791 - pmd = (pmd_t *)__pa(pg_dir); 792 - #else 793 - pmd = (pmd_t *)pgd_address(*pg_dir); 794 - 795 - /* 796 - * pmd is physical at this point 797 - */ 798 - 799 - if (!pmd) { 800 - pmd = (pmd_t *) alloc_bootmem_low_pages_node(NODE_DATA(0),PAGE_SIZE << PMD_ORDER); 801 - pmd = (pmd_t *) __pa(pmd); 802 - } 803 - 804 - pgd_populate(NULL, pg_dir, __va(pmd)); 805 - #endif 806 - pg_dir++; 807 - 808 - /* now change pmd to kernel virtual addresses */ 809 - 810 - pmd = (pmd_t *)__va(pmd) + start_pmd; 811 - for (tmp1 = start_pmd; tmp1 < PTRS_PER_PMD; tmp1++,pmd++) { 812 - 813 - /* 814 - * pg_table is physical at this point 815 - */ 816 - 817 - pg_table = (pte_t *)pmd_address(*pmd); 818 - if (!pg_table) { 819 - pg_table = (pte_t *) 820 - alloc_bootmem_low_pages_node(NODE_DATA(0),PAGE_SIZE); 821 - pg_table = (pte_t *) __pa(pg_table); 822 - } 823 - 824 - pmd_populate_kernel(NULL, pmd, __va(pg_table)); 825 - 826 - /* now change pg_table to kernel virtual addresses */ 827 - 828 - pg_table = (pte_t *) __va(pg_table) + start_pte; 829 - for (tmp2 = start_pte; tmp2 < PTRS_PER_PTE; tmp2++,pg_table++) { 830 - pte_t pte; 831 - 832 - /* 833 - * Map the fault vector writable so we can 834 - * write the HPMC checksum. 835 - */ 836 - #if defined(CONFIG_PARISC_PAGE_SIZE_4KB) 837 - if (address >= ro_start && address < ro_end 838 - && address != fv_addr 839 - && address != gw_addr) 840 - pte = __mk_pte(address, PAGE_KERNEL_RO); 841 - else 842 - #endif 843 - pte = __mk_pte(address, pgprot); 844 - 845 - if (address >= end_paddr) 846 - pte_val(pte) = 0; 847 - 848 - set_pte(pg_table, pte); 849 - 850 - address += PAGE_SIZE; 851 - } 852 - start_pte = 0; 853 - 854 - if (address >= end_paddr) 855 - break; 856 - } 857 - start_pmd = 0; 858 - } 859 - } 860 - 861 619 /* 862 620 * pagetable_init() sets up the page tables 863 621 * ··· 774 748 size = pmem_ranges[range].pages << PAGE_SHIFT; 775 749 776 750 map_pages((unsigned long)__va(start_paddr), start_paddr, 777 - size, PAGE_KERNEL); 751 + size, PAGE_KERNEL, 0); 778 752 } 779 753 780 754 #ifdef CONFIG_BLK_DEV_INITRD 781 755 if (initrd_end && initrd_end > mem_limit) { 782 756 printk(KERN_INFO "initrd: mapping %08lx-%08lx\n", initrd_start, initrd_end); 783 757 map_pages(initrd_start, __pa(initrd_start), 784 - initrd_end - initrd_start, PAGE_KERNEL); 758 + initrd_end - initrd_start, PAGE_KERNEL, 0); 785 759 } 786 760 #endif 787 761 ··· 806 780 */ 807 781 808 782 map_pages(linux_gateway_page_addr, __pa(&linux_gateway_page), 809 - PAGE_SIZE, PAGE_GATEWAY); 783 + PAGE_SIZE, PAGE_GATEWAY, 1); 810 784 } 811 785 812 786 #ifdef CONFIG_HPUX