Merge branch 'x86-security-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'x86-security-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
module: Move RO/NX module protection to after ftrace module update
x86: Resume trampoline must be executable
x86: Add RO/NX protection for loadable kernel modules
x86: Add NX protection for kernel data
x86: Fix improper large page preservation

+266 -18
+11
arch/x86/Kconfig.debug
··· 117 117 feature as well as for the change_page_attr() infrastructure. 118 118 If in doubt, say "N" 119 119 120 + config DEBUG_SET_MODULE_RONX 121 + bool "Set loadable kernel module data as NX and text as RO" 122 + depends on MODULES 123 + ---help--- 124 + This option helps catch unintended modifications to loadable 125 + kernel module's text and read-only data. It also prevents execution 126 + of module data. Such protection may interfere with run-time code 127 + patching and dynamic kernel tracing - and they might also protect 128 + against certain classes of kernel exploits. 129 + If in doubt, say "N". 130 + 120 131 config DEBUG_NX_TEST 121 132 tristate "Testcase for the NX non-executable stack feature" 122 133 depends on DEBUG_KERNEL && m
+1
arch/x86/include/asm/pci.h
··· 65 65 66 66 #define PCIBIOS_MIN_CARDBUS_IO 0x4000 67 67 68 + extern int pcibios_enabled; 68 69 void pcibios_config_init(void); 69 70 struct pci_bus *pcibios_scan_root(int bus); 70 71
+3
arch/x86/kernel/ftrace.c
··· 19 19 #include <linux/sched.h> 20 20 #include <linux/init.h> 21 21 #include <linux/list.h> 22 + #include <linux/module.h> 22 23 23 24 #include <trace/syscall.h> 24 25 ··· 50 49 int ftrace_arch_code_modify_prepare(void) 51 50 { 52 51 set_kernel_text_rw(); 52 + set_all_modules_text_rw(); 53 53 modifying_code = 1; 54 54 return 0; 55 55 } ··· 58 56 int ftrace_arch_code_modify_post_process(void) 59 57 { 60 58 modifying_code = 0; 59 + set_all_modules_text_ro(); 61 60 set_kernel_text_ro(); 62 61 return 0; 63 62 }
+6 -2
arch/x86/kernel/vmlinux.lds.S
··· 69 69 70 70 PHDRS { 71 71 text PT_LOAD FLAGS(5); /* R_E */ 72 - data PT_LOAD FLAGS(7); /* RWE */ 72 + data PT_LOAD FLAGS(6); /* RW_ */ 73 73 #ifdef CONFIG_X86_64 74 74 user PT_LOAD FLAGS(5); /* R_E */ 75 75 #ifdef CONFIG_SMP ··· 116 116 117 117 EXCEPTION_TABLE(16) :text = 0x9090 118 118 119 + #if defined(CONFIG_DEBUG_RODATA) 120 + /* .text should occupy whole number of pages */ 121 + . = ALIGN(PAGE_SIZE); 122 + #endif 119 123 X64_ALIGN_DEBUG_RODATA_BEGIN 120 124 RO_DATA(PAGE_SIZE) 121 125 X64_ALIGN_DEBUG_RODATA_END ··· 339 335 __bss_start = .; 340 336 *(.bss..page_aligned) 341 337 *(.bss) 342 - . = ALIGN(4); 338 + . = ALIGN(PAGE_SIZE); 343 339 __bss_stop = .; 344 340 } 345 341
+2 -1
arch/x86/mm/init.c
··· 364 364 /* 365 365 * We just marked the kernel text read only above, now that 366 366 * we are going to free part of that, we need to make that 367 - * writeable first. 367 + * writeable and non-executable first. 368 368 */ 369 + set_memory_nx(begin, (end - begin) >> PAGE_SHIFT); 369 370 set_memory_rw(begin, (end - begin) >> PAGE_SHIFT); 370 371 371 372 printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10);
+19 -1
arch/x86/mm/init_32.c
··· 226 226 227 227 static inline int is_kernel_text(unsigned long addr) 228 228 { 229 - if (addr >= PAGE_OFFSET && addr <= (unsigned long)__init_end) 229 + if (addr >= (unsigned long)_text && addr <= (unsigned long)__init_end) 230 230 return 1; 231 231 return 0; 232 232 } ··· 912 912 set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); 913 913 } 914 914 915 + static void mark_nxdata_nx(void) 916 + { 917 + /* 918 + * When this called, init has already been executed and released, 919 + * so everything past _etext sould be NX. 920 + */ 921 + unsigned long start = PFN_ALIGN(_etext); 922 + /* 923 + * This comes from is_kernel_text upper limit. Also HPAGE where used: 924 + */ 925 + unsigned long size = (((unsigned long)__init_end + HPAGE_SIZE) & HPAGE_MASK) - start; 926 + 927 + if (__supported_pte_mask & _PAGE_NX) 928 + printk(KERN_INFO "NX-protecting the kernel data: %luk\n", size >> 10); 929 + set_pages_nx(virt_to_page(start), size >> PAGE_SHIFT); 930 + } 931 + 915 932 void mark_rodata_ro(void) 916 933 { 917 934 unsigned long start = PFN_ALIGN(_text); ··· 963 946 printk(KERN_INFO "Testing CPA: write protecting again\n"); 964 947 set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); 965 948 #endif 949 + mark_nxdata_nx(); 966 950 } 967 951 #endif 968 952
+22 -11
arch/x86/mm/pageattr.c
··· 13 13 #include <linux/pfn.h> 14 14 #include <linux/percpu.h> 15 15 #include <linux/gfp.h> 16 + #include <linux/pci.h> 16 17 17 18 #include <asm/e820.h> 18 19 #include <asm/processor.h> ··· 256 255 unsigned long pfn) 257 256 { 258 257 pgprot_t forbidden = __pgprot(0); 258 + pgprot_t required = __pgprot(0); 259 259 260 260 /* 261 261 * The BIOS area between 640k and 1Mb needs to be executable for 262 262 * PCI BIOS based config access (CONFIG_PCI_GOBIOS) support. 263 263 */ 264 - if (within(pfn, BIOS_BEGIN >> PAGE_SHIFT, BIOS_END >> PAGE_SHIFT)) 264 + #ifdef CONFIG_PCI_BIOS 265 + if (pcibios_enabled && within(pfn, BIOS_BEGIN >> PAGE_SHIFT, BIOS_END >> PAGE_SHIFT)) 265 266 pgprot_val(forbidden) |= _PAGE_NX; 267 + #endif 266 268 267 269 /* 268 270 * The kernel text needs to be executable for obvious reasons ··· 282 278 if (within(pfn, __pa((unsigned long)__start_rodata) >> PAGE_SHIFT, 283 279 __pa((unsigned long)__end_rodata) >> PAGE_SHIFT)) 284 280 pgprot_val(forbidden) |= _PAGE_RW; 281 + /* 282 + * .data and .bss should always be writable. 283 + */ 284 + if (within(address, (unsigned long)_sdata, (unsigned long)_edata) || 285 + within(address, (unsigned long)__bss_start, (unsigned long)__bss_stop)) 286 + pgprot_val(required) |= _PAGE_RW; 285 287 286 288 #if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA) 287 289 /* ··· 327 317 #endif 328 318 329 319 prot = __pgprot(pgprot_val(prot) & ~pgprot_val(forbidden)); 320 + prot = __pgprot(pgprot_val(prot) | pgprot_val(required)); 330 321 331 322 return prot; 332 323 } ··· 404 393 { 405 394 unsigned long nextpage_addr, numpages, pmask, psize, flags, addr, pfn; 406 395 pte_t new_pte, old_pte, *tmp; 407 - pgprot_t old_prot, new_prot; 396 + pgprot_t old_prot, new_prot, req_prot; 408 397 int i, do_split = 1; 409 398 unsigned int level; 410 399 ··· 449 438 * We are safe now. Check whether the new pgprot is the same: 450 439 */ 451 440 old_pte = *kpte; 452 - old_prot = new_prot = pte_pgprot(old_pte); 441 + old_prot = new_prot = req_prot = pte_pgprot(old_pte); 453 442 454 - pgprot_val(new_prot) &= ~pgprot_val(cpa->mask_clr); 455 - pgprot_val(new_prot) |= pgprot_val(cpa->mask_set); 443 + pgprot_val(req_prot) &= ~pgprot_val(cpa->mask_clr); 444 + pgprot_val(req_prot) |= pgprot_val(cpa->mask_set); 456 445 457 446 /* 458 447 * old_pte points to the large page base address. So we need ··· 461 450 pfn = pte_pfn(old_pte) + ((address & (psize - 1)) >> PAGE_SHIFT); 462 451 cpa->pfn = pfn; 463 452 464 - new_prot = static_protections(new_prot, address, pfn); 453 + new_prot = static_protections(req_prot, address, pfn); 465 454 466 455 /* 467 456 * We need to check the full range, whether 468 457 * static_protection() requires a different pgprot for one of 469 458 * the pages in the range we try to preserve: 470 459 */ 471 - addr = address + PAGE_SIZE; 472 - pfn++; 473 - for (i = 1; i < cpa->numpages; i++, addr += PAGE_SIZE, pfn++) { 474 - pgprot_t chk_prot = static_protections(new_prot, addr, pfn); 460 + addr = address & pmask; 461 + pfn = pte_pfn(old_pte); 462 + for (i = 0; i < (psize >> PAGE_SHIFT); i++, addr += PAGE_SIZE, pfn++) { 463 + pgprot_t chk_prot = static_protections(req_prot, addr, pfn); 475 464 476 465 if (pgprot_val(chk_prot) != pgprot_val(new_prot)) 477 466 goto out_unlock; ··· 494 483 * that we limited the number of possible pages already to 495 484 * the number of pages in the large page. 496 485 */ 497 - if (address == (nextpage_addr - psize) && cpa->numpages == numpages) { 486 + if (address == (address & pmask) && cpa->numpages == (psize >> PAGE_SHIFT)) { 498 487 /* 499 488 * The address is aligned and the number of pages 500 489 * covers the full page.
+23
arch/x86/pci/pcbios.c
··· 9 9 #include <linux/uaccess.h> 10 10 #include <asm/pci_x86.h> 11 11 #include <asm/pci-functions.h> 12 + #include <asm/cacheflush.h> 12 13 13 14 /* BIOS32 signature: "_32_" */ 14 15 #define BIOS32_SIGNATURE (('_' << 0) + ('3' << 8) + ('2' << 16) + ('_' << 24)) ··· 25 24 #define PCIBIOS_HW_TYPE2 0x02 26 25 #define PCIBIOS_HW_TYPE1_SPEC 0x10 27 26 #define PCIBIOS_HW_TYPE2_SPEC 0x20 27 + 28 + int pcibios_enabled; 29 + 30 + /* According to the BIOS specification at: 31 + * http://members.datafast.net.au/dft0802/specs/bios21.pdf, we could 32 + * restrict the x zone to some pages and make it ro. But this may be 33 + * broken on some bios, complex to handle with static_protections. 34 + * We could make the 0xe0000-0x100000 range rox, but this can break 35 + * some ISA mapping. 36 + * 37 + * So we let's an rw and x hole when pcibios is used. This shouldn't 38 + * happen for modern system with mmconfig, and if you don't want it 39 + * you could disable pcibios... 40 + */ 41 + static inline void set_bios_x(void) 42 + { 43 + pcibios_enabled = 1; 44 + set_memory_x(PAGE_OFFSET + BIOS_BEGIN, (BIOS_END - BIOS_BEGIN) >> PAGE_SHIFT); 45 + if (__supported_pte_mask & _PAGE_NX) 46 + printk(KERN_INFO "PCI : PCI BIOS aera is rw and x. Use pci=nobios if you want it NX.\n"); 47 + } 28 48 29 49 /* 30 50 * This is the standard structure used to identify the entry point ··· 354 332 DBG("PCI: BIOS32 Service Directory entry at 0x%lx\n", 355 333 bios32_entry); 356 334 bios32_indirect.address = bios32_entry + PAGE_OFFSET; 335 + set_bios_x(); 357 336 if (check_pcibios()) 358 337 return &pci_bios_access; 359 338 }
+10 -1
include/linux/module.h
··· 308 308 /* The size of the executable code in each section. */ 309 309 unsigned int init_text_size, core_text_size; 310 310 311 + /* Size of RO sections of the module (text+rodata) */ 312 + unsigned int init_ro_size, core_ro_size; 313 + 311 314 /* Arch-specific module values */ 312 315 struct mod_arch_specific arch; 313 316 ··· 675 672 { 676 673 return 0; 677 674 } 678 - 679 675 #endif /* CONFIG_MODULES */ 680 676 681 677 #ifdef CONFIG_SYSFS ··· 689 687 690 688 #define __MODULE_STRING(x) __stringify(x) 691 689 690 + #ifdef CONFIG_DEBUG_SET_MODULE_RONX 691 + extern void set_all_modules_text_rw(void); 692 + extern void set_all_modules_text_ro(void); 693 + #else 694 + static inline void set_all_modules_text_rw(void) { } 695 + static inline void set_all_modules_text_ro(void) { } 696 + #endif 692 697 693 698 #ifdef CONFIG_GENERIC_BUG 694 699 void module_bug_finalize(const Elf_Ehdr *, const Elf_Shdr *,
+169 -2
kernel/module.c
··· 56 56 #include <linux/percpu.h> 57 57 #include <linux/kmemleak.h> 58 58 #include <linux/jump_label.h> 59 + #include <linux/pfn.h> 59 60 60 61 #define CREATE_TRACE_POINTS 61 62 #include <trace/events/module.h> ··· 70 69 #ifndef ARCH_SHF_SMALL 71 70 #define ARCH_SHF_SMALL 0 72 71 #endif 72 + 73 + /* 74 + * Modules' sections will be aligned on page boundaries 75 + * to ensure complete separation of code and data, but 76 + * only when CONFIG_DEBUG_SET_MODULE_RONX=y 77 + */ 78 + #ifdef CONFIG_DEBUG_SET_MODULE_RONX 79 + # define debug_align(X) ALIGN(X, PAGE_SIZE) 80 + #else 81 + # define debug_align(X) (X) 82 + #endif 83 + 84 + /* 85 + * Given BASE and SIZE this macro calculates the number of pages the 86 + * memory regions occupies 87 + */ 88 + #define MOD_NUMBER_OF_PAGES(BASE, SIZE) (((SIZE) > 0) ? \ 89 + (PFN_DOWN((unsigned long)(BASE) + (SIZE) - 1) - \ 90 + PFN_DOWN((unsigned long)BASE) + 1) \ 91 + : (0UL)) 73 92 74 93 /* If this is set, the section belongs in the init part of the module */ 75 94 #define INIT_OFFSET_MASK (1UL << (BITS_PER_LONG-1)) ··· 1563 1542 return 0; 1564 1543 } 1565 1544 1545 + #ifdef CONFIG_DEBUG_SET_MODULE_RONX 1546 + /* 1547 + * LKM RO/NX protection: protect module's text/ro-data 1548 + * from modification and any data from execution. 1549 + */ 1550 + void set_page_attributes(void *start, void *end, int (*set)(unsigned long start, int num_pages)) 1551 + { 1552 + unsigned long begin_pfn = PFN_DOWN((unsigned long)start); 1553 + unsigned long end_pfn = PFN_DOWN((unsigned long)end); 1554 + 1555 + if (end_pfn > begin_pfn) 1556 + set(begin_pfn << PAGE_SHIFT, end_pfn - begin_pfn); 1557 + } 1558 + 1559 + static void set_section_ro_nx(void *base, 1560 + unsigned long text_size, 1561 + unsigned long ro_size, 1562 + unsigned long total_size) 1563 + { 1564 + /* begin and end PFNs of the current subsection */ 1565 + unsigned long begin_pfn; 1566 + unsigned long end_pfn; 1567 + 1568 + /* 1569 + * Set RO for module text and RO-data: 1570 + * - Always protect first page. 1571 + * - Do not protect last partial page. 1572 + */ 1573 + if (ro_size > 0) 1574 + set_page_attributes(base, base + ro_size, set_memory_ro); 1575 + 1576 + /* 1577 + * Set NX permissions for module data: 1578 + * - Do not protect first partial page. 1579 + * - Always protect last page. 1580 + */ 1581 + if (total_size > text_size) { 1582 + begin_pfn = PFN_UP((unsigned long)base + text_size); 1583 + end_pfn = PFN_UP((unsigned long)base + total_size); 1584 + if (end_pfn > begin_pfn) 1585 + set_memory_nx(begin_pfn << PAGE_SHIFT, end_pfn - begin_pfn); 1586 + } 1587 + } 1588 + 1589 + /* Setting memory back to RW+NX before releasing it */ 1590 + void unset_section_ro_nx(struct module *mod, void *module_region) 1591 + { 1592 + unsigned long total_pages; 1593 + 1594 + if (mod->module_core == module_region) { 1595 + /* Set core as NX+RW */ 1596 + total_pages = MOD_NUMBER_OF_PAGES(mod->module_core, mod->core_size); 1597 + set_memory_nx((unsigned long)mod->module_core, total_pages); 1598 + set_memory_rw((unsigned long)mod->module_core, total_pages); 1599 + 1600 + } else if (mod->module_init == module_region) { 1601 + /* Set init as NX+RW */ 1602 + total_pages = MOD_NUMBER_OF_PAGES(mod->module_init, mod->init_size); 1603 + set_memory_nx((unsigned long)mod->module_init, total_pages); 1604 + set_memory_rw((unsigned long)mod->module_init, total_pages); 1605 + } 1606 + } 1607 + 1608 + /* Iterate through all modules and set each module's text as RW */ 1609 + void set_all_modules_text_rw() 1610 + { 1611 + struct module *mod; 1612 + 1613 + mutex_lock(&module_mutex); 1614 + list_for_each_entry_rcu(mod, &modules, list) { 1615 + if ((mod->module_core) && (mod->core_text_size)) { 1616 + set_page_attributes(mod->module_core, 1617 + mod->module_core + mod->core_text_size, 1618 + set_memory_rw); 1619 + } 1620 + if ((mod->module_init) && (mod->init_text_size)) { 1621 + set_page_attributes(mod->module_init, 1622 + mod->module_init + mod->init_text_size, 1623 + set_memory_rw); 1624 + } 1625 + } 1626 + mutex_unlock(&module_mutex); 1627 + } 1628 + 1629 + /* Iterate through all modules and set each module's text as RO */ 1630 + void set_all_modules_text_ro() 1631 + { 1632 + struct module *mod; 1633 + 1634 + mutex_lock(&module_mutex); 1635 + list_for_each_entry_rcu(mod, &modules, list) { 1636 + if ((mod->module_core) && (mod->core_text_size)) { 1637 + set_page_attributes(mod->module_core, 1638 + mod->module_core + mod->core_text_size, 1639 + set_memory_ro); 1640 + } 1641 + if ((mod->module_init) && (mod->init_text_size)) { 1642 + set_page_attributes(mod->module_init, 1643 + mod->module_init + mod->init_text_size, 1644 + set_memory_ro); 1645 + } 1646 + } 1647 + mutex_unlock(&module_mutex); 1648 + } 1649 + #else 1650 + static inline void set_section_ro_nx(void *base, unsigned long text_size, unsigned long ro_size, unsigned long total_size) { } 1651 + static inline void unset_section_ro_nx(struct module *mod, void *module_region) { } 1652 + #endif 1653 + 1566 1654 /* Free a module, remove from lists, etc. */ 1567 1655 static void free_module(struct module *mod) 1568 1656 { ··· 1696 1566 destroy_params(mod->kp, mod->num_kp); 1697 1567 1698 1568 /* This may be NULL, but that's OK */ 1569 + unset_section_ro_nx(mod, mod->module_init); 1699 1570 module_free(mod, mod->module_init); 1700 1571 kfree(mod->args); 1701 1572 percpu_modfree(mod); ··· 1705 1574 lockdep_free_key_range(mod->module_core, mod->core_size); 1706 1575 1707 1576 /* Finally, free the core (containing the module structure) */ 1577 + unset_section_ro_nx(mod, mod->module_core); 1708 1578 module_free(mod, mod->module_core); 1709 1579 1710 1580 #ifdef CONFIG_MPU ··· 1909 1777 s->sh_entsize = get_offset(mod, &mod->core_size, s, i); 1910 1778 DEBUGP("\t%s\n", name); 1911 1779 } 1912 - if (m == 0) 1780 + switch (m) { 1781 + case 0: /* executable */ 1782 + mod->core_size = debug_align(mod->core_size); 1913 1783 mod->core_text_size = mod->core_size; 1784 + break; 1785 + case 1: /* RO: text and ro-data */ 1786 + mod->core_size = debug_align(mod->core_size); 1787 + mod->core_ro_size = mod->core_size; 1788 + break; 1789 + case 3: /* whole core */ 1790 + mod->core_size = debug_align(mod->core_size); 1791 + break; 1792 + } 1914 1793 } 1915 1794 1916 1795 DEBUGP("Init section allocation order:\n"); ··· 1939 1796 | INIT_OFFSET_MASK); 1940 1797 DEBUGP("\t%s\n", sname); 1941 1798 } 1942 - if (m == 0) 1799 + switch (m) { 1800 + case 0: /* executable */ 1801 + mod->init_size = debug_align(mod->init_size); 1943 1802 mod->init_text_size = mod->init_size; 1803 + break; 1804 + case 1: /* RO: text and ro-data */ 1805 + mod->init_size = debug_align(mod->init_size); 1806 + mod->init_ro_size = mod->init_size; 1807 + break; 1808 + case 3: /* whole init */ 1809 + mod->init_size = debug_align(mod->init_size); 1810 + break; 1811 + } 1944 1812 } 1945 1813 } 1946 1814 ··· 2876 2722 blocking_notifier_call_chain(&module_notify_list, 2877 2723 MODULE_STATE_COMING, mod); 2878 2724 2725 + /* Set RO and NX regions for core */ 2726 + set_section_ro_nx(mod->module_core, 2727 + mod->core_text_size, 2728 + mod->core_ro_size, 2729 + mod->core_size); 2730 + 2731 + /* Set RO and NX regions for init */ 2732 + set_section_ro_nx(mod->module_init, 2733 + mod->init_text_size, 2734 + mod->init_ro_size, 2735 + mod->init_size); 2736 + 2879 2737 do_mod_ctors(mod); 2880 2738 /* Start the module */ 2881 2739 if (mod->init != NULL) ··· 2931 2765 mod->symtab = mod->core_symtab; 2932 2766 mod->strtab = mod->core_strtab; 2933 2767 #endif 2768 + unset_section_ro_nx(mod, mod->module_init); 2934 2769 module_free(mod, mod->module_init); 2935 2770 mod->module_init = NULL; 2936 2771 mod->init_size = 0;