Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull straggler x86 fixes from Peter Anvin:
"Three groups of patches:

- EFI boot stub documentation and the ability to print error messages;
- Removal for PTRACE_ARCH_PRCTL for x32 (obsolete interface which
should never have been ported, and the port is broken and
potentially dangerous.)
- ftrace stack corruption fixes. I'm not super-happy about the
technical implementation, but it is probably the least invasive in
the short term. In the future I would like a single method for
nesting the debug stack, however."

* 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
x86, x32, ptrace: Remove PTRACE_ARCH_PRCTL for x32
x86, efi: Add EFI boot stub documentation
x86, efi; Add EFI boot stub console support
x86, efi: Only close open files in error path
ftrace/x86: Do not change stacks in DEBUG when calling lockdep
x86: Allow nesting of the debug stack IDT setting
x86: Reset the debug_stack update counter
ftrace: Use breakpoint method to update ftrace caller
ftrace: Synchronize variable setting with breakpoints

+297 -39
+65
Documentation/x86/efi-stub.txt
··· 1 + The EFI Boot Stub 2 + --------------------------- 3 + 4 + On the x86 platform, a bzImage can masquerade as a PE/COFF image, 5 + thereby convincing EFI firmware loaders to load it as an EFI 6 + executable. The code that modifies the bzImage header, along with the 7 + EFI-specific entry point that the firmware loader jumps to are 8 + collectively known as the "EFI boot stub", and live in 9 + arch/x86/boot/header.S and arch/x86/boot/compressed/eboot.c, 10 + respectively. 11 + 12 + By using the EFI boot stub it's possible to boot a Linux kernel 13 + without the use of a conventional EFI boot loader, such as grub or 14 + elilo. Since the EFI boot stub performs the jobs of a boot loader, in 15 + a certain sense it *IS* the boot loader. 16 + 17 + The EFI boot stub is enabled with the CONFIG_EFI_STUB kernel option. 18 + 19 + 20 + **** How to install bzImage.efi 21 + 22 + The bzImage located in arch/x86/boot/bzImage must be copied to the EFI 23 + System Partiion (ESP) and renamed with the extension ".efi". Without 24 + the extension the EFI firmware loader will refuse to execute it. It's 25 + not possible to execute bzImage.efi from the usual Linux file systems 26 + because EFI firmware doesn't have support for them. 27 + 28 + 29 + **** Passing kernel parameters from the EFI shell 30 + 31 + Arguments to the kernel can be passed after bzImage.efi, e.g. 32 + 33 + fs0:> bzImage.efi console=ttyS0 root=/dev/sda4 34 + 35 + 36 + **** The "initrd=" option 37 + 38 + Like most boot loaders, the EFI stub allows the user to specify 39 + multiple initrd files using the "initrd=" option. This is the only EFI 40 + stub-specific command line parameter, everything else is passed to the 41 + kernel when it boots. 42 + 43 + The path to the initrd file must be an absolute path from the 44 + beginning of the ESP, relative path names do not work. Also, the path 45 + is an EFI-style path and directory elements must be separated with 46 + backslashes (\). For example, given the following directory layout, 47 + 48 + fs0:> 49 + Kernels\ 50 + bzImage.efi 51 + initrd-large.img 52 + 53 + Ramdisks\ 54 + initrd-small.img 55 + initrd-medium.img 56 + 57 + to boot with the initrd-large.img file if the current working 58 + directory is fs0:\Kernels, the following command must be used, 59 + 60 + fs0:\Kernels> bzImage.efi initrd=\Kernels\initrd-large.img 61 + 62 + Notice how bzImage.efi can be specified with a relative path. That's 63 + because the image we're executing is interpreted by the EFI shell, 64 + which understands relative paths, whereas the rest of the command line 65 + is passed to bzImage.efi.
+2
arch/x86/Kconfig
··· 1506 1506 This kernel feature allows a bzImage to be loaded directly 1507 1507 by EFI firmware without the use of a bootloader. 1508 1508 1509 + See Documentation/x86/efi-stub.txt for more information. 1510 + 1509 1511 config SECCOMP 1510 1512 def_bool y 1511 1513 prompt "Enable seccomp to safely compute untrusted bytecode"
+70 -17
arch/x86/boot/compressed/eboot.c
··· 16 16 17 17 static efi_system_table_t *sys_table; 18 18 19 + static void efi_printk(char *str) 20 + { 21 + char *s8; 22 + 23 + for (s8 = str; *s8; s8++) { 24 + struct efi_simple_text_output_protocol *out; 25 + efi_char16_t ch[2] = { 0 }; 26 + 27 + ch[0] = *s8; 28 + out = (struct efi_simple_text_output_protocol *)sys_table->con_out; 29 + 30 + if (*s8 == '\n') { 31 + efi_char16_t nl[2] = { '\r', 0 }; 32 + efi_call_phys2(out->output_string, out, nl); 33 + } 34 + 35 + efi_call_phys2(out->output_string, out, ch); 36 + } 37 + } 38 + 19 39 static efi_status_t __get_map(efi_memory_desc_t **map, unsigned long *map_size, 20 40 unsigned long *desc_size) 21 41 { ··· 551 531 EFI_LOADER_DATA, 552 532 nr_initrds * sizeof(*initrds), 553 533 &initrds); 554 - if (status != EFI_SUCCESS) 534 + if (status != EFI_SUCCESS) { 535 + efi_printk("Failed to alloc mem for initrds\n"); 555 536 goto fail; 537 + } 556 538 557 539 str = (char *)(unsigned long)hdr->cmd_line_ptr; 558 540 for (i = 0; i < nr_initrds; i++) { ··· 597 575 598 576 status = efi_call_phys3(boottime->handle_protocol, 599 577 image->device_handle, &fs_proto, &io); 600 - if (status != EFI_SUCCESS) 578 + if (status != EFI_SUCCESS) { 579 + efi_printk("Failed to handle fs_proto\n"); 601 580 goto free_initrds; 581 + } 602 582 603 583 status = efi_call_phys2(io->open_volume, io, &fh); 604 - if (status != EFI_SUCCESS) 584 + if (status != EFI_SUCCESS) { 585 + efi_printk("Failed to open volume\n"); 605 586 goto free_initrds; 587 + } 606 588 } 607 589 608 590 status = efi_call_phys5(fh->open, fh, &h, filename_16, 609 591 EFI_FILE_MODE_READ, (u64)0); 610 - if (status != EFI_SUCCESS) 592 + if (status != EFI_SUCCESS) { 593 + efi_printk("Failed to open initrd file\n"); 611 594 goto close_handles; 595 + } 612 596 613 597 initrd->handle = h; 614 598 615 599 info_sz = 0; 616 600 status = efi_call_phys4(h->get_info, h, &info_guid, 617 601 &info_sz, NULL); 618 - if (status != EFI_BUFFER_TOO_SMALL) 602 + if (status != EFI_BUFFER_TOO_SMALL) { 603 + efi_printk("Failed to get initrd info size\n"); 619 604 goto close_handles; 605 + } 620 606 621 607 grow: 622 608 status = efi_call_phys3(sys_table->boottime->allocate_pool, 623 609 EFI_LOADER_DATA, info_sz, &info); 624 - if (status != EFI_SUCCESS) 610 + if (status != EFI_SUCCESS) { 611 + efi_printk("Failed to alloc mem for initrd info\n"); 625 612 goto close_handles; 613 + } 626 614 627 615 status = efi_call_phys4(h->get_info, h, &info_guid, 628 616 &info_sz, info); ··· 644 612 file_sz = info->file_size; 645 613 efi_call_phys1(sys_table->boottime->free_pool, info); 646 614 647 - if (status != EFI_SUCCESS) 615 + if (status != EFI_SUCCESS) { 616 + efi_printk("Failed to get initrd info\n"); 648 617 goto close_handles; 618 + } 649 619 650 620 initrd->size = file_sz; 651 621 initrd_total += file_sz; ··· 663 629 */ 664 630 status = high_alloc(initrd_total, 0x1000, 665 631 &initrd_addr, hdr->initrd_addr_max); 666 - if (status != EFI_SUCCESS) 632 + if (status != EFI_SUCCESS) { 633 + efi_printk("Failed to alloc highmem for initrds\n"); 667 634 goto close_handles; 635 + } 668 636 669 637 /* We've run out of free low memory. */ 670 638 if (initrd_addr > hdr->initrd_addr_max) { 639 + efi_printk("We've run out of free low memory\n"); 671 640 status = EFI_INVALID_PARAMETER; 672 641 goto free_initrd_total; 673 642 } ··· 689 652 status = efi_call_phys3(fh->read, 690 653 initrds[j].handle, 691 654 &chunksize, addr); 692 - if (status != EFI_SUCCESS) 655 + if (status != EFI_SUCCESS) { 656 + efi_printk("Failed to read initrd\n"); 693 657 goto free_initrd_total; 658 + } 694 659 addr += chunksize; 695 660 size -= chunksize; 696 661 } ··· 713 674 low_free(initrd_total, initrd_addr); 714 675 715 676 close_handles: 716 - for (k = j; k < nr_initrds; k++) 677 + for (k = j; k < i; k++) 717 678 efi_call_phys1(fh->close, initrds[k].handle); 718 679 free_initrds: 719 680 efi_call_phys1(sys_table->boottime->free_pool, initrds); ··· 771 732 options_size++; /* NUL termination */ 772 733 773 734 status = low_alloc(options_size, 1, &cmdline); 774 - if (status != EFI_SUCCESS) 735 + if (status != EFI_SUCCESS) { 736 + efi_printk("Failed to alloc mem for cmdline\n"); 775 737 goto fail; 738 + } 776 739 777 740 s1 = (u8 *)(unsigned long)cmdline; 778 741 s2 = (u16 *)options; ··· 936 895 937 896 status = efi_call_phys3(sys_table->boottime->handle_protocol, 938 897 handle, &proto, (void *)&image); 939 - if (status != EFI_SUCCESS) 898 + if (status != EFI_SUCCESS) { 899 + efi_printk("Failed to get handle for LOADED_IMAGE_PROTOCOL\n"); 940 900 goto fail; 901 + } 941 902 942 903 status = low_alloc(0x4000, 1, (unsigned long *)&boot_params); 943 - if (status != EFI_SUCCESS) 904 + if (status != EFI_SUCCESS) { 905 + efi_printk("Failed to alloc lowmem for boot params\n"); 944 906 goto fail; 907 + } 945 908 946 909 memset(boot_params, 0x0, 0x4000); 947 910 ··· 978 933 if (status != EFI_SUCCESS) { 979 934 status = low_alloc(hdr->init_size, hdr->kernel_alignment, 980 935 &start); 981 - if (status != EFI_SUCCESS) 936 + if (status != EFI_SUCCESS) { 937 + efi_printk("Failed to alloc mem for kernel\n"); 982 938 goto fail; 939 + } 983 940 } 984 941 985 942 hdr->code32_start = (__u32)start; ··· 992 945 status = efi_call_phys3(sys_table->boottime->allocate_pool, 993 946 EFI_LOADER_DATA, sizeof(*gdt), 994 947 (void **)&gdt); 995 - if (status != EFI_SUCCESS) 948 + if (status != EFI_SUCCESS) { 949 + efi_printk("Failed to alloc mem for gdt structure\n"); 996 950 goto fail; 951 + } 997 952 998 953 gdt->size = 0x800; 999 954 status = low_alloc(gdt->size, 8, (unsigned long *)&gdt->address); 1000 - if (status != EFI_SUCCESS) 955 + if (status != EFI_SUCCESS) { 956 + efi_printk("Failed to alloc mem for gdt\n"); 1001 957 goto fail; 958 + } 1002 959 1003 960 status = efi_call_phys3(sys_table->boottime->allocate_pool, 1004 961 EFI_LOADER_DATA, sizeof(*idt), 1005 962 (void **)&idt); 1006 - if (status != EFI_SUCCESS) 963 + if (status != EFI_SUCCESS) { 964 + efi_printk("Failed to alloc mem for idt structure\n"); 1007 965 goto fail; 966 + } 1008 967 1009 968 idt->size = 0; 1010 969 idt->address = 0;
+6
arch/x86/boot/compressed/eboot.h
··· 58 58 void *blt; 59 59 }; 60 60 61 + struct efi_simple_text_output_protocol { 62 + void *reset; 63 + void *output_string; 64 + void *test_string; 65 + }; 66 + 61 67 #endif /* BOOT_COMPRESSED_EBOOT_H */
+1 -1
arch/x86/include/asm/ftrace.h
··· 34 34 35 35 #ifndef __ASSEMBLY__ 36 36 extern void mcount(void); 37 - extern int modifying_ftrace_code; 37 + extern atomic_t modifying_ftrace_code; 38 38 39 39 static inline unsigned long ftrace_call_adjust(unsigned long addr) 40 40 {
+7 -1
arch/x86/kernel/cpu/common.c
··· 1101 1101 addr > (__get_cpu_var(debug_stack_addr) - DEBUG_STKSZ)); 1102 1102 } 1103 1103 1104 + static DEFINE_PER_CPU(u32, debug_stack_use_ctr); 1105 + 1104 1106 void debug_stack_set_zero(void) 1105 1107 { 1108 + this_cpu_inc(debug_stack_use_ctr); 1106 1109 load_idt((const struct desc_ptr *)&nmi_idt_descr); 1107 1110 } 1108 1111 1109 1112 void debug_stack_reset(void) 1110 1113 { 1111 - load_idt((const struct desc_ptr *)&idt_descr); 1114 + if (WARN_ON(!this_cpu_read(debug_stack_use_ctr))) 1115 + return; 1116 + if (this_cpu_dec_return(debug_stack_use_ctr) == 0) 1117 + load_idt((const struct desc_ptr *)&idt_descr); 1112 1118 } 1113 1119 1114 1120 #else /* CONFIG_X86_64 */
+41 -3
arch/x86/kernel/entry_64.S
··· 191 191 .endm 192 192 193 193 /* 194 + * When dynamic function tracer is enabled it will add a breakpoint 195 + * to all locations that it is about to modify, sync CPUs, update 196 + * all the code, sync CPUs, then remove the breakpoints. In this time 197 + * if lockdep is enabled, it might jump back into the debug handler 198 + * outside the updating of the IST protection. (TRACE_IRQS_ON/OFF). 199 + * 200 + * We need to change the IDT table before calling TRACE_IRQS_ON/OFF to 201 + * make sure the stack pointer does not get reset back to the top 202 + * of the debug stack, and instead just reuses the current stack. 203 + */ 204 + #if defined(CONFIG_DYNAMIC_FTRACE) && defined(CONFIG_TRACE_IRQFLAGS) 205 + 206 + .macro TRACE_IRQS_OFF_DEBUG 207 + call debug_stack_set_zero 208 + TRACE_IRQS_OFF 209 + call debug_stack_reset 210 + .endm 211 + 212 + .macro TRACE_IRQS_ON_DEBUG 213 + call debug_stack_set_zero 214 + TRACE_IRQS_ON 215 + call debug_stack_reset 216 + .endm 217 + 218 + .macro TRACE_IRQS_IRETQ_DEBUG offset=ARGOFFSET 219 + bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */ 220 + jnc 1f 221 + TRACE_IRQS_ON_DEBUG 222 + 1: 223 + .endm 224 + 225 + #else 226 + # define TRACE_IRQS_OFF_DEBUG TRACE_IRQS_OFF 227 + # define TRACE_IRQS_ON_DEBUG TRACE_IRQS_ON 228 + # define TRACE_IRQS_IRETQ_DEBUG TRACE_IRQS_IRETQ 229 + #endif 230 + 231 + /* 194 232 * C code is not supposed to know about undefined top of stack. Every time 195 233 * a C function with an pt_regs argument is called from the SYSCALL based 196 234 * fast path FIXUP_TOP_OF_STACK is needed. ··· 1136 1098 subq $ORIG_RAX-R15, %rsp 1137 1099 CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 1138 1100 call save_paranoid 1139 - TRACE_IRQS_OFF 1101 + TRACE_IRQS_OFF_DEBUG 1140 1102 movq %rsp,%rdi /* pt_regs pointer */ 1141 1103 xorl %esi,%esi /* no error code */ 1142 1104 subq $EXCEPTION_STKSZ, INIT_TSS_IST(\ist) ··· 1431 1393 ENTRY(paranoid_exit) 1432 1394 DEFAULT_FRAME 1433 1395 DISABLE_INTERRUPTS(CLBR_NONE) 1434 - TRACE_IRQS_OFF 1396 + TRACE_IRQS_OFF_DEBUG 1435 1397 testl %ebx,%ebx /* swapgs needed? */ 1436 1398 jnz paranoid_restore 1437 1399 testl $3,CS(%rsp) ··· 1442 1404 RESTORE_ALL 8 1443 1405 jmp irq_return 1444 1406 paranoid_restore: 1445 - TRACE_IRQS_IRETQ 0 1407 + TRACE_IRQS_IRETQ_DEBUG 0 1446 1408 RESTORE_ALL 8 1447 1409 jmp irq_return 1448 1410 paranoid_userspace:
+95 -7
arch/x86/kernel/ftrace.c
··· 100 100 } 101 101 102 102 static int 103 - ftrace_modify_code(unsigned long ip, unsigned const char *old_code, 103 + ftrace_modify_code_direct(unsigned long ip, unsigned const char *old_code, 104 104 unsigned const char *new_code) 105 105 { 106 106 unsigned char replaced[MCOUNT_INSN_SIZE]; ··· 141 141 old = ftrace_call_replace(ip, addr); 142 142 new = ftrace_nop_replace(); 143 143 144 - return ftrace_modify_code(rec->ip, old, new); 144 + /* 145 + * On boot up, and when modules are loaded, the MCOUNT_ADDR 146 + * is converted to a nop, and will never become MCOUNT_ADDR 147 + * again. This code is either running before SMP (on boot up) 148 + * or before the code will ever be executed (module load). 149 + * We do not want to use the breakpoint version in this case, 150 + * just modify the code directly. 151 + */ 152 + if (addr == MCOUNT_ADDR) 153 + return ftrace_modify_code_direct(rec->ip, old, new); 154 + 155 + /* Normal cases use add_brk_on_nop */ 156 + WARN_ONCE(1, "invalid use of ftrace_make_nop"); 157 + return -EINVAL; 145 158 } 146 159 147 160 int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) ··· 165 152 old = ftrace_nop_replace(); 166 153 new = ftrace_call_replace(ip, addr); 167 154 168 - return ftrace_modify_code(rec->ip, old, new); 155 + /* Should only be called when module is loaded */ 156 + return ftrace_modify_code_direct(rec->ip, old, new); 169 157 } 158 + 159 + /* 160 + * The modifying_ftrace_code is used to tell the breakpoint 161 + * handler to call ftrace_int3_handler(). If it fails to 162 + * call this handler for a breakpoint added by ftrace, then 163 + * the kernel may crash. 164 + * 165 + * As atomic_writes on x86 do not need a barrier, we do not 166 + * need to add smp_mb()s for this to work. It is also considered 167 + * that we can not read the modifying_ftrace_code before 168 + * executing the breakpoint. That would be quite remarkable if 169 + * it could do that. Here's the flow that is required: 170 + * 171 + * CPU-0 CPU-1 172 + * 173 + * atomic_inc(mfc); 174 + * write int3s 175 + * <trap-int3> // implicit (r)mb 176 + * if (atomic_read(mfc)) 177 + * call ftrace_int3_handler() 178 + * 179 + * Then when we are finished: 180 + * 181 + * atomic_dec(mfc); 182 + * 183 + * If we hit a breakpoint that was not set by ftrace, it does not 184 + * matter if ftrace_int3_handler() is called or not. It will 185 + * simply be ignored. But it is crucial that a ftrace nop/caller 186 + * breakpoint is handled. No other user should ever place a 187 + * breakpoint on an ftrace nop/caller location. It must only 188 + * be done by this code. 189 + */ 190 + atomic_t modifying_ftrace_code __read_mostly; 191 + 192 + static int 193 + ftrace_modify_code(unsigned long ip, unsigned const char *old_code, 194 + unsigned const char *new_code); 170 195 171 196 int ftrace_update_ftrace_func(ftrace_func_t func) 172 197 { ··· 214 163 215 164 memcpy(old, &ftrace_call, MCOUNT_INSN_SIZE); 216 165 new = ftrace_call_replace(ip, (unsigned long)func); 166 + 167 + /* See comment above by declaration of modifying_ftrace_code */ 168 + atomic_inc(&modifying_ftrace_code); 169 + 217 170 ret = ftrace_modify_code(ip, old, new); 171 + 172 + atomic_dec(&modifying_ftrace_code); 218 173 219 174 return ret; 220 175 } 221 - 222 - int modifying_ftrace_code __read_mostly; 223 176 224 177 /* 225 178 * A breakpoint was added to the code address we are about to ··· 544 489 } 545 490 } 546 491 492 + static int 493 + ftrace_modify_code(unsigned long ip, unsigned const char *old_code, 494 + unsigned const char *new_code) 495 + { 496 + int ret; 497 + 498 + ret = add_break(ip, old_code); 499 + if (ret) 500 + goto out; 501 + 502 + run_sync(); 503 + 504 + ret = add_update_code(ip, new_code); 505 + if (ret) 506 + goto fail_update; 507 + 508 + run_sync(); 509 + 510 + ret = ftrace_write(ip, new_code, 1); 511 + if (ret) { 512 + ret = -EPERM; 513 + goto out; 514 + } 515 + run_sync(); 516 + out: 517 + return ret; 518 + 519 + fail_update: 520 + probe_kernel_write((void *)ip, &old_code[0], 1); 521 + goto out; 522 + } 523 + 547 524 void arch_ftrace_update_code(int command) 548 525 { 549 - modifying_ftrace_code++; 526 + /* See comment above by declaration of modifying_ftrace_code */ 527 + atomic_inc(&modifying_ftrace_code); 550 528 551 529 ftrace_modify_all_code(command); 552 530 553 - modifying_ftrace_code--; 531 + atomic_dec(&modifying_ftrace_code); 554 532 } 555 533 556 534 int __init ftrace_dyn_arch_init(void *data)
+4 -2
arch/x86/kernel/nmi.c
··· 444 444 */ 445 445 if (unlikely(is_debug_stack(regs->sp))) { 446 446 debug_stack_set_zero(); 447 - __get_cpu_var(update_debug_stack) = 1; 447 + this_cpu_write(update_debug_stack, 1); 448 448 } 449 449 } 450 450 451 451 static inline void nmi_nesting_postprocess(void) 452 452 { 453 - if (unlikely(__get_cpu_var(update_debug_stack))) 453 + if (unlikely(this_cpu_read(update_debug_stack))) { 454 454 debug_stack_reset(); 455 + this_cpu_write(update_debug_stack, 0); 456 + } 455 457 } 456 458 #endif 457 459
-6
arch/x86/kernel/ptrace.c
··· 1211 1211 0, sizeof(struct user_i387_struct), 1212 1212 datap); 1213 1213 1214 - /* normal 64bit interface to access TLS data. 1215 - Works just like arch_prctl, except that the arguments 1216 - are reversed. */ 1217 - case PTRACE_ARCH_PRCTL: 1218 - return do_arch_prctl(child, data, addr); 1219 - 1220 1214 default: 1221 1215 return compat_ptrace_request(child, request, addr, data); 1222 1216 }
+6 -2
arch/x86/kernel/traps.c
··· 303 303 dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_code) 304 304 { 305 305 #ifdef CONFIG_DYNAMIC_FTRACE 306 - /* ftrace must be first, everything else may cause a recursive crash */ 307 - if (unlikely(modifying_ftrace_code) && ftrace_int3_handler(regs)) 306 + /* 307 + * ftrace must be first, everything else may cause a recursive crash. 308 + * See note by declaration of modifying_ftrace_code in ftrace.c 309 + */ 310 + if (unlikely(atomic_read(&modifying_ftrace_code)) && 311 + ftrace_int3_handler(regs)) 308 312 return; 309 313 #endif 310 314 #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP