Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

x86/efi: Avoid triple faults during EFI mixed mode calls

Andy pointed out that if an NMI or MCE is received while we're in the
middle of an EFI mixed mode call a triple fault will occur. This can
happen, for example, when issuing an EFI mixed mode call while running
perf.

The reason for the triple fault is that we execute the mixed mode call
in 32-bit mode with paging disabled but with 64-bit kernel IDT handlers
installed throughout the call.

At Andy's suggestion, stop playing the games we currently do at runtime,
such as disabling paging and installing a 32-bit GDT for __KERNEL_CS. We
can simply switch to the __KERNEL32_CS descriptor before invoking
firmware services, and run in compatibility mode. This way, if an
NMI/MCE does occur the kernel IDT handler will execute correctly, since
it'll jump to __KERNEL_CS automatically.

However, this change is only possible post-ExitBootServices(). Before
then the firmware "owns" the machine and expects for its 32-bit IDT
handlers to be left intact to service interrupts, etc.

So, we now need to distinguish between early boot and runtime
invocations of EFI services. During early boot, we need to restore the
GDT that the firmware expects to be present. We can only jump to the
__KERNEL32_CS code segment for mixed mode calls after ExitBootServices()
has been invoked.

A liberal sprinkling of comments in the thunking code should make the
differences in early and late environments more apparent.

Reported-by: Andy Lutomirski <luto@amacapital.net>
Tested-by: Borislav Petkov <bp@suse.de>
Cc: <stable@vger.kernel.org>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>

+301 -203
+1
arch/x86/boot/compressed/Makefile
··· 49 49 50 50 vmlinux-objs-$(CONFIG_EFI_STUB) += $(obj)/eboot.o $(obj)/efi_stub_$(BITS).o \ 51 51 $(objtree)/drivers/firmware/efi/libstub/lib.a 52 + vmlinux-objs-$(CONFIG_EFI_MIXED) += $(obj)/efi_thunk_$(BITS).o 52 53 53 54 $(obj)/vmlinux: $(vmlinux-objs-y) FORCE 54 55 $(call if_changed,ld)
-25
arch/x86/boot/compressed/efi_stub_64.S
··· 3 3 #include <asm/processor-flags.h> 4 4 5 5 #include "../../platform/efi/efi_stub_64.S" 6 - 7 - #ifdef CONFIG_EFI_MIXED 8 - .code64 9 - .text 10 - ENTRY(efi64_thunk) 11 - push %rbp 12 - push %rbx 13 - 14 - subq $16, %rsp 15 - leaq efi_exit32(%rip), %rax 16 - movl %eax, 8(%rsp) 17 - leaq efi_gdt64(%rip), %rax 18 - movl %eax, 4(%rsp) 19 - movl %eax, 2(%rax) /* Fixup the gdt base address */ 20 - leaq efi32_boot_gdt(%rip), %rax 21 - movl %eax, (%rsp) 22 - 23 - call __efi64_thunk 24 - 25 - addq $16, %rsp 26 - pop %rbx 27 - pop %rbp 28 - ret 29 - ENDPROC(efi64_thunk) 30 - #endif /* CONFIG_EFI_MIXED */
+196
arch/x86/boot/compressed/efi_thunk_64.S
··· 1 + /* 2 + * Copyright (C) 2014, 2015 Intel Corporation; author Matt Fleming 3 + * 4 + * Early support for invoking 32-bit EFI services from a 64-bit kernel. 5 + * 6 + * Because this thunking occurs before ExitBootServices() we have to 7 + * restore the firmware's 32-bit GDT before we make EFI serivce calls, 8 + * since the firmware's 32-bit IDT is still currently installed and it 9 + * needs to be able to service interrupts. 10 + * 11 + * On the plus side, we don't have to worry about mangling 64-bit 12 + * addresses into 32-bits because we're executing with an identify 13 + * mapped pagetable and haven't transitioned to 64-bit virtual addresses 14 + * yet. 15 + */ 16 + 17 + #include <linux/linkage.h> 18 + #include <asm/msr.h> 19 + #include <asm/page_types.h> 20 + #include <asm/processor-flags.h> 21 + #include <asm/segment.h> 22 + 23 + .code64 24 + .text 25 + ENTRY(efi64_thunk) 26 + push %rbp 27 + push %rbx 28 + 29 + subq $8, %rsp 30 + leaq efi_exit32(%rip), %rax 31 + movl %eax, 4(%rsp) 32 + leaq efi_gdt64(%rip), %rax 33 + movl %eax, (%rsp) 34 + movl %eax, 2(%rax) /* Fixup the gdt base address */ 35 + 36 + movl %ds, %eax 37 + push %rax 38 + movl %es, %eax 39 + push %rax 40 + movl %ss, %eax 41 + push %rax 42 + 43 + /* 44 + * Convert x86-64 ABI params to i386 ABI 45 + */ 46 + subq $32, %rsp 47 + movl %esi, 0x0(%rsp) 48 + movl %edx, 0x4(%rsp) 49 + movl %ecx, 0x8(%rsp) 50 + movq %r8, %rsi 51 + movl %esi, 0xc(%rsp) 52 + movq %r9, %rsi 53 + movl %esi, 0x10(%rsp) 54 + 55 + sgdt save_gdt(%rip) 56 + 57 + leaq 1f(%rip), %rbx 58 + movq %rbx, func_rt_ptr(%rip) 59 + 60 + /* 61 + * Switch to gdt with 32-bit segments. This is the firmware GDT 62 + * that was installed when the kernel started executing. This 63 + * pointer was saved at the EFI stub entry point in head_64.S. 64 + */ 65 + leaq efi32_boot_gdt(%rip), %rax 66 + lgdt (%rax) 67 + 68 + pushq $__KERNEL_CS 69 + leaq efi_enter32(%rip), %rax 70 + pushq %rax 71 + lretq 72 + 73 + 1: addq $32, %rsp 74 + 75 + lgdt save_gdt(%rip) 76 + 77 + pop %rbx 78 + movl %ebx, %ss 79 + pop %rbx 80 + movl %ebx, %es 81 + pop %rbx 82 + movl %ebx, %ds 83 + 84 + /* 85 + * Convert 32-bit status code into 64-bit. 86 + */ 87 + test %rax, %rax 88 + jz 1f 89 + movl %eax, %ecx 90 + andl $0x0fffffff, %ecx 91 + andl $0xf0000000, %eax 92 + shl $32, %rax 93 + or %rcx, %rax 94 + 1: 95 + addq $8, %rsp 96 + pop %rbx 97 + pop %rbp 98 + ret 99 + ENDPROC(efi64_thunk) 100 + 101 + ENTRY(efi_exit32) 102 + movq func_rt_ptr(%rip), %rax 103 + push %rax 104 + mov %rdi, %rax 105 + ret 106 + ENDPROC(efi_exit32) 107 + 108 + .code32 109 + /* 110 + * EFI service pointer must be in %edi. 111 + * 112 + * The stack should represent the 32-bit calling convention. 113 + */ 114 + ENTRY(efi_enter32) 115 + movl $__KERNEL_DS, %eax 116 + movl %eax, %ds 117 + movl %eax, %es 118 + movl %eax, %ss 119 + 120 + /* Reload pgtables */ 121 + movl %cr3, %eax 122 + movl %eax, %cr3 123 + 124 + /* Disable paging */ 125 + movl %cr0, %eax 126 + btrl $X86_CR0_PG_BIT, %eax 127 + movl %eax, %cr0 128 + 129 + /* Disable long mode via EFER */ 130 + movl $MSR_EFER, %ecx 131 + rdmsr 132 + btrl $_EFER_LME, %eax 133 + wrmsr 134 + 135 + call *%edi 136 + 137 + /* We must preserve return value */ 138 + movl %eax, %edi 139 + 140 + /* 141 + * Some firmware will return with interrupts enabled. Be sure to 142 + * disable them before we switch GDTs. 143 + */ 144 + cli 145 + 146 + movl 56(%esp), %eax 147 + movl %eax, 2(%eax) 148 + lgdtl (%eax) 149 + 150 + movl %cr4, %eax 151 + btsl $(X86_CR4_PAE_BIT), %eax 152 + movl %eax, %cr4 153 + 154 + movl %cr3, %eax 155 + movl %eax, %cr3 156 + 157 + movl $MSR_EFER, %ecx 158 + rdmsr 159 + btsl $_EFER_LME, %eax 160 + wrmsr 161 + 162 + xorl %eax, %eax 163 + lldt %ax 164 + 165 + movl 60(%esp), %eax 166 + pushl $__KERNEL_CS 167 + pushl %eax 168 + 169 + /* Enable paging */ 170 + movl %cr0, %eax 171 + btsl $X86_CR0_PG_BIT, %eax 172 + movl %eax, %cr0 173 + lret 174 + ENDPROC(efi_enter32) 175 + 176 + .data 177 + .balign 8 178 + .global efi32_boot_gdt 179 + efi32_boot_gdt: .word 0 180 + .quad 0 181 + 182 + save_gdt: .word 0 183 + .quad 0 184 + func_rt_ptr: .quad 0 185 + 186 + .global efi_gdt64 187 + efi_gdt64: 188 + .word efi_gdt64_end - efi_gdt64 189 + .long 0 /* Filled out by user */ 190 + .word 0 191 + .quad 0x0000000000000000 /* NULL descriptor */ 192 + .quad 0x00af9a000000ffff /* __KERNEL_CS */ 193 + .quad 0x00cf92000000ffff /* __KERNEL_DS */ 194 + .quad 0x0080890000000000 /* TS descriptor */ 195 + .quad 0x0000000000000000 /* TS continued */ 196 + efi_gdt64_end:
-161
arch/x86/platform/efi/efi_stub_64.S
··· 91 91 ret 92 92 ENDPROC(efi_call) 93 93 94 - #ifdef CONFIG_EFI_MIXED 95 - 96 - /* 97 - * We run this function from the 1:1 mapping. 98 - * 99 - * This function must be invoked with a 1:1 mapped stack. 100 - */ 101 - ENTRY(__efi64_thunk) 102 - movl %ds, %eax 103 - push %rax 104 - movl %es, %eax 105 - push %rax 106 - movl %ss, %eax 107 - push %rax 108 - 109 - subq $32, %rsp 110 - movl %esi, 0x0(%rsp) 111 - movl %edx, 0x4(%rsp) 112 - movl %ecx, 0x8(%rsp) 113 - movq %r8, %rsi 114 - movl %esi, 0xc(%rsp) 115 - movq %r9, %rsi 116 - movl %esi, 0x10(%rsp) 117 - 118 - sgdt save_gdt(%rip) 119 - 120 - leaq 1f(%rip), %rbx 121 - movq %rbx, func_rt_ptr(%rip) 122 - 123 - /* Switch to gdt with 32-bit segments */ 124 - movl 64(%rsp), %eax 125 - lgdt (%rax) 126 - 127 - leaq efi_enter32(%rip), %rax 128 - pushq $__KERNEL_CS 129 - pushq %rax 130 - lretq 131 - 132 - 1: addq $32, %rsp 133 - 134 - lgdt save_gdt(%rip) 135 - 136 - pop %rbx 137 - movl %ebx, %ss 138 - pop %rbx 139 - movl %ebx, %es 140 - pop %rbx 141 - movl %ebx, %ds 142 - 143 - /* 144 - * Convert 32-bit status code into 64-bit. 145 - */ 146 - test %rax, %rax 147 - jz 1f 148 - movl %eax, %ecx 149 - andl $0x0fffffff, %ecx 150 - andl $0xf0000000, %eax 151 - shl $32, %rax 152 - or %rcx, %rax 153 - 1: 154 - ret 155 - ENDPROC(__efi64_thunk) 156 - 157 - ENTRY(efi_exit32) 158 - movq func_rt_ptr(%rip), %rax 159 - push %rax 160 - mov %rdi, %rax 161 - ret 162 - ENDPROC(efi_exit32) 163 - 164 - .code32 165 - /* 166 - * EFI service pointer must be in %edi. 167 - * 168 - * The stack should represent the 32-bit calling convention. 169 - */ 170 - ENTRY(efi_enter32) 171 - movl $__KERNEL_DS, %eax 172 - movl %eax, %ds 173 - movl %eax, %es 174 - movl %eax, %ss 175 - 176 - /* Reload pgtables */ 177 - movl %cr3, %eax 178 - movl %eax, %cr3 179 - 180 - /* Disable paging */ 181 - movl %cr0, %eax 182 - btrl $X86_CR0_PG_BIT, %eax 183 - movl %eax, %cr0 184 - 185 - /* Disable long mode via EFER */ 186 - movl $MSR_EFER, %ecx 187 - rdmsr 188 - btrl $_EFER_LME, %eax 189 - wrmsr 190 - 191 - call *%edi 192 - 193 - /* We must preserve return value */ 194 - movl %eax, %edi 195 - 196 - /* 197 - * Some firmware will return with interrupts enabled. Be sure to 198 - * disable them before we switch GDTs. 199 - */ 200 - cli 201 - 202 - movl 68(%esp), %eax 203 - movl %eax, 2(%eax) 204 - lgdtl (%eax) 205 - 206 - movl %cr4, %eax 207 - btsl $(X86_CR4_PAE_BIT), %eax 208 - movl %eax, %cr4 209 - 210 - movl %cr3, %eax 211 - movl %eax, %cr3 212 - 213 - movl $MSR_EFER, %ecx 214 - rdmsr 215 - btsl $_EFER_LME, %eax 216 - wrmsr 217 - 218 - xorl %eax, %eax 219 - lldt %ax 220 - 221 - movl 72(%esp), %eax 222 - pushl $__KERNEL_CS 223 - pushl %eax 224 - 225 - /* Enable paging */ 226 - movl %cr0, %eax 227 - btsl $X86_CR0_PG_BIT, %eax 228 - movl %eax, %cr0 229 - lret 230 - ENDPROC(efi_enter32) 231 - 232 - .data 233 - .balign 8 234 - .global efi32_boot_gdt 235 - efi32_boot_gdt: .word 0 236 - .quad 0 237 - 238 - save_gdt: .word 0 239 - .quad 0 240 - func_rt_ptr: .quad 0 241 - 242 - .global efi_gdt64 243 - efi_gdt64: 244 - .word efi_gdt64_end - efi_gdt64 245 - .long 0 /* Filled out by user */ 246 - .word 0 247 - .quad 0x0000000000000000 /* NULL descriptor */ 248 - .quad 0x00af9a000000ffff /* __KERNEL_CS */ 249 - .quad 0x00cf92000000ffff /* __KERNEL_DS */ 250 - .quad 0x0080890000000000 /* TS descriptor */ 251 - .quad 0x0000000000000000 /* TS continued */ 252 - efi_gdt64_end: 253 - #endif /* CONFIG_EFI_MIXED */ 254 - 255 94 .data 256 95 ENTRY(efi_scratch) 257 96 .fill 3,8,0
+104 -17
arch/x86/platform/efi/efi_thunk_64.S
··· 1 1 /* 2 2 * Copyright (C) 2014 Intel Corporation; author Matt Fleming 3 + * 4 + * Support for invoking 32-bit EFI runtime services from a 64-bit 5 + * kernel. 6 + * 7 + * The below thunking functions are only used after ExitBootServices() 8 + * has been called. This simplifies things considerably as compared with 9 + * the early EFI thunking because we can leave all the kernel state 10 + * intact (GDT, IDT, etc) and simply invoke the the 32-bit EFI runtime 11 + * services from __KERNEL32_CS. This means we can continue to service 12 + * interrupts across an EFI mixed mode call. 13 + * 14 + * We do however, need to handle the fact that we're running in a full 15 + * 64-bit virtual address space. Things like the stack and instruction 16 + * addresses need to be accessible by the 32-bit firmware, so we rely on 17 + * using the identity mappings in the EFI page table to access the stack 18 + * and kernel text (see efi_setup_page_tables()). 3 19 */ 4 20 5 21 #include <linux/linkage.h> 6 22 #include <asm/page_types.h> 23 + #include <asm/segment.h> 7 24 8 25 .text 9 26 .code64 ··· 50 33 leaq efi_exit32(%rip), %rbx 51 34 subq %rax, %rbx 52 35 movl %ebx, 8(%rsp) 53 - leaq efi_gdt64(%rip), %rbx 54 - subq %rax, %rbx 55 - movl %ebx, 2(%ebx) 56 - movl %ebx, 4(%rsp) 57 - leaq efi_gdt32(%rip), %rbx 58 - subq %rax, %rbx 59 - movl %ebx, 2(%ebx) 60 - movl %ebx, (%rsp) 61 36 62 37 leaq __efi64_thunk(%rip), %rbx 63 38 subq %rax, %rbx ··· 61 52 retq 62 53 ENDPROC(efi64_thunk) 63 54 64 - .data 65 - efi_gdt32: 66 - .word efi_gdt32_end - efi_gdt32 67 - .long 0 /* Filled out above */ 68 - .word 0 69 - .quad 0x0000000000000000 /* NULL descriptor */ 70 - .quad 0x00cf9a000000ffff /* __KERNEL_CS */ 71 - .quad 0x00cf93000000ffff /* __KERNEL_DS */ 72 - efi_gdt32_end: 55 + /* 56 + * We run this function from the 1:1 mapping. 57 + * 58 + * This function must be invoked with a 1:1 mapped stack. 59 + */ 60 + ENTRY(__efi64_thunk) 61 + movl %ds, %eax 62 + push %rax 63 + movl %es, %eax 64 + push %rax 65 + movl %ss, %eax 66 + push %rax 73 67 68 + subq $32, %rsp 69 + movl %esi, 0x0(%rsp) 70 + movl %edx, 0x4(%rsp) 71 + movl %ecx, 0x8(%rsp) 72 + movq %r8, %rsi 73 + movl %esi, 0xc(%rsp) 74 + movq %r9, %rsi 75 + movl %esi, 0x10(%rsp) 76 + 77 + leaq 1f(%rip), %rbx 78 + movq %rbx, func_rt_ptr(%rip) 79 + 80 + /* Switch to 32-bit descriptor */ 81 + pushq $__KERNEL32_CS 82 + leaq efi_enter32(%rip), %rax 83 + pushq %rax 84 + lretq 85 + 86 + 1: addq $32, %rsp 87 + 88 + pop %rbx 89 + movl %ebx, %ss 90 + pop %rbx 91 + movl %ebx, %es 92 + pop %rbx 93 + movl %ebx, %ds 94 + 95 + /* 96 + * Convert 32-bit status code into 64-bit. 97 + */ 98 + test %rax, %rax 99 + jz 1f 100 + movl %eax, %ecx 101 + andl $0x0fffffff, %ecx 102 + andl $0xf0000000, %eax 103 + shl $32, %rax 104 + or %rcx, %rax 105 + 1: 106 + ret 107 + ENDPROC(__efi64_thunk) 108 + 109 + ENTRY(efi_exit32) 110 + movq func_rt_ptr(%rip), %rax 111 + push %rax 112 + mov %rdi, %rax 113 + ret 114 + ENDPROC(efi_exit32) 115 + 116 + .code32 117 + /* 118 + * EFI service pointer must be in %edi. 119 + * 120 + * The stack should represent the 32-bit calling convention. 121 + */ 122 + ENTRY(efi_enter32) 123 + movl $__KERNEL_DS, %eax 124 + movl %eax, %ds 125 + movl %eax, %es 126 + movl %eax, %ss 127 + 128 + call *%edi 129 + 130 + /* We must preserve return value */ 131 + movl %eax, %edi 132 + 133 + movl 72(%esp), %eax 134 + pushl $__KERNEL_CS 135 + pushl %eax 136 + 137 + lret 138 + ENDPROC(efi_enter32) 139 + 140 + .data 141 + .balign 8 142 + func_rt_ptr: .quad 0 74 143 efi_saved_sp: .quad 0