Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 fixes from Peter Anvin:
"This is a collection of minor fixes for x86, plus the IRET information
leak fix (forbid the use of 16-bit segments in 64-bit mode)"

NOTE! We may have to relax the "forbid the use of 16-bit segments in
64-bit mode" part, since there may be people who still run and depend on
16-bit Windows binaries under Wine.

But I'm taking this in the current unconditional form for now to see who
(if anybody) screams bloody murder. Maybe nobody cares. And maybe
we'll have to update it with some kind of runtime enablement (like our
vm.mmap_min_addr tunable that people who run dosemu/qemu/wine already
need to tweak).

* 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
x86-64, modify_ldt: Ban 16-bit segments on 64-bit kernels
efi: Pass correct file handle to efi_file_{read,close}
x86/efi: Correct EFI boot stub use of code32_start
x86/efi: Fix boot failure with EFI stub
x86/platform/hyperv: Handle VMBUS driver being a module
x86/apic: Reinstate error IRQ Pentium erratum 3AP workaround
x86, CMCI: Add proper detection of end of CMCI storms

+67 -28
+10 -9
arch/x86/boot/compressed/eboot.c
··· 112 112 efi_file_info_t *info; 113 113 efi_status_t status; 114 114 efi_guid_t info_guid = EFI_FILE_INFO_ID; 115 - u32 info_sz; 115 + u64 info_sz; 116 116 117 117 status = efi_early->call((unsigned long)fh->open, fh, &h, filename_16, 118 118 EFI_FILE_MODE_READ, (u64)0); ··· 167 167 } 168 168 169 169 static inline efi_status_t 170 - efi_file_read(void *__fh, void *handle, unsigned long *size, void *addr) 170 + efi_file_read(void *handle, unsigned long *size, void *addr) 171 171 { 172 172 unsigned long func; 173 173 174 174 if (efi_early->is64) { 175 - efi_file_handle_64_t *fh = __fh; 175 + efi_file_handle_64_t *fh = handle; 176 176 177 177 func = (unsigned long)fh->read; 178 178 return efi_early->call(func, handle, size, addr); 179 179 } else { 180 - efi_file_handle_32_t *fh = __fh; 180 + efi_file_handle_32_t *fh = handle; 181 181 182 182 func = (unsigned long)fh->read; 183 183 return efi_early->call(func, handle, size, addr); 184 184 } 185 185 } 186 186 187 - static inline efi_status_t efi_file_close(void *__fh, void *handle) 187 + static inline efi_status_t efi_file_close(void *handle) 188 188 { 189 189 if (efi_early->is64) { 190 - efi_file_handle_64_t *fh = __fh; 190 + efi_file_handle_64_t *fh = handle; 191 191 192 192 return efi_early->call((unsigned long)fh->close, handle); 193 193 } else { 194 - efi_file_handle_32_t *fh = __fh; 194 + efi_file_handle_32_t *fh = handle; 195 195 196 196 return efi_early->call((unsigned long)fh->close, handle); 197 197 } ··· 1016 1016 * Because the x86 boot code expects to be passed a boot_params we 1017 1017 * need to create one ourselves (usually the bootloader would create 1018 1018 * one for us). 1019 + * 1020 + * The caller is responsible for filling out ->code32_start in the 1021 + * returned boot_params. 1019 1022 */ 1020 1023 struct boot_params *make_boot_params(struct efi_config *c) 1021 1024 { ··· 1083 1080 hdr->root_flags = 1; 1084 1081 hdr->vid_mode = 0xffff; 1085 1082 hdr->boot_flag = 0xAA55; 1086 - 1087 - hdr->code32_start = (__u64)(unsigned long)image->image_base; 1088 1083 1089 1084 hdr->type_of_loader = 0x21; 1090 1085
+2 -6
arch/x86/boot/compressed/head_32.S
··· 59 59 call make_boot_params 60 60 cmpl $0, %eax 61 61 je fail 62 + movl %esi, BP_code32_start(%eax) 62 63 popl %ecx 63 64 pushl %eax 64 65 pushl %ecx ··· 91 90 hlt 92 91 jmp fail 93 92 2: 94 - call 3f 95 - 3: 96 - popl %eax 97 - subl $3b, %eax 98 - subl BP_pref_address(%esi), %eax 99 - add BP_code32_start(%esi), %eax 93 + movl BP_code32_start(%esi), %eax 100 94 leal preferred_addr(%eax), %eax 101 95 jmp *%eax 102 96
+3 -6
arch/x86/boot/compressed/head_64.S
··· 261 261 cmpq $0,%rax 262 262 je fail 263 263 mov %rax, %rsi 264 + leaq startup_32(%rip), %rax 265 + movl %eax, BP_code32_start(%rsi) 264 266 jmp 2f /* Skip the relocation */ 265 267 266 268 handover_entry: ··· 286 284 hlt 287 285 jmp fail 288 286 2: 289 - call 3f 290 - 3: 291 - popq %rax 292 - subq $3b, %rax 293 - subq BP_pref_address(%rsi), %rax 294 - add BP_code32_start(%esi), %eax 287 + movl BP_code32_start(%esi), %eax 295 288 leaq preferred_addr(%rax), %rax 296 289 jmp *%rax 297 290
+2 -1
arch/x86/kernel/apic/apic.c
··· 1996 1996 }; 1997 1997 1998 1998 /* First tickle the hardware, only then report what went on. -- REW */ 1999 - apic_write(APIC_ESR, 0); 1999 + if (lapic_get_maxlvt() > 3) /* Due to the Pentium erratum 3AP. */ 2000 + apic_write(APIC_ESR, 0); 2000 2001 v = apic_read(APIC_ESR); 2001 2002 ack_APIC_irq(); 2002 2003 atomic_inc(&irq_err_count);
+17 -1
arch/x86/kernel/cpu/mcheck/mce.c
··· 89 89 static DEFINE_PER_CPU(struct mce, mces_seen); 90 90 static int cpu_missing; 91 91 92 + /* CMCI storm detection filter */ 93 + static DEFINE_PER_CPU(unsigned long, mce_polled_error); 94 + 92 95 /* 93 96 * MCA banks polled by the period polling timer for corrected events. 94 97 * With Intel CMCI, this only has MCA banks which do not support CMCI (if any). ··· 598 595 { 599 596 struct mce m; 600 597 int i; 598 + unsigned long *v; 601 599 602 600 this_cpu_inc(mce_poll_count); 603 601 ··· 618 614 if (!(m.status & MCI_STATUS_VAL)) 619 615 continue; 620 616 617 + v = &get_cpu_var(mce_polled_error); 618 + set_bit(0, v); 621 619 /* 622 620 * Uncorrected or signalled events are handled by the exception 623 621 * handler when it is enabled, so don't process those here. ··· 1284 1278 static unsigned long (*mce_adjust_timer)(unsigned long interval) = 1285 1279 mce_adjust_timer_default; 1286 1280 1281 + static int cmc_error_seen(void) 1282 + { 1283 + unsigned long *v = &__get_cpu_var(mce_polled_error); 1284 + 1285 + return test_and_clear_bit(0, v); 1286 + } 1287 + 1287 1288 static void mce_timer_fn(unsigned long data) 1288 1289 { 1289 1290 struct timer_list *t = &__get_cpu_var(mce_timer); 1290 1291 unsigned long iv; 1292 + int notify; 1291 1293 1292 1294 WARN_ON(smp_processor_id() != data); 1293 1295 ··· 1310 1296 * polling interval, otherwise increase the polling interval. 1311 1297 */ 1312 1298 iv = __this_cpu_read(mce_next_interval); 1313 - if (mce_notify_irq()) { 1299 + notify = mce_notify_irq(); 1300 + notify |= cmc_error_seen(); 1301 + if (notify) { 1314 1302 iv = max(iv / 2, (unsigned long) HZ/100); 1315 1303 } else { 1316 1304 iv = min(iv * 2, round_jiffies_relative(check_interval * HZ));
+18 -1
arch/x86/kernel/cpu/mcheck/mce_intel.c
··· 9 9 #include <linux/interrupt.h> 10 10 #include <linux/percpu.h> 11 11 #include <linux/sched.h> 12 + #include <linux/cpumask.h> 12 13 #include <asm/apic.h> 13 14 #include <asm/processor.h> 14 15 #include <asm/msr.h> ··· 138 137 } 139 138 } 140 139 140 + static void cmci_storm_disable_banks(void) 141 + { 142 + unsigned long flags, *owned; 143 + int bank; 144 + u64 val; 145 + 146 + raw_spin_lock_irqsave(&cmci_discover_lock, flags); 147 + owned = __get_cpu_var(mce_banks_owned); 148 + for_each_set_bit(bank, owned, MAX_NR_BANKS) { 149 + rdmsrl(MSR_IA32_MCx_CTL2(bank), val); 150 + val &= ~MCI_CTL2_CMCI_EN; 151 + wrmsrl(MSR_IA32_MCx_CTL2(bank), val); 152 + } 153 + raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); 154 + } 155 + 141 156 static bool cmci_storm_detect(void) 142 157 { 143 158 unsigned int cnt = __this_cpu_read(cmci_storm_cnt); ··· 175 158 if (cnt <= CMCI_STORM_THRESHOLD) 176 159 return false; 177 160 178 - cmci_clear(); 161 + cmci_storm_disable_banks(); 179 162 __this_cpu_write(cmci_storm_state, CMCI_STORM_ACTIVE); 180 163 r = atomic_add_return(1, &cmci_storm_on_cpus); 181 164 mce_timer_kick(CMCI_POLL_INTERVAL);
+1 -1
arch/x86/kernel/irq.c
··· 125 125 seq_printf(p, "%10u ", per_cpu(mce_poll_count, j)); 126 126 seq_printf(p, " Machine check polls\n"); 127 127 #endif 128 - #if defined(CONFIG_HYPERV) || defined(CONFIG_XEN) 128 + #if IS_ENABLED(CONFIG_HYPERV) || defined(CONFIG_XEN) 129 129 seq_printf(p, "%*s: ", prec, "THR"); 130 130 for_each_online_cpu(j) 131 131 seq_printf(p, "%10u ", irq_stats(j)->irq_hv_callback_count);
+11
arch/x86/kernel/ldt.c
··· 229 229 } 230 230 } 231 231 232 + /* 233 + * On x86-64 we do not support 16-bit segments due to 234 + * IRET leaking the high bits of the kernel stack address. 235 + */ 236 + #ifdef CONFIG_X86_64 237 + if (!ldt_info.seg_32bit) { 238 + error = -EINVAL; 239 + goto out_unlock; 240 + } 241 + #endif 242 + 232 243 fill_ldt(&ldt, &ldt_info); 233 244 if (oldmode) 234 245 ldt.avl = 0;
+3 -3
drivers/firmware/efi/efi-stub-helper.c
··· 397 397 else 398 398 chunksize = size; 399 399 400 - status = efi_file_read(fh, files[j].handle, 400 + status = efi_file_read(files[j].handle, 401 401 &chunksize, 402 402 (void *)addr); 403 403 if (status != EFI_SUCCESS) { ··· 408 408 size -= chunksize; 409 409 } 410 410 411 - efi_file_close(fh, files[j].handle); 411 + efi_file_close(files[j].handle); 412 412 } 413 413 414 414 } ··· 425 425 426 426 close_handles: 427 427 for (k = j; k < i; k++) 428 - efi_file_close(fh, files[k].handle); 428 + efi_file_close(files[k].handle); 429 429 free_files: 430 430 efi_call_early(free_pool, files); 431 431 fail: