Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

x86/its: Add support for ITS-safe indirect thunk

Due to ITS, indirect branches in the lower half of a cacheline may be
vulnerable to branch target injection attack.

Introduce ITS-safe thunks to patch indirect branches in the lower half of
cacheline with the thunk. Also thunk any eBPF generated indirect branches
in emit_indirect_jump().

Below category of indirect branches are not mitigated:

- Indirect branches in the .init section are not mitigated because they are
discarded after boot.
- Indirect branches that are explicitly marked retpoline-safe.

Note that retpoline also mitigates the indirect branches against ITS. This
is because the retpoline sequence fills an RSB entry before RET, and it
does not suffer from RSB-underflow part of the ITS.

Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org>
Reviewed-by: Alexandre Chartre <alexandre.chartre@oracle.com>

authored by

Pawan Gupta and committed by
Dave Hansen
8754e67a 159013a7

+96 -4
+11
arch/x86/Kconfig
··· 2710 2710 of speculative execution in a similar way to the Meltdown and Spectre 2711 2711 security vulnerabilities. 2712 2712 2713 + config MITIGATION_ITS 2714 + bool "Enable Indirect Target Selection mitigation" 2715 + depends on CPU_SUP_INTEL && X86_64 2716 + depends on MITIGATION_RETPOLINE && MITIGATION_RETHUNK 2717 + default y 2718 + help 2719 + Enable Indirect Target Selection (ITS) mitigation. ITS is a bug in 2720 + BPU on some Intel CPUs that may allow Spectre V2 style attacks. If 2721 + disabled, mitigation cannot be enabled via cmdline. 2722 + See <file:Documentation/admin-guide/hw-vuln/indirect-target-selection.rst> 2723 + 2713 2724 endif 2714 2725 2715 2726 config ARCH_HAS_ADD_PAGES
+1
arch/x86/include/asm/cpufeatures.h
··· 481 481 #define X86_FEATURE_AMD_HETEROGENEOUS_CORES (21*32 + 6) /* Heterogeneous Core Topology */ 482 482 #define X86_FEATURE_AMD_WORKLOAD_CLASS (21*32 + 7) /* Workload Classification */ 483 483 #define X86_FEATURE_PREFER_YMM (21*32 + 8) /* Avoid ZMM registers due to downclocking */ 484 + #define X86_FEATURE_INDIRECT_THUNK_ITS (21*32 + 9) /* Use thunk for indirect branches in lower half of cacheline */ 484 485 485 486 /* 486 487 * BUG word(s)
+4
arch/x86/include/asm/nospec-branch.h
··· 336 336 337 337 #else /* __ASSEMBLER__ */ 338 338 339 + #define ITS_THUNK_SIZE 64 340 + 339 341 typedef u8 retpoline_thunk_t[RETPOLINE_THUNK_SIZE]; 342 + typedef u8 its_thunk_t[ITS_THUNK_SIZE]; 340 343 extern retpoline_thunk_t __x86_indirect_thunk_array[]; 341 344 extern retpoline_thunk_t __x86_indirect_call_thunk_array[]; 342 345 extern retpoline_thunk_t __x86_indirect_jump_thunk_array[]; 346 + extern its_thunk_t __x86_indirect_its_thunk_array[]; 343 347 344 348 #ifdef CONFIG_MITIGATION_RETHUNK 345 349 extern void __x86_return_thunk(void);
+42 -3
arch/x86/kernel/alternative.c
··· 581 581 return i; 582 582 } 583 583 584 - static int emit_call_track_retpoline(void *addr, struct insn *insn, int reg, u8 *bytes) 584 + static int __emit_trampoline(void *addr, struct insn *insn, u8 *bytes, 585 + void *call_dest, void *jmp_dest) 585 586 { 586 587 u8 op = insn->opcode.bytes[0]; 587 588 int i = 0; ··· 603 602 switch (op) { 604 603 case CALL_INSN_OPCODE: 605 604 __text_gen_insn(bytes+i, op, addr+i, 606 - __x86_indirect_call_thunk_array[reg], 605 + call_dest, 607 606 CALL_INSN_SIZE); 608 607 i += CALL_INSN_SIZE; 609 608 break; ··· 611 610 case JMP32_INSN_OPCODE: 612 611 clang_jcc: 613 612 __text_gen_insn(bytes+i, op, addr+i, 614 - __x86_indirect_jump_thunk_array[reg], 613 + jmp_dest, 615 614 JMP32_INSN_SIZE); 616 615 i += JMP32_INSN_SIZE; 617 616 break; ··· 625 624 626 625 return i; 627 626 } 627 + 628 + static int emit_call_track_retpoline(void *addr, struct insn *insn, int reg, u8 *bytes) 629 + { 630 + return __emit_trampoline(addr, insn, bytes, 631 + __x86_indirect_call_thunk_array[reg], 632 + __x86_indirect_jump_thunk_array[reg]); 633 + } 634 + 635 + #ifdef CONFIG_MITIGATION_ITS 636 + static int emit_its_trampoline(void *addr, struct insn *insn, int reg, u8 *bytes) 637 + { 638 + return __emit_trampoline(addr, insn, bytes, 639 + __x86_indirect_its_thunk_array[reg], 640 + __x86_indirect_its_thunk_array[reg]); 641 + } 642 + 643 + /* Check if an indirect branch is at ITS-unsafe address */ 644 + static bool cpu_wants_indirect_its_thunk_at(unsigned long addr, int reg) 645 + { 646 + if (!cpu_feature_enabled(X86_FEATURE_INDIRECT_THUNK_ITS)) 647 + return false; 648 + 649 + /* Indirect branch opcode is 2 or 3 bytes depending on reg */ 650 + addr += 1 + reg / 8; 651 + 652 + /* Lower-half of the cacheline? */ 653 + return !(addr & 0x20); 654 + } 655 + #endif 628 656 629 657 /* 630 658 * Rewrite the compiler generated retpoline thunk calls. ··· 728 698 bytes[i++] = 0xae; 729 699 bytes[i++] = 0xe8; /* LFENCE */ 730 700 } 701 + 702 + #ifdef CONFIG_MITIGATION_ITS 703 + /* 704 + * Check if the address of last byte of emitted-indirect is in 705 + * lower-half of the cacheline. Such branches need ITS mitigation. 706 + */ 707 + if (cpu_wants_indirect_its_thunk_at((unsigned long)addr + i, reg)) 708 + return emit_its_trampoline(addr, insn, reg, bytes); 709 + #endif 731 710 732 711 ret = emit_indirect(op, reg, bytes + i); 733 712 if (ret < 0)
+6
arch/x86/kernel/vmlinux.lds.S
··· 497 497 "SRSO function pair won't alias"); 498 498 #endif 499 499 500 + #if defined(CONFIG_MITIGATION_ITS) && !defined(CONFIG_DEBUG_FORCE_FUNCTION_ALIGN_64B) 501 + . = ASSERT(__x86_indirect_its_thunk_rax & 0x20, "__x86_indirect_thunk_rax not in second half of cacheline"); 502 + . = ASSERT(((__x86_indirect_its_thunk_rcx - __x86_indirect_its_thunk_rax) % 64) == 0, "Indirect thunks are not cacheline apart"); 503 + . = ASSERT(__x86_indirect_its_thunk_array == __x86_indirect_its_thunk_rax, "Gap in ITS thunk array"); 504 + #endif 505 + 500 506 #endif /* CONFIG_X86_64 */ 501 507 502 508 /*
+28
arch/x86/lib/retpoline.S
··· 367 367 368 368 #endif /* CONFIG_MITIGATION_CALL_DEPTH_TRACKING */ 369 369 370 + #ifdef CONFIG_MITIGATION_ITS 371 + 372 + .macro ITS_THUNK reg 373 + 374 + SYM_INNER_LABEL(__x86_indirect_its_thunk_\reg, SYM_L_GLOBAL) 375 + UNWIND_HINT_UNDEFINED 376 + ANNOTATE_NOENDBR 377 + ANNOTATE_RETPOLINE_SAFE 378 + jmp *%\reg 379 + int3 380 + .align 32, 0xcc /* fill to the end of the line */ 381 + .skip 32, 0xcc /* skip to the next upper half */ 382 + .endm 383 + 384 + /* ITS mitigation requires thunks be aligned to upper half of cacheline */ 385 + .align 64, 0xcc 386 + .skip 32, 0xcc 387 + SYM_CODE_START(__x86_indirect_its_thunk_array) 388 + 389 + #define GEN(reg) ITS_THUNK reg 390 + #include <asm/GEN-for-each-reg.h> 391 + #undef GEN 392 + 393 + .align 64, 0xcc 394 + SYM_CODE_END(__x86_indirect_its_thunk_array) 395 + 396 + #endif 397 + 370 398 /* 371 399 * This function name is magical and is used by -mfunction-return=thunk-extern 372 400 * for the compiler to generate JMPs to it.
+4 -1
arch/x86/net/bpf_jit_comp.c
··· 661 661 { 662 662 u8 *prog = *pprog; 663 663 664 - if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE)) { 664 + if (cpu_feature_enabled(X86_FEATURE_INDIRECT_THUNK_ITS)) { 665 + OPTIMIZER_HIDE_VAR(reg); 666 + emit_jump(&prog, &__x86_indirect_its_thunk_array[reg], ip); 667 + } else if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE)) { 665 668 EMIT_LFENCE(); 666 669 EMIT2(0xFF, 0xE0 + reg); 667 670 } else if (cpu_feature_enabled(X86_FEATURE_RETPOLINE)) {