Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'x86_core_for_v6.2' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 core updates from Borislav Petkov:

- Add the call depth tracking mitigation for Retbleed which has been
long in the making. It is a lighterweight software-only fix for
Skylake-based cores where enabling IBRS is a big hammer and causes a
significant performance impact.

What it basically does is, it aligns all kernel functions to 16 bytes
boundary and adds a 16-byte padding before the function, objtool
collects all functions' locations and when the mitigation gets
applied, it patches a call accounting thunk which is used to track
the call depth of the stack at any time.

When that call depth reaches a magical, microarchitecture-specific
value for the Return Stack Buffer, the code stuffs that RSB and
avoids its underflow which could otherwise lead to the Intel variant
of Retbleed.

This software-only solution brings a lot of the lost performance
back, as benchmarks suggest:

https://lore.kernel.org/all/20220915111039.092790446@infradead.org/

That page above also contains a lot more detailed explanation of the
whole mechanism

- Implement a new control flow integrity scheme called FineIBT which is
based on the software kCFI implementation and uses hardware IBT
support where present to annotate and track indirect branches using a
hash to validate them

- Other misc fixes and cleanups

* tag 'x86_core_for_v6.2' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (80 commits)
x86/paravirt: Use common macro for creating simple asm paravirt functions
x86/paravirt: Remove clobber bitmask from .parainstructions
x86/debug: Include percpu.h in debugreg.h to get DECLARE_PER_CPU() et al
x86/cpufeatures: Move X86_FEATURE_CALL_DEPTH from bit 18 to bit 19 of word 11, to leave space for WIP X86_FEATURE_SGX_EDECCSSA bit
x86/Kconfig: Enable kernel IBT by default
x86,pm: Force out-of-line memcpy()
objtool: Fix weak hole vs prefix symbol
objtool: Optimize elf_dirty_reloc_sym()
x86/cfi: Add boot time hash randomization
x86/cfi: Boot time selection of CFI scheme
x86/ibt: Implement FineIBT
objtool: Add --cfi to generate the .cfi_sites section
x86: Add prefix symbols for function padding
objtool: Add option to generate prefix symbols
objtool: Avoid O(bloody terrible) behaviour -- an ode to libelf
objtool: Slice up elf_create_section_symbol()
kallsyms: Revert "Take callthunks into account"
x86: Unconfuse CONFIG_ and X86_FEATURE_ namespaces
x86/retpoline: Fix crash printing warning
x86/paravirt: Fix a !PARAVIRT build warning
...

+2709 -597
+2 -2
Makefile
··· 1006 1006 export CC_FLAGS_CFI 1007 1007 endif 1008 1008 1009 - ifdef CONFIG_DEBUG_FORCE_FUNCTION_ALIGN_64B 1010 - KBUILD_CFLAGS += -falign-functions=64 1009 + ifneq ($(CONFIG_FUNCTION_ALIGNMENT),0) 1010 + KBUILD_CFLAGS += -falign-functions=$(CONFIG_FUNCTION_ALIGNMENT) 1011 1011 endif 1012 1012 1013 1013 # arch Makefile may override CC so keep this after arch Makefile is included
+24
arch/Kconfig
··· 1438 1438 1439 1439 source "scripts/gcc-plugins/Kconfig" 1440 1440 1441 + config FUNCTION_ALIGNMENT_4B 1442 + bool 1443 + 1444 + config FUNCTION_ALIGNMENT_8B 1445 + bool 1446 + 1447 + config FUNCTION_ALIGNMENT_16B 1448 + bool 1449 + 1450 + config FUNCTION_ALIGNMENT_32B 1451 + bool 1452 + 1453 + config FUNCTION_ALIGNMENT_64B 1454 + bool 1455 + 1456 + config FUNCTION_ALIGNMENT 1457 + int 1458 + default 64 if FUNCTION_ALIGNMENT_64B 1459 + default 32 if FUNCTION_ALIGNMENT_32B 1460 + default 16 if FUNCTION_ALIGNMENT_16B 1461 + default 8 if FUNCTION_ALIGNMENT_8B 1462 + default 4 if FUNCTION_ALIGNMENT_4B 1463 + default 0 1464 + 1441 1465 endmenu
+1
arch/ia64/Kconfig
··· 63 63 select NUMA if !FLATMEM 64 64 select PCI_MSI_ARCH_FALLBACKS if PCI_MSI 65 65 select ZONE_DMA32 66 + select FUNCTION_ALIGNMENT_32B 66 67 default y 67 68 help 68 69 The Itanium Processor Family is Intel's 64-bit successor to
+1 -1
arch/ia64/Makefile
··· 23 23 EXTRA := 24 24 25 25 cflags-y := -pipe $(EXTRA) -ffixed-r13 -mfixed-range=f12-f15,f32-f127 \ 26 - -falign-functions=32 -frename-registers -fno-optimize-sibling-calls 26 + -frename-registers -fno-optimize-sibling-calls 27 27 KBUILD_CFLAGS_KERNEL := -mconstant-gp 28 28 29 29 GAS_STATUS = $(shell $(srctree)/arch/ia64/scripts/check-gas "$(CC)" "$(OBJDUMP)")
+5
arch/um/kernel/um_arch.c
··· 444 444 { 445 445 } 446 446 447 + void apply_fineibt(s32 *start_retpoline, s32 *end_retpoline, 448 + s32 *start_cfi, s32 *end_cfi) 449 + { 450 + } 451 + 447 452 void apply_alternatives(struct alt_instr *start, struct alt_instr *end) 448 453 { 449 454 }
+74 -1
arch/x86/Kconfig
··· 292 292 select X86_FEATURE_NAMES if PROC_FS 293 293 select PROC_PID_ARCH_STATUS if PROC_FS 294 294 select HAVE_ARCH_NODE_DEV_GROUP if X86_SGX 295 + select FUNCTION_ALIGNMENT_16B if X86_64 || X86_ALIGNMENT_16 296 + select FUNCTION_ALIGNMENT_4B 295 297 imply IMA_SECURE_AND_OR_TRUSTED_BOOT if EFI 296 298 select HAVE_DYNAMIC_FTRACE_NO_PATCHABLE 297 299 ··· 1857 1855 1858 1856 config X86_KERNEL_IBT 1859 1857 prompt "Indirect Branch Tracking" 1860 - bool 1858 + def_bool y 1861 1859 depends on X86_64 && CC_HAS_IBT && HAVE_OBJTOOL 1862 1860 # https://github.com/llvm/llvm-project/commit/9d7001eba9c4cb311e03cd8cdc231f9e579f2d0f 1863 1861 depends on !LD_IS_LLD || LLD_VERSION >= 140000 ··· 2494 2492 config CC_HAS_RETURN_THUNK 2495 2493 def_bool $(cc-option,-mfunction-return=thunk-extern) 2496 2494 2495 + config CC_HAS_ENTRY_PADDING 2496 + def_bool $(cc-option,-fpatchable-function-entry=16,16) 2497 + 2498 + config FUNCTION_PADDING_CFI 2499 + int 2500 + default 59 if FUNCTION_ALIGNMENT_64B 2501 + default 27 if FUNCTION_ALIGNMENT_32B 2502 + default 11 if FUNCTION_ALIGNMENT_16B 2503 + default 3 if FUNCTION_ALIGNMENT_8B 2504 + default 0 2505 + 2506 + # Basically: FUNCTION_ALIGNMENT - 5*CFI_CLANG 2507 + # except Kconfig can't do arithmetic :/ 2508 + config FUNCTION_PADDING_BYTES 2509 + int 2510 + default FUNCTION_PADDING_CFI if CFI_CLANG 2511 + default FUNCTION_ALIGNMENT 2512 + 2513 + config CALL_PADDING 2514 + def_bool n 2515 + depends on CC_HAS_ENTRY_PADDING && OBJTOOL 2516 + select FUNCTION_ALIGNMENT_16B 2517 + 2518 + config FINEIBT 2519 + def_bool y 2520 + depends on X86_KERNEL_IBT && CFI_CLANG && RETPOLINE 2521 + select CALL_PADDING 2522 + 2523 + config HAVE_CALL_THUNKS 2524 + def_bool y 2525 + depends on CC_HAS_ENTRY_PADDING && RETHUNK && OBJTOOL 2526 + 2527 + config CALL_THUNKS 2528 + def_bool n 2529 + select CALL_PADDING 2530 + 2531 + config PREFIX_SYMBOLS 2532 + def_bool y 2533 + depends on CALL_PADDING && !CFI_CLANG 2534 + 2497 2535 menuconfig SPECULATION_MITIGATIONS 2498 2536 bool "Mitigations for speculative execution vulnerabilities" 2499 2537 default y ··· 2584 2542 default y 2585 2543 help 2586 2544 Compile the kernel with support for the retbleed=unret mitigation. 2545 + 2546 + config CALL_DEPTH_TRACKING 2547 + bool "Mitigate RSB underflow with call depth tracking" 2548 + depends on CPU_SUP_INTEL && HAVE_CALL_THUNKS 2549 + select HAVE_DYNAMIC_FTRACE_NO_PATCHABLE 2550 + select CALL_THUNKS 2551 + default y 2552 + help 2553 + Compile the kernel with call depth tracking to mitigate the Intel 2554 + SKL Return-Speculation-Buffer (RSB) underflow issue. The 2555 + mitigation is off by default and needs to be enabled on the 2556 + kernel command line via the retbleed=stuff option. For 2557 + non-affected systems the overhead of this option is marginal as 2558 + the call depth tracking is using run-time generated call thunks 2559 + in a compiler generated padding area and call patching. This 2560 + increases text size by ~5%. For non affected systems this space 2561 + is unused. On affected SKL systems this results in a significant 2562 + performance gain over the IBRS mitigation. 2563 + 2564 + config CALL_THUNKS_DEBUG 2565 + bool "Enable call thunks and call depth tracking debugging" 2566 + depends on CALL_DEPTH_TRACKING 2567 + select FUNCTION_ALIGNMENT_32B 2568 + default n 2569 + help 2570 + Enable call/ret counters for imbalance detection and build in 2571 + a noisy dmesg about callthunks generation and call patching for 2572 + trouble shooting. The debug prints need to be enabled on the 2573 + kernel command line with 'debug-callthunks'. 2574 + Only enable this, when you are debugging call thunks as this 2575 + creates a noticable runtime overhead. If unsure say N. 2587 2576 2588 2577 config CPU_IBPB_ENTRY 2589 2578 bool "Enable IBPB on kernel entry"
+6
arch/x86/Makefile
··· 208 208 KBUILD_CFLAGS += -mharden-sls=all 209 209 endif 210 210 211 + ifdef CONFIG_CALL_PADDING 212 + PADDING_CFLAGS := -fpatchable-function-entry=$(CONFIG_FUNCTION_PADDING_BYTES),$(CONFIG_FUNCTION_PADDING_BYTES) 213 + KBUILD_CFLAGS += $(PADDING_CFLAGS) 214 + export PADDING_CFLAGS 215 + endif 216 + 211 217 KBUILD_LDFLAGS += -m elf_$(UTS_MACHINE) 212 218 213 219 ifdef CONFIG_LTO_CLANG
+8
arch/x86/boot/compressed/head_64.S
··· 38 38 #include "pgtable.h" 39 39 40 40 /* 41 + * Fix alignment at 16 bytes. Following CONFIG_FUNCTION_ALIGNMENT will result 42 + * in assembly errors due to trying to move .org backward due to the excessive 43 + * alignment. 44 + */ 45 + #undef __ALIGN 46 + #define __ALIGN .balign 16, 0x90 47 + 48 + /* 41 49 * Locally defined symbols should be marked hidden: 42 50 */ 43 51 .hidden _bss
-2
arch/x86/crypto/camellia-aesni-avx-asm_64.S
··· 712 712 713 713 .text 714 714 715 - .align 8 716 715 SYM_FUNC_START_LOCAL(__camellia_enc_blk16) 717 716 /* input: 718 717 * %rdi: ctx, CTX ··· 798 799 jmp .Lenc_done; 799 800 SYM_FUNC_END(__camellia_enc_blk16) 800 801 801 - .align 8 802 802 SYM_FUNC_START_LOCAL(__camellia_dec_blk16) 803 803 /* input: 804 804 * %rdi: ctx, CTX
-4
arch/x86/crypto/camellia-aesni-avx2-asm_64.S
··· 221 221 * Size optimization... with inlined roundsm32 binary would be over 5 times 222 222 * larger and would only marginally faster. 223 223 */ 224 - .align 8 225 224 SYM_FUNC_START_LOCAL(roundsm32_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd) 226 225 roundsm32(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, 227 226 %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, %ymm15, ··· 228 229 RET; 229 230 SYM_FUNC_END(roundsm32_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd) 230 231 231 - .align 8 232 232 SYM_FUNC_START_LOCAL(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) 233 233 roundsm32(%ymm4, %ymm5, %ymm6, %ymm7, %ymm0, %ymm1, %ymm2, %ymm3, 234 234 %ymm12, %ymm13, %ymm14, %ymm15, %ymm8, %ymm9, %ymm10, %ymm11, ··· 746 748 747 749 .text 748 750 749 - .align 8 750 751 SYM_FUNC_START_LOCAL(__camellia_enc_blk32) 751 752 /* input: 752 753 * %rdi: ctx, CTX ··· 832 835 jmp .Lenc_done; 833 836 SYM_FUNC_END(__camellia_enc_blk32) 834 837 835 - .align 8 836 838 SYM_FUNC_START_LOCAL(__camellia_dec_blk32) 837 839 /* input: 838 840 * %rdi: ctx, CTX
-2
arch/x86/crypto/cast5-avx-x86_64-asm_64.S
··· 208 208 209 209 .text 210 210 211 - .align 16 212 211 SYM_FUNC_START_LOCAL(__cast5_enc_blk16) 213 212 /* input: 214 213 * %rdi: ctx ··· 281 282 RET; 282 283 SYM_FUNC_END(__cast5_enc_blk16) 283 284 284 - .align 16 285 285 SYM_FUNC_START_LOCAL(__cast5_dec_blk16) 286 286 /* input: 287 287 * %rdi: ctx
-1
arch/x86/crypto/crct10dif-pcl-asm_64.S
··· 94 94 # 95 95 # Assumes len >= 16. 96 96 # 97 - .align 16 98 97 SYM_FUNC_START(crc_t10dif_pcl) 99 98 100 99 movdqa .Lbswap_mask(%rip), BSWAP_MASK
-1
arch/x86/crypto/poly1305-x86_64-cryptogams.pl
··· 108 108 sub declare_function() { 109 109 my ($name, $align, $nargs) = @_; 110 110 if($kernel) { 111 - $code .= ".align $align\n"; 112 111 $code .= "SYM_FUNC_START($name)\n"; 113 112 $code .= ".L$name:\n"; 114 113 } else {
-2
arch/x86/crypto/serpent-avx-x86_64-asm_64.S
··· 550 550 #define write_blocks(x0, x1, x2, x3, t0, t1, t2) \ 551 551 transpose_4x4(x0, x1, x2, x3, t0, t1, t2) 552 552 553 - .align 8 554 553 SYM_FUNC_START_LOCAL(__serpent_enc_blk8_avx) 555 554 /* input: 556 555 * %rdi: ctx, CTX ··· 603 604 RET; 604 605 SYM_FUNC_END(__serpent_enc_blk8_avx) 605 606 606 - .align 8 607 607 SYM_FUNC_START_LOCAL(__serpent_dec_blk8_avx) 608 608 /* input: 609 609 * %rdi: ctx, CTX
-2
arch/x86/crypto/serpent-avx2-asm_64.S
··· 550 550 #define write_blocks(x0, x1, x2, x3, t0, t1, t2) \ 551 551 transpose_4x4(x0, x1, x2, x3, t0, t1, t2) 552 552 553 - .align 8 554 553 SYM_FUNC_START_LOCAL(__serpent_enc_blk16) 555 554 /* input: 556 555 * %rdi: ctx, CTX ··· 603 604 RET; 604 605 SYM_FUNC_END(__serpent_enc_blk16) 605 606 606 - .align 8 607 607 SYM_FUNC_START_LOCAL(__serpent_dec_blk16) 608 608 /* input: 609 609 * %rdi: ctx, CTX
-1
arch/x86/crypto/sha1_ni_asm.S
··· 93 93 * numBlocks: Number of blocks to process 94 94 */ 95 95 .text 96 - .align 32 97 96 SYM_TYPED_FUNC_START(sha1_ni_transform) 98 97 push %rbp 99 98 mov %rsp, %rbp
-1
arch/x86/crypto/sha256-avx-asm.S
··· 348 348 ######################################################################## 349 349 .text 350 350 SYM_TYPED_FUNC_START(sha256_transform_avx) 351 - .align 32 352 351 pushq %rbx 353 352 pushq %r12 354 353 pushq %r13
-1
arch/x86/crypto/sha256-avx2-asm.S
··· 525 525 ######################################################################## 526 526 .text 527 527 SYM_TYPED_FUNC_START(sha256_transform_rorx) 528 - .align 32 529 528 pushq %rbx 530 529 pushq %r12 531 530 pushq %r13
-1
arch/x86/crypto/sha256-ssse3-asm.S
··· 357 357 ######################################################################## 358 358 .text 359 359 SYM_TYPED_FUNC_START(sha256_transform_ssse3) 360 - .align 32 361 360 pushq %rbx 362 361 pushq %r12 363 362 pushq %r13
-1
arch/x86/crypto/sha256_ni_asm.S
··· 97 97 */ 98 98 99 99 .text 100 - .align 32 101 100 SYM_TYPED_FUNC_START(sha256_ni_transform) 102 101 103 102 shl $6, NUM_BLKS /* convert to bytes */
-1
arch/x86/crypto/sm3-avx-asm_64.S
··· 328 328 * void sm3_transform_avx(struct sm3_state *state, 329 329 * const u8 *data, int nblocks); 330 330 */ 331 - .align 16 332 331 SYM_TYPED_FUNC_START(sm3_transform_avx) 333 332 /* input: 334 333 * %rdi: ctx, CTX
-7
arch/x86/crypto/sm4-aesni-avx-asm_64.S
··· 140 140 141 141 142 142 .text 143 - .align 16 144 143 145 144 /* 146 145 * void sm4_aesni_avx_crypt4(const u32 *rk, u8 *dst, 147 146 * const u8 *src, int nblocks) 148 147 */ 149 - .align 8 150 148 SYM_FUNC_START(sm4_aesni_avx_crypt4) 151 149 /* input: 152 150 * %rdi: round key array, CTX ··· 248 250 RET; 249 251 SYM_FUNC_END(sm4_aesni_avx_crypt4) 250 252 251 - .align 8 252 253 SYM_FUNC_START_LOCAL(__sm4_crypt_blk8) 253 254 /* input: 254 255 * %rdi: round key array, CTX ··· 361 364 * void sm4_aesni_avx_crypt8(const u32 *rk, u8 *dst, 362 365 * const u8 *src, int nblocks) 363 366 */ 364 - .align 8 365 367 SYM_FUNC_START(sm4_aesni_avx_crypt8) 366 368 /* input: 367 369 * %rdi: round key array, CTX ··· 416 420 * void sm4_aesni_avx_ctr_enc_blk8(const u32 *rk, u8 *dst, 417 421 * const u8 *src, u8 *iv) 418 422 */ 419 - .align 8 420 423 SYM_TYPED_FUNC_START(sm4_aesni_avx_ctr_enc_blk8) 421 424 /* input: 422 425 * %rdi: round key array, CTX ··· 490 495 * void sm4_aesni_avx_cbc_dec_blk8(const u32 *rk, u8 *dst, 491 496 * const u8 *src, u8 *iv) 492 497 */ 493 - .align 8 494 498 SYM_TYPED_FUNC_START(sm4_aesni_avx_cbc_dec_blk8) 495 499 /* input: 496 500 * %rdi: round key array, CTX ··· 539 545 * void sm4_aesni_avx_cfb_dec_blk8(const u32 *rk, u8 *dst, 540 546 * const u8 *src, u8 *iv) 541 547 */ 542 - .align 8 543 548 SYM_TYPED_FUNC_START(sm4_aesni_avx_cfb_dec_blk8) 544 549 /* input: 545 550 * %rdi: round key array, CTX
-6
arch/x86/crypto/sm4-aesni-avx2-asm_64.S
··· 154 154 .long 0xdeadbeef, 0xdeadbeef, 0xdeadbeef 155 155 156 156 .text 157 - .align 16 158 - 159 - .align 8 160 157 SYM_FUNC_START_LOCAL(__sm4_crypt_blk16) 161 158 /* input: 162 159 * %rdi: round key array, CTX ··· 279 282 * void sm4_aesni_avx2_ctr_enc_blk16(const u32 *rk, u8 *dst, 280 283 * const u8 *src, u8 *iv) 281 284 */ 282 - .align 8 283 285 SYM_TYPED_FUNC_START(sm4_aesni_avx2_ctr_enc_blk16) 284 286 /* input: 285 287 * %rdi: round key array, CTX ··· 391 395 * void sm4_aesni_avx2_cbc_dec_blk16(const u32 *rk, u8 *dst, 392 396 * const u8 *src, u8 *iv) 393 397 */ 394 - .align 8 395 398 SYM_TYPED_FUNC_START(sm4_aesni_avx2_cbc_dec_blk16) 396 399 /* input: 397 400 * %rdi: round key array, CTX ··· 444 449 * void sm4_aesni_avx2_cfb_dec_blk16(const u32 *rk, u8 *dst, 445 450 * const u8 *src, u8 *iv) 446 451 */ 447 - .align 8 448 452 SYM_TYPED_FUNC_START(sm4_aesni_avx2_cfb_dec_blk16) 449 453 /* input: 450 454 * %rdi: round key array, CTX
-2
arch/x86/crypto/twofish-avx-x86_64-asm_64.S
··· 228 228 vpxor x2, wkey, x2; \ 229 229 vpxor x3, wkey, x3; 230 230 231 - .align 8 232 231 SYM_FUNC_START_LOCAL(__twofish_enc_blk8) 233 232 /* input: 234 233 * %rdi: ctx, CTX ··· 269 270 RET; 270 271 SYM_FUNC_END(__twofish_enc_blk8) 271 272 272 - .align 8 273 273 SYM_FUNC_START_LOCAL(__twofish_dec_blk8) 274 274 /* input: 275 275 * %rdi: ctx, CTX
+2 -2
arch/x86/entry/entry_32.S
··· 1181 1181 * is using the thread stack right now, so it's safe for us to use it. 1182 1182 */ 1183 1183 movl %esp, %ebx 1184 - movl PER_CPU_VAR(cpu_current_top_of_stack), %esp 1184 + movl PER_CPU_VAR(pcpu_hot + X86_top_of_stack), %esp 1185 1185 call exc_nmi 1186 1186 movl %ebx, %esp 1187 1187 ··· 1243 1243 /* Prevent any naive code from trying to unwind to our caller. */ 1244 1244 xorl %ebp, %ebp 1245 1245 1246 - movl PER_CPU_VAR(cpu_current_top_of_stack), %esi 1246 + movl PER_CPU_VAR(pcpu_hot + X86_top_of_stack), %esi 1247 1247 leal -TOP_OF_KERNEL_STACK_PADDING-PTREGS_SIZE(%esi), %esp 1248 1248 1249 1249 call make_task_dead
+28 -22
arch/x86/entry/entry_64.S
··· 92 92 /* tss.sp2 is scratch space. */ 93 93 movq %rsp, PER_CPU_VAR(cpu_tss_rw + TSS_sp2) 94 94 SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp 95 - movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp 95 + movq PER_CPU_VAR(pcpu_hot + X86_top_of_stack), %rsp 96 96 97 97 SYM_INNER_LABEL(entry_SYSCALL_64_safe_stack, SYM_L_GLOBAL) 98 98 ANNOTATE_NOENDBR ··· 252 252 253 253 #ifdef CONFIG_STACKPROTECTOR 254 254 movq TASK_stack_canary(%rsi), %rbx 255 - movq %rbx, PER_CPU_VAR(fixed_percpu_data) + stack_canary_offset 255 + movq %rbx, PER_CPU_VAR(fixed_percpu_data) + FIXED_stack_canary 256 256 #endif 257 257 258 258 /* ··· 284 284 * r12: kernel thread arg 285 285 */ 286 286 .pushsection .text, "ax" 287 - SYM_CODE_START(ret_from_fork) 287 + __FUNC_ALIGN 288 + SYM_CODE_START_NOALIGN(ret_from_fork) 288 289 UNWIND_HINT_EMPTY 289 290 ANNOTATE_NOENDBR // copy_thread 291 + CALL_DEPTH_ACCOUNT 290 292 movq %rax, %rdi 291 293 call schedule_tail /* rdi: 'prev' task parameter */ 292 294 ··· 328 326 #endif 329 327 .endm 330 328 331 - SYM_CODE_START_LOCAL(xen_error_entry) 329 + SYM_CODE_START(xen_error_entry) 330 + ANNOTATE_NOENDBR 332 331 UNWIND_HINT_FUNC 333 332 PUSH_AND_CLEAR_REGS save_ret=1 334 333 ENCODE_FRAME_POINTER 8 335 - UNTRAIN_RET 334 + UNTRAIN_RET_FROM_CALL 336 335 RET 337 336 SYM_CODE_END(xen_error_entry) 338 337 ··· 603 600 * shared between 32 and 64 bit and emit the __irqentry_text_* markers 604 601 * so the stacktrace boundary checks work. 605 602 */ 606 - .align 16 603 + __ALIGN 607 604 .globl __irqentry_text_start 608 605 __irqentry_text_start: 609 606 610 607 #include <asm/idtentry.h> 611 608 612 - .align 16 609 + __ALIGN 613 610 .globl __irqentry_text_end 614 611 __irqentry_text_end: 615 612 ANNOTATE_NOENDBR ··· 831 828 * 832 829 * C calling convention: exc_xen_hypervisor_callback(struct *pt_regs) 833 830 */ 834 - SYM_CODE_START_LOCAL(exc_xen_hypervisor_callback) 831 + __FUNC_ALIGN 832 + SYM_CODE_START_LOCAL_NOALIGN(exc_xen_hypervisor_callback) 835 833 836 834 /* 837 835 * Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will ··· 860 856 * We distinguish between categories by comparing each saved segment register 861 857 * with its current contents: any discrepancy means we in category 1. 862 858 */ 863 - SYM_CODE_START(xen_failsafe_callback) 859 + __FUNC_ALIGN 860 + SYM_CODE_START_NOALIGN(xen_failsafe_callback) 864 861 UNWIND_HINT_EMPTY 865 862 ENDBR 866 863 movl %ds, %ecx ··· 908 903 * R14 - old CR3 909 904 * R15 - old SPEC_CTRL 910 905 */ 911 - SYM_CODE_START_LOCAL(paranoid_entry) 906 + SYM_CODE_START(paranoid_entry) 907 + ANNOTATE_NOENDBR 912 908 UNWIND_HINT_FUNC 913 909 PUSH_AND_CLEAR_REGS save_ret=1 914 910 ENCODE_FRAME_POINTER 8 ··· 978 972 * CR3 above, keep the old value in a callee saved register. 979 973 */ 980 974 IBRS_ENTER save_reg=%r15 981 - UNTRAIN_RET 975 + UNTRAIN_RET_FROM_CALL 982 976 983 977 RET 984 978 SYM_CODE_END(paranoid_entry) ··· 1044 1038 /* 1045 1039 * Switch GS and CR3 if needed. 1046 1040 */ 1047 - SYM_CODE_START_LOCAL(error_entry) 1041 + SYM_CODE_START(error_entry) 1042 + ANNOTATE_NOENDBR 1048 1043 UNWIND_HINT_FUNC 1049 1044 1050 1045 PUSH_AND_CLEAR_REGS save_ret=1 ··· 1063 1056 /* We have user CR3. Change to kernel CR3. */ 1064 1057 SWITCH_TO_KERNEL_CR3 scratch_reg=%rax 1065 1058 IBRS_ENTER 1066 - UNTRAIN_RET 1059 + UNTRAIN_RET_FROM_CALL 1067 1060 1068 1061 leaq 8(%rsp), %rdi /* arg0 = pt_regs pointer */ 1069 - .Lerror_entry_from_usermode_after_swapgs: 1070 - 1071 1062 /* Put us onto the real thread stack. */ 1072 - call sync_regs 1073 - RET 1063 + jmp sync_regs 1074 1064 1075 1065 /* 1076 1066 * There are two places in the kernel that can potentially fault with ··· 1098 1094 */ 1099 1095 .Lerror_entry_done_lfence: 1100 1096 FENCE_SWAPGS_KERNEL_ENTRY 1097 + CALL_DEPTH_ACCOUNT 1101 1098 leaq 8(%rsp), %rax /* return pt_regs pointer */ 1102 1099 ANNOTATE_UNRET_END 1103 1100 RET ··· 1117 1112 FENCE_SWAPGS_USER_ENTRY 1118 1113 SWITCH_TO_KERNEL_CR3 scratch_reg=%rax 1119 1114 IBRS_ENTER 1120 - UNTRAIN_RET 1115 + UNTRAIN_RET_FROM_CALL 1121 1116 1122 1117 /* 1123 1118 * Pretend that the exception came from user mode: set up pt_regs ··· 1126 1121 leaq 8(%rsp), %rdi /* arg0 = pt_regs pointer */ 1127 1122 call fixup_bad_iret 1128 1123 mov %rax, %rdi 1129 - jmp .Lerror_entry_from_usermode_after_swapgs 1124 + jmp sync_regs 1130 1125 SYM_CODE_END(error_entry) 1131 1126 1132 1127 SYM_CODE_START_LOCAL(error_return) ··· 1211 1206 FENCE_SWAPGS_USER_ENTRY 1212 1207 SWITCH_TO_KERNEL_CR3 scratch_reg=%rdx 1213 1208 movq %rsp, %rdx 1214 - movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp 1209 + movq PER_CPU_VAR(pcpu_hot + X86_top_of_stack), %rsp 1215 1210 UNWIND_HINT_IRET_REGS base=%rdx offset=8 1216 1211 pushq 5*8(%rdx) /* pt_regs->ss */ 1217 1212 pushq 4*8(%rdx) /* pt_regs->rsp */ ··· 1521 1516 #endif 1522 1517 1523 1518 .pushsection .text, "ax" 1524 - SYM_CODE_START(rewind_stack_and_make_dead) 1519 + __FUNC_ALIGN 1520 + SYM_CODE_START_NOALIGN(rewind_stack_and_make_dead) 1525 1521 UNWIND_HINT_FUNC 1526 1522 /* Prevent any naive code from trying to unwind to our caller. */ 1527 1523 xorl %ebp, %ebp 1528 1524 1529 - movq PER_CPU_VAR(cpu_current_top_of_stack), %rax 1525 + movq PER_CPU_VAR(pcpu_hot + X86_top_of_stack), %rax 1530 1526 leaq -PTREGS_SIZE(%rax), %rsp 1531 1527 UNWIND_HINT_REGS 1532 1528
+3 -4
arch/x86/entry/entry_64_compat.S
··· 58 58 SWITCH_TO_KERNEL_CR3 scratch_reg=%rax 59 59 popq %rax 60 60 61 - movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp 61 + movq PER_CPU_VAR(pcpu_hot + X86_top_of_stack), %rsp 62 62 63 63 /* Construct struct pt_regs on stack */ 64 64 pushq $__USER_DS /* pt_regs->ss */ ··· 128 128 popfq 129 129 jmp .Lsysenter_flags_fixed 130 130 SYM_INNER_LABEL(__end_entry_SYSENTER_compat, SYM_L_GLOBAL) 131 - ANNOTATE_NOENDBR // is_sysenter_singlestep 132 131 SYM_CODE_END(entry_SYSENTER_compat) 133 132 134 133 /* ··· 190 191 SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp 191 192 192 193 /* Switch to the kernel stack */ 193 - movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp 194 + movq PER_CPU_VAR(pcpu_hot + X86_top_of_stack), %rsp 194 195 195 196 SYM_INNER_LABEL(entry_SYSCALL_compat_safe_stack, SYM_L_GLOBAL) 196 197 ANNOTATE_NOENDBR ··· 331 332 ALTERNATIVE "", "jmp .Lint80_keep_stack", X86_FEATURE_XENPV 332 333 333 334 movq %rsp, %rax 334 - movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp 335 + movq PER_CPU_VAR(pcpu_hot + X86_top_of_stack), %rsp 335 336 336 337 pushq 5*8(%rax) /* regs->ss */ 337 338 pushq 4*8(%rax) /* regs->rsp */
+2 -2
arch/x86/entry/thunk_64.S
··· 11 11 12 12 /* rdi: arg1 ... normal C conventions. rax is saved/restored. */ 13 13 .macro THUNK name, func 14 - SYM_FUNC_START_NOALIGN(\name) 14 + SYM_FUNC_START(\name) 15 15 pushq %rbp 16 16 movq %rsp, %rbp 17 17 ··· 36 36 EXPORT_SYMBOL(preempt_schedule_thunk) 37 37 EXPORT_SYMBOL(preempt_schedule_notrace_thunk) 38 38 39 - SYM_CODE_START_LOCAL_NOALIGN(__thunk_restore) 39 + SYM_CODE_START_LOCAL(__thunk_restore) 40 40 popq %r11 41 41 popq %r10 42 42 popq %r9
+8 -6
arch/x86/entry/vdso/Makefile
··· 33 33 vobjs-$(CONFIG_X86_SGX) += vsgx.o 34 34 35 35 # files to link into kernel 36 - obj-y += vma.o extable.o 37 - KASAN_SANITIZE_vma.o := y 38 - UBSAN_SANITIZE_vma.o := y 39 - KCSAN_SANITIZE_vma.o := y 40 - OBJECT_FILES_NON_STANDARD_vma.o := n 36 + obj-y += vma.o extable.o 37 + KASAN_SANITIZE_vma.o := y 38 + UBSAN_SANITIZE_vma.o := y 39 + KCSAN_SANITIZE_vma.o := y 40 + OBJECT_FILES_NON_STANDARD_vma.o := n 41 + OBJECT_FILES_NON_STANDARD_extable.o := n 41 42 42 43 # vDSO images to build 43 44 vdso_img-$(VDSO64-y) += 64 ··· 95 94 endif 96 95 endif 97 96 98 - $(vobjs): KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_LTO) $(CC_FLAGS_CFI) $(RANDSTRUCT_CFLAGS) $(GCC_PLUGINS_CFLAGS) $(RETPOLINE_CFLAGS),$(KBUILD_CFLAGS)) $(CFL) 97 + $(vobjs): KBUILD_CFLAGS := $(filter-out $(PADDING_CFLAGS) $(CC_FLAGS_LTO) $(CC_FLAGS_CFI) $(RANDSTRUCT_CFLAGS) $(GCC_PLUGINS_CFLAGS) $(RETPOLINE_CFLAGS),$(KBUILD_CFLAGS)) $(CFL) 99 98 $(vobjs): KBUILD_AFLAGS += -DBUILD_VDSO 100 99 101 100 # ··· 158 157 KBUILD_CFLAGS_32 := $(filter-out $(RETPOLINE_CFLAGS),$(KBUILD_CFLAGS_32)) 159 158 KBUILD_CFLAGS_32 := $(filter-out $(CC_FLAGS_LTO),$(KBUILD_CFLAGS_32)) 160 159 KBUILD_CFLAGS_32 := $(filter-out $(CC_FLAGS_CFI),$(KBUILD_CFLAGS_32)) 160 + KBUILD_CFLAGS_32 := $(filter-out $(PADDING_CFLAGS),$(KBUILD_CFLAGS_32)) 161 161 KBUILD_CFLAGS_32 += -m32 -msoft-float -mregparm=0 -fpic 162 162 KBUILD_CFLAGS_32 += -fno-stack-protector 163 163 KBUILD_CFLAGS_32 += $(call cc-option, -foptimize-sibling-calls)
+65 -3
arch/x86/include/asm/alternative.h
··· 78 78 extern void apply_retpolines(s32 *start, s32 *end); 79 79 extern void apply_returns(s32 *start, s32 *end); 80 80 extern void apply_ibt_endbr(s32 *start, s32 *end); 81 + extern void apply_fineibt(s32 *start_retpoline, s32 *end_retpoine, 82 + s32 *start_cfi, s32 *end_cfi); 81 83 82 84 struct module; 85 + struct paravirt_patch_site; 86 + 87 + struct callthunk_sites { 88 + s32 *call_start, *call_end; 89 + struct paravirt_patch_site *pv_start, *pv_end; 90 + }; 91 + 92 + #ifdef CONFIG_CALL_THUNKS 93 + extern void callthunks_patch_builtin_calls(void); 94 + extern void callthunks_patch_module_calls(struct callthunk_sites *sites, 95 + struct module *mod); 96 + extern void *callthunks_translate_call_dest(void *dest); 97 + extern bool is_callthunk(void *addr); 98 + extern int x86_call_depth_emit_accounting(u8 **pprog, void *func); 99 + #else 100 + static __always_inline void callthunks_patch_builtin_calls(void) {} 101 + static __always_inline void 102 + callthunks_patch_module_calls(struct callthunk_sites *sites, 103 + struct module *mod) {} 104 + static __always_inline void *callthunks_translate_call_dest(void *dest) 105 + { 106 + return dest; 107 + } 108 + static __always_inline bool is_callthunk(void *addr) 109 + { 110 + return false; 111 + } 112 + static __always_inline int x86_call_depth_emit_accounting(u8 **pprog, 113 + void *func) 114 + { 115 + return 0; 116 + } 117 + #endif 83 118 84 119 #ifdef CONFIG_SMP 85 120 extern void alternatives_smp_module_add(struct module *mod, char *name, ··· 382 347 #define old_len 141b-140b 383 348 #define new_len1 144f-143f 384 349 #define new_len2 145f-144f 350 + #define new_len3 146f-145f 385 351 386 352 /* 387 353 * gas compatible max based on the idea from: ··· 390 354 * 391 355 * The additional "-" is needed because gas uses a "true" value of -1. 392 356 */ 393 - #define alt_max_short(a, b) ((a) ^ (((a) ^ (b)) & -(-((a) < (b))))) 357 + #define alt_max_2(a, b) ((a) ^ (((a) ^ (b)) & -(-((a) < (b))))) 358 + #define alt_max_3(a, b, c) (alt_max_2(alt_max_2(a, b), c)) 394 359 395 360 396 361 /* ··· 403 366 140: 404 367 \oldinstr 405 368 141: 406 - .skip -((alt_max_short(new_len1, new_len2) - (old_len)) > 0) * \ 407 - (alt_max_short(new_len1, new_len2) - (old_len)),0x90 369 + .skip -((alt_max_2(new_len1, new_len2) - (old_len)) > 0) * \ 370 + (alt_max_2(new_len1, new_len2) - (old_len)),0x90 408 371 142: 409 372 410 373 .pushsection .altinstructions,"a" ··· 418 381 144: 419 382 \newinstr2 420 383 145: 384 + .popsection 385 + .endm 386 + 387 + .macro ALTERNATIVE_3 oldinstr, newinstr1, feature1, newinstr2, feature2, newinstr3, feature3 388 + 140: 389 + \oldinstr 390 + 141: 391 + .skip -((alt_max_3(new_len1, new_len2, new_len3) - (old_len)) > 0) * \ 392 + (alt_max_3(new_len1, new_len2, new_len3) - (old_len)),0x90 393 + 142: 394 + 395 + .pushsection .altinstructions,"a" 396 + altinstruction_entry 140b,143f,\feature1,142b-140b,144f-143f 397 + altinstruction_entry 140b,144f,\feature2,142b-140b,145f-144f 398 + altinstruction_entry 140b,145f,\feature3,142b-140b,146f-145f 399 + .popsection 400 + 401 + .pushsection .altinstr_replacement,"ax" 402 + 143: 403 + \newinstr1 404 + 144: 405 + \newinstr2 406 + 145: 407 + \newinstr3 408 + 146: 421 409 .popsection 422 410 .endm 423 411
+1 -2
arch/x86/include/asm/cpufeatures.h
··· 305 305 #define X86_FEATURE_USE_IBPB_FW (11*32+16) /* "" Use IBPB during runtime firmware calls */ 306 306 #define X86_FEATURE_RSB_VMEXIT_LITE (11*32+17) /* "" Fill RSB on VM exit when EIBRS is enabled */ 307 307 #define X86_FEATURE_SGX_EDECCSSA (11*32+18) /* "" SGX EDECCSSA user leaf function */ 308 - 309 - 308 + #define X86_FEATURE_CALL_DEPTH (11*32+19) /* "" Call depth tracking for RSB stuffing */ 310 309 #define X86_FEATURE_MSR_TSX_CTRL (11*32+20) /* "" MSR IA32_TSX_CTRL (Intel) implemented */ 311 310 312 311 /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
+29 -3
arch/x86/include/asm/current.h
··· 3 3 #define _ASM_X86_CURRENT_H 4 4 5 5 #include <linux/compiler.h> 6 - #include <asm/percpu.h> 7 6 8 7 #ifndef __ASSEMBLY__ 8 + 9 + #include <linux/cache.h> 10 + #include <asm/percpu.h> 11 + 9 12 struct task_struct; 10 13 11 - DECLARE_PER_CPU(struct task_struct *, current_task); 14 + struct pcpu_hot { 15 + union { 16 + struct { 17 + struct task_struct *current_task; 18 + int preempt_count; 19 + int cpu_number; 20 + #ifdef CONFIG_CALL_DEPTH_TRACKING 21 + u64 call_depth; 22 + #endif 23 + unsigned long top_of_stack; 24 + void *hardirq_stack_ptr; 25 + u16 softirq_pending; 26 + #ifdef CONFIG_X86_64 27 + bool hardirq_stack_inuse; 28 + #else 29 + void *softirq_stack_ptr; 30 + #endif 31 + }; 32 + u8 pad[64]; 33 + }; 34 + }; 35 + static_assert(sizeof(struct pcpu_hot) == 64); 36 + 37 + DECLARE_PER_CPU_ALIGNED(struct pcpu_hot, pcpu_hot); 12 38 13 39 static __always_inline struct task_struct *get_current(void) 14 40 { 15 - return this_cpu_read_stable(current_task); 41 + return this_cpu_read_stable(pcpu_hot.current_task); 16 42 } 17 43 18 44 #define current get_current()
+1 -1
arch/x86/include/asm/debugreg.h
··· 2 2 #ifndef _ASM_X86_DEBUGREG_H 3 3 #define _ASM_X86_DEBUGREG_H 4 4 5 - 6 5 #include <linux/bug.h> 6 + #include <linux/percpu.h> 7 7 #include <uapi/asm/debugreg.h> 8 8 9 9 DECLARE_PER_CPU(unsigned long, cpu_dr7);
+8 -1
arch/x86/include/asm/disabled-features.h
··· 69 69 # define DISABLE_UNRET (1 << (X86_FEATURE_UNRET & 31)) 70 70 #endif 71 71 72 + #ifdef CONFIG_CALL_DEPTH_TRACKING 73 + # define DISABLE_CALL_DEPTH_TRACKING 0 74 + #else 75 + # define DISABLE_CALL_DEPTH_TRACKING (1 << (X86_FEATURE_CALL_DEPTH & 31)) 76 + #endif 77 + 72 78 #ifdef CONFIG_INTEL_IOMMU_SVM 73 79 # define DISABLE_ENQCMD 0 74 80 #else ··· 113 107 #define DISABLED_MASK8 (DISABLE_XENPV|DISABLE_TDX_GUEST) 114 108 #define DISABLED_MASK9 (DISABLE_SGX) 115 109 #define DISABLED_MASK10 0 116 - #define DISABLED_MASK11 (DISABLE_RETPOLINE|DISABLE_RETHUNK|DISABLE_UNRET) 110 + #define DISABLED_MASK11 (DISABLE_RETPOLINE|DISABLE_RETHUNK|DISABLE_UNRET| \ 111 + DISABLE_CALL_DEPTH_TRACKING) 117 112 #define DISABLED_MASK12 0 118 113 #define DISABLED_MASK13 0 119 114 #define DISABLED_MASK14 0
+2 -1
arch/x86/include/asm/hardirq.h
··· 3 3 #define _ASM_X86_HARDIRQ_H 4 4 5 5 #include <linux/threads.h> 6 + #include <asm/current.h> 6 7 7 8 typedef struct { 8 - u16 __softirq_pending; 9 9 #if IS_ENABLED(CONFIG_KVM_INTEL) 10 10 u8 kvm_cpu_l1tf_flush_l1d; 11 11 #endif ··· 60 60 extern u64 arch_irq_stat(void); 61 61 #define arch_irq_stat arch_irq_stat 62 62 63 + #define local_softirq_pending_ref pcpu_hot.softirq_pending 63 64 64 65 #if IS_ENABLED(CONFIG_KVM_INTEL) 65 66 static inline void kvm_set_cpu_l1tf_flush_l1d(void)
+6 -6
arch/x86/include/asm/irq_stack.h
··· 116 116 ASM_CALL_ARG2 117 117 118 118 #define call_on_irqstack(func, asm_call, argconstr...) \ 119 - call_on_stack(__this_cpu_read(hardirq_stack_ptr), \ 119 + call_on_stack(__this_cpu_read(pcpu_hot.hardirq_stack_ptr), \ 120 120 func, asm_call, argconstr) 121 121 122 122 /* Macros to assert type correctness for run_*_on_irqstack macros */ ··· 135 135 * User mode entry and interrupt on the irq stack do not \ 136 136 * switch stacks. If from user mode the task stack is empty. \ 137 137 */ \ 138 - if (user_mode(regs) || __this_cpu_read(hardirq_stack_inuse)) { \ 138 + if (user_mode(regs) || __this_cpu_read(pcpu_hot.hardirq_stack_inuse)) { \ 139 139 irq_enter_rcu(); \ 140 140 func(c_args); \ 141 141 irq_exit_rcu(); \ ··· 146 146 * places. Invoke the stack switch macro with the call \ 147 147 * sequence which matches the above direct invocation. \ 148 148 */ \ 149 - __this_cpu_write(hardirq_stack_inuse, true); \ 149 + __this_cpu_write(pcpu_hot.hardirq_stack_inuse, true); \ 150 150 call_on_irqstack(func, asm_call, constr); \ 151 - __this_cpu_write(hardirq_stack_inuse, false); \ 151 + __this_cpu_write(pcpu_hot.hardirq_stack_inuse, false); \ 152 152 } \ 153 153 } 154 154 ··· 212 212 */ 213 213 #define do_softirq_own_stack() \ 214 214 { \ 215 - __this_cpu_write(hardirq_stack_inuse, true); \ 215 + __this_cpu_write(pcpu_hot.hardirq_stack_inuse, true); \ 216 216 call_on_irqstack(__do_softirq, ASM_CALL_ARG0); \ 217 - __this_cpu_write(hardirq_stack_inuse, false); \ 217 + __this_cpu_write(pcpu_hot.hardirq_stack_inuse, false); \ 218 218 } 219 219 220 220 #endif
+55 -8
arch/x86/include/asm/linkage.h
··· 12 12 #define asmlinkage CPP_ASMLINKAGE __attribute__((regparm(0))) 13 13 #endif /* CONFIG_X86_32 */ 14 14 15 - #ifdef __ASSEMBLY__ 16 - 17 - #if defined(CONFIG_X86_64) || defined(CONFIG_X86_ALIGNMENT_16) 18 - #define __ALIGN .p2align 4, 0x90 15 + #define __ALIGN .balign CONFIG_FUNCTION_ALIGNMENT, 0x90; 19 16 #define __ALIGN_STR __stringify(__ALIGN) 17 + 18 + #if defined(CONFIG_CALL_PADDING) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO) 19 + #define FUNCTION_PADDING .skip CONFIG_FUNCTION_ALIGNMENT, 0x90; 20 + #else 21 + #define FUNCTION_PADDING 20 22 #endif 23 + 24 + #if (CONFIG_FUNCTION_ALIGNMENT > 8) && !defined(__DISABLE_EXPORTS) && !defined(BULID_VDSO) 25 + # define __FUNC_ALIGN __ALIGN; FUNCTION_PADDING 26 + #else 27 + # define __FUNC_ALIGN __ALIGN 28 + #endif 29 + 30 + #define ASM_FUNC_ALIGN __stringify(__FUNC_ALIGN) 31 + #define SYM_F_ALIGN __FUNC_ALIGN 32 + 33 + #ifdef __ASSEMBLY__ 21 34 22 35 #if defined(CONFIG_RETHUNK) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO) 23 36 #define RET jmp __x86_return_thunk ··· 56 43 57 44 #endif /* __ASSEMBLY__ */ 58 45 46 + /* 47 + * Depending on -fpatchable-function-entry=N,N usage (CONFIG_CALL_PADDING) the 48 + * CFI symbol layout changes. 49 + * 50 + * Without CALL_THUNKS: 51 + * 52 + * .align FUNCTION_ALIGNMENT 53 + * __cfi_##name: 54 + * .skip FUNCTION_PADDING, 0x90 55 + * .byte 0xb8 56 + * .long __kcfi_typeid_##name 57 + * name: 58 + * 59 + * With CALL_THUNKS: 60 + * 61 + * .align FUNCTION_ALIGNMENT 62 + * __cfi_##name: 63 + * .byte 0xb8 64 + * .long __kcfi_typeid_##name 65 + * .skip FUNCTION_PADDING, 0x90 66 + * name: 67 + * 68 + * In both cases the whole thing is FUNCTION_ALIGNMENT aligned and sized. 69 + */ 70 + 71 + #ifdef CONFIG_CALL_PADDING 72 + #define CFI_PRE_PADDING 73 + #define CFI_POST_PADDING .skip CONFIG_FUNCTION_PADDING_BYTES, 0x90; 74 + #else 75 + #define CFI_PRE_PADDING .skip CONFIG_FUNCTION_PADDING_BYTES, 0x90; 76 + #define CFI_POST_PADDING 77 + #endif 78 + 59 79 #define __CFI_TYPE(name) \ 60 80 SYM_START(__cfi_##name, SYM_L_LOCAL, SYM_A_NONE) \ 61 - .fill 11, 1, 0x90 ASM_NL \ 81 + CFI_PRE_PADDING \ 62 82 .byte 0xb8 ASM_NL \ 63 83 .long __kcfi_typeid_##name ASM_NL \ 84 + CFI_POST_PADDING \ 64 85 SYM_FUNC_END(__cfi_##name) 65 86 66 87 /* SYM_TYPED_FUNC_START -- use for indirectly called globals, w/ CFI type */ ··· 104 57 105 58 /* SYM_FUNC_START -- use for global functions */ 106 59 #define SYM_FUNC_START(name) \ 107 - SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) \ 60 + SYM_START(name, SYM_L_GLOBAL, SYM_F_ALIGN) \ 108 61 ENDBR 109 62 110 63 /* SYM_FUNC_START_NOALIGN -- use for global functions, w/o alignment */ ··· 114 67 115 68 /* SYM_FUNC_START_LOCAL -- use for local functions */ 116 69 #define SYM_FUNC_START_LOCAL(name) \ 117 - SYM_START(name, SYM_L_LOCAL, SYM_A_ALIGN) \ 70 + SYM_START(name, SYM_L_LOCAL, SYM_F_ALIGN) \ 118 71 ENDBR 119 72 120 73 /* SYM_FUNC_START_LOCAL_NOALIGN -- use for local functions, w/o alignment */ ··· 124 77 125 78 /* SYM_FUNC_START_WEAK -- use for weak functions */ 126 79 #define SYM_FUNC_START_WEAK(name) \ 127 - SYM_START(name, SYM_L_WEAK, SYM_A_ALIGN) \ 80 + SYM_START(name, SYM_L_WEAK, SYM_F_ALIGN) \ 128 81 ENDBR 129 82 130 83 /* SYM_FUNC_START_WEAK_NOALIGN -- use for weak functions, w/o alignment */
+170 -6
arch/x86/include/asm/nospec-branch.h
··· 12 12 #include <asm/msr-index.h> 13 13 #include <asm/unwind_hints.h> 14 14 #include <asm/percpu.h> 15 + #include <asm/current.h> 15 16 16 - #define RETPOLINE_THUNK_SIZE 32 17 + /* 18 + * Call depth tracking for Intel SKL CPUs to address the RSB underflow 19 + * issue in software. 20 + * 21 + * The tracking does not use a counter. It uses uses arithmetic shift 22 + * right on call entry and logical shift left on return. 23 + * 24 + * The depth tracking variable is initialized to 0x8000.... when the call 25 + * depth is zero. The arithmetic shift right sign extends the MSB and 26 + * saturates after the 12th call. The shift count is 5 for both directions 27 + * so the tracking covers 12 nested calls. 28 + * 29 + * Call 30 + * 0: 0x8000000000000000 0x0000000000000000 31 + * 1: 0xfc00000000000000 0xf000000000000000 32 + * ... 33 + * 11: 0xfffffffffffffff8 0xfffffffffffffc00 34 + * 12: 0xffffffffffffffff 0xffffffffffffffe0 35 + * 36 + * After a return buffer fill the depth is credited 12 calls before the 37 + * next stuffing has to take place. 38 + * 39 + * There is a inaccuracy for situations like this: 40 + * 41 + * 10 calls 42 + * 5 returns 43 + * 3 calls 44 + * 4 returns 45 + * 3 calls 46 + * .... 47 + * 48 + * The shift count might cause this to be off by one in either direction, 49 + * but there is still a cushion vs. the RSB depth. The algorithm does not 50 + * claim to be perfect and it can be speculated around by the CPU, but it 51 + * is considered that it obfuscates the problem enough to make exploitation 52 + * extremly difficult. 53 + */ 54 + #define RET_DEPTH_SHIFT 5 55 + #define RSB_RET_STUFF_LOOPS 16 56 + #define RET_DEPTH_INIT 0x8000000000000000ULL 57 + #define RET_DEPTH_INIT_FROM_CALL 0xfc00000000000000ULL 58 + #define RET_DEPTH_CREDIT 0xffffffffffffffffULL 59 + 60 + #ifdef CONFIG_CALL_THUNKS_DEBUG 61 + # define CALL_THUNKS_DEBUG_INC_CALLS \ 62 + incq %gs:__x86_call_count; 63 + # define CALL_THUNKS_DEBUG_INC_RETS \ 64 + incq %gs:__x86_ret_count; 65 + # define CALL_THUNKS_DEBUG_INC_STUFFS \ 66 + incq %gs:__x86_stuffs_count; 67 + # define CALL_THUNKS_DEBUG_INC_CTXSW \ 68 + incq %gs:__x86_ctxsw_count; 69 + #else 70 + # define CALL_THUNKS_DEBUG_INC_CALLS 71 + # define CALL_THUNKS_DEBUG_INC_RETS 72 + # define CALL_THUNKS_DEBUG_INC_STUFFS 73 + # define CALL_THUNKS_DEBUG_INC_CTXSW 74 + #endif 75 + 76 + #if defined(CONFIG_CALL_DEPTH_TRACKING) && !defined(COMPILE_OFFSETS) 77 + 78 + #include <asm/asm-offsets.h> 79 + 80 + #define CREDIT_CALL_DEPTH \ 81 + movq $-1, PER_CPU_VAR(pcpu_hot + X86_call_depth); 82 + 83 + #define ASM_CREDIT_CALL_DEPTH \ 84 + movq $-1, PER_CPU_VAR(pcpu_hot + X86_call_depth); 85 + 86 + #define RESET_CALL_DEPTH \ 87 + mov $0x80, %rax; \ 88 + shl $56, %rax; \ 89 + movq %rax, PER_CPU_VAR(pcpu_hot + X86_call_depth); 90 + 91 + #define RESET_CALL_DEPTH_FROM_CALL \ 92 + mov $0xfc, %rax; \ 93 + shl $56, %rax; \ 94 + movq %rax, PER_CPU_VAR(pcpu_hot + X86_call_depth); \ 95 + CALL_THUNKS_DEBUG_INC_CALLS 96 + 97 + #define INCREMENT_CALL_DEPTH \ 98 + sarq $5, %gs:pcpu_hot + X86_call_depth; \ 99 + CALL_THUNKS_DEBUG_INC_CALLS 100 + 101 + #define ASM_INCREMENT_CALL_DEPTH \ 102 + sarq $5, PER_CPU_VAR(pcpu_hot + X86_call_depth); \ 103 + CALL_THUNKS_DEBUG_INC_CALLS 104 + 105 + #else 106 + #define CREDIT_CALL_DEPTH 107 + #define ASM_CREDIT_CALL_DEPTH 108 + #define RESET_CALL_DEPTH 109 + #define INCREMENT_CALL_DEPTH 110 + #define ASM_INCREMENT_CALL_DEPTH 111 + #define RESET_CALL_DEPTH_FROM_CALL 112 + #endif 17 113 18 114 /* 19 115 * Fill the CPU return stack buffer. ··· 128 32 * from C via asm(".include <asm/nospec-branch.h>") but let's not go there. 129 33 */ 130 34 35 + #define RETPOLINE_THUNK_SIZE 32 131 36 #define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */ 132 37 133 38 /* ··· 157 60 dec reg; \ 158 61 jnz 771b; \ 159 62 /* barrier for jnz misprediction */ \ 160 - lfence; 63 + lfence; \ 64 + ASM_CREDIT_CALL_DEPTH \ 65 + CALL_THUNKS_DEBUG_INC_CTXSW 161 66 #else 162 67 /* 163 68 * i386 doesn't unconditionally have LFENCE, as such it can't ··· 284 185 * where we have a stack but before any RET instruction. 285 186 */ 286 187 .macro UNTRAIN_RET 287 - #if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY) 188 + #if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY) || \ 189 + defined(CONFIG_CALL_DEPTH_TRACKING) 288 190 ANNOTATE_UNRET_END 289 - ALTERNATIVE_2 "", \ 290 - CALL_ZEN_UNTRAIN_RET, X86_FEATURE_UNRET, \ 291 - "call entry_ibpb", X86_FEATURE_ENTRY_IBPB 191 + ALTERNATIVE_3 "", \ 192 + CALL_ZEN_UNTRAIN_RET, X86_FEATURE_UNRET, \ 193 + "call entry_ibpb", X86_FEATURE_ENTRY_IBPB, \ 194 + __stringify(RESET_CALL_DEPTH), X86_FEATURE_CALL_DEPTH 195 + #endif 196 + .endm 197 + 198 + .macro UNTRAIN_RET_FROM_CALL 199 + #if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY) || \ 200 + defined(CONFIG_CALL_DEPTH_TRACKING) 201 + ANNOTATE_UNRET_END 202 + ALTERNATIVE_3 "", \ 203 + CALL_ZEN_UNTRAIN_RET, X86_FEATURE_UNRET, \ 204 + "call entry_ibpb", X86_FEATURE_ENTRY_IBPB, \ 205 + __stringify(RESET_CALL_DEPTH_FROM_CALL), X86_FEATURE_CALL_DEPTH 206 + #endif 207 + .endm 208 + 209 + 210 + .macro CALL_DEPTH_ACCOUNT 211 + #ifdef CONFIG_CALL_DEPTH_TRACKING 212 + ALTERNATIVE "", \ 213 + __stringify(ASM_INCREMENT_CALL_DEPTH), X86_FEATURE_CALL_DEPTH 292 214 #endif 293 215 .endm 294 216 ··· 323 203 324 204 typedef u8 retpoline_thunk_t[RETPOLINE_THUNK_SIZE]; 325 205 extern retpoline_thunk_t __x86_indirect_thunk_array[]; 206 + extern retpoline_thunk_t __x86_indirect_call_thunk_array[]; 207 + extern retpoline_thunk_t __x86_indirect_jump_thunk_array[]; 326 208 327 209 extern void __x86_return_thunk(void); 328 210 extern void zen_untrain_ret(void); 329 211 extern void entry_ibpb(void); 330 212 213 + #ifdef CONFIG_CALL_THUNKS 214 + extern void (*x86_return_thunk)(void); 215 + #else 216 + #define x86_return_thunk (&__x86_return_thunk) 217 + #endif 218 + 219 + #ifdef CONFIG_CALL_DEPTH_TRACKING 220 + extern void __x86_return_skl(void); 221 + 222 + static inline void x86_set_skl_return_thunk(void) 223 + { 224 + x86_return_thunk = &__x86_return_skl; 225 + } 226 + 227 + #define CALL_DEPTH_ACCOUNT \ 228 + ALTERNATIVE("", \ 229 + __stringify(INCREMENT_CALL_DEPTH), \ 230 + X86_FEATURE_CALL_DEPTH) 231 + 232 + #ifdef CONFIG_CALL_THUNKS_DEBUG 233 + DECLARE_PER_CPU(u64, __x86_call_count); 234 + DECLARE_PER_CPU(u64, __x86_ret_count); 235 + DECLARE_PER_CPU(u64, __x86_stuffs_count); 236 + DECLARE_PER_CPU(u64, __x86_ctxsw_count); 237 + #endif 238 + #else 239 + static inline void x86_set_skl_return_thunk(void) {} 240 + 241 + #define CALL_DEPTH_ACCOUNT "" 242 + 243 + #endif 244 + 331 245 #ifdef CONFIG_RETPOLINE 332 246 333 247 #define GEN(reg) \ 334 248 extern retpoline_thunk_t __x86_indirect_thunk_ ## reg; 249 + #include <asm/GEN-for-each-reg.h> 250 + #undef GEN 251 + 252 + #define GEN(reg) \ 253 + extern retpoline_thunk_t __x86_indirect_call_thunk_ ## reg; 254 + #include <asm/GEN-for-each-reg.h> 255 + #undef GEN 256 + 257 + #define GEN(reg) \ 258 + extern retpoline_thunk_t __x86_indirect_jump_thunk_ ## reg; 335 259 #include <asm/GEN-for-each-reg.h> 336 260 #undef GEN 337 261
+15 -2
arch/x86/include/asm/paravirt.h
··· 4 4 /* Various instructions on x86 need to be replaced for 5 5 * para-virtualization: those hooks are defined here. */ 6 6 7 + #include <asm/paravirt_types.h> 8 + 7 9 #ifdef CONFIG_PARAVIRT 8 10 #include <asm/pgtable_types.h> 9 11 #include <asm/asm.h> 10 12 #include <asm/nospec-branch.h> 11 - 12 - #include <asm/paravirt_types.h> 13 13 14 14 #ifndef __ASSEMBLY__ 15 15 #include <linux/bug.h> ··· 665 665 asm(".pushsection " section ", \"ax\";" \ 666 666 ".globl " PV_THUNK_NAME(func) ";" \ 667 667 ".type " PV_THUNK_NAME(func) ", @function;" \ 668 + ASM_FUNC_ALIGN \ 668 669 PV_THUNK_NAME(func) ":" \ 669 670 ASM_ENDBR \ 670 671 FRAME_BEGIN \ ··· 730 729 #undef PVOP_CALL3 731 730 #undef PVOP_VCALL4 732 731 #undef PVOP_CALL4 732 + 733 + #define DEFINE_PARAVIRT_ASM(func, instr, sec) \ 734 + asm (".pushsection " #sec ", \"ax\"\n" \ 735 + ".global " #func "\n\t" \ 736 + ".type " #func ", @function\n\t" \ 737 + ASM_FUNC_ALIGN "\n" \ 738 + #func ":\n\t" \ 739 + ASM_ENDBR \ 740 + instr "\n\t" \ 741 + ASM_RET \ 742 + ".size " #func ", . - " #func "\n\t" \ 743 + ".popsection") 733 744 734 745 extern void default_banner(void); 735 746
+19 -15
arch/x86/include/asm/paravirt_types.h
··· 3 3 #define _ASM_X86_PARAVIRT_TYPES_H 4 4 5 5 #ifndef __ASSEMBLY__ 6 + /* These all sit in the .parainstructions section to tell us what to patch. */ 7 + struct paravirt_patch_site { 8 + u8 *instr; /* original instructions */ 9 + u8 type; /* type of this instruction */ 10 + u8 len; /* length of original instruction */ 11 + }; 12 + 13 + /* Lazy mode for batching updates / context switch */ 14 + enum paravirt_lazy_mode { 15 + PARAVIRT_LAZY_NONE, 16 + PARAVIRT_LAZY_MMU, 17 + PARAVIRT_LAZY_CPU, 18 + }; 19 + #endif 20 + 21 + #ifdef CONFIG_PARAVIRT 22 + 23 + #ifndef __ASSEMBLY__ 6 24 7 25 #include <asm/desc_defs.h> 8 26 #include <asm/pgtable_types.h> ··· 552 534 __PVOP_VCALL(op, PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2), \ 553 535 PVOP_CALL_ARG3(arg3), PVOP_CALL_ARG4(arg4)) 554 536 555 - /* Lazy mode for batching updates / context switch */ 556 - enum paravirt_lazy_mode { 557 - PARAVIRT_LAZY_NONE, 558 - PARAVIRT_LAZY_MMU, 559 - PARAVIRT_LAZY_CPU, 560 - }; 561 - 562 537 enum paravirt_lazy_mode paravirt_get_lazy_mode(void); 563 538 void paravirt_start_context_switch(struct task_struct *prev); 564 539 void paravirt_end_context_switch(struct task_struct *next); ··· 567 556 568 557 #define paravirt_nop ((void *)_paravirt_nop) 569 558 570 - /* These all sit in the .parainstructions section to tell us what to patch. */ 571 - struct paravirt_patch_site { 572 - u8 *instr; /* original instructions */ 573 - u8 type; /* type of this instruction */ 574 - u8 len; /* length of original instruction */ 575 - }; 576 - 577 559 extern struct paravirt_patch_site __parainstructions[], 578 560 __parainstructions_end[]; 579 561 580 562 #endif /* __ASSEMBLY__ */ 581 - 563 + #endif /* CONFIG_PARAVIRT */ 582 564 #endif /* _ASM_X86_PARAVIRT_TYPES_H */
+14 -13
arch/x86/include/asm/preempt.h
··· 4 4 5 5 #include <asm/rmwcc.h> 6 6 #include <asm/percpu.h> 7 + #include <asm/current.h> 8 + 7 9 #include <linux/thread_info.h> 8 10 #include <linux/static_call_types.h> 9 - 10 - DECLARE_PER_CPU(int, __preempt_count); 11 11 12 12 /* We use the MSB mostly because its available */ 13 13 #define PREEMPT_NEED_RESCHED 0x80000000 ··· 24 24 */ 25 25 static __always_inline int preempt_count(void) 26 26 { 27 - return raw_cpu_read_4(__preempt_count) & ~PREEMPT_NEED_RESCHED; 27 + return raw_cpu_read_4(pcpu_hot.preempt_count) & ~PREEMPT_NEED_RESCHED; 28 28 } 29 29 30 30 static __always_inline void preempt_count_set(int pc) ··· 32 32 int old, new; 33 33 34 34 do { 35 - old = raw_cpu_read_4(__preempt_count); 35 + old = raw_cpu_read_4(pcpu_hot.preempt_count); 36 36 new = (old & PREEMPT_NEED_RESCHED) | 37 37 (pc & ~PREEMPT_NEED_RESCHED); 38 - } while (raw_cpu_cmpxchg_4(__preempt_count, old, new) != old); 38 + } while (raw_cpu_cmpxchg_4(pcpu_hot.preempt_count, old, new) != old); 39 39 } 40 40 41 41 /* ··· 44 44 #define init_task_preempt_count(p) do { } while (0) 45 45 46 46 #define init_idle_preempt_count(p, cpu) do { \ 47 - per_cpu(__preempt_count, (cpu)) = PREEMPT_DISABLED; \ 47 + per_cpu(pcpu_hot.preempt_count, (cpu)) = PREEMPT_DISABLED; \ 48 48 } while (0) 49 49 50 50 /* ··· 58 58 59 59 static __always_inline void set_preempt_need_resched(void) 60 60 { 61 - raw_cpu_and_4(__preempt_count, ~PREEMPT_NEED_RESCHED); 61 + raw_cpu_and_4(pcpu_hot.preempt_count, ~PREEMPT_NEED_RESCHED); 62 62 } 63 63 64 64 static __always_inline void clear_preempt_need_resched(void) 65 65 { 66 - raw_cpu_or_4(__preempt_count, PREEMPT_NEED_RESCHED); 66 + raw_cpu_or_4(pcpu_hot.preempt_count, PREEMPT_NEED_RESCHED); 67 67 } 68 68 69 69 static __always_inline bool test_preempt_need_resched(void) 70 70 { 71 - return !(raw_cpu_read_4(__preempt_count) & PREEMPT_NEED_RESCHED); 71 + return !(raw_cpu_read_4(pcpu_hot.preempt_count) & PREEMPT_NEED_RESCHED); 72 72 } 73 73 74 74 /* ··· 77 77 78 78 static __always_inline void __preempt_count_add(int val) 79 79 { 80 - raw_cpu_add_4(__preempt_count, val); 80 + raw_cpu_add_4(pcpu_hot.preempt_count, val); 81 81 } 82 82 83 83 static __always_inline void __preempt_count_sub(int val) 84 84 { 85 - raw_cpu_add_4(__preempt_count, -val); 85 + raw_cpu_add_4(pcpu_hot.preempt_count, -val); 86 86 } 87 87 88 88 /* ··· 92 92 */ 93 93 static __always_inline bool __preempt_count_dec_and_test(void) 94 94 { 95 - return GEN_UNARY_RMWcc("decl", __preempt_count, e, __percpu_arg([var])); 95 + return GEN_UNARY_RMWcc("decl", pcpu_hot.preempt_count, e, 96 + __percpu_arg([var])); 96 97 } 97 98 98 99 /* ··· 101 100 */ 102 101 static __always_inline bool should_resched(int preempt_offset) 103 102 { 104 - return unlikely(raw_cpu_read_4(__preempt_count) == preempt_offset); 103 + return unlikely(raw_cpu_read_4(pcpu_hot.preempt_count) == preempt_offset); 105 104 } 106 105 107 106 #ifdef CONFIG_PREEMPTION
+2 -9
arch/x86/include/asm/processor.h
··· 377 377 char stack[IRQ_STACK_SIZE]; 378 378 } __aligned(IRQ_STACK_SIZE); 379 379 380 - DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack); 381 - 382 380 #ifdef CONFIG_X86_64 383 381 struct fixed_percpu_data { 384 382 /* ··· 399 401 return (unsigned long)per_cpu(fixed_percpu_data.gs_base, cpu); 400 402 } 401 403 402 - DECLARE_PER_CPU(void *, hardirq_stack_ptr); 403 - DECLARE_PER_CPU(bool, hardirq_stack_inuse); 404 404 extern asmlinkage void ignore_sysret(void); 405 405 406 406 /* Save actual FS/GS selectors and bases to current->thread */ ··· 407 411 #ifdef CONFIG_STACKPROTECTOR 408 412 DECLARE_PER_CPU(unsigned long, __stack_chk_guard); 409 413 #endif 410 - DECLARE_PER_CPU(struct irq_stack *, hardirq_stack_ptr); 411 - DECLARE_PER_CPU(struct irq_stack *, softirq_stack_ptr); 412 414 #endif /* !X86_64 */ 413 415 414 416 struct perf_event; ··· 511 517 * and around vm86 mode and sp0 on x86_64 is special because of the 512 518 * entry trampoline. 513 519 */ 514 - return this_cpu_read_stable(cpu_current_top_of_stack); 520 + return this_cpu_read_stable(pcpu_hot.top_of_stack); 515 521 } 516 522 517 523 static __always_inline bool on_thread_stack(void) ··· 548 554 /* Defined in head.S */ 549 555 extern struct desc_ptr early_gdt_descr; 550 556 551 - extern void switch_to_new_gdt(int); 557 + extern void switch_gdt_and_percpu_base(int); 552 558 extern void load_direct_gdt(int); 553 559 extern void load_fixmap_gdt(int); 554 - extern void load_percpu_segment(int); 555 560 extern void cpu_init(void); 556 561 extern void cpu_init_secondary(void); 557 562 extern void cpu_init_exception_handling(void);
+20 -27
arch/x86/include/asm/qspinlock_paravirt.h
··· 14 14 15 15 __PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock_slowpath, ".spinlock.text"); 16 16 #define __pv_queued_spin_unlock __pv_queued_spin_unlock 17 - #define PV_UNLOCK "__raw_callee_save___pv_queued_spin_unlock" 18 - #define PV_UNLOCK_SLOWPATH "__raw_callee_save___pv_queued_spin_unlock_slowpath" 19 17 20 18 /* 21 19 * Optimized assembly version of __raw_callee_save___pv_queued_spin_unlock ··· 35 37 * rsi = lockval (second argument) 36 38 * rdx = internal variable (set to 0) 37 39 */ 38 - asm (".pushsection .spinlock.text, \"ax\";" 39 - ".globl " PV_UNLOCK ";" 40 - ".type " PV_UNLOCK ", @function;" 41 - ".align 4,0x90;" 42 - PV_UNLOCK ": " 43 - ASM_ENDBR 44 - FRAME_BEGIN 45 - "push %rdx;" 46 - "mov $0x1,%eax;" 47 - "xor %edx,%edx;" 48 - LOCK_PREFIX "cmpxchg %dl,(%rdi);" 49 - "cmp $0x1,%al;" 50 - "jne .slowpath;" 51 - "pop %rdx;" 40 + #define PV_UNLOCK_ASM \ 41 + FRAME_BEGIN \ 42 + "push %rdx\n\t" \ 43 + "mov $0x1,%eax\n\t" \ 44 + "xor %edx,%edx\n\t" \ 45 + LOCK_PREFIX "cmpxchg %dl,(%rdi)\n\t" \ 46 + "cmp $0x1,%al\n\t" \ 47 + "jne .slowpath\n\t" \ 48 + "pop %rdx\n\t" \ 49 + FRAME_END \ 50 + ASM_RET \ 51 + ".slowpath:\n\t" \ 52 + "push %rsi\n\t" \ 53 + "movzbl %al,%esi\n\t" \ 54 + "call __raw_callee_save___pv_queued_spin_unlock_slowpath\n\t" \ 55 + "pop %rsi\n\t" \ 56 + "pop %rdx\n\t" \ 52 57 FRAME_END 53 - ASM_RET 54 - ".slowpath: " 55 - "push %rsi;" 56 - "movzbl %al,%esi;" 57 - "call " PV_UNLOCK_SLOWPATH ";" 58 - "pop %rsi;" 59 - "pop %rdx;" 60 - FRAME_END 61 - ASM_RET 62 - ".size " PV_UNLOCK ", .-" PV_UNLOCK ";" 63 - ".popsection"); 58 + 59 + DEFINE_PARAVIRT_ASM(__raw_callee_save___pv_queued_spin_unlock, 60 + PV_UNLOCK_ASM, .spinlock.text); 64 61 65 62 #else /* CONFIG_64BIT */ 66 63
+5 -7
arch/x86/include/asm/smp.h
··· 3 3 #define _ASM_X86_SMP_H 4 4 #ifndef __ASSEMBLY__ 5 5 #include <linux/cpumask.h> 6 - #include <asm/percpu.h> 7 6 8 - #include <asm/thread_info.h> 9 7 #include <asm/cpumask.h> 8 + #include <asm/current.h> 9 + #include <asm/thread_info.h> 10 10 11 11 extern int smp_num_siblings; 12 12 extern unsigned int num_processors; ··· 19 19 DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_l2c_shared_map); 20 20 DECLARE_PER_CPU_READ_MOSTLY(u16, cpu_llc_id); 21 21 DECLARE_PER_CPU_READ_MOSTLY(u16, cpu_l2c_id); 22 - DECLARE_PER_CPU_READ_MOSTLY(int, cpu_number); 23 22 24 23 DECLARE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_cpu_to_apicid); 25 24 DECLARE_EARLY_PER_CPU_READ_MOSTLY(u32, x86_cpu_to_acpiid); ··· 149 150 150 151 /* 151 152 * This function is needed by all SMP systems. It must _always_ be valid 152 - * from the initial startup. We map APIC_BASE very early in page_setup(), 153 - * so this is correct in the x86 case. 153 + * from the initial startup. 154 154 */ 155 - #define raw_smp_processor_id() this_cpu_read(cpu_number) 156 - #define __smp_processor_id() __this_cpu_read(cpu_number) 155 + #define raw_smp_processor_id() this_cpu_read(pcpu_hot.cpu_number) 156 + #define __smp_processor_id() __this_cpu_read(pcpu_hot.cpu_number) 157 157 158 158 #ifdef CONFIG_X86_32 159 159 extern int safe_smp_processor_id(void);
+1
arch/x86/include/asm/text-patching.h
··· 45 45 extern void text_poke_sync(void); 46 46 extern void *text_poke_kgdb(void *addr, const void *opcode, size_t len); 47 47 extern void *text_poke_copy(void *addr, const void *opcode, size_t len); 48 + extern void *text_poke_copy_locked(void *addr, const void *opcode, size_t len, bool core_ok); 48 49 extern void *text_poke_set(void *addr, int c, size_t len); 49 50 extern int poke_int3_handler(struct pt_regs *regs); 50 51 extern void text_poke_bp(void *addr, const void *opcode, size_t len, const void *emulate);
+2
arch/x86/kernel/Makefile
··· 143 143 144 144 obj-$(CONFIG_CFI_CLANG) += cfi.o 145 145 146 + obj-$(CONFIG_CALL_THUNKS) += callthunks.o 147 + 146 148 ### 147 149 # 64 bit specific files 148 150 ifeq ($(CONFIG_X86_64),y)
+498 -37
arch/x86/kernel/alternative.c
··· 116 116 117 117 extern s32 __retpoline_sites[], __retpoline_sites_end[]; 118 118 extern s32 __return_sites[], __return_sites_end[]; 119 + extern s32 __cfi_sites[], __cfi_sites_end[]; 119 120 extern s32 __ibt_endbr_seal[], __ibt_endbr_seal_end[]; 120 121 extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; 121 122 extern s32 __smp_locks[], __smp_locks_end[]; ··· 378 377 return i; 379 378 } 380 379 380 + static inline bool is_jcc32(struct insn *insn) 381 + { 382 + /* Jcc.d32 second opcode byte is in the range: 0x80-0x8f */ 383 + return insn->opcode.bytes[0] == 0x0f && (insn->opcode.bytes[1] & 0xf0) == 0x80; 384 + } 385 + 386 + static int emit_call_track_retpoline(void *addr, struct insn *insn, int reg, u8 *bytes) 387 + { 388 + u8 op = insn->opcode.bytes[0]; 389 + int i = 0; 390 + 391 + /* 392 + * Clang does 'weird' Jcc __x86_indirect_thunk_r11 conditional 393 + * tail-calls. Deal with them. 394 + */ 395 + if (is_jcc32(insn)) { 396 + bytes[i++] = op; 397 + op = insn->opcode.bytes[1]; 398 + goto clang_jcc; 399 + } 400 + 401 + if (insn->length == 6) 402 + bytes[i++] = 0x2e; /* CS-prefix */ 403 + 404 + switch (op) { 405 + case CALL_INSN_OPCODE: 406 + __text_gen_insn(bytes+i, op, addr+i, 407 + __x86_indirect_call_thunk_array[reg], 408 + CALL_INSN_SIZE); 409 + i += CALL_INSN_SIZE; 410 + break; 411 + 412 + case JMP32_INSN_OPCODE: 413 + clang_jcc: 414 + __text_gen_insn(bytes+i, op, addr+i, 415 + __x86_indirect_jump_thunk_array[reg], 416 + JMP32_INSN_SIZE); 417 + i += JMP32_INSN_SIZE; 418 + break; 419 + 420 + default: 421 + WARN(1, "%pS %px %*ph\n", addr, addr, 6, addr); 422 + return -1; 423 + } 424 + 425 + WARN_ON_ONCE(i != insn->length); 426 + 427 + return i; 428 + } 429 + 381 430 /* 382 431 * Rewrite the compiler generated retpoline thunk calls. 383 432 * ··· 460 409 BUG_ON(reg == 4); 461 410 462 411 if (cpu_feature_enabled(X86_FEATURE_RETPOLINE) && 463 - !cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE)) 412 + !cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE)) { 413 + if (cpu_feature_enabled(X86_FEATURE_CALL_DEPTH)) 414 + return emit_call_track_retpoline(addr, insn, reg, bytes); 415 + 464 416 return -1; 417 + } 465 418 466 419 op = insn->opcode.bytes[0]; 467 420 ··· 482 427 * [ NOP ] 483 428 * 1: 484 429 */ 485 - /* Jcc.d32 second opcode byte is in the range: 0x80-0x8f */ 486 - if (op == 0x0f && (insn->opcode.bytes[1] & 0xf0) == 0x80) { 430 + if (is_jcc32(insn)) { 487 431 cc = insn->opcode.bytes[1] & 0xf; 488 432 cc ^= 1; /* invert condition */ 489 433 ··· 572 518 } 573 519 574 520 #ifdef CONFIG_RETHUNK 521 + 522 + #ifdef CONFIG_CALL_THUNKS 523 + void (*x86_return_thunk)(void) __ro_after_init = &__x86_return_thunk; 524 + #endif 525 + 575 526 /* 576 527 * Rewrite the compiler generated return thunk tail-calls. 577 528 * ··· 592 533 { 593 534 int i = 0; 594 535 595 - if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) 596 - return -1; 536 + if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) { 537 + if (x86_return_thunk == __x86_return_thunk) 538 + return -1; 597 539 598 - bytes[i++] = RET_INSN_OPCODE; 540 + i = JMP32_INSN_SIZE; 541 + __text_gen_insn(bytes, JMP32_INSN_OPCODE, addr, x86_return_thunk, i); 542 + } else { 543 + bytes[i++] = RET_INSN_OPCODE; 544 + } 599 545 600 546 for (; i < insn->length;) 601 547 bytes[i++] = INT3_INSN_OPCODE; 602 - 603 548 return i; 604 549 } 605 550 ··· 657 594 658 595 #ifdef CONFIG_X86_KERNEL_IBT 659 596 597 + static void poison_endbr(void *addr, bool warn) 598 + { 599 + u32 endbr, poison = gen_endbr_poison(); 600 + 601 + if (WARN_ON_ONCE(get_kernel_nofault(endbr, addr))) 602 + return; 603 + 604 + if (!is_endbr(endbr)) { 605 + WARN_ON_ONCE(warn); 606 + return; 607 + } 608 + 609 + DPRINTK("ENDBR at: %pS (%px)", addr, addr); 610 + 611 + /* 612 + * When we have IBT, the lack of ENDBR will trigger #CP 613 + */ 614 + DUMP_BYTES(((u8*)addr), 4, "%px: orig: ", addr); 615 + DUMP_BYTES(((u8*)&poison), 4, "%px: repl: ", addr); 616 + text_poke_early(addr, &poison, 4); 617 + } 618 + 660 619 /* 661 620 * Generated by: objtool --ibt 662 621 */ ··· 687 602 s32 *s; 688 603 689 604 for (s = start; s < end; s++) { 690 - u32 endbr, poison = gen_endbr_poison(); 691 605 void *addr = (void *)s + *s; 692 606 693 - if (WARN_ON_ONCE(get_kernel_nofault(endbr, addr))) 694 - continue; 695 - 696 - if (WARN_ON_ONCE(!is_endbr(endbr))) 697 - continue; 698 - 699 - DPRINTK("ENDBR at: %pS (%px)", addr, addr); 700 - 701 - /* 702 - * When we have IBT, the lack of ENDBR will trigger #CP 703 - */ 704 - DUMP_BYTES(((u8*)addr), 4, "%px: orig: ", addr); 705 - DUMP_BYTES(((u8*)&poison), 4, "%px: repl: ", addr); 706 - text_poke_early(addr, &poison, 4); 607 + poison_endbr(addr, true); 608 + if (IS_ENABLED(CONFIG_FINEIBT)) 609 + poison_endbr(addr - 16, false); 707 610 } 708 611 } 709 612 ··· 700 627 void __init_or_module apply_ibt_endbr(s32 *start, s32 *end) { } 701 628 702 629 #endif /* CONFIG_X86_KERNEL_IBT */ 630 + 631 + #ifdef CONFIG_FINEIBT 632 + 633 + enum cfi_mode { 634 + CFI_DEFAULT, 635 + CFI_OFF, 636 + CFI_KCFI, 637 + CFI_FINEIBT, 638 + }; 639 + 640 + static enum cfi_mode cfi_mode __ro_after_init = CFI_DEFAULT; 641 + static bool cfi_rand __ro_after_init = true; 642 + static u32 cfi_seed __ro_after_init; 643 + 644 + /* 645 + * Re-hash the CFI hash with a boot-time seed while making sure the result is 646 + * not a valid ENDBR instruction. 647 + */ 648 + static u32 cfi_rehash(u32 hash) 649 + { 650 + hash ^= cfi_seed; 651 + while (unlikely(is_endbr(hash) || is_endbr(-hash))) { 652 + bool lsb = hash & 1; 653 + hash >>= 1; 654 + if (lsb) 655 + hash ^= 0x80200003; 656 + } 657 + return hash; 658 + } 659 + 660 + static __init int cfi_parse_cmdline(char *str) 661 + { 662 + if (!str) 663 + return -EINVAL; 664 + 665 + while (str) { 666 + char *next = strchr(str, ','); 667 + if (next) { 668 + *next = 0; 669 + next++; 670 + } 671 + 672 + if (!strcmp(str, "auto")) { 673 + cfi_mode = CFI_DEFAULT; 674 + } else if (!strcmp(str, "off")) { 675 + cfi_mode = CFI_OFF; 676 + cfi_rand = false; 677 + } else if (!strcmp(str, "kcfi")) { 678 + cfi_mode = CFI_KCFI; 679 + } else if (!strcmp(str, "fineibt")) { 680 + cfi_mode = CFI_FINEIBT; 681 + } else if (!strcmp(str, "norand")) { 682 + cfi_rand = false; 683 + } else { 684 + pr_err("Ignoring unknown cfi option (%s).", str); 685 + } 686 + 687 + str = next; 688 + } 689 + 690 + return 0; 691 + } 692 + early_param("cfi", cfi_parse_cmdline); 693 + 694 + /* 695 + * kCFI FineIBT 696 + * 697 + * __cfi_\func: __cfi_\func: 698 + * movl $0x12345678,%eax // 5 endbr64 // 4 699 + * nop subl $0x12345678,%r10d // 7 700 + * nop jz 1f // 2 701 + * nop ud2 // 2 702 + * nop 1: nop // 1 703 + * nop 704 + * nop 705 + * nop 706 + * nop 707 + * nop 708 + * nop 709 + * nop 710 + * 711 + * 712 + * caller: caller: 713 + * movl $(-0x12345678),%r10d // 6 movl $0x12345678,%r10d // 6 714 + * addl $-15(%r11),%r10d // 4 sub $16,%r11 // 4 715 + * je 1f // 2 nop4 // 4 716 + * ud2 // 2 717 + * 1: call __x86_indirect_thunk_r11 // 5 call *%r11; nop2; // 5 718 + * 719 + */ 720 + 721 + asm( ".pushsection .rodata \n" 722 + "fineibt_preamble_start: \n" 723 + " endbr64 \n" 724 + " subl $0x12345678, %r10d \n" 725 + " je fineibt_preamble_end \n" 726 + " ud2 \n" 727 + " nop \n" 728 + "fineibt_preamble_end: \n" 729 + ".popsection\n" 730 + ); 731 + 732 + extern u8 fineibt_preamble_start[]; 733 + extern u8 fineibt_preamble_end[]; 734 + 735 + #define fineibt_preamble_size (fineibt_preamble_end - fineibt_preamble_start) 736 + #define fineibt_preamble_hash 7 737 + 738 + asm( ".pushsection .rodata \n" 739 + "fineibt_caller_start: \n" 740 + " movl $0x12345678, %r10d \n" 741 + " sub $16, %r11 \n" 742 + ASM_NOP4 743 + "fineibt_caller_end: \n" 744 + ".popsection \n" 745 + ); 746 + 747 + extern u8 fineibt_caller_start[]; 748 + extern u8 fineibt_caller_end[]; 749 + 750 + #define fineibt_caller_size (fineibt_caller_end - fineibt_caller_start) 751 + #define fineibt_caller_hash 2 752 + 753 + #define fineibt_caller_jmp (fineibt_caller_size - 2) 754 + 755 + static u32 decode_preamble_hash(void *addr) 756 + { 757 + u8 *p = addr; 758 + 759 + /* b8 78 56 34 12 mov $0x12345678,%eax */ 760 + if (p[0] == 0xb8) 761 + return *(u32 *)(addr + 1); 762 + 763 + return 0; /* invalid hash value */ 764 + } 765 + 766 + static u32 decode_caller_hash(void *addr) 767 + { 768 + u8 *p = addr; 769 + 770 + /* 41 ba 78 56 34 12 mov $0x12345678,%r10d */ 771 + if (p[0] == 0x41 && p[1] == 0xba) 772 + return -*(u32 *)(addr + 2); 773 + 774 + /* e8 0c 78 56 34 12 jmp.d8 +12 */ 775 + if (p[0] == JMP8_INSN_OPCODE && p[1] == fineibt_caller_jmp) 776 + return -*(u32 *)(addr + 2); 777 + 778 + return 0; /* invalid hash value */ 779 + } 780 + 781 + /* .retpoline_sites */ 782 + static int cfi_disable_callers(s32 *start, s32 *end) 783 + { 784 + /* 785 + * Disable kCFI by patching in a JMP.d8, this leaves the hash immediate 786 + * in tact for later usage. Also see decode_caller_hash() and 787 + * cfi_rewrite_callers(). 788 + */ 789 + const u8 jmp[] = { JMP8_INSN_OPCODE, fineibt_caller_jmp }; 790 + s32 *s; 791 + 792 + for (s = start; s < end; s++) { 793 + void *addr = (void *)s + *s; 794 + u32 hash; 795 + 796 + addr -= fineibt_caller_size; 797 + hash = decode_caller_hash(addr); 798 + if (!hash) /* nocfi callers */ 799 + continue; 800 + 801 + text_poke_early(addr, jmp, 2); 802 + } 803 + 804 + return 0; 805 + } 806 + 807 + static int cfi_enable_callers(s32 *start, s32 *end) 808 + { 809 + /* 810 + * Re-enable kCFI, undo what cfi_disable_callers() did. 811 + */ 812 + const u8 mov[] = { 0x41, 0xba }; 813 + s32 *s; 814 + 815 + for (s = start; s < end; s++) { 816 + void *addr = (void *)s + *s; 817 + u32 hash; 818 + 819 + addr -= fineibt_caller_size; 820 + hash = decode_caller_hash(addr); 821 + if (!hash) /* nocfi callers */ 822 + continue; 823 + 824 + text_poke_early(addr, mov, 2); 825 + } 826 + 827 + return 0; 828 + } 829 + 830 + /* .cfi_sites */ 831 + static int cfi_rand_preamble(s32 *start, s32 *end) 832 + { 833 + s32 *s; 834 + 835 + for (s = start; s < end; s++) { 836 + void *addr = (void *)s + *s; 837 + u32 hash; 838 + 839 + hash = decode_preamble_hash(addr); 840 + if (WARN(!hash, "no CFI hash found at: %pS %px %*ph\n", 841 + addr, addr, 5, addr)) 842 + return -EINVAL; 843 + 844 + hash = cfi_rehash(hash); 845 + text_poke_early(addr + 1, &hash, 4); 846 + } 847 + 848 + return 0; 849 + } 850 + 851 + static int cfi_rewrite_preamble(s32 *start, s32 *end) 852 + { 853 + s32 *s; 854 + 855 + for (s = start; s < end; s++) { 856 + void *addr = (void *)s + *s; 857 + u32 hash; 858 + 859 + hash = decode_preamble_hash(addr); 860 + if (WARN(!hash, "no CFI hash found at: %pS %px %*ph\n", 861 + addr, addr, 5, addr)) 862 + return -EINVAL; 863 + 864 + text_poke_early(addr, fineibt_preamble_start, fineibt_preamble_size); 865 + WARN_ON(*(u32 *)(addr + fineibt_preamble_hash) != 0x12345678); 866 + text_poke_early(addr + fineibt_preamble_hash, &hash, 4); 867 + } 868 + 869 + return 0; 870 + } 871 + 872 + /* .retpoline_sites */ 873 + static int cfi_rand_callers(s32 *start, s32 *end) 874 + { 875 + s32 *s; 876 + 877 + for (s = start; s < end; s++) { 878 + void *addr = (void *)s + *s; 879 + u32 hash; 880 + 881 + addr -= fineibt_caller_size; 882 + hash = decode_caller_hash(addr); 883 + if (hash) { 884 + hash = -cfi_rehash(hash); 885 + text_poke_early(addr + 2, &hash, 4); 886 + } 887 + } 888 + 889 + return 0; 890 + } 891 + 892 + static int cfi_rewrite_callers(s32 *start, s32 *end) 893 + { 894 + s32 *s; 895 + 896 + for (s = start; s < end; s++) { 897 + void *addr = (void *)s + *s; 898 + u32 hash; 899 + 900 + addr -= fineibt_caller_size; 901 + hash = decode_caller_hash(addr); 902 + if (hash) { 903 + text_poke_early(addr, fineibt_caller_start, fineibt_caller_size); 904 + WARN_ON(*(u32 *)(addr + fineibt_caller_hash) != 0x12345678); 905 + text_poke_early(addr + fineibt_caller_hash, &hash, 4); 906 + } 907 + /* rely on apply_retpolines() */ 908 + } 909 + 910 + return 0; 911 + } 912 + 913 + static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline, 914 + s32 *start_cfi, s32 *end_cfi, bool builtin) 915 + { 916 + int ret; 917 + 918 + if (WARN_ONCE(fineibt_preamble_size != 16, 919 + "FineIBT preamble wrong size: %ld", fineibt_preamble_size)) 920 + return; 921 + 922 + if (cfi_mode == CFI_DEFAULT) { 923 + cfi_mode = CFI_KCFI; 924 + if (HAS_KERNEL_IBT && cpu_feature_enabled(X86_FEATURE_IBT)) 925 + cfi_mode = CFI_FINEIBT; 926 + } 927 + 928 + /* 929 + * Rewrite the callers to not use the __cfi_ stubs, such that we might 930 + * rewrite them. This disables all CFI. If this succeeds but any of the 931 + * later stages fails, we're without CFI. 932 + */ 933 + ret = cfi_disable_callers(start_retpoline, end_retpoline); 934 + if (ret) 935 + goto err; 936 + 937 + if (cfi_rand) { 938 + if (builtin) 939 + cfi_seed = get_random_u32(); 940 + 941 + ret = cfi_rand_preamble(start_cfi, end_cfi); 942 + if (ret) 943 + goto err; 944 + 945 + ret = cfi_rand_callers(start_retpoline, end_retpoline); 946 + if (ret) 947 + goto err; 948 + } 949 + 950 + switch (cfi_mode) { 951 + case CFI_OFF: 952 + if (builtin) 953 + pr_info("Disabling CFI\n"); 954 + return; 955 + 956 + case CFI_KCFI: 957 + ret = cfi_enable_callers(start_retpoline, end_retpoline); 958 + if (ret) 959 + goto err; 960 + 961 + if (builtin) 962 + pr_info("Using kCFI\n"); 963 + return; 964 + 965 + case CFI_FINEIBT: 966 + ret = cfi_rewrite_preamble(start_cfi, end_cfi); 967 + if (ret) 968 + goto err; 969 + 970 + ret = cfi_rewrite_callers(start_retpoline, end_retpoline); 971 + if (ret) 972 + goto err; 973 + 974 + if (builtin) 975 + pr_info("Using FineIBT CFI\n"); 976 + return; 977 + 978 + default: 979 + break; 980 + } 981 + 982 + err: 983 + pr_err("Something went horribly wrong trying to rewrite the CFI implementation.\n"); 984 + } 985 + 986 + #else 987 + 988 + static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline, 989 + s32 *start_cfi, s32 *end_cfi, bool builtin) 990 + { 991 + } 992 + 993 + #endif 994 + 995 + void apply_fineibt(s32 *start_retpoline, s32 *end_retpoline, 996 + s32 *start_cfi, s32 *end_cfi) 997 + { 998 + return __apply_fineibt(start_retpoline, end_retpoline, 999 + start_cfi, end_cfi, 1000 + /* .builtin = */ false); 1001 + } 703 1002 704 1003 #ifdef CONFIG_SMP 705 1004 static void alternatives_smp_lock(const s32 *start, const s32 *end, ··· 1379 934 */ 1380 935 apply_paravirt(__parainstructions, __parainstructions_end); 1381 936 937 + __apply_fineibt(__retpoline_sites, __retpoline_sites_end, 938 + __cfi_sites, __cfi_sites_end, true); 939 + 1382 940 /* 1383 941 * Rewrite the retpolines, must be done before alternatives since 1384 942 * those can rewrite the retpoline thunks. ··· 1394 946 * alternatives can be overwritten by their immediate fragments. 1395 947 */ 1396 948 apply_alternatives(__alt_instructions, __alt_instructions_end); 949 + 950 + /* 951 + * Now all calls are established. Apply the call thunks if 952 + * required. 953 + */ 954 + callthunks_patch_builtin_calls(); 1397 955 1398 956 apply_ibt_endbr(__ibt_endbr_seal, __ibt_endbr_seal_end); 1399 957 ··· 1690 1236 return __text_poke(text_poke_memcpy, addr, opcode, len); 1691 1237 } 1692 1238 1239 + void *text_poke_copy_locked(void *addr, const void *opcode, size_t len, 1240 + bool core_ok) 1241 + { 1242 + unsigned long start = (unsigned long)addr; 1243 + size_t patched = 0; 1244 + 1245 + if (WARN_ON_ONCE(!core_ok && core_kernel_text(start))) 1246 + return NULL; 1247 + 1248 + while (patched < len) { 1249 + unsigned long ptr = start + patched; 1250 + size_t s; 1251 + 1252 + s = min_t(size_t, PAGE_SIZE * 2 - offset_in_page(ptr), len - patched); 1253 + 1254 + __text_poke(text_poke_memcpy, (void *)ptr, opcode + patched, s); 1255 + patched += s; 1256 + } 1257 + return addr; 1258 + } 1259 + 1693 1260 /** 1694 1261 * text_poke_copy - Copy instructions into (an unused part of) RX memory 1695 1262 * @addr: address to modify ··· 1725 1250 */ 1726 1251 void *text_poke_copy(void *addr, const void *opcode, size_t len) 1727 1252 { 1728 - unsigned long start = (unsigned long)addr; 1729 - size_t patched = 0; 1730 - 1731 - if (WARN_ON_ONCE(core_kernel_text(start))) 1732 - return NULL; 1733 - 1734 1253 mutex_lock(&text_mutex); 1735 - while (patched < len) { 1736 - unsigned long ptr = start + patched; 1737 - size_t s; 1738 - 1739 - s = min_t(size_t, PAGE_SIZE * 2 - offset_in_page(ptr), len - patched); 1740 - 1741 - __text_poke(text_poke_memcpy, (void *)ptr, opcode + patched, s); 1742 - patched += s; 1743 - } 1254 + addr = text_poke_copy_locked(addr, opcode, len, false); 1744 1255 mutex_unlock(&text_mutex); 1745 1256 return addr; 1746 1257 }
+5
arch/x86/kernel/asm-offsets.c
··· 107 107 OFFSET(TSS_sp0, tss_struct, x86_tss.sp0); 108 108 OFFSET(TSS_sp1, tss_struct, x86_tss.sp1); 109 109 OFFSET(TSS_sp2, tss_struct, x86_tss.sp2); 110 + OFFSET(X86_top_of_stack, pcpu_hot, top_of_stack); 111 + #ifdef CONFIG_CALL_DEPTH_TRACKING 112 + OFFSET(X86_call_depth, pcpu_hot, call_depth); 113 + #endif 114 + 110 115 }
+1 -1
arch/x86/kernel/asm-offsets_64.c
··· 57 57 BLANK(); 58 58 59 59 #ifdef CONFIG_STACKPROTECTOR 60 - DEFINE(stack_canary_offset, offsetof(struct fixed_percpu_data, stack_canary)); 60 + OFFSET(FIXED_stack_canary, fixed_percpu_data, stack_canary); 61 61 BLANK(); 62 62 #endif 63 63 return 0;
+388
arch/x86/kernel/callthunks.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + 3 + #define pr_fmt(fmt) "callthunks: " fmt 4 + 5 + #include <linux/debugfs.h> 6 + #include <linux/kallsyms.h> 7 + #include <linux/memory.h> 8 + #include <linux/moduleloader.h> 9 + #include <linux/static_call.h> 10 + 11 + #include <asm/alternative.h> 12 + #include <asm/asm-offsets.h> 13 + #include <asm/cpu.h> 14 + #include <asm/ftrace.h> 15 + #include <asm/insn.h> 16 + #include <asm/kexec.h> 17 + #include <asm/nospec-branch.h> 18 + #include <asm/paravirt.h> 19 + #include <asm/sections.h> 20 + #include <asm/switch_to.h> 21 + #include <asm/sync_core.h> 22 + #include <asm/text-patching.h> 23 + #include <asm/xen/hypercall.h> 24 + 25 + static int __initdata_or_module debug_callthunks; 26 + 27 + #define prdbg(fmt, args...) \ 28 + do { \ 29 + if (debug_callthunks) \ 30 + printk(KERN_DEBUG pr_fmt(fmt), ##args); \ 31 + } while(0) 32 + 33 + static int __init debug_thunks(char *str) 34 + { 35 + debug_callthunks = 1; 36 + return 1; 37 + } 38 + __setup("debug-callthunks", debug_thunks); 39 + 40 + #ifdef CONFIG_CALL_THUNKS_DEBUG 41 + DEFINE_PER_CPU(u64, __x86_call_count); 42 + DEFINE_PER_CPU(u64, __x86_ret_count); 43 + DEFINE_PER_CPU(u64, __x86_stuffs_count); 44 + DEFINE_PER_CPU(u64, __x86_ctxsw_count); 45 + EXPORT_SYMBOL_GPL(__x86_ctxsw_count); 46 + EXPORT_SYMBOL_GPL(__x86_call_count); 47 + #endif 48 + 49 + extern s32 __call_sites[], __call_sites_end[]; 50 + 51 + struct thunk_desc { 52 + void *template; 53 + unsigned int template_size; 54 + }; 55 + 56 + struct core_text { 57 + unsigned long base; 58 + unsigned long end; 59 + const char *name; 60 + }; 61 + 62 + static bool thunks_initialized __ro_after_init; 63 + 64 + static const struct core_text builtin_coretext = { 65 + .base = (unsigned long)_text, 66 + .end = (unsigned long)_etext, 67 + .name = "builtin", 68 + }; 69 + 70 + asm ( 71 + ".pushsection .rodata \n" 72 + ".global skl_call_thunk_template \n" 73 + "skl_call_thunk_template: \n" 74 + __stringify(INCREMENT_CALL_DEPTH)" \n" 75 + ".global skl_call_thunk_tail \n" 76 + "skl_call_thunk_tail: \n" 77 + ".popsection \n" 78 + ); 79 + 80 + extern u8 skl_call_thunk_template[]; 81 + extern u8 skl_call_thunk_tail[]; 82 + 83 + #define SKL_TMPL_SIZE \ 84 + ((unsigned int)(skl_call_thunk_tail - skl_call_thunk_template)) 85 + 86 + extern void error_entry(void); 87 + extern void xen_error_entry(void); 88 + extern void paranoid_entry(void); 89 + 90 + static inline bool within_coretext(const struct core_text *ct, void *addr) 91 + { 92 + unsigned long p = (unsigned long)addr; 93 + 94 + return ct->base <= p && p < ct->end; 95 + } 96 + 97 + static inline bool within_module_coretext(void *addr) 98 + { 99 + bool ret = false; 100 + 101 + #ifdef CONFIG_MODULES 102 + struct module *mod; 103 + 104 + preempt_disable(); 105 + mod = __module_address((unsigned long)addr); 106 + if (mod && within_module_core((unsigned long)addr, mod)) 107 + ret = true; 108 + preempt_enable(); 109 + #endif 110 + return ret; 111 + } 112 + 113 + static bool is_coretext(const struct core_text *ct, void *addr) 114 + { 115 + if (ct && within_coretext(ct, addr)) 116 + return true; 117 + if (within_coretext(&builtin_coretext, addr)) 118 + return true; 119 + return within_module_coretext(addr); 120 + } 121 + 122 + static __init_or_module bool skip_addr(void *dest) 123 + { 124 + if (dest == error_entry) 125 + return true; 126 + if (dest == paranoid_entry) 127 + return true; 128 + if (dest == xen_error_entry) 129 + return true; 130 + /* Does FILL_RSB... */ 131 + if (dest == __switch_to_asm) 132 + return true; 133 + /* Accounts directly */ 134 + if (dest == ret_from_fork) 135 + return true; 136 + #ifdef CONFIG_HOTPLUG_CPU 137 + if (dest == start_cpu0) 138 + return true; 139 + #endif 140 + #ifdef CONFIG_FUNCTION_TRACER 141 + if (dest == __fentry__) 142 + return true; 143 + #endif 144 + #ifdef CONFIG_KEXEC_CORE 145 + if (dest >= (void *)relocate_kernel && 146 + dest < (void*)relocate_kernel + KEXEC_CONTROL_CODE_MAX_SIZE) 147 + return true; 148 + #endif 149 + #ifdef CONFIG_XEN 150 + if (dest >= (void *)hypercall_page && 151 + dest < (void*)hypercall_page + PAGE_SIZE) 152 + return true; 153 + #endif 154 + return false; 155 + } 156 + 157 + static __init_or_module void *call_get_dest(void *addr) 158 + { 159 + struct insn insn; 160 + void *dest; 161 + int ret; 162 + 163 + ret = insn_decode_kernel(&insn, addr); 164 + if (ret) 165 + return ERR_PTR(ret); 166 + 167 + /* Patched out call? */ 168 + if (insn.opcode.bytes[0] != CALL_INSN_OPCODE) 169 + return NULL; 170 + 171 + dest = addr + insn.length + insn.immediate.value; 172 + if (skip_addr(dest)) 173 + return NULL; 174 + return dest; 175 + } 176 + 177 + static const u8 nops[] = { 178 + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 179 + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 180 + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 181 + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 182 + }; 183 + 184 + static __init_or_module void *patch_dest(void *dest, bool direct) 185 + { 186 + unsigned int tsize = SKL_TMPL_SIZE; 187 + u8 *pad = dest - tsize; 188 + 189 + /* Already patched? */ 190 + if (!bcmp(pad, skl_call_thunk_template, tsize)) 191 + return pad; 192 + 193 + /* Ensure there are nops */ 194 + if (bcmp(pad, nops, tsize)) { 195 + pr_warn_once("Invalid padding area for %pS\n", dest); 196 + return NULL; 197 + } 198 + 199 + if (direct) 200 + memcpy(pad, skl_call_thunk_template, tsize); 201 + else 202 + text_poke_copy_locked(pad, skl_call_thunk_template, tsize, true); 203 + return pad; 204 + } 205 + 206 + static __init_or_module void patch_call(void *addr, const struct core_text *ct) 207 + { 208 + void *pad, *dest; 209 + u8 bytes[8]; 210 + 211 + if (!within_coretext(ct, addr)) 212 + return; 213 + 214 + dest = call_get_dest(addr); 215 + if (!dest || WARN_ON_ONCE(IS_ERR(dest))) 216 + return; 217 + 218 + if (!is_coretext(ct, dest)) 219 + return; 220 + 221 + pad = patch_dest(dest, within_coretext(ct, dest)); 222 + if (!pad) 223 + return; 224 + 225 + prdbg("Patch call at: %pS %px to %pS %px -> %px \n", addr, addr, 226 + dest, dest, pad); 227 + __text_gen_insn(bytes, CALL_INSN_OPCODE, addr, pad, CALL_INSN_SIZE); 228 + text_poke_early(addr, bytes, CALL_INSN_SIZE); 229 + } 230 + 231 + static __init_or_module void 232 + patch_call_sites(s32 *start, s32 *end, const struct core_text *ct) 233 + { 234 + s32 *s; 235 + 236 + for (s = start; s < end; s++) 237 + patch_call((void *)s + *s, ct); 238 + } 239 + 240 + static __init_or_module void 241 + patch_paravirt_call_sites(struct paravirt_patch_site *start, 242 + struct paravirt_patch_site *end, 243 + const struct core_text *ct) 244 + { 245 + struct paravirt_patch_site *p; 246 + 247 + for (p = start; p < end; p++) 248 + patch_call(p->instr, ct); 249 + } 250 + 251 + static __init_or_module void 252 + callthunks_setup(struct callthunk_sites *cs, const struct core_text *ct) 253 + { 254 + prdbg("Patching call sites %s\n", ct->name); 255 + patch_call_sites(cs->call_start, cs->call_end, ct); 256 + patch_paravirt_call_sites(cs->pv_start, cs->pv_end, ct); 257 + prdbg("Patching call sites done%s\n", ct->name); 258 + } 259 + 260 + void __init callthunks_patch_builtin_calls(void) 261 + { 262 + struct callthunk_sites cs = { 263 + .call_start = __call_sites, 264 + .call_end = __call_sites_end, 265 + .pv_start = __parainstructions, 266 + .pv_end = __parainstructions_end 267 + }; 268 + 269 + if (!cpu_feature_enabled(X86_FEATURE_CALL_DEPTH)) 270 + return; 271 + 272 + pr_info("Setting up call depth tracking\n"); 273 + mutex_lock(&text_mutex); 274 + callthunks_setup(&cs, &builtin_coretext); 275 + static_call_force_reinit(); 276 + thunks_initialized = true; 277 + mutex_unlock(&text_mutex); 278 + } 279 + 280 + void *callthunks_translate_call_dest(void *dest) 281 + { 282 + void *target; 283 + 284 + lockdep_assert_held(&text_mutex); 285 + 286 + if (!thunks_initialized || skip_addr(dest)) 287 + return dest; 288 + 289 + if (!is_coretext(NULL, dest)) 290 + return dest; 291 + 292 + target = patch_dest(dest, false); 293 + return target ? : dest; 294 + } 295 + 296 + bool is_callthunk(void *addr) 297 + { 298 + unsigned int tmpl_size = SKL_TMPL_SIZE; 299 + void *tmpl = skl_call_thunk_template; 300 + unsigned long dest; 301 + 302 + dest = roundup((unsigned long)addr, CONFIG_FUNCTION_ALIGNMENT); 303 + if (!thunks_initialized || skip_addr((void *)dest)) 304 + return false; 305 + 306 + return !bcmp((void *)(dest - tmpl_size), tmpl, tmpl_size); 307 + } 308 + 309 + #ifdef CONFIG_BPF_JIT 310 + int x86_call_depth_emit_accounting(u8 **pprog, void *func) 311 + { 312 + unsigned int tmpl_size = SKL_TMPL_SIZE; 313 + void *tmpl = skl_call_thunk_template; 314 + 315 + if (!thunks_initialized) 316 + return 0; 317 + 318 + /* Is function call target a thunk? */ 319 + if (func && is_callthunk(func)) 320 + return 0; 321 + 322 + memcpy(*pprog, tmpl, tmpl_size); 323 + *pprog += tmpl_size; 324 + return tmpl_size; 325 + } 326 + #endif 327 + 328 + #ifdef CONFIG_MODULES 329 + void noinline callthunks_patch_module_calls(struct callthunk_sites *cs, 330 + struct module *mod) 331 + { 332 + struct core_text ct = { 333 + .base = (unsigned long)mod->core_layout.base, 334 + .end = (unsigned long)mod->core_layout.base + mod->core_layout.size, 335 + .name = mod->name, 336 + }; 337 + 338 + if (!thunks_initialized) 339 + return; 340 + 341 + mutex_lock(&text_mutex); 342 + callthunks_setup(cs, &ct); 343 + mutex_unlock(&text_mutex); 344 + } 345 + #endif /* CONFIG_MODULES */ 346 + 347 + #if defined(CONFIG_CALL_THUNKS_DEBUG) && defined(CONFIG_DEBUG_FS) 348 + static int callthunks_debug_show(struct seq_file *m, void *p) 349 + { 350 + unsigned long cpu = (unsigned long)m->private; 351 + 352 + seq_printf(m, "C: %16llu R: %16llu S: %16llu X: %16llu\n,", 353 + per_cpu(__x86_call_count, cpu), 354 + per_cpu(__x86_ret_count, cpu), 355 + per_cpu(__x86_stuffs_count, cpu), 356 + per_cpu(__x86_ctxsw_count, cpu)); 357 + return 0; 358 + } 359 + 360 + static int callthunks_debug_open(struct inode *inode, struct file *file) 361 + { 362 + return single_open(file, callthunks_debug_show, inode->i_private); 363 + } 364 + 365 + static const struct file_operations dfs_ops = { 366 + .open = callthunks_debug_open, 367 + .read = seq_read, 368 + .llseek = seq_lseek, 369 + .release = single_release, 370 + }; 371 + 372 + static int __init callthunks_debugfs_init(void) 373 + { 374 + struct dentry *dir; 375 + unsigned long cpu; 376 + 377 + dir = debugfs_create_dir("callthunks", NULL); 378 + for_each_possible_cpu(cpu) { 379 + void *arg = (void *)cpu; 380 + char name [10]; 381 + 382 + sprintf(name, "cpu%lu", cpu); 383 + debugfs_create_file(name, 0644, dir, arg, &dfs_ops); 384 + } 385 + return 0; 386 + } 387 + __initcall(callthunks_debugfs_init); 388 + #endif
-3
arch/x86/kernel/cpu/Makefile
··· 17 17 # As above, instrumenting secondary CPU boot code causes boot hangs. 18 18 KCSAN_SANITIZE_common.o := n 19 19 20 - # Make sure load_percpu_segment has no stackprotector 21 - CFLAGS_common.o := -fno-stack-protector 22 - 23 20 obj-y := cacheinfo.o scattered.o topology.o 24 21 obj-y += common.o 25 22 obj-y += rdrand.o
+32 -2
arch/x86/kernel/cpu/bugs.c
··· 787 787 RETBLEED_MITIGATION_IBPB, 788 788 RETBLEED_MITIGATION_IBRS, 789 789 RETBLEED_MITIGATION_EIBRS, 790 + RETBLEED_MITIGATION_STUFF, 790 791 }; 791 792 792 793 enum retbleed_mitigation_cmd { ··· 795 794 RETBLEED_CMD_AUTO, 796 795 RETBLEED_CMD_UNRET, 797 796 RETBLEED_CMD_IBPB, 797 + RETBLEED_CMD_STUFF, 798 798 }; 799 799 800 800 static const char * const retbleed_strings[] = { ··· 804 802 [RETBLEED_MITIGATION_IBPB] = "Mitigation: IBPB", 805 803 [RETBLEED_MITIGATION_IBRS] = "Mitigation: IBRS", 806 804 [RETBLEED_MITIGATION_EIBRS] = "Mitigation: Enhanced IBRS", 805 + [RETBLEED_MITIGATION_STUFF] = "Mitigation: Stuffing", 807 806 }; 808 807 809 808 static enum retbleed_mitigation retbleed_mitigation __ro_after_init = ··· 834 831 retbleed_cmd = RETBLEED_CMD_UNRET; 835 832 } else if (!strcmp(str, "ibpb")) { 836 833 retbleed_cmd = RETBLEED_CMD_IBPB; 834 + } else if (!strcmp(str, "stuff")) { 835 + retbleed_cmd = RETBLEED_CMD_STUFF; 837 836 } else if (!strcmp(str, "nosmt")) { 838 837 retbleed_nosmt = true; 838 + } else if (!strcmp(str, "force")) { 839 + setup_force_cpu_bug(X86_BUG_RETBLEED); 839 840 } else { 840 841 pr_err("Ignoring unknown retbleed option (%s).", str); 841 842 } ··· 886 879 } 887 880 break; 888 881 882 + case RETBLEED_CMD_STUFF: 883 + if (IS_ENABLED(CONFIG_CALL_DEPTH_TRACKING) && 884 + spectre_v2_enabled == SPECTRE_V2_RETPOLINE) { 885 + retbleed_mitigation = RETBLEED_MITIGATION_STUFF; 886 + 887 + } else { 888 + if (IS_ENABLED(CONFIG_CALL_DEPTH_TRACKING)) 889 + pr_err("WARNING: retbleed=stuff depends on spectre_v2=retpoline\n"); 890 + else 891 + pr_err("WARNING: kernel not compiled with CALL_DEPTH_TRACKING.\n"); 892 + 893 + goto do_cmd_auto; 894 + } 895 + break; 896 + 889 897 do_cmd_auto: 890 898 case RETBLEED_CMD_AUTO: 891 899 default: ··· 938 916 mitigate_smt = true; 939 917 break; 940 918 919 + case RETBLEED_MITIGATION_STUFF: 920 + setup_force_cpu_cap(X86_FEATURE_RETHUNK); 921 + setup_force_cpu_cap(X86_FEATURE_CALL_DEPTH); 922 + x86_set_skl_return_thunk(); 923 + break; 924 + 941 925 default: 942 926 break; 943 927 } ··· 954 926 955 927 /* 956 928 * Let IBRS trump all on Intel without affecting the effects of the 957 - * retbleed= cmdline option. 929 + * retbleed= cmdline option except for call depth based stuffing 958 930 */ 959 931 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) { 960 932 switch (spectre_v2_enabled) { ··· 967 939 retbleed_mitigation = RETBLEED_MITIGATION_EIBRS; 968 940 break; 969 941 default: 970 - pr_err(RETBLEED_INTEL_MSG); 942 + if (retbleed_mitigation != RETBLEED_MITIGATION_STUFF) 943 + pr_err(RETBLEED_INTEL_MSG); 971 944 } 972 945 } 973 946 ··· 1442 1413 if (IS_ENABLED(CONFIG_CPU_IBRS_ENTRY) && 1443 1414 boot_cpu_has_bug(X86_BUG_RETBLEED) && 1444 1415 retbleed_cmd != RETBLEED_CMD_OFF && 1416 + retbleed_cmd != RETBLEED_CMD_STUFF && 1445 1417 boot_cpu_has(X86_FEATURE_IBRS) && 1446 1418 boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) { 1447 1419 mode = SPECTRE_V2_IBRS;
+44 -53
arch/x86/kernel/cpu/common.c
··· 610 610 611 611 if (!ibt_selftest()) { 612 612 pr_err("IBT selftest: Failed!\n"); 613 + wrmsrl(MSR_IA32_S_CET, 0); 613 614 setup_clear_cpu_cap(X86_FEATURE_IBT); 614 615 return; 615 616 } ··· 703 702 __u32 cpu_caps_cleared[NCAPINTS + NBUGINTS] __aligned(sizeof(unsigned long)); 704 703 __u32 cpu_caps_set[NCAPINTS + NBUGINTS] __aligned(sizeof(unsigned long)); 705 704 706 - void load_percpu_segment(int cpu) 707 - { 708 - #ifdef CONFIG_X86_32 709 - loadsegment(fs, __KERNEL_PERCPU); 710 - #else 711 - __loadsegment_simple(gs, 0); 712 - wrmsrl(MSR_GS_BASE, cpu_kernelmode_gs_base(cpu)); 713 - #endif 714 - } 715 - 716 705 #ifdef CONFIG_X86_32 717 706 /* The 32-bit entry code needs to find cpu_entry_area. */ 718 707 DEFINE_PER_CPU(struct cpu_entry_area *, cpu_entry_area); ··· 730 739 } 731 740 EXPORT_SYMBOL_GPL(load_fixmap_gdt); 732 741 733 - /* 734 - * Current gdt points %fs at the "master" per-cpu area: after this, 735 - * it's on the real one. 742 + /** 743 + * switch_gdt_and_percpu_base - Switch to direct GDT and runtime per CPU base 744 + * @cpu: The CPU number for which this is invoked 745 + * 746 + * Invoked during early boot to switch from early GDT and early per CPU to 747 + * the direct GDT and the runtime per CPU area. On 32-bit the percpu base 748 + * switch is implicit by loading the direct GDT. On 64bit this requires 749 + * to update GSBASE. 736 750 */ 737 - void switch_to_new_gdt(int cpu) 751 + void __init switch_gdt_and_percpu_base(int cpu) 738 752 { 739 - /* Load the original GDT */ 740 753 load_direct_gdt(cpu); 741 - /* Reload the per-cpu base */ 742 - load_percpu_segment(cpu); 754 + 755 + #ifdef CONFIG_X86_64 756 + /* 757 + * No need to load %gs. It is already correct. 758 + * 759 + * Writing %gs on 64bit would zero GSBASE which would make any per 760 + * CPU operation up to the point of the wrmsrl() fault. 761 + * 762 + * Set GSBASE to the new offset. Until the wrmsrl() happens the 763 + * early mapping is still valid. That means the GSBASE update will 764 + * lose any prior per CPU data which was not copied over in 765 + * setup_per_cpu_areas(). 766 + * 767 + * This works even with stackprotector enabled because the 768 + * per CPU stack canary is 0 in both per CPU areas. 769 + */ 770 + wrmsrl(MSR_GS_BASE, cpu_kernelmode_gs_base(cpu)); 771 + #else 772 + /* 773 + * %fs is already set to __KERNEL_PERCPU, but after switching GDT 774 + * it is required to load FS again so that the 'hidden' part is 775 + * updated from the new GDT. Up to this point the early per CPU 776 + * translation is active. Any content of the early per CPU data 777 + * which was not copied over in setup_per_cpu_areas() is lost. 778 + */ 779 + loadsegment(fs, __KERNEL_PERCPU); 780 + #endif 743 781 } 744 782 745 783 static const struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {}; ··· 2013 1993 } 2014 1994 __setup("clearcpuid=", setup_clearcpuid); 2015 1995 1996 + DEFINE_PER_CPU_ALIGNED(struct pcpu_hot, pcpu_hot) = { 1997 + .current_task = &init_task, 1998 + .preempt_count = INIT_PREEMPT_COUNT, 1999 + .top_of_stack = TOP_OF_INIT_STACK, 2000 + }; 2001 + EXPORT_PER_CPU_SYMBOL(pcpu_hot); 2002 + 2016 2003 #ifdef CONFIG_X86_64 2017 2004 DEFINE_PER_CPU_FIRST(struct fixed_percpu_data, 2018 2005 fixed_percpu_data) __aligned(PAGE_SIZE) __visible; 2019 2006 EXPORT_PER_CPU_SYMBOL_GPL(fixed_percpu_data); 2020 - 2021 - /* 2022 - * The following percpu variables are hot. Align current_task to 2023 - * cacheline size such that they fall in the same cacheline. 2024 - */ 2025 - DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned = 2026 - &init_task; 2027 - EXPORT_PER_CPU_SYMBOL(current_task); 2028 - 2029 - DEFINE_PER_CPU(void *, hardirq_stack_ptr); 2030 - DEFINE_PER_CPU(bool, hardirq_stack_inuse); 2031 - 2032 - DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT; 2033 - EXPORT_PER_CPU_SYMBOL(__preempt_count); 2034 - 2035 - DEFINE_PER_CPU(unsigned long, cpu_current_top_of_stack) = TOP_OF_INIT_STACK; 2036 2007 2037 2008 static void wrmsrl_cstar(unsigned long val) 2038 2009 { ··· 2074 2063 } 2075 2064 2076 2065 #else /* CONFIG_X86_64 */ 2077 - 2078 - DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; 2079 - EXPORT_PER_CPU_SYMBOL(current_task); 2080 - DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT; 2081 - EXPORT_PER_CPU_SYMBOL(__preempt_count); 2082 - 2083 - /* 2084 - * On x86_32, vm86 modifies tss.sp0, so sp0 isn't a reliable way to find 2085 - * the top of the kernel stack. Use an extra percpu variable to track the 2086 - * top of the kernel stack directly. 2087 - */ 2088 - DEFINE_PER_CPU(unsigned long, cpu_current_top_of_stack) = 2089 - (unsigned long)&init_thread_union + THREAD_SIZE; 2090 - EXPORT_PER_CPU_SYMBOL(cpu_current_top_of_stack); 2091 2066 2092 2067 #ifdef CONFIG_STACKPROTECTOR 2093 2068 DEFINE_PER_CPU(unsigned long, __stack_chk_guard); ··· 2244 2247 if (IS_ENABLED(CONFIG_X86_64) || cpu_feature_enabled(X86_FEATURE_VME) || 2245 2248 boot_cpu_has(X86_FEATURE_TSC) || boot_cpu_has(X86_FEATURE_DE)) 2246 2249 cr4_clear_bits(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); 2247 - 2248 - /* 2249 - * Initialize the per-CPU GDT with the boot GDT, 2250 - * and set up the GDT descriptor: 2251 - */ 2252 - switch_to_new_gdt(cpu); 2253 2250 2254 2251 if (IS_ENABLED(CONFIG_X86_64)) { 2255 2252 loadsegment(fs, 0);
+2 -2
arch/x86/kernel/dumpstack_32.c
··· 37 37 38 38 static bool in_hardirq_stack(unsigned long *stack, struct stack_info *info) 39 39 { 40 - unsigned long *begin = (unsigned long *)this_cpu_read(hardirq_stack_ptr); 40 + unsigned long *begin = (unsigned long *)this_cpu_read(pcpu_hot.hardirq_stack_ptr); 41 41 unsigned long *end = begin + (THREAD_SIZE / sizeof(long)); 42 42 43 43 /* ··· 62 62 63 63 static bool in_softirq_stack(unsigned long *stack, struct stack_info *info) 64 64 { 65 - unsigned long *begin = (unsigned long *)this_cpu_read(softirq_stack_ptr); 65 + unsigned long *begin = (unsigned long *)this_cpu_read(pcpu_hot.softirq_stack_ptr); 66 66 unsigned long *end = begin + (THREAD_SIZE / sizeof(long)); 67 67 68 68 /*
+1 -1
arch/x86/kernel/dumpstack_64.c
··· 134 134 135 135 static __always_inline bool in_irq_stack(unsigned long *stack, struct stack_info *info) 136 136 { 137 - unsigned long *end = (unsigned long *)this_cpu_read(hardirq_stack_ptr); 137 + unsigned long *end = (unsigned long *)this_cpu_read(pcpu_hot.hardirq_stack_ptr); 138 138 unsigned long *begin; 139 139 140 140 /*
+13 -7
arch/x86/kernel/ftrace.c
··· 69 69 70 70 static const char *ftrace_call_replace(unsigned long ip, unsigned long addr) 71 71 { 72 + /* 73 + * No need to translate into a callthunk. The trampoline does 74 + * the depth accounting itself. 75 + */ 72 76 return text_gen_insn(CALL_INSN_OPCODE, (void *)ip, (void *)addr); 73 77 } 74 78 ··· 321 317 unsigned long size; 322 318 unsigned long *ptr; 323 319 void *trampoline; 324 - void *ip; 320 + void *ip, *dest; 325 321 /* 48 8b 15 <offset> is movq <offset>(%rip), %rdx */ 326 322 unsigned const char op_ref[] = { 0x48, 0x8b, 0x15 }; 327 323 unsigned const char retq[] = { RET_INSN_OPCODE, INT3_INSN_OPCODE }; ··· 363 359 364 360 ip = trampoline + size; 365 361 if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) 366 - __text_gen_insn(ip, JMP32_INSN_OPCODE, ip, &__x86_return_thunk, JMP32_INSN_SIZE); 362 + __text_gen_insn(ip, JMP32_INSN_OPCODE, ip, x86_return_thunk, JMP32_INSN_SIZE); 367 363 else 368 364 memcpy(ip, retq, sizeof(retq)); 369 365 ··· 408 404 /* put in the call to the function */ 409 405 mutex_lock(&text_mutex); 410 406 call_offset -= start_offset; 407 + /* 408 + * No need to translate into a callthunk. The trampoline does 409 + * the depth accounting before the call already. 410 + */ 411 + dest = ftrace_ops_get_func(ops); 411 412 memcpy(trampoline + call_offset, 412 - text_gen_insn(CALL_INSN_OPCODE, 413 - trampoline + call_offset, 414 - ftrace_ops_get_func(ops)), CALL_INSN_SIZE); 413 + text_gen_insn(CALL_INSN_OPCODE, trampoline + call_offset, dest), 414 + CALL_INSN_SIZE); 415 415 mutex_unlock(&text_mutex); 416 416 417 417 /* ALLOC_TRAMP flags lets us know we created it */ 418 418 ops->flags |= FTRACE_OPS_FL_ALLOC_TRAMP; 419 - 420 - set_vm_flush_reset_perms(trampoline); 421 419 422 420 if (likely(system_state != SYSTEM_BOOTING)) 423 421 set_memory_ro((unsigned long)trampoline, npages);
+34 -3
arch/x86/kernel/ftrace_64.S
··· 3 3 * Copyright (C) 2014 Steven Rostedt, Red Hat Inc 4 4 */ 5 5 6 - #include <linux/linkage.h> 7 6 #include <linux/cfi_types.h> 7 + #include <linux/linkage.h> 8 + #include <asm/asm-offsets.h> 8 9 #include <asm/ptrace.h> 9 10 #include <asm/ftrace.h> 10 11 #include <asm/export.h> ··· 132 131 .endm 133 132 134 133 SYM_TYPED_FUNC_START(ftrace_stub) 134 + CALL_DEPTH_ACCOUNT 135 135 RET 136 136 SYM_FUNC_END(ftrace_stub) 137 137 138 138 SYM_TYPED_FUNC_START(ftrace_stub_graph) 139 + CALL_DEPTH_ACCOUNT 139 140 RET 140 141 SYM_FUNC_END(ftrace_stub_graph) 141 142 142 143 #ifdef CONFIG_DYNAMIC_FTRACE 143 144 144 145 SYM_FUNC_START(__fentry__) 146 + CALL_DEPTH_ACCOUNT 145 147 RET 146 148 SYM_FUNC_END(__fentry__) 147 149 EXPORT_SYMBOL(__fentry__) ··· 152 148 SYM_FUNC_START(ftrace_caller) 153 149 /* save_mcount_regs fills in first two parameters */ 154 150 save_mcount_regs 151 + 152 + CALL_DEPTH_ACCOUNT 155 153 156 154 /* Stack - skipping return address of ftrace_caller */ 157 155 leaq MCOUNT_REG_SIZE+8(%rsp), %rcx ··· 169 163 170 164 /* Only ops with REGS flag set should have CS register set */ 171 165 movq $0, CS(%rsp) 166 + 167 + /* Account for the function call below */ 168 + CALL_DEPTH_ACCOUNT 172 169 173 170 SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL) 174 171 ANNOTATE_NOENDBR ··· 202 193 save_mcount_regs 8 203 194 /* save_mcount_regs fills in first two parameters */ 204 195 196 + CALL_DEPTH_ACCOUNT 197 + 205 198 SYM_INNER_LABEL(ftrace_regs_caller_op_ptr, SYM_L_GLOBAL) 206 199 ANNOTATE_NOENDBR 207 200 /* Load the ftrace_ops into the 3rd parameter */ ··· 233 222 234 223 /* regs go into 4th parameter */ 235 224 leaq (%rsp), %rcx 225 + 226 + /* Account for the function call below */ 227 + CALL_DEPTH_ACCOUNT 236 228 237 229 SYM_INNER_LABEL(ftrace_regs_call, SYM_L_GLOBAL) 238 230 ANNOTATE_NOENDBR ··· 289 275 /* Restore flags */ 290 276 popfq 291 277 UNWIND_HINT_FUNC 292 - RET 278 + 279 + /* 280 + * The above left an extra return value on the stack; effectively 281 + * doing a tail-call without using a register. This PUSH;RET 282 + * pattern unbalances the RSB, inject a pointless CALL to rebalance. 283 + */ 284 + ANNOTATE_INTRA_FUNCTION_CALL 285 + CALL .Ldo_rebalance 286 + int3 287 + .Ldo_rebalance: 288 + add $8, %rsp 289 + ALTERNATIVE __stringify(RET), \ 290 + __stringify(ANNOTATE_UNRET_SAFE; ret; int3), \ 291 + X86_FEATURE_CALL_DEPTH 293 292 294 293 SYM_FUNC_END(ftrace_regs_caller) 295 294 STACK_FRAME_NON_STANDARD_FP(ftrace_regs_caller) ··· 311 284 #else /* ! CONFIG_DYNAMIC_FTRACE */ 312 285 313 286 SYM_FUNC_START(__fentry__) 287 + CALL_DEPTH_ACCOUNT 288 + 314 289 cmpq $ftrace_stub, ftrace_trace_function 315 290 jnz trace 316 291 RET ··· 366 337 int3 367 338 .Ldo_rop: 368 339 mov %rdi, (%rsp) 369 - RET 340 + ALTERNATIVE __stringify(RET), \ 341 + __stringify(ANNOTATE_UNRET_SAFE; ret; int3), \ 342 + X86_FEATURE_CALL_DEPTH 370 343 SYM_CODE_END(return_to_handler) 371 344 #endif
+1
arch/x86/kernel/head_64.S
··· 370 370 * start_secondary() via .Ljump_to_C_code. 371 371 */ 372 372 SYM_CODE_START(start_cpu0) 373 + ANNOTATE_NOENDBR 373 374 UNWIND_HINT_EMPTY 374 375 movq initial_stack(%rip), %rsp 375 376 jmp .Ljump_to_C_code
+5 -8
arch/x86/kernel/irq_32.c
··· 52 52 static inline void print_stack_overflow(void) { } 53 53 #endif 54 54 55 - DEFINE_PER_CPU(struct irq_stack *, hardirq_stack_ptr); 56 - DEFINE_PER_CPU(struct irq_stack *, softirq_stack_ptr); 57 - 58 55 static void call_on_stack(void *func, void *stack) 59 56 { 60 57 asm volatile("xchgl %%ebx,%%esp \n" ··· 74 77 u32 *isp, *prev_esp, arg1; 75 78 76 79 curstk = (struct irq_stack *) current_stack(); 77 - irqstk = __this_cpu_read(hardirq_stack_ptr); 80 + irqstk = __this_cpu_read(pcpu_hot.hardirq_stack_ptr); 78 81 79 82 /* 80 83 * this is where we switch to the IRQ stack. However, if we are ··· 112 115 int node = cpu_to_node(cpu); 113 116 struct page *ph, *ps; 114 117 115 - if (per_cpu(hardirq_stack_ptr, cpu)) 118 + if (per_cpu(pcpu_hot.hardirq_stack_ptr, cpu)) 116 119 return 0; 117 120 118 121 ph = alloc_pages_node(node, THREADINFO_GFP, THREAD_SIZE_ORDER); ··· 124 127 return -ENOMEM; 125 128 } 126 129 127 - per_cpu(hardirq_stack_ptr, cpu) = page_address(ph); 128 - per_cpu(softirq_stack_ptr, cpu) = page_address(ps); 130 + per_cpu(pcpu_hot.hardirq_stack_ptr, cpu) = page_address(ph); 131 + per_cpu(pcpu_hot.softirq_stack_ptr, cpu) = page_address(ps); 129 132 return 0; 130 133 } 131 134 ··· 135 138 struct irq_stack *irqstk; 136 139 u32 *isp, *prev_esp; 137 140 138 - irqstk = __this_cpu_read(softirq_stack_ptr); 141 + irqstk = __this_cpu_read(pcpu_hot.softirq_stack_ptr); 139 142 140 143 /* build the stack frame on the softirq stack */ 141 144 isp = (u32 *) ((char *)irqstk + sizeof(*irqstk));
+3 -3
arch/x86/kernel/irq_64.c
··· 50 50 return -ENOMEM; 51 51 52 52 /* Store actual TOS to avoid adjustment in the hotpath */ 53 - per_cpu(hardirq_stack_ptr, cpu) = va + IRQ_STACK_SIZE - 8; 53 + per_cpu(pcpu_hot.hardirq_stack_ptr, cpu) = va + IRQ_STACK_SIZE - 8; 54 54 return 0; 55 55 } 56 56 #else ··· 63 63 void *va = per_cpu_ptr(&irq_stack_backing_store, cpu); 64 64 65 65 /* Store actual TOS to avoid adjustment in the hotpath */ 66 - per_cpu(hardirq_stack_ptr, cpu) = va + IRQ_STACK_SIZE - 8; 66 + per_cpu(pcpu_hot.hardirq_stack_ptr, cpu) = va + IRQ_STACK_SIZE - 8; 67 67 return 0; 68 68 } 69 69 #endif 70 70 71 71 int irq_init_percpu_irqstack(unsigned int cpu) 72 72 { 73 - if (per_cpu(hardirq_stack_ptr, cpu)) 73 + if (per_cpu(pcpu_hot.hardirq_stack_ptr, cpu)) 74 74 return 0; 75 75 return map_irq_stack(cpu); 76 76 }
-1
arch/x86/kernel/kprobes/core.c
··· 414 414 if (!page) 415 415 return NULL; 416 416 417 - set_vm_flush_reset_perms(page); 418 417 /* 419 418 * First make the page read-only, and only then make it executable to 420 419 * prevent it from being W+X in between.
+6 -12
arch/x86/kernel/kvm.c
··· 798 798 * Hand-optimize version for x86-64 to avoid 8 64-bit register saving and 799 799 * restoring to/from the stack. 800 800 */ 801 - asm( 802 - ".pushsection .text;" 803 - ".global __raw_callee_save___kvm_vcpu_is_preempted;" 804 - ".type __raw_callee_save___kvm_vcpu_is_preempted, @function;" 805 - "__raw_callee_save___kvm_vcpu_is_preempted:" 806 - ASM_ENDBR 807 - "movq __per_cpu_offset(,%rdi,8), %rax;" 808 - "cmpb $0, " __stringify(KVM_STEAL_TIME_preempted) "+steal_time(%rax);" 809 - "setne %al;" 810 - ASM_RET 811 - ".size __raw_callee_save___kvm_vcpu_is_preempted, .-__raw_callee_save___kvm_vcpu_is_preempted;" 812 - ".popsection"); 801 + #define PV_VCPU_PREEMPTED_ASM \ 802 + "movq __per_cpu_offset(,%rdi,8), %rax\n\t" \ 803 + "cmpb $0, " __stringify(KVM_STEAL_TIME_preempted) "+steal_time(%rax)\n\t" \ 804 + "setne %al\n\t" 813 805 806 + DEFINE_PARAVIRT_ASM(__raw_callee_save___kvm_vcpu_is_preempted, 807 + PV_VCPU_PREEMPTED_ASM, .text); 814 808 #endif 815 809 816 810 static void __init kvm_guest_init(void)
+42 -5
arch/x86/kernel/module.c
··· 74 74 return NULL; 75 75 76 76 p = __vmalloc_node_range(size, MODULE_ALIGN, 77 - MODULES_VADDR + get_module_load_offset(), 78 - MODULES_END, gfp_mask, 79 - PAGE_KERNEL, VM_DEFER_KMEMLEAK, NUMA_NO_NODE, 80 - __builtin_return_address(0)); 77 + MODULES_VADDR + get_module_load_offset(), 78 + MODULES_END, gfp_mask, PAGE_KERNEL, 79 + VM_FLUSH_RESET_PERMS | VM_DEFER_KMEMLEAK, 80 + NUMA_NO_NODE, __builtin_return_address(0)); 81 + 81 82 if (p && (kasan_alloc_module_shadow(p, size, gfp_mask) < 0)) { 82 83 vfree(p); 83 84 return NULL; ··· 254 253 { 255 254 const Elf_Shdr *s, *alt = NULL, *locks = NULL, 256 255 *para = NULL, *orc = NULL, *orc_ip = NULL, 257 - *retpolines = NULL, *returns = NULL, *ibt_endbr = NULL; 256 + *retpolines = NULL, *returns = NULL, *ibt_endbr = NULL, 257 + *calls = NULL, *cfi = NULL; 258 258 char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; 259 259 260 260 for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { ··· 273 271 retpolines = s; 274 272 if (!strcmp(".return_sites", secstrings + s->sh_name)) 275 273 returns = s; 274 + if (!strcmp(".call_sites", secstrings + s->sh_name)) 275 + calls = s; 276 + if (!strcmp(".cfi_sites", secstrings + s->sh_name)) 277 + cfi = s; 276 278 if (!strcmp(".ibt_endbr_seal", secstrings + s->sh_name)) 277 279 ibt_endbr = s; 278 280 } ··· 288 282 if (para) { 289 283 void *pseg = (void *)para->sh_addr; 290 284 apply_paravirt(pseg, pseg + para->sh_size); 285 + } 286 + if (retpolines || cfi) { 287 + void *rseg = NULL, *cseg = NULL; 288 + unsigned int rsize = 0, csize = 0; 289 + 290 + if (retpolines) { 291 + rseg = (void *)retpolines->sh_addr; 292 + rsize = retpolines->sh_size; 293 + } 294 + 295 + if (cfi) { 296 + cseg = (void *)cfi->sh_addr; 297 + csize = cfi->sh_size; 298 + } 299 + 300 + apply_fineibt(rseg, rseg + rsize, cseg, cseg + csize); 291 301 } 292 302 if (retpolines) { 293 303 void *rseg = (void *)retpolines->sh_addr; ··· 317 295 /* patch .altinstructions */ 318 296 void *aseg = (void *)alt->sh_addr; 319 297 apply_alternatives(aseg, aseg + alt->sh_size); 298 + } 299 + if (calls || para) { 300 + struct callthunk_sites cs = {}; 301 + 302 + if (calls) { 303 + cs.call_start = (void *)calls->sh_addr; 304 + cs.call_end = (void *)calls->sh_addr + calls->sh_size; 305 + } 306 + 307 + if (para) { 308 + cs.pv_start = (void *)para->sh_addr; 309 + cs.pv_end = (void *)para->sh_addr + para->sh_size; 310 + } 311 + 312 + callthunks_patch_module_calls(&cs, me); 320 313 } 321 314 if (ibt_endbr) { 322 315 void *iseg = (void *)ibt_endbr->sh_addr;
+2 -19
arch/x86/kernel/paravirt.c
··· 37 37 * nop stub, which must not clobber anything *including the stack* to 38 38 * avoid confusing the entry prologues. 39 39 */ 40 - extern void _paravirt_nop(void); 41 - asm (".pushsection .entry.text, \"ax\"\n" 42 - ".global _paravirt_nop\n" 43 - "_paravirt_nop:\n\t" 44 - ASM_ENDBR 45 - ASM_RET 46 - ".size _paravirt_nop, . - _paravirt_nop\n\t" 47 - ".type _paravirt_nop, @function\n\t" 48 - ".popsection"); 40 + DEFINE_PARAVIRT_ASM(_paravirt_nop, "", .entry.text); 49 41 50 42 /* stub always returning 0. */ 51 - asm (".pushsection .entry.text, \"ax\"\n" 52 - ".global paravirt_ret0\n" 53 - "paravirt_ret0:\n\t" 54 - ASM_ENDBR 55 - "xor %" _ASM_AX ", %" _ASM_AX ";\n\t" 56 - ASM_RET 57 - ".size paravirt_ret0, . - paravirt_ret0\n\t" 58 - ".type paravirt_ret0, @function\n\t" 59 - ".popsection"); 60 - 43 + DEFINE_PARAVIRT_ASM(paravirt_ret0, "xor %eax,%eax", .entry.text); 61 44 62 45 void __init default_banner(void) 63 46 {
+3 -3
arch/x86/kernel/process_32.c
··· 191 191 arch_end_context_switch(next_p); 192 192 193 193 /* 194 - * Reload esp0 and cpu_current_top_of_stack. This changes 194 + * Reload esp0 and pcpu_hot.top_of_stack. This changes 195 195 * current_thread_info(). Refresh the SYSENTER configuration in 196 196 * case prev or next is vm86. 197 197 */ 198 198 update_task_stack(next_p); 199 199 refresh_sysenter_cs(next); 200 - this_cpu_write(cpu_current_top_of_stack, 200 + this_cpu_write(pcpu_hot.top_of_stack, 201 201 (unsigned long)task_stack_page(next_p) + 202 202 THREAD_SIZE); 203 203 ··· 207 207 if (prev->gs | next->gs) 208 208 loadsegment(gs, next->gs); 209 209 210 - this_cpu_write(current_task, next_p); 210 + raw_cpu_write(pcpu_hot.current_task, next_p); 211 211 212 212 switch_fpu_finish(); 213 213
+3 -3
arch/x86/kernel/process_64.c
··· 563 563 int cpu = smp_processor_id(); 564 564 565 565 WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) && 566 - this_cpu_read(hardirq_stack_inuse)); 566 + this_cpu_read(pcpu_hot.hardirq_stack_inuse)); 567 567 568 568 if (!test_thread_flag(TIF_NEED_FPU_LOAD)) 569 569 switch_fpu_prepare(prev_fpu, cpu); ··· 617 617 /* 618 618 * Switch the PDA and FPU contexts. 619 619 */ 620 - this_cpu_write(current_task, next_p); 621 - this_cpu_write(cpu_current_top_of_stack, task_top_of_stack(next_p)); 620 + raw_cpu_write(pcpu_hot.current_task, next_p); 621 + raw_cpu_write(pcpu_hot.top_of_stack, task_top_of_stack(next_p)); 622 622 623 623 switch_fpu_finish(); 624 624
+3 -2
arch/x86/kernel/relocate_kernel_64.S
··· 41 41 .text 42 42 .align PAGE_SIZE 43 43 .code64 44 + SYM_CODE_START_NOALIGN(relocate_range) 44 45 SYM_CODE_START_NOALIGN(relocate_kernel) 45 46 UNWIND_HINT_EMPTY 46 47 ANNOTATE_NOENDBR ··· 313 312 int3 314 313 SYM_CODE_END(swap_pages) 315 314 316 - .globl kexec_control_code_size 317 - .set kexec_control_code_size, . - relocate_kernel 315 + .skip KEXEC_CONTROL_CODE_MAX_SIZE - (. - relocate_kernel), 0xcc 316 + SYM_CODE_END(relocate_range);
+2 -5
arch/x86/kernel/setup_percpu.c
··· 23 23 #include <asm/cpumask.h> 24 24 #include <asm/cpu.h> 25 25 26 - DEFINE_PER_CPU_READ_MOSTLY(int, cpu_number); 27 - EXPORT_PER_CPU_SYMBOL(cpu_number); 28 - 29 26 #ifdef CONFIG_X86_64 30 27 #define BOOT_PERCPU_OFFSET ((unsigned long)__per_cpu_load) 31 28 #else ··· 169 172 for_each_possible_cpu(cpu) { 170 173 per_cpu_offset(cpu) = delta + pcpu_unit_offsets[cpu]; 171 174 per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu); 172 - per_cpu(cpu_number, cpu) = cpu; 175 + per_cpu(pcpu_hot.cpu_number, cpu) = cpu; 173 176 setup_percpu_segment(cpu); 174 177 /* 175 178 * Copy data used in early init routines from the ··· 208 211 * area. Reload any changed state for the boot CPU. 209 212 */ 210 213 if (!cpu) 211 - switch_to_new_gdt(cpu); 214 + switch_gdt_and_percpu_base(cpu); 212 215 } 213 216 214 217 /* indicate the early static arrays will soon be gone */
+7 -3
arch/x86/kernel/smpboot.c
··· 1048 1048 /* Just in case we booted with a single CPU. */ 1049 1049 alternatives_enable_smp(); 1050 1050 1051 - per_cpu(current_task, cpu) = idle; 1051 + per_cpu(pcpu_hot.current_task, cpu) = idle; 1052 1052 cpu_init_stack_canary(cpu, idle); 1053 1053 1054 1054 /* Initialize the interrupt stack(s) */ ··· 1058 1058 1059 1059 #ifdef CONFIG_X86_32 1060 1060 /* Stack for startup_32 can be just as for start_secondary onwards */ 1061 - per_cpu(cpu_current_top_of_stack, cpu) = task_top_of_stack(idle); 1061 + per_cpu(pcpu_hot.top_of_stack, cpu) = task_top_of_stack(idle); 1062 1062 #else 1063 1063 initial_gs = per_cpu_offset(cpu); 1064 1064 #endif ··· 1453 1453 void __init native_smp_prepare_boot_cpu(void) 1454 1454 { 1455 1455 int me = smp_processor_id(); 1456 - switch_to_new_gdt(me); 1456 + 1457 + /* SMP handles this from setup_per_cpu_areas() */ 1458 + if (!IS_ENABLED(CONFIG_SMP)) 1459 + switch_gdt_and_percpu_base(me); 1460 + 1457 1461 /* already set me in cpu_online_mask in boot_cpu_init() */ 1458 1462 cpumask_set_cpu(me, cpu_callout_mask); 1459 1463 cpu_set_state_online(me);
+2 -1
arch/x86/kernel/static_call.c
··· 34 34 35 35 switch (type) { 36 36 case CALL: 37 + func = callthunks_translate_call_dest(func); 37 38 code = text_gen_insn(CALL_INSN_OPCODE, insn, func); 38 39 if (func == &__static_call_return0) { 39 40 emulate = code; ··· 53 52 54 53 case RET: 55 54 if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) 56 - code = text_gen_insn(JMP32_INSN_OPCODE, insn, &__x86_return_thunk); 55 + code = text_gen_insn(JMP32_INSN_OPCODE, insn, x86_return_thunk); 57 56 else 58 57 code = &retinsn; 59 58 break;
+2 -2
arch/x86/kernel/traps.c
··· 858 858 */ 859 859 asmlinkage __visible noinstr struct pt_regs *sync_regs(struct pt_regs *eregs) 860 860 { 861 - struct pt_regs *regs = (struct pt_regs *)this_cpu_read(cpu_current_top_of_stack) - 1; 861 + struct pt_regs *regs = (struct pt_regs *)this_cpu_read(pcpu_hot.top_of_stack) - 1; 862 862 if (regs != eregs) 863 863 *regs = *eregs; 864 864 return regs; ··· 876 876 * trust it and switch to the current kernel stack 877 877 */ 878 878 if (ip_within_syscall_gap(regs)) { 879 - sp = this_cpu_read(cpu_current_top_of_stack); 879 + sp = this_cpu_read(pcpu_hot.top_of_stack); 880 880 goto sync; 881 881 } 882 882
+20 -1
arch/x86/kernel/unwind_orc.c
··· 136 136 .type = UNWIND_HINT_TYPE_CALL 137 137 }; 138 138 139 + #ifdef CONFIG_CALL_THUNKS 140 + static struct orc_entry *orc_callthunk_find(unsigned long ip) 141 + { 142 + if (!is_callthunk((void *)ip)) 143 + return NULL; 144 + 145 + return &null_orc_entry; 146 + } 147 + #else 148 + static struct orc_entry *orc_callthunk_find(unsigned long ip) 149 + { 150 + return NULL; 151 + } 152 + #endif 153 + 139 154 /* Fake frame pointer entry -- used as a fallback for generated code */ 140 155 static struct orc_entry orc_fp_entry = { 141 156 .type = UNWIND_HINT_TYPE_CALL, ··· 204 189 if (orc) 205 190 return orc; 206 191 207 - return orc_ftrace_find(ip); 192 + orc = orc_ftrace_find(ip); 193 + if (orc) 194 + return orc; 195 + 196 + return orc_callthunk_find(ip); 208 197 } 209 198 210 199 #ifdef CONFIG_MODULES
+23 -14
arch/x86/kernel/vmlinux.lds.S
··· 132 132 CPUIDLE_TEXT 133 133 LOCK_TEXT 134 134 KPROBES_TEXT 135 - ALIGN_ENTRY_TEXT_BEGIN 136 - ENTRY_TEXT 137 - ALIGN_ENTRY_TEXT_END 138 135 SOFTIRQENTRY_TEXT 139 - STATIC_CALL_TEXT 140 - *(.gnu.warning) 141 - 142 136 #ifdef CONFIG_RETPOLINE 143 137 __indirect_thunk_start = .; 144 138 *(.text.__x86.*) 145 139 __indirect_thunk_end = .; 146 140 #endif 141 + STATIC_CALL_TEXT 142 + 143 + ALIGN_ENTRY_TEXT_BEGIN 144 + ENTRY_TEXT 145 + ALIGN_ENTRY_TEXT_END 146 + *(.gnu.warning) 147 + 147 148 } :text =0xcccc 148 149 149 150 /* End of text section, which should occupy whole number of pages */ ··· 291 290 *(.return_sites) 292 291 __return_sites_end = .; 293 292 } 293 + 294 + . = ALIGN(8); 295 + .call_sites : AT(ADDR(.call_sites) - LOAD_OFFSET) { 296 + __call_sites = .; 297 + *(.call_sites) 298 + __call_sites_end = .; 299 + } 294 300 #endif 295 301 296 302 #ifdef CONFIG_X86_KERNEL_IBT ··· 306 298 __ibt_endbr_seal = .; 307 299 *(.ibt_endbr_seal) 308 300 __ibt_endbr_seal_end = .; 301 + } 302 + #endif 303 + 304 + #ifdef CONFIG_FINEIBT 305 + . = ALIGN(8); 306 + .cfi_sites : AT(ADDR(.cfi_sites) - LOAD_OFFSET) { 307 + __cfi_sites = .; 308 + *(.cfi_sites) 309 + __cfi_sites_end = .; 309 310 } 310 311 #endif 311 312 ··· 510 493 #endif 511 494 512 495 #endif /* CONFIG_X86_64 */ 513 - 514 - #ifdef CONFIG_KEXEC_CORE 515 - #include <asm/kexec.h> 516 - 517 - . = ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE, 518 - "kexec control code size is too big"); 519 - #endif 520 -
+1
arch/x86/kvm/svm/vmenter.S
··· 1 1 /* SPDX-License-Identifier: GPL-2.0 */ 2 2 #include <linux/linkage.h> 3 3 #include <asm/asm.h> 4 + #include <asm/asm-offsets.h> 4 5 #include <asm/bitsperlong.h> 5 6 #include <asm/kvm_vcpu_regs.h> 6 7 #include <asm/nospec-branch.h>
+1
arch/x86/lib/error-inject.c
··· 11 11 ".text\n" 12 12 ".type just_return_func, @function\n" 13 13 ".globl just_return_func\n" 14 + ASM_FUNC_ALIGN 14 15 "just_return_func:\n" 15 16 ANNOTATE_NOENDBR 16 17 ASM_RET
+49 -13
arch/x86/lib/putuser.S
··· 47 47 LOAD_TASK_SIZE_MINUS_N(0) 48 48 cmp %_ASM_BX,%_ASM_CX 49 49 jae .Lbad_put_user 50 - SYM_INNER_LABEL(__put_user_nocheck_1, SYM_L_GLOBAL) 51 - ENDBR 52 50 ASM_STAC 53 51 1: movb %al,(%_ASM_CX) 54 52 xor %ecx,%ecx ··· 54 56 RET 55 57 SYM_FUNC_END(__put_user_1) 56 58 EXPORT_SYMBOL(__put_user_1) 59 + 60 + SYM_FUNC_START(__put_user_nocheck_1) 61 + ENDBR 62 + ASM_STAC 63 + 2: movb %al,(%_ASM_CX) 64 + xor %ecx,%ecx 65 + ASM_CLAC 66 + RET 67 + SYM_FUNC_END(__put_user_nocheck_1) 57 68 EXPORT_SYMBOL(__put_user_nocheck_1) 58 69 59 70 SYM_FUNC_START(__put_user_2) 60 71 LOAD_TASK_SIZE_MINUS_N(1) 61 72 cmp %_ASM_BX,%_ASM_CX 62 73 jae .Lbad_put_user 63 - SYM_INNER_LABEL(__put_user_nocheck_2, SYM_L_GLOBAL) 64 - ENDBR 65 74 ASM_STAC 66 - 2: movw %ax,(%_ASM_CX) 75 + 3: movw %ax,(%_ASM_CX) 67 76 xor %ecx,%ecx 68 77 ASM_CLAC 69 78 RET 70 79 SYM_FUNC_END(__put_user_2) 71 80 EXPORT_SYMBOL(__put_user_2) 81 + 82 + SYM_FUNC_START(__put_user_nocheck_2) 83 + ENDBR 84 + ASM_STAC 85 + 4: movw %ax,(%_ASM_CX) 86 + xor %ecx,%ecx 87 + ASM_CLAC 88 + RET 89 + SYM_FUNC_END(__put_user_nocheck_2) 72 90 EXPORT_SYMBOL(__put_user_nocheck_2) 73 91 74 92 SYM_FUNC_START(__put_user_4) 75 93 LOAD_TASK_SIZE_MINUS_N(3) 76 94 cmp %_ASM_BX,%_ASM_CX 77 95 jae .Lbad_put_user 78 - SYM_INNER_LABEL(__put_user_nocheck_4, SYM_L_GLOBAL) 79 - ENDBR 80 96 ASM_STAC 81 - 3: movl %eax,(%_ASM_CX) 97 + 5: movl %eax,(%_ASM_CX) 82 98 xor %ecx,%ecx 83 99 ASM_CLAC 84 100 RET 85 101 SYM_FUNC_END(__put_user_4) 86 102 EXPORT_SYMBOL(__put_user_4) 103 + 104 + SYM_FUNC_START(__put_user_nocheck_4) 105 + ENDBR 106 + ASM_STAC 107 + 6: movl %eax,(%_ASM_CX) 108 + xor %ecx,%ecx 109 + ASM_CLAC 110 + RET 111 + SYM_FUNC_END(__put_user_nocheck_4) 87 112 EXPORT_SYMBOL(__put_user_nocheck_4) 88 113 89 114 SYM_FUNC_START(__put_user_8) 90 115 LOAD_TASK_SIZE_MINUS_N(7) 91 116 cmp %_ASM_BX,%_ASM_CX 92 117 jae .Lbad_put_user 93 - SYM_INNER_LABEL(__put_user_nocheck_8, SYM_L_GLOBAL) 94 - ENDBR 95 118 ASM_STAC 96 - 4: mov %_ASM_AX,(%_ASM_CX) 119 + 7: mov %_ASM_AX,(%_ASM_CX) 97 120 #ifdef CONFIG_X86_32 98 - 5: movl %edx,4(%_ASM_CX) 121 + 8: movl %edx,4(%_ASM_CX) 99 122 #endif 100 123 xor %ecx,%ecx 101 124 ASM_CLAC 102 125 RET 103 126 SYM_FUNC_END(__put_user_8) 104 127 EXPORT_SYMBOL(__put_user_8) 128 + 129 + SYM_FUNC_START(__put_user_nocheck_8) 130 + ENDBR 131 + ASM_STAC 132 + 9: mov %_ASM_AX,(%_ASM_CX) 133 + #ifdef CONFIG_X86_32 134 + 10: movl %edx,4(%_ASM_CX) 135 + #endif 136 + xor %ecx,%ecx 137 + ASM_CLAC 138 + RET 139 + SYM_FUNC_END(__put_user_nocheck_8) 105 140 EXPORT_SYMBOL(__put_user_nocheck_8) 106 141 107 142 SYM_CODE_START_LOCAL(.Lbad_put_user_clac) ··· 148 117 _ASM_EXTABLE_UA(2b, .Lbad_put_user_clac) 149 118 _ASM_EXTABLE_UA(3b, .Lbad_put_user_clac) 150 119 _ASM_EXTABLE_UA(4b, .Lbad_put_user_clac) 151 - #ifdef CONFIG_X86_32 152 120 _ASM_EXTABLE_UA(5b, .Lbad_put_user_clac) 121 + _ASM_EXTABLE_UA(6b, .Lbad_put_user_clac) 122 + _ASM_EXTABLE_UA(7b, .Lbad_put_user_clac) 123 + _ASM_EXTABLE_UA(9b, .Lbad_put_user_clac) 124 + #ifdef CONFIG_X86_32 125 + _ASM_EXTABLE_UA(8b, .Lbad_put_user_clac) 126 + _ASM_EXTABLE_UA(10b, .Lbad_put_user_clac) 153 127 #endif
+99 -8
arch/x86/lib/retpoline.S
··· 5 5 #include <asm/dwarf2.h> 6 6 #include <asm/cpufeatures.h> 7 7 #include <asm/alternative.h> 8 + #include <asm/asm-offsets.h> 8 9 #include <asm/export.h> 9 10 #include <asm/nospec-branch.h> 10 11 #include <asm/unwind_hints.h> 12 + #include <asm/percpu.h> 11 13 #include <asm/frame.h> 12 14 13 15 .section .text.__x86.indirect_thunk 14 16 15 - .macro RETPOLINE reg 17 + 18 + .macro POLINE reg 16 19 ANNOTATE_INTRA_FUNCTION_CALL 17 20 call .Ldo_rop_\@ 18 - .Lspec_trap_\@: 19 - UNWIND_HINT_EMPTY 20 - pause 21 - lfence 22 - jmp .Lspec_trap_\@ 21 + int3 23 22 .Ldo_rop_\@: 24 23 mov %\reg, (%_ASM_SP) 25 24 UNWIND_HINT_FUNC 25 + .endm 26 + 27 + .macro RETPOLINE reg 28 + POLINE \reg 26 29 RET 27 30 .endm 28 31 ··· 55 52 */ 56 53 57 54 #define __EXPORT_THUNK(sym) _ASM_NOKPROBE(sym); EXPORT_SYMBOL(sym) 58 - #define EXPORT_THUNK(reg) __EXPORT_THUNK(__x86_indirect_thunk_ ## reg) 59 55 60 56 .align RETPOLINE_THUNK_SIZE 61 57 SYM_CODE_START(__x86_indirect_thunk_array) ··· 66 64 .align RETPOLINE_THUNK_SIZE 67 65 SYM_CODE_END(__x86_indirect_thunk_array) 68 66 69 - #define GEN(reg) EXPORT_THUNK(reg) 67 + #define GEN(reg) __EXPORT_THUNK(__x86_indirect_thunk_ ## reg) 70 68 #include <asm/GEN-for-each-reg.h> 71 69 #undef GEN 72 70 71 + #ifdef CONFIG_CALL_DEPTH_TRACKING 72 + .macro CALL_THUNK reg 73 + .align RETPOLINE_THUNK_SIZE 74 + 75 + SYM_INNER_LABEL(__x86_indirect_call_thunk_\reg, SYM_L_GLOBAL) 76 + UNWIND_HINT_EMPTY 77 + ANNOTATE_NOENDBR 78 + 79 + CALL_DEPTH_ACCOUNT 80 + POLINE \reg 81 + ANNOTATE_UNRET_SAFE 82 + ret 83 + int3 84 + .endm 85 + 86 + .align RETPOLINE_THUNK_SIZE 87 + SYM_CODE_START(__x86_indirect_call_thunk_array) 88 + 89 + #define GEN(reg) CALL_THUNK reg 90 + #include <asm/GEN-for-each-reg.h> 91 + #undef GEN 92 + 93 + .align RETPOLINE_THUNK_SIZE 94 + SYM_CODE_END(__x86_indirect_call_thunk_array) 95 + 96 + #define GEN(reg) __EXPORT_THUNK(__x86_indirect_call_thunk_ ## reg) 97 + #include <asm/GEN-for-each-reg.h> 98 + #undef GEN 99 + 100 + .macro JUMP_THUNK reg 101 + .align RETPOLINE_THUNK_SIZE 102 + 103 + SYM_INNER_LABEL(__x86_indirect_jump_thunk_\reg, SYM_L_GLOBAL) 104 + UNWIND_HINT_EMPTY 105 + ANNOTATE_NOENDBR 106 + POLINE \reg 107 + ANNOTATE_UNRET_SAFE 108 + ret 109 + int3 110 + .endm 111 + 112 + .align RETPOLINE_THUNK_SIZE 113 + SYM_CODE_START(__x86_indirect_jump_thunk_array) 114 + 115 + #define GEN(reg) JUMP_THUNK reg 116 + #include <asm/GEN-for-each-reg.h> 117 + #undef GEN 118 + 119 + .align RETPOLINE_THUNK_SIZE 120 + SYM_CODE_END(__x86_indirect_jump_thunk_array) 121 + 122 + #define GEN(reg) __EXPORT_THUNK(__x86_indirect_jump_thunk_ ## reg) 123 + #include <asm/GEN-for-each-reg.h> 124 + #undef GEN 125 + #endif 73 126 /* 74 127 * This function name is magical and is used by -mfunction-return=thunk-extern 75 128 * for the compiler to generate JMPs to it. ··· 197 140 EXPORT_SYMBOL(__x86_return_thunk) 198 141 199 142 #endif /* CONFIG_RETHUNK */ 143 + 144 + #ifdef CONFIG_CALL_DEPTH_TRACKING 145 + 146 + .align 64 147 + SYM_FUNC_START(__x86_return_skl) 148 + ANNOTATE_NOENDBR 149 + /* 150 + * Keep the hotpath in a 16byte I-fetch for the non-debug 151 + * case. 152 + */ 153 + CALL_THUNKS_DEBUG_INC_RETS 154 + shlq $5, PER_CPU_VAR(pcpu_hot + X86_call_depth) 155 + jz 1f 156 + ANNOTATE_UNRET_SAFE 157 + ret 158 + int3 159 + 1: 160 + CALL_THUNKS_DEBUG_INC_STUFFS 161 + .rept 16 162 + ANNOTATE_INTRA_FUNCTION_CALL 163 + call 2f 164 + int3 165 + 2: 166 + .endr 167 + add $(8*16), %rsp 168 + 169 + CREDIT_CALL_DEPTH 170 + 171 + ANNOTATE_UNRET_SAFE 172 + ret 173 + int3 174 + SYM_FUNC_END(__x86_return_skl) 175 + 176 + #endif /* CONFIG_CALL_DEPTH_TRACKING */
+34 -11
arch/x86/net/bpf_jit_comp.c
··· 12 12 #include <linux/memory.h> 13 13 #include <linux/sort.h> 14 14 #include <asm/extable.h> 15 + #include <asm/ftrace.h> 15 16 #include <asm/set_memory.h> 16 17 #include <asm/nospec-branch.h> 17 18 #include <asm/text-patching.h> ··· 341 340 return emit_patch(pprog, func, ip, 0xE8); 342 341 } 343 342 343 + static int emit_rsb_call(u8 **pprog, void *func, void *ip) 344 + { 345 + OPTIMIZER_HIDE_VAR(func); 346 + x86_call_depth_emit_accounting(pprog, func); 347 + return emit_patch(pprog, func, ip, 0xE8); 348 + } 349 + 344 350 static int emit_jump(u8 **pprog, void *func, void *ip) 345 351 { 346 352 return emit_patch(pprog, func, ip, 0xE9); ··· 425 417 EMIT2(0xFF, 0xE0 + reg); 426 418 } else if (cpu_feature_enabled(X86_FEATURE_RETPOLINE)) { 427 419 OPTIMIZER_HIDE_VAR(reg); 428 - emit_jump(&prog, &__x86_indirect_thunk_array[reg], ip); 420 + if (cpu_feature_enabled(X86_FEATURE_CALL_DEPTH)) 421 + emit_jump(&prog, &__x86_indirect_jump_thunk_array[reg], ip); 422 + else 423 + emit_jump(&prog, &__x86_indirect_thunk_array[reg], ip); 429 424 } else { 430 425 EMIT2(0xFF, 0xE0 + reg); /* jmp *%\reg */ 431 426 if (IS_ENABLED(CONFIG_RETPOLINE) || IS_ENABLED(CONFIG_SLS)) ··· 443 432 u8 *prog = *pprog; 444 433 445 434 if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) { 446 - emit_jump(&prog, &__x86_return_thunk, ip); 435 + emit_jump(&prog, x86_return_thunk, ip); 447 436 } else { 448 437 EMIT1(0xC3); /* ret */ 449 438 if (IS_ENABLED(CONFIG_SLS)) ··· 1525 1514 break; 1526 1515 1527 1516 /* call */ 1528 - case BPF_JMP | BPF_CALL: 1517 + case BPF_JMP | BPF_CALL: { 1518 + int offs; 1519 + 1529 1520 func = (u8 *) __bpf_call_base + imm32; 1530 1521 if (tail_call_reachable) { 1531 1522 /* mov rax, qword ptr [rbp - rounded_stack_depth - 8] */ 1532 1523 EMIT3_off32(0x48, 0x8B, 0x85, 1533 1524 -round_up(bpf_prog->aux->stack_depth, 8) - 8); 1534 - if (!imm32 || emit_call(&prog, func, image + addrs[i - 1] + 7)) 1525 + if (!imm32) 1535 1526 return -EINVAL; 1527 + offs = 7 + x86_call_depth_emit_accounting(&prog, func); 1536 1528 } else { 1537 - if (!imm32 || emit_call(&prog, func, image + addrs[i - 1])) 1529 + if (!imm32) 1538 1530 return -EINVAL; 1531 + offs = x86_call_depth_emit_accounting(&prog, func); 1539 1532 } 1533 + if (emit_call(&prog, func, image + addrs[i - 1] + offs)) 1534 + return -EINVAL; 1540 1535 break; 1536 + } 1541 1537 1542 1538 case BPF_JMP | BPF_TAIL_CALL: 1543 1539 if (imm32) ··· 1935 1917 /* arg2: lea rsi, [rbp - ctx_cookie_off] */ 1936 1918 EMIT4(0x48, 0x8D, 0x75, -run_ctx_off); 1937 1919 1938 - if (emit_call(&prog, bpf_trampoline_enter(p), prog)) 1920 + if (emit_rsb_call(&prog, bpf_trampoline_enter(p), prog)) 1939 1921 return -EINVAL; 1940 1922 /* remember prog start time returned by __bpf_prog_enter */ 1941 1923 emit_mov_reg(&prog, true, BPF_REG_6, BPF_REG_0); ··· 1956 1938 (long) p->insnsi >> 32, 1957 1939 (u32) (long) p->insnsi); 1958 1940 /* call JITed bpf program or interpreter */ 1959 - if (emit_call(&prog, p->bpf_func, prog)) 1941 + if (emit_rsb_call(&prog, p->bpf_func, prog)) 1960 1942 return -EINVAL; 1961 1943 1962 1944 /* ··· 1980 1962 emit_mov_reg(&prog, true, BPF_REG_2, BPF_REG_6); 1981 1963 /* arg3: lea rdx, [rbp - run_ctx_off] */ 1982 1964 EMIT4(0x48, 0x8D, 0x55, -run_ctx_off); 1983 - if (emit_call(&prog, bpf_trampoline_exit(p), prog)) 1965 + if (emit_rsb_call(&prog, bpf_trampoline_exit(p), prog)) 1984 1966 return -EINVAL; 1985 1967 1986 1968 *pprog = prog; ··· 2202 2184 prog = image; 2203 2185 2204 2186 EMIT_ENDBR(); 2187 + /* 2188 + * This is the direct-call trampoline, as such it needs accounting 2189 + * for the __fentry__ call. 2190 + */ 2191 + x86_call_depth_emit_accounting(&prog, NULL); 2205 2192 EMIT1(0x55); /* push rbp */ 2206 2193 EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */ 2207 2194 EMIT4(0x48, 0x83, 0xEC, stack_size); /* sub rsp, stack_size */ ··· 2233 2210 if (flags & BPF_TRAMP_F_CALL_ORIG) { 2234 2211 /* arg1: mov rdi, im */ 2235 2212 emit_mov_imm64(&prog, BPF_REG_1, (long) im >> 32, (u32) (long) im); 2236 - if (emit_call(&prog, __bpf_tramp_enter, prog)) { 2213 + if (emit_rsb_call(&prog, __bpf_tramp_enter, prog)) { 2237 2214 ret = -EINVAL; 2238 2215 goto cleanup; 2239 2216 } ··· 2265 2242 EMIT2(0xff, 0xd0); /* call *rax */ 2266 2243 } else { 2267 2244 /* call original function */ 2268 - if (emit_call(&prog, orig_call, prog)) { 2245 + if (emit_rsb_call(&prog, orig_call, prog)) { 2269 2246 ret = -EINVAL; 2270 2247 goto cleanup; 2271 2248 } ··· 2309 2286 im->ip_epilogue = prog; 2310 2287 /* arg1: mov rdi, im */ 2311 2288 emit_mov_imm64(&prog, BPF_REG_1, (long) im >> 32, (u32) (long) im); 2312 - if (emit_call(&prog, __bpf_tramp_exit, prog)) { 2289 + if (emit_rsb_call(&prog, __bpf_tramp_exit, prog)) { 2313 2290 ret = -EINVAL; 2314 2291 goto cleanup; 2315 2292 }
+1 -1
arch/x86/power/hibernate.c
··· 159 159 if (!relocated_restore_code) 160 160 return -ENOMEM; 161 161 162 - memcpy((void *)relocated_restore_code, core_restore_code, PAGE_SIZE); 162 + __memcpy((void *)relocated_restore_code, core_restore_code, PAGE_SIZE); 163 163 164 164 /* Make the page containing the relocated code executable */ 165 165 pgd = (pgd_t *)__va(read_cr3_pa()) +
+1 -1
arch/x86/xen/enlighten_pv.c
··· 1210 1210 pv_ops.cpu.write_gdt_entry = xen_write_gdt_entry_boot; 1211 1211 pv_ops.cpu.load_gdt = xen_load_gdt_boot; 1212 1212 1213 - switch_to_new_gdt(cpu); 1213 + switch_gdt_and_percpu_base(cpu); 1214 1214 1215 1215 pv_ops.cpu.write_gdt_entry = xen_write_gdt_entry; 1216 1216 pv_ops.cpu.load_gdt = xen_load_gdt;
+2 -2
include/asm-generic/vmlinux.lds.h
··· 81 81 #define RO_EXCEPTION_TABLE 82 82 #endif 83 83 84 - /* Align . to a 8 byte boundary equals to maximum function alignment. */ 85 - #define ALIGN_FUNCTION() . = ALIGN(8) 84 + /* Align . function alignment. */ 85 + #define ALIGN_FUNCTION() . = ALIGN(CONFIG_FUNCTION_ALIGNMENT) 86 86 87 87 /* 88 88 * LD_DEAD_CODE_DATA_ELIMINATION option enables -fdata-sections, which
+2 -2
include/linux/linkage.h
··· 69 69 #endif 70 70 71 71 #ifndef __ALIGN 72 - #define __ALIGN .align 4,0x90 73 - #define __ALIGN_STR ".align 4,0x90" 72 + #define __ALIGN .balign CONFIG_FUNCTION_ALIGNMENT 73 + #define __ALIGN_STR __stringify(__ALIGN) 74 74 #endif 75 75 76 76 #ifdef __ASSEMBLY__
+2
include/linux/static_call.h
··· 162 162 163 163 extern int __init static_call_init(void); 164 164 165 + extern void static_call_force_reinit(void); 166 + 165 167 struct static_call_mod { 166 168 struct static_call_mod *next; 167 169 struct module *mod; /* for vmlinux, mod == NULL */
+18 -5
kernel/static_call_inline.c
··· 15 15 extern struct static_call_tramp_key __start_static_call_tramp_key[], 16 16 __stop_static_call_tramp_key[]; 17 17 18 - static bool static_call_initialized; 18 + static int static_call_initialized; 19 + 20 + /* 21 + * Must be called before early_initcall() to be effective. 22 + */ 23 + void static_call_force_reinit(void) 24 + { 25 + if (WARN_ON_ONCE(!static_call_initialized)) 26 + return; 27 + 28 + static_call_initialized++; 29 + } 19 30 20 31 /* mutex to protect key modules/sites */ 21 32 static DEFINE_MUTEX(static_call_mutex); ··· 486 475 { 487 476 int ret; 488 477 489 - if (static_call_initialized) 478 + /* See static_call_force_reinit(). */ 479 + if (static_call_initialized == 1) 490 480 return 0; 491 481 492 482 cpus_read_lock(); ··· 502 490 BUG(); 503 491 } 504 492 505 - static_call_initialized = true; 506 - 507 493 #ifdef CONFIG_MODULES 508 - register_module_notifier(&static_call_module_nb); 494 + if (!static_call_initialized) 495 + register_module_notifier(&static_call_module_nb); 509 496 #endif 497 + 498 + static_call_initialized = 1; 510 499 return 0; 511 500 } 512 501 early_initcall(static_call_init);
+8 -1
kernel/trace/trace_selftest.c
··· 785 785 }; 786 786 787 787 #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS 788 - noinline __noclone static void trace_direct_tramp(void) { } 788 + #ifndef CALL_DEPTH_ACCOUNT 789 + #define CALL_DEPTH_ACCOUNT "" 790 + #endif 791 + 792 + noinline __noclone static void trace_direct_tramp(void) 793 + { 794 + asm(CALL_DEPTH_ACCOUNT); 795 + } 789 796 #endif 790 797 791 798 /*
+1
lib/Kconfig.debug
··· 469 469 config DEBUG_FORCE_FUNCTION_ALIGN_64B 470 470 bool "Force all function address 64B aligned" 471 471 depends on EXPERT && (X86_64 || ARM64 || PPC32 || PPC64 || ARC) 472 + select FUNCTION_ALIGNMENT_64B 472 473 help 473 474 There are cases that a commit from one domain changes the function 474 475 address alignment of other domains, and cause magic performance
+3
samples/ftrace/ftrace-direct-modify.c
··· 3 3 #include <linux/kthread.h> 4 4 #include <linux/ftrace.h> 5 5 #include <asm/asm-offsets.h> 6 + #include <asm/nospec-branch.h> 6 7 7 8 extern void my_direct_func1(void); 8 9 extern void my_direct_func2(void); ··· 35 34 ASM_ENDBR 36 35 " pushq %rbp\n" 37 36 " movq %rsp, %rbp\n" 37 + CALL_DEPTH_ACCOUNT 38 38 " call my_direct_func1\n" 39 39 " leave\n" 40 40 " .size my_tramp1, .-my_tramp1\n" ··· 47 45 ASM_ENDBR 48 46 " pushq %rbp\n" 49 47 " movq %rsp, %rbp\n" 48 + CALL_DEPTH_ACCOUNT 50 49 " call my_direct_func2\n" 51 50 " leave\n" 52 51 ASM_RET
+3
samples/ftrace/ftrace-direct-multi-modify.c
··· 3 3 #include <linux/kthread.h> 4 4 #include <linux/ftrace.h> 5 5 #include <asm/asm-offsets.h> 6 + #include <asm/nospec-branch.h> 6 7 7 8 extern void my_direct_func1(unsigned long ip); 8 9 extern void my_direct_func2(unsigned long ip); ··· 33 32 ASM_ENDBR 34 33 " pushq %rbp\n" 35 34 " movq %rsp, %rbp\n" 35 + CALL_DEPTH_ACCOUNT 36 36 " pushq %rdi\n" 37 37 " movq 8(%rbp), %rdi\n" 38 38 " call my_direct_func1\n" ··· 48 46 ASM_ENDBR 49 47 " pushq %rbp\n" 50 48 " movq %rsp, %rbp\n" 49 + CALL_DEPTH_ACCOUNT 51 50 " pushq %rdi\n" 52 51 " movq 8(%rbp), %rdi\n" 53 52 " call my_direct_func2\n"
+2
samples/ftrace/ftrace-direct-multi.c
··· 5 5 #include <linux/ftrace.h> 6 6 #include <linux/sched/stat.h> 7 7 #include <asm/asm-offsets.h> 8 + #include <asm/nospec-branch.h> 8 9 9 10 extern void my_direct_func(unsigned long ip); 10 11 ··· 28 27 ASM_ENDBR 29 28 " pushq %rbp\n" 30 29 " movq %rsp, %rbp\n" 30 + CALL_DEPTH_ACCOUNT 31 31 " pushq %rdi\n" 32 32 " movq 8(%rbp), %rdi\n" 33 33 " call my_direct_func\n"
+2
samples/ftrace/ftrace-direct-too.c
··· 4 4 #include <linux/mm.h> /* for handle_mm_fault() */ 5 5 #include <linux/ftrace.h> 6 6 #include <asm/asm-offsets.h> 7 + #include <asm/nospec-branch.h> 7 8 8 9 extern void my_direct_func(struct vm_area_struct *vma, 9 10 unsigned long address, unsigned int flags); ··· 30 29 ASM_ENDBR 31 30 " pushq %rbp\n" 32 31 " movq %rsp, %rbp\n" 32 + CALL_DEPTH_ACCOUNT 33 33 " pushq %rdi\n" 34 34 " pushq %rsi\n" 35 35 " pushq %rdx\n"
+2
samples/ftrace/ftrace-direct.c
··· 4 4 #include <linux/sched.h> /* for wake_up_process() */ 5 5 #include <linux/ftrace.h> 6 6 #include <asm/asm-offsets.h> 7 + #include <asm/nospec-branch.h> 7 8 8 9 extern void my_direct_func(struct task_struct *p); 9 10 ··· 27 26 ASM_ENDBR 28 27 " pushq %rbp\n" 29 28 " movq %rsp, %rbp\n" 29 + CALL_DEPTH_ACCOUNT 30 30 " pushq %rdi\n" 31 31 " call my_direct_func\n" 32 32 " popq %rdi\n"
+3
scripts/Makefile.lib
··· 254 254 255 255 objtool-args-$(CONFIG_HAVE_JUMP_LABEL_HACK) += --hacks=jump_label 256 256 objtool-args-$(CONFIG_HAVE_NOINSTR_HACK) += --hacks=noinstr 257 + objtool-args-$(CONFIG_CALL_DEPTH_TRACKING) += --hacks=skylake 257 258 objtool-args-$(CONFIG_X86_KERNEL_IBT) += --ibt 259 + objtool-args-$(CONFIG_FINEIBT) += --cfi 258 260 objtool-args-$(CONFIG_FTRACE_MCOUNT_USE_OBJTOOL) += --mcount 259 261 objtool-args-$(CONFIG_UNWINDER_ORC) += --orc 260 262 objtool-args-$(CONFIG_RETPOLINE) += --retpoline ··· 266 264 objtool-args-$(CONFIG_HAVE_STATIC_CALL_INLINE) += --static-call 267 265 objtool-args-$(CONFIG_HAVE_UACCESS_VALIDATION) += --uaccess 268 266 objtool-args-$(CONFIG_GCOV_KERNEL) += --no-unreachable 267 + objtool-args-$(CONFIG_PREFIX_SYMBOLS) += --prefix=$(CONFIG_FUNCTION_PADDING_BYTES) 269 268 270 269 objtool-args = $(objtool-args-y) \ 271 270 $(if $(delay-objtool), --link) \
+187
tools/include/linux/interval_tree_generic.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0-or-later */ 2 + /* 3 + Interval Trees 4 + (C) 2012 Michel Lespinasse <walken@google.com> 5 + 6 + 7 + include/linux/interval_tree_generic.h 8 + */ 9 + 10 + #include <linux/rbtree_augmented.h> 11 + 12 + /* 13 + * Template for implementing interval trees 14 + * 15 + * ITSTRUCT: struct type of the interval tree nodes 16 + * ITRB: name of struct rb_node field within ITSTRUCT 17 + * ITTYPE: type of the interval endpoints 18 + * ITSUBTREE: name of ITTYPE field within ITSTRUCT holding last-in-subtree 19 + * ITSTART(n): start endpoint of ITSTRUCT node n 20 + * ITLAST(n): last endpoint of ITSTRUCT node n 21 + * ITSTATIC: 'static' or empty 22 + * ITPREFIX: prefix to use for the inline tree definitions 23 + * 24 + * Note - before using this, please consider if generic version 25 + * (interval_tree.h) would work for you... 26 + */ 27 + 28 + #define INTERVAL_TREE_DEFINE(ITSTRUCT, ITRB, ITTYPE, ITSUBTREE, \ 29 + ITSTART, ITLAST, ITSTATIC, ITPREFIX) \ 30 + \ 31 + /* Callbacks for augmented rbtree insert and remove */ \ 32 + \ 33 + RB_DECLARE_CALLBACKS_MAX(static, ITPREFIX ## _augment, \ 34 + ITSTRUCT, ITRB, ITTYPE, ITSUBTREE, ITLAST) \ 35 + \ 36 + /* Insert / remove interval nodes from the tree */ \ 37 + \ 38 + ITSTATIC void ITPREFIX ## _insert(ITSTRUCT *node, \ 39 + struct rb_root_cached *root) \ 40 + { \ 41 + struct rb_node **link = &root->rb_root.rb_node, *rb_parent = NULL; \ 42 + ITTYPE start = ITSTART(node), last = ITLAST(node); \ 43 + ITSTRUCT *parent; \ 44 + bool leftmost = true; \ 45 + \ 46 + while (*link) { \ 47 + rb_parent = *link; \ 48 + parent = rb_entry(rb_parent, ITSTRUCT, ITRB); \ 49 + if (parent->ITSUBTREE < last) \ 50 + parent->ITSUBTREE = last; \ 51 + if (start < ITSTART(parent)) \ 52 + link = &parent->ITRB.rb_left; \ 53 + else { \ 54 + link = &parent->ITRB.rb_right; \ 55 + leftmost = false; \ 56 + } \ 57 + } \ 58 + \ 59 + node->ITSUBTREE = last; \ 60 + rb_link_node(&node->ITRB, rb_parent, link); \ 61 + rb_insert_augmented_cached(&node->ITRB, root, \ 62 + leftmost, &ITPREFIX ## _augment); \ 63 + } \ 64 + \ 65 + ITSTATIC void ITPREFIX ## _remove(ITSTRUCT *node, \ 66 + struct rb_root_cached *root) \ 67 + { \ 68 + rb_erase_augmented_cached(&node->ITRB, root, &ITPREFIX ## _augment); \ 69 + } \ 70 + \ 71 + /* \ 72 + * Iterate over intervals intersecting [start;last] \ 73 + * \ 74 + * Note that a node's interval intersects [start;last] iff: \ 75 + * Cond1: ITSTART(node) <= last \ 76 + * and \ 77 + * Cond2: start <= ITLAST(node) \ 78 + */ \ 79 + \ 80 + static ITSTRUCT * \ 81 + ITPREFIX ## _subtree_search(ITSTRUCT *node, ITTYPE start, ITTYPE last) \ 82 + { \ 83 + while (true) { \ 84 + /* \ 85 + * Loop invariant: start <= node->ITSUBTREE \ 86 + * (Cond2 is satisfied by one of the subtree nodes) \ 87 + */ \ 88 + if (node->ITRB.rb_left) { \ 89 + ITSTRUCT *left = rb_entry(node->ITRB.rb_left, \ 90 + ITSTRUCT, ITRB); \ 91 + if (start <= left->ITSUBTREE) { \ 92 + /* \ 93 + * Some nodes in left subtree satisfy Cond2. \ 94 + * Iterate to find the leftmost such node N. \ 95 + * If it also satisfies Cond1, that's the \ 96 + * match we are looking for. Otherwise, there \ 97 + * is no matching interval as nodes to the \ 98 + * right of N can't satisfy Cond1 either. \ 99 + */ \ 100 + node = left; \ 101 + continue; \ 102 + } \ 103 + } \ 104 + if (ITSTART(node) <= last) { /* Cond1 */ \ 105 + if (start <= ITLAST(node)) /* Cond2 */ \ 106 + return node; /* node is leftmost match */ \ 107 + if (node->ITRB.rb_right) { \ 108 + node = rb_entry(node->ITRB.rb_right, \ 109 + ITSTRUCT, ITRB); \ 110 + if (start <= node->ITSUBTREE) \ 111 + continue; \ 112 + } \ 113 + } \ 114 + return NULL; /* No match */ \ 115 + } \ 116 + } \ 117 + \ 118 + ITSTATIC ITSTRUCT * \ 119 + ITPREFIX ## _iter_first(struct rb_root_cached *root, \ 120 + ITTYPE start, ITTYPE last) \ 121 + { \ 122 + ITSTRUCT *node, *leftmost; \ 123 + \ 124 + if (!root->rb_root.rb_node) \ 125 + return NULL; \ 126 + \ 127 + /* \ 128 + * Fastpath range intersection/overlap between A: [a0, a1] and \ 129 + * B: [b0, b1] is given by: \ 130 + * \ 131 + * a0 <= b1 && b0 <= a1 \ 132 + * \ 133 + * ... where A holds the lock range and B holds the smallest \ 134 + * 'start' and largest 'last' in the tree. For the later, we \ 135 + * rely on the root node, which by augmented interval tree \ 136 + * property, holds the largest value in its last-in-subtree. \ 137 + * This allows mitigating some of the tree walk overhead for \ 138 + * for non-intersecting ranges, maintained and consulted in O(1). \ 139 + */ \ 140 + node = rb_entry(root->rb_root.rb_node, ITSTRUCT, ITRB); \ 141 + if (node->ITSUBTREE < start) \ 142 + return NULL; \ 143 + \ 144 + leftmost = rb_entry(root->rb_leftmost, ITSTRUCT, ITRB); \ 145 + if (ITSTART(leftmost) > last) \ 146 + return NULL; \ 147 + \ 148 + return ITPREFIX ## _subtree_search(node, start, last); \ 149 + } \ 150 + \ 151 + ITSTATIC ITSTRUCT * \ 152 + ITPREFIX ## _iter_next(ITSTRUCT *node, ITTYPE start, ITTYPE last) \ 153 + { \ 154 + struct rb_node *rb = node->ITRB.rb_right, *prev; \ 155 + \ 156 + while (true) { \ 157 + /* \ 158 + * Loop invariants: \ 159 + * Cond1: ITSTART(node) <= last \ 160 + * rb == node->ITRB.rb_right \ 161 + * \ 162 + * First, search right subtree if suitable \ 163 + */ \ 164 + if (rb) { \ 165 + ITSTRUCT *right = rb_entry(rb, ITSTRUCT, ITRB); \ 166 + if (start <= right->ITSUBTREE) \ 167 + return ITPREFIX ## _subtree_search(right, \ 168 + start, last); \ 169 + } \ 170 + \ 171 + /* Move up the tree until we come from a node's left child */ \ 172 + do { \ 173 + rb = rb_parent(&node->ITRB); \ 174 + if (!rb) \ 175 + return NULL; \ 176 + prev = &node->ITRB; \ 177 + node = rb_entry(rb, ITSTRUCT, ITRB); \ 178 + rb = node->ITRB.rb_right; \ 179 + } while (prev == rb); \ 180 + \ 181 + /* Check if the node intersects [start;last] */ \ 182 + if (last < ITSTART(node)) /* !Cond1 */ \ 183 + return NULL; \ 184 + else if (start <= ITLAST(node)) /* Cond2 */ \ 185 + return node; \ 186 + } \ 187 + }
+24
tools/objtool/arch/x86/decode.c
··· 73 73 return insn->offset + insn->len + insn->immediate; 74 74 } 75 75 76 + bool arch_pc_relative_reloc(struct reloc *reloc) 77 + { 78 + /* 79 + * All relocation types where P (the address of the target) 80 + * is included in the computation. 81 + */ 82 + switch (reloc->type) { 83 + case R_X86_64_PC8: 84 + case R_X86_64_PC16: 85 + case R_X86_64_PC32: 86 + case R_X86_64_PC64: 87 + 88 + case R_X86_64_PLT32: 89 + case R_X86_64_GOTPC32: 90 + case R_X86_64_GOTPCREL: 91 + return true; 92 + 93 + default: 94 + break; 95 + } 96 + 97 + return false; 98 + } 99 + 76 100 #define ADD_OP(op) \ 77 101 if (!(op = calloc(1, sizeof(*op)))) \ 78 102 return -1; \
+8 -1
tools/objtool/builtin-check.c
··· 57 57 found = true; 58 58 } 59 59 60 + if (!str || strstr(str, "skylake")) { 61 + opts.hack_skylake = true; 62 + found = true; 63 + } 64 + 60 65 return found ? 0 : -1; 61 66 } 62 67 63 68 const struct option check_options[] = { 64 69 OPT_GROUP("Actions:"), 65 - OPT_CALLBACK_OPTARG('h', "hacks", NULL, NULL, "jump_label,noinstr", "patch toolchain bugs/limitations", parse_hacks), 70 + OPT_CALLBACK_OPTARG('h', "hacks", NULL, NULL, "jump_label,noinstr,skylake", "patch toolchain bugs/limitations", parse_hacks), 66 71 OPT_BOOLEAN('i', "ibt", &opts.ibt, "validate and annotate IBT"), 67 72 OPT_BOOLEAN('m', "mcount", &opts.mcount, "annotate mcount/fentry calls for ftrace"), 68 73 OPT_BOOLEAN('n', "noinstr", &opts.noinstr, "validate noinstr rules"), ··· 75 70 OPT_BOOLEAN('r', "retpoline", &opts.retpoline, "validate and annotate retpoline usage"), 76 71 OPT_BOOLEAN(0, "rethunk", &opts.rethunk, "validate and annotate rethunk usage"), 77 72 OPT_BOOLEAN(0, "unret", &opts.unret, "validate entry unret placement"), 73 + OPT_INTEGER(0, "prefix", &opts.prefix, "generate prefix symbols"), 78 74 OPT_BOOLEAN('l', "sls", &opts.sls, "validate straight-line-speculation mitigations"), 79 75 OPT_BOOLEAN('s', "stackval", &opts.stackval, "validate frame pointer rules"), 80 76 OPT_BOOLEAN('t', "static-call", &opts.static_call, "annotate static calls"), 81 77 OPT_BOOLEAN('u', "uaccess", &opts.uaccess, "validate uaccess rules for SMAP"), 78 + OPT_BOOLEAN(0 , "cfi", &opts.cfi, "annotate kernel control flow integrity (kCFI) function preambles"), 82 79 OPT_CALLBACK_OPTARG(0, "dump", NULL, NULL, "orc", "dump metadata", parse_dump), 83 80 84 81 OPT_GROUP("Options:"),
+310 -81
tools/objtool/check.c
··· 62 62 struct instruction *insn) 63 63 { 64 64 struct instruction *next = list_next_entry(insn, list); 65 - struct symbol *func = insn->func; 65 + struct symbol *func = insn_func(insn); 66 66 67 67 if (!func) 68 68 return NULL; 69 69 70 - if (&next->list != &file->insn_list && next->func == func) 70 + if (&next->list != &file->insn_list && insn_func(next) == func) 71 71 return next; 72 72 73 73 /* Check if we're already in the subfunction: */ ··· 83 83 { 84 84 struct instruction *prev = list_prev_entry(insn, list); 85 85 86 - if (&prev->list != &file->insn_list && prev->func == insn->func) 86 + if (&prev->list != &file->insn_list && insn_func(prev) == insn_func(insn)) 87 87 return prev; 88 88 89 89 return NULL; ··· 129 129 static bool is_sibling_call(struct instruction *insn) 130 130 { 131 131 /* 132 - * Assume only ELF functions can make sibling calls. This ensures 133 - * sibling call detection consistency between vmlinux.o and individual 134 - * objects. 132 + * Assume only STT_FUNC calls have jump-tables. 135 133 */ 136 - if (!insn->func) 137 - return false; 138 - 139 - /* An indirect jump is either a sibling call or a jump to a table. */ 140 - if (insn->type == INSN_JUMP_DYNAMIC) 141 - return !is_jump_table_jump(insn); 134 + if (insn_func(insn)) { 135 + /* An indirect jump is either a sibling call or a jump to a table. */ 136 + if (insn->type == INSN_JUMP_DYNAMIC) 137 + return !is_jump_table_jump(insn); 138 + } 142 139 143 140 /* add_jump_destinations() sets insn->call_dest for sibling calls. */ 144 141 return (is_static_jump(insn) && insn->call_dest); ··· 204 207 return false; 205 208 206 209 insn = find_insn(file, func->sec, func->offset); 207 - if (!insn->func) 210 + if (!insn_func(insn)) 208 211 return false; 209 212 210 213 func_for_each_insn(file, func, insn) { ··· 240 243 return false; 241 244 } 242 245 243 - return __dead_end_function(file, dest->func, recursion+1); 246 + return __dead_end_function(file, insn_func(dest), recursion+1); 244 247 } 245 248 } 246 249 ··· 379 382 !strncmp(sec->name, ".text.__x86.", 12)) 380 383 sec->noinstr = true; 381 384 385 + /* 386 + * .init.text code is ran before userspace and thus doesn't 387 + * strictly need retpolines, except for modules which are 388 + * loaded late, they very much do need retpoline in their 389 + * .init.text 390 + */ 391 + if (!strcmp(sec->name, ".init.text") && !opts.module) 392 + sec->init = true; 393 + 382 394 for (offset = 0; offset < sec->sh.sh_size; offset += insn->len) { 383 395 insn = malloc(sizeof(*insn)); 384 396 if (!insn) { ··· 424 418 } 425 419 426 420 list_for_each_entry(func, &sec->symbol_list, list) { 427 - if (func->type != STT_FUNC || func->alias != func) 421 + if (func->type != STT_NOTYPE && func->type != STT_FUNC) 422 + continue; 423 + 424 + if (func->return_thunk || func->alias != func) 428 425 continue; 429 426 430 427 if (!find_insn(file, sec, func->offset)) { ··· 437 428 } 438 429 439 430 sym_for_each_insn(file, func, insn) { 440 - insn->func = func; 441 - if (insn->type == INSN_ENDBR && list_empty(&insn->call_node)) { 442 - if (insn->offset == insn->func->offset) { 431 + insn->sym = func; 432 + if (func->type == STT_FUNC && 433 + insn->type == INSN_ENDBR && 434 + list_empty(&insn->call_node)) { 435 + if (insn->offset == func->offset) { 443 436 list_add_tail(&insn->call_node, &file->endbr_list); 444 437 file->nr_endbr++; 445 438 } else { ··· 861 850 return 0; 862 851 } 863 852 853 + static int create_cfi_sections(struct objtool_file *file) 854 + { 855 + struct section *sec, *s; 856 + struct symbol *sym; 857 + unsigned int *loc; 858 + int idx; 859 + 860 + sec = find_section_by_name(file->elf, ".cfi_sites"); 861 + if (sec) { 862 + INIT_LIST_HEAD(&file->call_list); 863 + WARN("file already has .cfi_sites section, skipping"); 864 + return 0; 865 + } 866 + 867 + idx = 0; 868 + for_each_sec(file, s) { 869 + if (!s->text) 870 + continue; 871 + 872 + list_for_each_entry(sym, &s->symbol_list, list) { 873 + if (sym->type != STT_FUNC) 874 + continue; 875 + 876 + if (strncmp(sym->name, "__cfi_", 6)) 877 + continue; 878 + 879 + idx++; 880 + } 881 + } 882 + 883 + sec = elf_create_section(file->elf, ".cfi_sites", 0, sizeof(unsigned int), idx); 884 + if (!sec) 885 + return -1; 886 + 887 + idx = 0; 888 + for_each_sec(file, s) { 889 + if (!s->text) 890 + continue; 891 + 892 + list_for_each_entry(sym, &s->symbol_list, list) { 893 + if (sym->type != STT_FUNC) 894 + continue; 895 + 896 + if (strncmp(sym->name, "__cfi_", 6)) 897 + continue; 898 + 899 + loc = (unsigned int *)sec->data->d_buf + idx; 900 + memset(loc, 0, sizeof(unsigned int)); 901 + 902 + if (elf_add_reloc_to_insn(file->elf, sec, 903 + idx * sizeof(unsigned int), 904 + R_X86_64_PC32, 905 + s, sym->offset)) 906 + return -1; 907 + 908 + idx++; 909 + } 910 + } 911 + 912 + return 0; 913 + } 914 + 864 915 static int create_mcount_loc_sections(struct objtool_file *file) 865 916 { 866 917 struct section *sec; ··· 957 884 if (elf_add_reloc_to_insn(file->elf, sec, 958 885 idx * sizeof(unsigned long), 959 886 R_X86_64_64, 887 + insn->sec, insn->offset)) 888 + return -1; 889 + 890 + idx++; 891 + } 892 + 893 + return 0; 894 + } 895 + 896 + static int create_direct_call_sections(struct objtool_file *file) 897 + { 898 + struct instruction *insn; 899 + struct section *sec; 900 + unsigned int *loc; 901 + int idx; 902 + 903 + sec = find_section_by_name(file->elf, ".call_sites"); 904 + if (sec) { 905 + INIT_LIST_HEAD(&file->call_list); 906 + WARN("file already has .call_sites section, skipping"); 907 + return 0; 908 + } 909 + 910 + if (list_empty(&file->call_list)) 911 + return 0; 912 + 913 + idx = 0; 914 + list_for_each_entry(insn, &file->call_list, call_node) 915 + idx++; 916 + 917 + sec = elf_create_section(file->elf, ".call_sites", 0, sizeof(unsigned int), idx); 918 + if (!sec) 919 + return -1; 920 + 921 + idx = 0; 922 + list_for_each_entry(insn, &file->call_list, call_node) { 923 + 924 + loc = (unsigned int *)sec->data->d_buf + idx; 925 + memset(loc, 0, sizeof(unsigned int)); 926 + 927 + if (elf_add_reloc_to_insn(file->elf, sec, 928 + idx * sizeof(unsigned int), 929 + R_X86_64_PC32, 960 930 insn->sec, insn->offset)) 961 931 return -1; 962 932 ··· 1396 1280 return; 1397 1281 } 1398 1282 1283 + if (insn->type == INSN_CALL && !insn->sec->init) 1284 + list_add_tail(&insn->call_node, &file->call_list); 1285 + 1399 1286 if (!sibling && dead_end_function(file, sym)) 1400 1287 insn->dead_end = true; 1401 1288 } ··· 1469 1350 list_add_tail(&insn->call_node, &file->return_thunk_list); 1470 1351 } 1471 1352 1472 - static bool same_function(struct instruction *insn1, struct instruction *insn2) 1353 + static bool is_first_func_insn(struct objtool_file *file, 1354 + struct instruction *insn, struct symbol *sym) 1473 1355 { 1474 - return insn1->func->pfunc == insn2->func->pfunc; 1475 - } 1476 - 1477 - static bool is_first_func_insn(struct objtool_file *file, struct instruction *insn) 1478 - { 1479 - if (insn->offset == insn->func->offset) 1356 + if (insn->offset == sym->offset) 1480 1357 return true; 1481 1358 1359 + /* Allow direct CALL/JMP past ENDBR */ 1482 1360 if (opts.ibt) { 1483 1361 struct instruction *prev = prev_insn_same_sym(file, insn); 1484 1362 1485 1363 if (prev && prev->type == INSN_ENDBR && 1486 - insn->offset == insn->func->offset + prev->len) 1364 + insn->offset == sym->offset + prev->len) 1487 1365 return true; 1488 1366 } 1489 1367 1490 1368 return false; 1369 + } 1370 + 1371 + /* 1372 + * A sibling call is a tail-call to another symbol -- to differentiate from a 1373 + * recursive tail-call which is to the same symbol. 1374 + */ 1375 + static bool jump_is_sibling_call(struct objtool_file *file, 1376 + struct instruction *from, struct instruction *to) 1377 + { 1378 + struct symbol *fs = from->sym; 1379 + struct symbol *ts = to->sym; 1380 + 1381 + /* Not a sibling call if from/to a symbol hole */ 1382 + if (!fs || !ts) 1383 + return false; 1384 + 1385 + /* Not a sibling call if not targeting the start of a symbol. */ 1386 + if (!is_first_func_insn(file, to, ts)) 1387 + return false; 1388 + 1389 + /* Disallow sibling calls into STT_NOTYPE */ 1390 + if (ts->type == STT_NOTYPE) 1391 + return false; 1392 + 1393 + /* Must not be self to be a sibling */ 1394 + return fs->pfunc != ts->pfunc; 1491 1395 } 1492 1396 1493 1397 /* ··· 1547 1405 } else if (reloc->sym->return_thunk) { 1548 1406 add_return_call(file, insn, true); 1549 1407 continue; 1550 - } else if (insn->func) { 1408 + } else if (insn_func(insn)) { 1551 1409 /* 1552 1410 * External sibling call or internal sibling call with 1553 1411 * STT_FUNC reloc. ··· 1589 1447 /* 1590 1448 * Cross-function jump. 1591 1449 */ 1592 - if (insn->func && jump_dest->func && 1593 - insn->func != jump_dest->func) { 1450 + if (insn_func(insn) && insn_func(jump_dest) && 1451 + insn_func(insn) != insn_func(jump_dest)) { 1594 1452 1595 1453 /* 1596 1454 * For GCC 8+, create parent/child links for any cold ··· 1607 1465 * case where the parent function's only reference to a 1608 1466 * subfunction is through a jump table. 1609 1467 */ 1610 - if (!strstr(insn->func->name, ".cold") && 1611 - strstr(jump_dest->func->name, ".cold")) { 1612 - insn->func->cfunc = jump_dest->func; 1613 - jump_dest->func->pfunc = insn->func; 1614 - 1615 - } else if (!same_function(insn, jump_dest) && 1616 - is_first_func_insn(file, jump_dest)) { 1617 - /* 1618 - * Internal sibling call without reloc or with 1619 - * STT_SECTION reloc. 1620 - */ 1621 - add_call_dest(file, insn, jump_dest->func, true); 1622 - continue; 1468 + if (!strstr(insn_func(insn)->name, ".cold") && 1469 + strstr(insn_func(jump_dest)->name, ".cold")) { 1470 + insn_func(insn)->cfunc = insn_func(jump_dest); 1471 + insn_func(jump_dest)->pfunc = insn_func(insn); 1623 1472 } 1473 + } 1474 + 1475 + if (jump_is_sibling_call(file, insn, jump_dest)) { 1476 + /* 1477 + * Internal sibling call without reloc or with 1478 + * STT_SECTION reloc. 1479 + */ 1480 + add_call_dest(file, insn, insn_func(jump_dest), true); 1481 + continue; 1624 1482 } 1625 1483 1626 1484 insn->jump_dest = jump_dest; ··· 1669 1527 return -1; 1670 1528 } 1671 1529 1672 - if (insn->func && insn->call_dest->type != STT_FUNC) { 1530 + if (insn_func(insn) && insn->call_dest->type != STT_FUNC) { 1673 1531 WARN_FUNC("unsupported call to non-function", 1674 1532 insn->sec, insn->offset); 1675 1533 return -1; ··· 1765 1623 nop->offset = special_alt->new_off + special_alt->new_len; 1766 1624 nop->len = special_alt->orig_len - special_alt->new_len; 1767 1625 nop->type = INSN_NOP; 1768 - nop->func = orig_insn->func; 1626 + nop->sym = orig_insn->sym; 1769 1627 nop->alt_group = new_alt_group; 1770 1628 nop->ignore = orig_insn->ignore_alts; 1771 1629 } ··· 1785 1643 last_new_insn = insn; 1786 1644 1787 1645 insn->ignore = orig_insn->ignore_alts; 1788 - insn->func = orig_insn->func; 1646 + insn->sym = orig_insn->sym; 1789 1647 insn->alt_group = new_alt_group; 1790 1648 1791 1649 /* ··· 1797 1655 * accordingly. 1798 1656 */ 1799 1657 alt_reloc = insn_reloc(file, insn); 1800 - if (alt_reloc && 1658 + if (alt_reloc && arch_pc_relative_reloc(alt_reloc) && 1801 1659 !arch_support_alt_relocation(special_alt, insn, alt_reloc)) { 1802 1660 1803 1661 WARN_FUNC("unsupported relocation in alternatives section", ··· 1979 1837 struct reloc *reloc = table; 1980 1838 struct instruction *dest_insn; 1981 1839 struct alternative *alt; 1982 - struct symbol *pfunc = insn->func->pfunc; 1840 + struct symbol *pfunc = insn_func(insn)->pfunc; 1983 1841 unsigned int prev_offset = 0; 1984 1842 1985 1843 /* ··· 2006 1864 break; 2007 1865 2008 1866 /* Make sure the destination is in the same function: */ 2009 - if (!dest_insn->func || dest_insn->func->pfunc != pfunc) 1867 + if (!insn_func(dest_insn) || insn_func(dest_insn)->pfunc != pfunc) 2010 1868 break; 2011 1869 2012 1870 alt = malloc(sizeof(*alt)); ··· 2046 1904 * it. 2047 1905 */ 2048 1906 for (; 2049 - insn && insn->func && insn->func->pfunc == func; 1907 + insn && insn_func(insn) && insn_func(insn)->pfunc == func; 2050 1908 insn = insn->first_jump_src ?: prev_insn_same_sym(file, insn)) { 2051 1909 2052 1910 if (insn != orig_insn && insn->type == INSN_JUMP_DYNAMIC) ··· 2063 1921 if (!table_reloc) 2064 1922 continue; 2065 1923 dest_insn = find_insn(file, table_reloc->sym->sec, table_reloc->addend); 2066 - if (!dest_insn || !dest_insn->func || dest_insn->func->pfunc != func) 1924 + if (!dest_insn || !insn_func(dest_insn) || insn_func(dest_insn)->pfunc != func) 2067 1925 continue; 2068 1926 2069 1927 return table_reloc; ··· 2512 2370 if (ret) 2513 2371 return ret; 2514 2372 2373 + /* 2374 + * Must be before add_{jump_call}_destination. 2375 + */ 2376 + ret = classify_symbols(file); 2377 + if (ret) 2378 + return ret; 2379 + 2515 2380 ret = decode_instructions(file); 2516 2381 if (ret) 2517 2382 return ret; ··· 2534 2385 * Must be before read_unwind_hints() since that needs insn->noendbr. 2535 2386 */ 2536 2387 ret = read_noendbr_hints(file); 2537 - if (ret) 2538 - return ret; 2539 - 2540 - /* 2541 - * Must be before add_{jump_call}_destination. 2542 - */ 2543 - ret = classify_symbols(file); 2544 2388 if (ret) 2545 2389 return ret; 2546 2390 ··· 2745 2603 2746 2604 /* stack operations don't make sense with an undefined CFA */ 2747 2605 if (cfa->base == CFI_UNDEFINED) { 2748 - if (insn->func) { 2606 + if (insn_func(insn)) { 2749 2607 WARN_FUNC("undefined stack state", insn->sec, insn->offset); 2750 2608 return -1; 2751 2609 } ··· 3091 2949 } 3092 2950 3093 2951 /* detect when asm code uses rbp as a scratch register */ 3094 - if (opts.stackval && insn->func && op->src.reg == CFI_BP && 2952 + if (opts.stackval && insn_func(insn) && op->src.reg == CFI_BP && 3095 2953 cfa->base != CFI_BP) 3096 2954 cfi->bp_scratch = true; 3097 2955 break; ··· 3401 3259 struct instruction *insn, 3402 3260 struct insn_state *state) 3403 3261 { 3404 - if (has_modified_stack_frame(insn, state)) { 3262 + if (insn_func(insn) && has_modified_stack_frame(insn, state)) { 3405 3263 WARN_FUNC("sibling call from callable instruction with modified stack frame", 3406 3264 insn->sec, insn->offset); 3407 3265 return 1; ··· 3487 3345 while (1) { 3488 3346 next_insn = next_insn_to_validate(file, insn); 3489 3347 3490 - if (func && insn->func && func != insn->func->pfunc) { 3348 + if (func && insn_func(insn) && func != insn_func(insn)->pfunc) { 3491 3349 /* Ignore KCFI type preambles, which always fall through */ 3492 - if (!strncmp(func->name, "__cfi_", 6)) 3350 + if (!strncmp(func->name, "__cfi_", 6) || 3351 + !strncmp(func->name, "__pfx_", 6)) 3493 3352 return 0; 3494 3353 3495 3354 WARN("%s() falls through to next function %s()", 3496 - func->name, insn->func->name); 3355 + func->name, insn_func(insn)->name); 3497 3356 return 1; 3498 3357 } 3499 3358 ··· 3736 3593 3737 3594 while (&insn->list != &file->insn_list && (!sec || insn->sec == sec)) { 3738 3595 if (insn->hint && !insn->visited && !insn->ignore) { 3739 - ret = validate_branch(file, insn->func, insn, state); 3596 + ret = validate_branch(file, insn_func(insn), insn, state); 3740 3597 if (ret && opts.backtrace) 3741 3598 BT_FUNC("<=== (hint)", insn); 3742 3599 warnings += ret; ··· 3901 3758 if (insn->retpoline_safe) 3902 3759 continue; 3903 3760 3904 - /* 3905 - * .init.text code is ran before userspace and thus doesn't 3906 - * strictly need retpolines, except for modules which are 3907 - * loaded late, they very much do need retpoline in their 3908 - * .init.text 3909 - */ 3910 - if (!strcmp(insn->sec->name, ".init.text") && !opts.module) 3761 + if (insn->sec->init) 3911 3762 continue; 3912 3763 3913 3764 if (insn->type == INSN_RETURN) { ··· 3959 3822 * In this case we'll find a piece of code (whole function) that is not 3960 3823 * covered by a !section symbol. Ignore them. 3961 3824 */ 3962 - if (opts.link && !insn->func) { 3825 + if (opts.link && !insn_func(insn)) { 3963 3826 int size = find_symbol_hole_containing(insn->sec, insn->offset); 3964 3827 unsigned long end = insn->offset + size; 3965 3828 ··· 3983 3846 /* 3984 3847 * If this hole jumps to a .cold function, mark it ignore too. 3985 3848 */ 3986 - if (insn->jump_dest && insn->jump_dest->func && 3987 - strstr(insn->jump_dest->func->name, ".cold")) { 3849 + if (insn->jump_dest && insn_func(insn->jump_dest) && 3850 + strstr(insn_func(insn->jump_dest)->name, ".cold")) { 3988 3851 struct instruction *dest = insn->jump_dest; 3989 - func_for_each_insn(file, dest->func, dest) 3852 + func_for_each_insn(file, insn_func(dest), dest) 3990 3853 dest->ignore = true; 3991 3854 } 3992 3855 } ··· 3994 3857 return false; 3995 3858 } 3996 3859 3997 - if (!insn->func) 3860 + if (!insn_func(insn)) 3998 3861 return false; 3999 3862 4000 - if (insn->func->static_call_tramp) 3863 + if (insn_func(insn)->static_call_tramp) 4001 3864 return true; 4002 3865 4003 3866 /* ··· 4028 3891 4029 3892 if (insn->type == INSN_JUMP_UNCONDITIONAL) { 4030 3893 if (insn->jump_dest && 4031 - insn->jump_dest->func == insn->func) { 3894 + insn_func(insn->jump_dest) == insn_func(insn)) { 4032 3895 insn = insn->jump_dest; 4033 3896 continue; 4034 3897 } ··· 4036 3899 break; 4037 3900 } 4038 3901 4039 - if (insn->offset + insn->len >= insn->func->offset + insn->func->len) 3902 + if (insn->offset + insn->len >= insn_func(insn)->offset + insn_func(insn)->len) 4040 3903 break; 4041 3904 4042 3905 insn = list_next_entry(insn, list); 4043 3906 } 4044 3907 4045 3908 return false; 3909 + } 3910 + 3911 + static int add_prefix_symbol(struct objtool_file *file, struct symbol *func, 3912 + struct instruction *insn) 3913 + { 3914 + if (!opts.prefix) 3915 + return 0; 3916 + 3917 + for (;;) { 3918 + struct instruction *prev = list_prev_entry(insn, list); 3919 + u64 offset; 3920 + 3921 + if (&prev->list == &file->insn_list) 3922 + break; 3923 + 3924 + if (prev->type != INSN_NOP) 3925 + break; 3926 + 3927 + offset = func->offset - prev->offset; 3928 + if (offset >= opts.prefix) { 3929 + if (offset == opts.prefix) { 3930 + /* 3931 + * Since the sec->symbol_list is ordered by 3932 + * offset (see elf_add_symbol()) the added 3933 + * symbol will not be seen by the iteration in 3934 + * validate_section(). 3935 + * 3936 + * Hence the lack of list_for_each_entry_safe() 3937 + * there. 3938 + * 3939 + * The direct concequence is that prefix symbols 3940 + * don't get visited (because pointless), except 3941 + * for the logic in ignore_unreachable_insn() 3942 + * that needs the terminating insn to be visited 3943 + * otherwise it will report the hole. 3944 + * 3945 + * Hence mark the first instruction of the 3946 + * prefix symbol as visisted. 3947 + */ 3948 + prev->visited |= VISITED_BRANCH; 3949 + elf_create_prefix_symbol(file->elf, func, opts.prefix); 3950 + } 3951 + break; 3952 + } 3953 + insn = prev; 3954 + } 3955 + 3956 + return 0; 4046 3957 } 4047 3958 4048 3959 static int validate_symbol(struct objtool_file *file, struct section *sec, ··· 4111 3926 if (!insn || insn->ignore || insn->visited) 4112 3927 return 0; 4113 3928 3929 + add_prefix_symbol(file, sym, insn); 3930 + 4114 3931 state->uaccess = sym->uaccess_safe; 4115 3932 4116 - ret = validate_branch(file, insn->func, insn, *state); 3933 + ret = validate_branch(file, insn_func(insn), insn, *state); 4117 3934 if (ret && opts.backtrace) 4118 3935 BT_FUNC("<=== (sym)", insn); 4119 3936 return ret; ··· 4181 3994 list_del_init(&insn->call_node); 4182 3995 } 4183 3996 3997 + static bool noendbr_range(struct objtool_file *file, struct instruction *insn) 3998 + { 3999 + struct symbol *sym = find_symbol_containing(insn->sec, insn->offset-1); 4000 + struct instruction *first; 4001 + 4002 + if (!sym) 4003 + return false; 4004 + 4005 + first = find_insn(file, sym->sec, sym->offset); 4006 + if (!first) 4007 + return false; 4008 + 4009 + if (first->type != INSN_ENDBR && !first->noendbr) 4010 + return false; 4011 + 4012 + return insn->offset == sym->offset + sym->len; 4013 + } 4014 + 4184 4015 static int validate_ibt_insn(struct objtool_file *file, struct instruction *insn) 4185 4016 { 4186 4017 struct instruction *dest; ··· 4252 4047 continue; 4253 4048 } 4254 4049 4255 - if (dest->func && dest->func == insn->func) { 4050 + if (insn_func(dest) && insn_func(dest) == insn_func(insn)) { 4256 4051 /* 4257 4052 * Anything from->to self is either _THIS_IP_ or 4258 4053 * IRET-to-self. ··· 4271 4066 continue; 4272 4067 } 4273 4068 4069 + /* 4070 + * Accept anything ANNOTATE_NOENDBR. 4071 + */ 4274 4072 if (dest->noendbr) 4073 + continue; 4074 + 4075 + /* 4076 + * Accept if this is the instruction after a symbol 4077 + * that is (no)endbr -- typical code-range usage. 4078 + */ 4079 + if (noendbr_range(file, dest)) 4275 4080 continue; 4276 4081 4277 4082 WARN_FUNC("relocation to !ENDBR: %s", ··· 4522 4307 warnings += ret; 4523 4308 } 4524 4309 4310 + if (opts.cfi) { 4311 + ret = create_cfi_sections(file); 4312 + if (ret < 0) 4313 + goto out; 4314 + warnings += ret; 4315 + } 4316 + 4525 4317 if (opts.rethunk) { 4526 4318 ret = create_return_sites_sections(file); 4527 4319 if (ret < 0) 4528 4320 goto out; 4529 4321 warnings += ret; 4322 + 4323 + if (opts.hack_skylake) { 4324 + ret = create_direct_call_sections(file); 4325 + if (ret < 0) 4326 + goto out; 4327 + warnings += ret; 4328 + } 4530 4329 } 4531 4330 4532 4331 if (opts.mcount) {
+200 -98
tools/objtool/elf.c
··· 16 16 #include <string.h> 17 17 #include <unistd.h> 18 18 #include <errno.h> 19 + #include <linux/interval_tree_generic.h> 19 20 #include <objtool/builtin.h> 20 21 21 22 #include <objtool/elf.h> ··· 51 50 __elf_table(name); \ 52 51 }) 53 52 54 - static bool symbol_to_offset(struct rb_node *a, const struct rb_node *b) 53 + static inline unsigned long __sym_start(struct symbol *s) 55 54 { 56 - struct symbol *sa = rb_entry(a, struct symbol, node); 57 - struct symbol *sb = rb_entry(b, struct symbol, node); 58 - 59 - if (sa->offset < sb->offset) 60 - return true; 61 - if (sa->offset > sb->offset) 62 - return false; 63 - 64 - if (sa->len < sb->len) 65 - return true; 66 - if (sa->len > sb->len) 67 - return false; 68 - 69 - sa->alias = sb; 70 - 71 - return false; 55 + return s->offset; 72 56 } 73 57 74 - static int symbol_by_offset(const void *key, const struct rb_node *node) 58 + static inline unsigned long __sym_last(struct symbol *s) 75 59 { 76 - const struct symbol *s = rb_entry(node, struct symbol, node); 77 - const unsigned long *o = key; 78 - 79 - if (*o < s->offset) 80 - return -1; 81 - if (*o >= s->offset + s->len) 82 - return 1; 83 - 84 - return 0; 60 + return s->offset + s->len - 1; 85 61 } 62 + 63 + INTERVAL_TREE_DEFINE(struct symbol, node, unsigned long, __subtree_last, 64 + __sym_start, __sym_last, static, __sym) 65 + 66 + #define __sym_for_each(_iter, _tree, _start, _end) \ 67 + for (_iter = __sym_iter_first((_tree), (_start), (_end)); \ 68 + _iter; _iter = __sym_iter_next(_iter, (_start), (_end))) 86 69 87 70 struct symbol_hole { 88 71 unsigned long key; ··· 132 147 133 148 struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset) 134 149 { 135 - struct rb_node *node; 150 + struct rb_root_cached *tree = (struct rb_root_cached *)&sec->symbol_tree; 151 + struct symbol *iter; 136 152 137 - rb_for_each(node, &offset, &sec->symbol_tree, symbol_by_offset) { 138 - struct symbol *s = rb_entry(node, struct symbol, node); 139 - 140 - if (s->offset == offset && s->type != STT_SECTION) 141 - return s; 153 + __sym_for_each(iter, tree, offset, offset) { 154 + if (iter->offset == offset && iter->type != STT_SECTION) 155 + return iter; 142 156 } 143 157 144 158 return NULL; ··· 145 161 146 162 struct symbol *find_func_by_offset(struct section *sec, unsigned long offset) 147 163 { 148 - struct rb_node *node; 164 + struct rb_root_cached *tree = (struct rb_root_cached *)&sec->symbol_tree; 165 + struct symbol *iter; 149 166 150 - rb_for_each(node, &offset, &sec->symbol_tree, symbol_by_offset) { 151 - struct symbol *s = rb_entry(node, struct symbol, node); 152 - 153 - if (s->offset == offset && s->type == STT_FUNC) 154 - return s; 167 + __sym_for_each(iter, tree, offset, offset) { 168 + if (iter->offset == offset && iter->type == STT_FUNC) 169 + return iter; 155 170 } 156 171 157 172 return NULL; ··· 158 175 159 176 struct symbol *find_symbol_containing(const struct section *sec, unsigned long offset) 160 177 { 161 - struct rb_node *node; 178 + struct rb_root_cached *tree = (struct rb_root_cached *)&sec->symbol_tree; 179 + struct symbol *iter; 162 180 163 - rb_for_each(node, &offset, &sec->symbol_tree, symbol_by_offset) { 164 - struct symbol *s = rb_entry(node, struct symbol, node); 165 - 166 - if (s->type != STT_SECTION) 167 - return s; 181 + __sym_for_each(iter, tree, offset, offset) { 182 + if (iter->type != STT_SECTION) 183 + return iter; 168 184 } 169 185 170 186 return NULL; ··· 184 202 /* 185 203 * Find the rightmost symbol for which @offset is after it. 186 204 */ 187 - n = rb_find(&hole, &sec->symbol_tree, symbol_hole_by_offset); 205 + n = rb_find(&hole, &sec->symbol_tree.rb_root, symbol_hole_by_offset); 188 206 189 207 /* found a symbol that contains @offset */ 190 208 if (n) ··· 206 224 207 225 struct symbol *find_func_containing(struct section *sec, unsigned long offset) 208 226 { 209 - struct rb_node *node; 227 + struct rb_root_cached *tree = (struct rb_root_cached *)&sec->symbol_tree; 228 + struct symbol *iter; 210 229 211 - rb_for_each(node, &offset, &sec->symbol_tree, symbol_by_offset) { 212 - struct symbol *s = rb_entry(node, struct symbol, node); 213 - 214 - if (s->type == STT_FUNC) 215 - return s; 230 + __sym_for_each(iter, tree, offset, offset) { 231 + if (iter->type == STT_FUNC) 232 + return iter; 216 233 } 217 234 218 235 return NULL; ··· 354 373 { 355 374 struct list_head *entry; 356 375 struct rb_node *pnode; 376 + struct symbol *iter; 357 377 378 + INIT_LIST_HEAD(&sym->reloc_list); 358 379 INIT_LIST_HEAD(&sym->pv_target); 359 380 sym->alias = sym; 360 381 ··· 369 386 sym->offset = sym->sym.st_value; 370 387 sym->len = sym->sym.st_size; 371 388 372 - rb_add(&sym->node, &sym->sec->symbol_tree, symbol_to_offset); 389 + __sym_for_each(iter, &sym->sec->symbol_tree, sym->offset, sym->offset) { 390 + if (iter->offset == sym->offset && iter->type == sym->type) 391 + iter->alias = sym; 392 + } 393 + 394 + __sym_insert(sym, &sym->sec->symbol_tree); 373 395 pnode = rb_prev(&sym->node); 374 396 if (pnode) 375 397 entry = &rb_entry(pnode, struct symbol, node)->list; ··· 389 401 * can exist within a function, confusing the sorting. 390 402 */ 391 403 if (!sym->len) 392 - rb_erase(&sym->node, &sym->sec->symbol_tree); 404 + __sym_remove(sym, &sym->sec->symbol_tree); 393 405 } 394 406 395 407 static int read_symbols(struct elf *elf) ··· 558 570 reloc->sym = sym; 559 571 reloc->addend = addend; 560 572 573 + list_add_tail(&reloc->sym_reloc_entry, &sym->reloc_list); 561 574 list_add_tail(&reloc->list, &sec->reloc->reloc_list); 562 575 elf_hash_add(reloc, &reloc->hash, reloc_hash(reloc)); 563 576 ··· 575 586 */ 576 587 static void elf_dirty_reloc_sym(struct elf *elf, struct symbol *sym) 577 588 { 578 - struct section *sec; 589 + struct reloc *reloc; 579 590 580 - list_for_each_entry(sec, &elf->sections, list) { 581 - struct reloc *reloc; 582 - 583 - if (sec->changed) 584 - continue; 585 - 586 - list_for_each_entry(reloc, &sec->reloc_list, list) { 587 - if (reloc->sym == sym) { 588 - sec->changed = true; 589 - break; 590 - } 591 - } 592 - } 591 + list_for_each_entry(reloc, &sym->reloc_list, sym_reloc_entry) 592 + reloc->sec->changed = true; 593 593 } 594 594 595 595 /* ··· 625 647 626 648 /* end-of-list */ 627 649 if (!symtab_data) { 650 + /* 651 + * Over-allocate to avoid O(n^2) symbol creation 652 + * behaviour. The down side is that libelf doesn't 653 + * like this; see elf_truncate_section() for the fixup. 654 + */ 655 + int num = max(1U, sym->idx/3); 628 656 void *buf; 629 657 630 658 if (idx) { ··· 644 660 if (t) 645 661 shndx_data = elf_newdata(t); 646 662 647 - buf = calloc(1, entsize); 663 + buf = calloc(num, entsize); 648 664 if (!buf) { 649 665 WARN("malloc"); 650 666 return -1; 651 667 } 652 668 653 669 symtab_data->d_buf = buf; 654 - symtab_data->d_size = entsize; 670 + symtab_data->d_size = num * entsize; 655 671 symtab_data->d_align = 1; 656 672 symtab_data->d_type = ELF_T_SYM; 657 673 658 - symtab->sh.sh_size += entsize; 659 674 symtab->changed = true; 675 + symtab->truncate = true; 660 676 661 677 if (t) { 662 - shndx_data->d_buf = &sym->sec->idx; 663 - shndx_data->d_size = sizeof(Elf32_Word); 678 + buf = calloc(num, sizeof(Elf32_Word)); 679 + if (!buf) { 680 + WARN("malloc"); 681 + return -1; 682 + } 683 + 684 + shndx_data->d_buf = buf; 685 + shndx_data->d_size = num * sizeof(Elf32_Word); 664 686 shndx_data->d_align = sizeof(Elf32_Word); 665 687 shndx_data->d_type = ELF_T_WORD; 666 688 667 - symtab_shndx->sh.sh_size += sizeof(Elf32_Word); 668 689 symtab_shndx->changed = true; 690 + symtab_shndx->truncate = true; 669 691 } 670 692 671 693 break; ··· 720 730 } 721 731 722 732 static struct symbol * 723 - elf_create_section_symbol(struct elf *elf, struct section *sec) 733 + __elf_create_symbol(struct elf *elf, struct symbol *sym) 724 734 { 725 735 struct section *symtab, *symtab_shndx; 726 736 Elf32_Word first_non_local, new_idx; 727 - struct symbol *sym, *old; 737 + struct symbol *old; 728 738 729 739 symtab = find_section_by_name(elf, ".symtab"); 730 740 if (symtab) { ··· 734 744 return NULL; 735 745 } 736 746 737 - sym = calloc(1, sizeof(*sym)); 738 - if (!sym) { 739 - perror("malloc"); 740 - return NULL; 741 - } 747 + new_idx = symtab->sh.sh_size / symtab->sh.sh_entsize; 742 748 743 - sym->name = sec->name; 744 - sym->sec = sec; 745 - 746 - // st_name 0 747 - sym->sym.st_info = GELF_ST_INFO(STB_LOCAL, STT_SECTION); 748 - // st_other 0 749 - // st_value 0 750 - // st_size 0 749 + if (GELF_ST_BIND(sym->sym.st_info) != STB_LOCAL) 750 + goto non_local; 751 751 752 752 /* 753 753 * Move the first global symbol, as per sh_info, into a new, higher 754 754 * symbol index. This fees up a spot for a new local symbol. 755 755 */ 756 756 first_non_local = symtab->sh.sh_info; 757 - new_idx = symtab->sh.sh_size / symtab->sh.sh_entsize; 758 757 old = find_symbol_by_index(elf, first_non_local); 759 758 if (old) { 760 759 old->idx = new_idx; ··· 761 782 new_idx = first_non_local; 762 783 } 763 784 785 + /* 786 + * Either way, we will add a LOCAL symbol. 787 + */ 788 + symtab->sh.sh_info += 1; 789 + 790 + non_local: 764 791 sym->idx = new_idx; 765 792 if (elf_update_symbol(elf, symtab, symtab_shndx, sym)) { 766 793 WARN("elf_update_symbol"); 767 794 return NULL; 768 795 } 769 796 770 - /* 771 - * Either way, we added a LOCAL symbol. 772 - */ 773 - symtab->sh.sh_info += 1; 797 + symtab->sh.sh_size += symtab->sh.sh_entsize; 798 + symtab->changed = true; 774 799 775 - elf_add_symbol(elf, sym); 800 + if (symtab_shndx) { 801 + symtab_shndx->sh.sh_size += sizeof(Elf32_Word); 802 + symtab_shndx->changed = true; 803 + } 804 + 805 + return sym; 806 + } 807 + 808 + static struct symbol * 809 + elf_create_section_symbol(struct elf *elf, struct section *sec) 810 + { 811 + struct symbol *sym = calloc(1, sizeof(*sym)); 812 + 813 + if (!sym) { 814 + perror("malloc"); 815 + return NULL; 816 + } 817 + 818 + sym->name = sec->name; 819 + sym->sec = sec; 820 + 821 + // st_name 0 822 + sym->sym.st_info = GELF_ST_INFO(STB_LOCAL, STT_SECTION); 823 + // st_other 0 824 + // st_value 0 825 + // st_size 0 826 + 827 + sym = __elf_create_symbol(elf, sym); 828 + if (sym) 829 + elf_add_symbol(elf, sym); 830 + 831 + return sym; 832 + } 833 + 834 + static int elf_add_string(struct elf *elf, struct section *strtab, char *str); 835 + 836 + struct symbol * 837 + elf_create_prefix_symbol(struct elf *elf, struct symbol *orig, long size) 838 + { 839 + struct symbol *sym = calloc(1, sizeof(*sym)); 840 + size_t namelen = strlen(orig->name) + sizeof("__pfx_"); 841 + char *name = malloc(namelen); 842 + 843 + if (!sym || !name) { 844 + perror("malloc"); 845 + return NULL; 846 + } 847 + 848 + snprintf(name, namelen, "__pfx_%s", orig->name); 849 + 850 + sym->name = name; 851 + sym->sec = orig->sec; 852 + 853 + sym->sym.st_name = elf_add_string(elf, NULL, name); 854 + sym->sym.st_info = orig->sym.st_info; 855 + sym->sym.st_value = orig->sym.st_value - size; 856 + sym->sym.st_size = size; 857 + 858 + sym = __elf_create_symbol(elf, sym); 859 + if (sym) 860 + elf_add_symbol(elf, sym); 776 861 777 862 return sym; 778 863 } ··· 893 850 894 851 static int read_relocs(struct elf *elf) 895 852 { 853 + unsigned long nr_reloc, max_reloc = 0, tot_reloc = 0; 896 854 struct section *sec; 897 855 struct reloc *reloc; 898 - int i; 899 856 unsigned int symndx; 900 - unsigned long nr_reloc, max_reloc = 0, tot_reloc = 0; 857 + struct symbol *sym; 858 + int i; 901 859 902 860 if (!elf_alloc_hash(reloc, elf->text_size / 16)) 903 861 return -1; ··· 939 895 940 896 reloc->sec = sec; 941 897 reloc->idx = i; 942 - reloc->sym = find_symbol_by_index(elf, symndx); 898 + reloc->sym = sym = find_symbol_by_index(elf, symndx); 943 899 if (!reloc->sym) { 944 900 WARN("can't find reloc entry symbol %d for %s", 945 901 symndx, sec->name); 946 902 return -1; 947 903 } 948 904 905 + list_add_tail(&reloc->sym_reloc_entry, &sym->reloc_list); 949 906 list_add_tail(&reloc->list, &sec->reloc_list); 950 907 elf_hash_add(reloc, &reloc->hash, reloc_hash(reloc)); 951 908 ··· 1330 1285 return 0; 1331 1286 } 1332 1287 1288 + /* 1289 + * When Elf_Scn::sh_size is smaller than the combined Elf_Data::d_size 1290 + * do you: 1291 + * 1292 + * A) adhere to the section header and truncate the data, or 1293 + * B) ignore the section header and write out all the data you've got? 1294 + * 1295 + * Yes, libelf sucks and we need to manually truncate if we over-allocate data. 1296 + */ 1297 + static int elf_truncate_section(struct elf *elf, struct section *sec) 1298 + { 1299 + u64 size = sec->sh.sh_size; 1300 + bool truncated = false; 1301 + Elf_Data *data = NULL; 1302 + Elf_Scn *s; 1303 + 1304 + s = elf_getscn(elf->elf, sec->idx); 1305 + if (!s) { 1306 + WARN_ELF("elf_getscn"); 1307 + return -1; 1308 + } 1309 + 1310 + for (;;) { 1311 + /* get next data descriptor for the relevant section */ 1312 + data = elf_getdata(s, data); 1313 + 1314 + if (!data) { 1315 + if (size) { 1316 + WARN("end of section data but non-zero size left\n"); 1317 + return -1; 1318 + } 1319 + return 0; 1320 + } 1321 + 1322 + if (truncated) { 1323 + /* when we remove symbols */ 1324 + WARN("truncated; but more data\n"); 1325 + return -1; 1326 + } 1327 + 1328 + if (!data->d_size) { 1329 + WARN("zero size data"); 1330 + return -1; 1331 + } 1332 + 1333 + if (data->d_size > size) { 1334 + truncated = true; 1335 + data->d_size = size; 1336 + } 1337 + 1338 + size -= data->d_size; 1339 + } 1340 + } 1341 + 1333 1342 int elf_write(struct elf *elf) 1334 1343 { 1335 1344 struct section *sec; ··· 1394 1295 1395 1296 /* Update changed relocation sections and section headers: */ 1396 1297 list_for_each_entry(sec, &elf->sections, list) { 1298 + if (sec->truncate) 1299 + elf_truncate_section(elf, sec); 1300 + 1397 1301 if (sec->changed) { 1398 1302 s = elf_getscn(elf->elf, sec->idx); 1399 1303 if (!s) {
+2
tools/objtool/include/objtool/arch.h
··· 93 93 94 94 int arch_rewrite_retpolines(struct objtool_file *file); 95 95 96 + bool arch_pc_relative_reloc(struct reloc *reloc); 97 + 96 98 #endif /* _ARCH_H */
+3
tools/objtool/include/objtool/builtin.h
··· 14 14 bool dump_orc; 15 15 bool hack_jump_label; 16 16 bool hack_noinstr; 17 + bool hack_skylake; 17 18 bool ibt; 18 19 bool mcount; 19 20 bool noinstr; ··· 26 25 bool stackval; 27 26 bool static_call; 28 27 bool uaccess; 28 + int prefix; 29 + bool cfi; 29 30 30 31 /* options: */ 31 32 bool backtrace;
+11 -1
tools/objtool/include/objtool/check.h
··· 67 67 struct reloc *jump_table; 68 68 struct reloc *reloc; 69 69 struct list_head alts; 70 - struct symbol *func; 70 + struct symbol *sym; 71 71 struct list_head stack_ops; 72 72 struct cfi_state *cfi; 73 73 }; 74 + 75 + static inline struct symbol *insn_func(struct instruction *insn) 76 + { 77 + struct symbol *sym = insn->sym; 78 + 79 + if (sym && sym->type != STT_FUNC) 80 + sym = NULL; 81 + 82 + return sym; 83 + } 74 84 75 85 #define VISITED_BRANCH 0x01 76 86 #define VISITED_BRANCH_UACCESS 0x02
+7 -2
tools/objtool/include/objtool/elf.h
··· 30 30 struct hlist_node hash; 31 31 struct hlist_node name_hash; 32 32 GElf_Shdr sh; 33 - struct rb_root symbol_tree; 33 + struct rb_root_cached symbol_tree; 34 34 struct list_head symbol_list; 35 35 struct list_head reloc_list; 36 36 struct section *base, *reloc; ··· 38 38 Elf_Data *data; 39 39 char *name; 40 40 int idx; 41 - bool changed, text, rodata, noinstr; 41 + bool changed, text, rodata, noinstr, init, truncate; 42 42 }; 43 43 44 44 struct symbol { ··· 53 53 unsigned char bind, type; 54 54 unsigned long offset; 55 55 unsigned int len; 56 + unsigned long __subtree_last; 56 57 struct symbol *pfunc, *cfunc, *alias; 57 58 u8 uaccess_safe : 1; 58 59 u8 static_call_tramp : 1; ··· 62 61 u8 fentry : 1; 63 62 u8 profiling_func : 1; 64 63 struct list_head pv_target; 64 + struct list_head reloc_list; 65 65 }; 66 66 67 67 struct reloc { ··· 74 72 }; 75 73 struct section *sec; 76 74 struct symbol *sym; 75 + struct list_head sym_reloc_entry; 77 76 unsigned long offset; 78 77 unsigned int type; 79 78 s64 addend; ··· 147 144 148 145 struct elf *elf_open_read(const char *name, int flags); 149 146 struct section *elf_create_section(struct elf *elf, const char *name, unsigned int sh_flags, size_t entsize, int nr); 147 + 148 + struct symbol *elf_create_prefix_symbol(struct elf *elf, struct symbol *orig, long size); 150 149 151 150 int elf_add_reloc(struct elf *elf, struct section *sec, unsigned long offset, 152 151 unsigned int type, struct symbol *sym, s64 addend);
+1
tools/objtool/include/objtool/objtool.h
··· 28 28 struct list_head static_call_list; 29 29 struct list_head mcount_loc_list; 30 30 struct list_head endbr_list; 31 + struct list_head call_list; 31 32 bool ignore_unreachables, hints, rodata; 32 33 33 34 unsigned int nr_endbr;
+1
tools/objtool/objtool.c
··· 106 106 INIT_LIST_HEAD(&file.static_call_list); 107 107 INIT_LIST_HEAD(&file.mcount_loc_list); 108 108 INIT_LIST_HEAD(&file.endbr_list); 109 + INIT_LIST_HEAD(&file.call_list); 109 110 file.ignore_unreachables = opts.no_unreachable; 110 111 file.hints = false; 111 112