x86,ibt: Use UDB instead of 0xEA

A while ago [0] FineIBT started using the 0xEA instruction to raise #UD.
All existing parts will generate #UD in 64bit mode on that instruction.

However; Intel/AMD have not blessed using this instruction, it is on
their 'reserved' opcode list for future use.

Peter Anvin worked the committees and got use of 0xD6 blessed, it
shall be called UDB (per the next SDM or so), and it being a single
byte instruction is easy to slip into a single byte immediate -- as
is done by this very patch.

Reworking the FineIBT code to use UDB wasn't entirely trivial. Notably
the FineIBT-BHI1 case ran out of bytes. In order to condense the
encoding some it was required to move the hash register from R10D to
EAX (thanks hpa!).

Per the x86_64 ABI, RAX is used to pass the number of vector registers
for vararg function calls -- something that should not happen in the
kernel. More so, the kernel is built with -mskip-rax-setup, which
should leave RAX completely unused, allowing its re-use.

[ For BPF; while the bpf2bpf tail-call uses RAX in its calling
convention, that does not use CFI and is unaffected. Only the
'regular' C->BPF transition is covered by CFI. ]

The ENDBR poison value is changed from 'OSP NOP3' to 'NOPL -42(%RAX)',
this is basically NOP4 but with UDB as its immediate. As such it is
still a non-standard NOP value unique to prior ENDBR sites, but now
also provides UDB.

Per Agner Fog's optimization guide, Jcc is assumed not-taken. That is,
the expected path should be the fallthrough case for improved
throughput.

Since the preamble now relies on the ENDBR poison to provide UDB, the
code is changed to write the poison right along with the initial
preamble -- this is possible because the ITS mitigation already
disabled IBT over rewriting the CFI scheme.

The scheme in detail:

Preamble:

FineIBT FineIBT-BHI1 FineIBT-BHI

__cfi_\func: __cfi_\func: __cfi_\func:
endbr endbr endbr
subl $0x12345678, %eax subl $0x12345678, %eax subl $0x12345678, %eax
jne.d32,np \func+3 cmovne %rax, %rdi cs cs call __bhi_args_N
jne.d8,np \func+3
\func: \func: \func:
nopl -42(%rax) nopl -42(%rax) nopl -42(%rax)

Notably there are 7 bytes available after the SUBL; this enables the
BHI1 case to fit without the nasty overlapping case it had previously.
The !BHI case uses Jcc.d32,np to consume all 7 bytes without the need
for an additional NOP, while the BHI case uses CS padding to align the
CALL with the end of the preamble such that it returns to \func+0.

Caller:

FineIBT Paranoid-FineIBT

fineibt_caller: fineibt_caller:
mov $0x12345678, %eax mov $0x12345678, %eax
lea -10(%r11), %r11 cmp -0x11(%r11), %eax
nop5 cs lea -0x10(%r11), %r11
retpoline: retpoline:
cs call __x86_indirect_thunk_r11 jne fineibt_caller+0xd
call *%r11
nop

Notably this is before apply_retpolines() which will fix up the
retpoline call -- since all parts with IBT also have eIBRS (lets
ignore ITS). Typically the retpoline site is rewritten (when still
intact) into:

call *%r11
nop3

[0] 06926c6cdb95 ("x86/ibt: Optimize the FineIBT instruction sequence")

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20250901191307.GI4067720@noisy.programming.kicks-ass.net

+178 -148
+7 -2
arch/x86/include/asm/bug.h
··· 5 5 #include <linux/stringify.h> 6 6 #include <linux/instrumentation.h> 7 7 #include <linux/objtool.h> 8 + #include <asm/asm.h> 8 9 9 10 /* 10 11 * Despite that some emulators terminate on UD2, we use it for WARN(). 11 12 */ 12 - #define ASM_UD2 ".byte 0x0f, 0x0b" 13 + #define ASM_UD2 _ASM_BYTES(0x0f, 0x0b) 13 14 #define INSN_UD2 0x0b0f 14 15 #define LEN_UD2 2 16 + 17 + #define ASM_UDB _ASM_BYTES(0xd6) 18 + #define INSN_UDB 0xd6 19 + #define LEN_UDB 1 15 20 16 21 /* 17 22 * In clang we have UD1s reporting UBSAN failures on X86, 64 and 32bit. ··· 31 26 #define BUG_UD2 0xfffe 32 27 #define BUG_UD1 0xfffd 33 28 #define BUG_UD1_UBSAN 0xfffc 34 - #define BUG_EA 0xffea 29 + #define BUG_UDB 0xffd6 35 30 #define BUG_LOCK 0xfff0 36 31 37 32 #ifdef CONFIG_GENERIC_BUG
+6 -8
arch/x86/include/asm/cfi.h
··· 71 71 * 72 72 * __cfi_foo: 73 73 * endbr64 74 - * subl 0x12345678, %r10d 75 - * jz foo 76 - * ud2 77 - * nop 74 + * subl 0x12345678, %eax 75 + * jne.32,pn foo+3 78 76 * foo: 79 - * osp nop3 # was endbr64 77 + * nopl -42(%rax) # was endbr64 80 78 * ... code here ... 81 79 * ret 82 80 * ··· 84 86 * indirect caller: 85 87 * lea foo(%rip), %r11 86 88 * ... 87 - * movl $0x12345678, %r10d 88 - * subl $16, %r11 89 - * nop4 89 + * movl $0x12345678, %eax 90 + * lea -0x10(%r11), %r11 91 + * nop5 90 92 * call *%r11 91 93 * 92 94 */
+3 -7
arch/x86/include/asm/ibt.h
··· 59 59 static __always_inline __attribute_const__ u32 gen_endbr_poison(void) 60 60 { 61 61 /* 62 - * 4 byte NOP that isn't NOP4 (in fact it is OSP NOP3), such that it 63 - * will be unique to (former) ENDBR sites. 62 + * 4 byte NOP that isn't NOP4, such that it will be unique to (former) 63 + * ENDBR sites. Additionally it carries UDB as immediate. 64 64 */ 65 - return 0x001f0f66; /* osp nopl (%rax) */ 65 + return 0xd6401f0f; /* nopl -42(%rax) */ 66 66 } 67 67 68 68 static inline bool __is_endbr(u32 val) 69 69 { 70 70 if (val == gen_endbr_poison()) 71 - return true; 72 - 73 - /* See cfi_fineibt_bhi_preamble() */ 74 - if (IS_ENABLED(CONFIG_FINEIBT_BHI) && val == 0x001f0ff5) 75 71 return true; 76 72 77 73 val &= ~0x01000000U; /* ENDBR32 -> ENDBR64 */
+124 -93
arch/x86/kernel/alternative.c
··· 147 147 /* 148 148 * When ITS uses indirect branch thunk the fineibt_paranoid 149 149 * caller sequence doesn't fit in the caller site. So put the 150 - * remaining part of the sequence (<ea> + JNE) into the ITS 150 + * remaining part of the sequence (UDB + JNE) into the ITS 151 151 * thunk. 152 152 */ 153 - bytes[i++] = 0xea; /* invalid instruction */ 153 + bytes[i++] = 0xd6; /* UDB */ 154 154 bytes[i++] = 0x75; /* JNE */ 155 155 bytes[i++] = 0xfd; 156 156 ··· 163 163 reg -= 8; 164 164 } 165 165 bytes[i++] = 0xff; 166 - bytes[i++] = 0xe0 + reg; /* jmp *reg */ 166 + bytes[i++] = 0xe0 + reg; /* JMP *reg */ 167 167 bytes[i++] = 0xcc; 168 168 169 169 return thunk + offset; ··· 970 970 case JMP32_INSN_OPCODE: 971 971 /* Check for cfi_paranoid + ITS */ 972 972 dest = addr + insn.length + insn.immediate.value; 973 - if (dest[-1] == 0xea && (dest[0] & 0xf0) == 0x70) { 973 + if (dest[-1] == 0xd6 && (dest[0] & 0xf0) == 0x70) { 974 974 WARN_ON_ONCE(cfi_mode != CFI_FINEIBT); 975 975 continue; 976 976 } ··· 1303 1303 * 1304 1304 * __cfi_\func: __cfi_\func: 1305 1305 * movl $0x12345678,%eax // 5 endbr64 // 4 1306 - * nop subl $0x12345678,%r10d // 7 1307 - * nop jne __cfi_\func+6 // 2 1308 - * nop nop3 // 3 1306 + * nop subl $0x12345678,%eax // 5 1307 + * nop jne.d32,pn \func+3 // 7 1309 1308 * nop 1310 1309 * nop 1311 1310 * nop ··· 1313 1314 * nop 1314 1315 * nop 1315 1316 * nop 1317 + * nop 1318 + * \func: \func: 1319 + * endbr64 nopl -42(%rax) 1316 1320 * 1317 1321 * 1318 1322 * caller: caller: 1319 - * movl $(-0x12345678),%r10d // 6 movl $0x12345678,%r10d // 6 1323 + * movl $(-0x12345678),%r10d // 6 movl $0x12345678,%eax // 5 1320 1324 * addl $-15(%r11),%r10d // 4 lea -0x10(%r11),%r11 // 4 1321 - * je 1f // 2 nop4 // 4 1325 + * je 1f // 2 nop5 // 5 1322 1326 * ud2 // 2 1323 1327 * 1: cs call __x86_indirect_thunk_r11 // 6 call *%r11; nop3; // 6 1324 1328 * 1329 + * 1330 + * Notably, the FineIBT sequences are crafted such that branches are presumed 1331 + * non-taken. This is based on Agner Fog's optimization manual, which states: 1332 + * 1333 + * "Make conditional jumps most often not taken: The efficiency and throughput 1334 + * for not-taken branches is better than for taken branches on most 1335 + * processors. Therefore, it is good to place the most frequent branch first" 1325 1336 */ 1326 1337 1327 1338 /* 1328 1339 * <fineibt_preamble_start>: 1329 1340 * 0: f3 0f 1e fa endbr64 1330 - * 4: 41 81 <ea> 78 56 34 12 sub $0x12345678, %r10d 1331 - * b: 75 f9 jne 6 <fineibt_preamble_start+0x6> 1332 - * d: 0f 1f 00 nopl (%rax) 1341 + * 4: 2d 78 56 34 12 sub $0x12345678, %eax 1342 + * 9: 2e 0f 85 03 00 00 00 jne,pn 13 <fineibt_preamble_start+0x13> 1343 + * 10: 0f 1f 40 d6 nopl -0x2a(%rax) 1333 1344 * 1334 - * Note that the JNE target is the 0xEA byte inside the SUB, this decodes as 1335 - * (bad) on x86_64 and raises #UD. 1345 + * Note that the JNE target is the 0xD6 byte inside the NOPL, this decodes as 1346 + * UDB on x86_64 and raises #UD. 1336 1347 */ 1337 1348 asm( ".pushsection .rodata \n" 1338 1349 "fineibt_preamble_start: \n" 1339 1350 " endbr64 \n" 1340 - " subl $0x12345678, %r10d \n" 1351 + " subl $0x12345678, %eax \n" 1341 1352 "fineibt_preamble_bhi: \n" 1342 - " jne fineibt_preamble_start+6 \n" 1343 - ASM_NOP3 1353 + " cs jne.d32 fineibt_preamble_start+0x13 \n" 1354 + "#fineibt_func: \n" 1355 + " nopl -42(%rax) \n" 1344 1356 "fineibt_preamble_end: \n" 1345 1357 ".popsection\n" 1346 1358 ); ··· 1362 1352 1363 1353 #define fineibt_preamble_size (fineibt_preamble_end - fineibt_preamble_start) 1364 1354 #define fineibt_preamble_bhi (fineibt_preamble_bhi - fineibt_preamble_start) 1365 - #define fineibt_preamble_ud 6 1366 - #define fineibt_preamble_hash 7 1355 + #define fineibt_preamble_ud 0x13 1356 + #define fineibt_preamble_hash 5 1367 1357 1368 1358 /* 1369 1359 * <fineibt_caller_start>: 1370 - * 0: 41 ba 78 56 34 12 mov $0x12345678, %r10d 1371 - * 6: 4d 8d 5b f0 lea -0x10(%r11), %r11 1372 - * a: 0f 1f 40 00 nopl 0x0(%rax) 1360 + * 0: b8 78 56 34 12 mov $0x12345678, %eax 1361 + * 5: 4d 8d 5b f0 lea -0x10(%r11), %r11 1362 + * 9: 0f 1f 44 00 00 nopl 0x0(%rax,%rax,1) 1373 1363 */ 1374 1364 asm( ".pushsection .rodata \n" 1375 1365 "fineibt_caller_start: \n" 1376 - " movl $0x12345678, %r10d \n" 1366 + " movl $0x12345678, %eax \n" 1377 1367 " lea -0x10(%r11), %r11 \n" 1378 - ASM_NOP4 1368 + ASM_NOP5 1379 1369 "fineibt_caller_end: \n" 1380 1370 ".popsection \n" 1381 1371 ); ··· 1384 1374 extern u8 fineibt_caller_end[]; 1385 1375 1386 1376 #define fineibt_caller_size (fineibt_caller_end - fineibt_caller_start) 1387 - #define fineibt_caller_hash 2 1377 + #define fineibt_caller_hash 1 1388 1378 1389 1379 #define fineibt_caller_jmp (fineibt_caller_size - 2) 1390 1380 ··· 1401 1391 * of adding a load. 1402 1392 * 1403 1393 * <fineibt_paranoid_start>: 1404 - * 0: 41 ba 78 56 34 12 mov $0x12345678, %r10d 1405 - * 6: 45 3b 53 f7 cmp -0x9(%r11), %r10d 1406 - * a: 4d 8d 5b <f0> lea -0x10(%r11), %r11 1394 + * 0: b8 78 56 34 12 mov $0x12345678, %eax 1395 + * 5: 41 3b 43 f5 cmp -0x11(%r11), %eax 1396 + * 9: 2e 4d 8d 5b <f0> cs lea -0x10(%r11), %r11 1407 1397 * e: 75 fd jne d <fineibt_paranoid_start+0xd> 1408 1398 * 10: 41 ff d3 call *%r11 1409 1399 * 13: 90 nop ··· 1415 1405 */ 1416 1406 asm( ".pushsection .rodata \n" 1417 1407 "fineibt_paranoid_start: \n" 1418 - " movl $0x12345678, %r10d \n" 1419 - " cmpl -9(%r11), %r10d \n" 1420 - " lea -0x10(%r11), %r11 \n" 1408 + " mov $0x12345678, %eax \n" 1409 + " cmpl -11(%r11), %eax \n" 1410 + " cs lea -0x10(%r11), %r11 \n" 1411 + "#fineibt_caller_size: \n" 1421 1412 " jne fineibt_paranoid_start+0xd \n" 1422 1413 "fineibt_paranoid_ind: \n" 1423 1414 " call *%r11 \n" ··· 1534 1523 return 0; 1535 1524 } 1536 1525 1526 + /* 1527 + * Inline the bhi-arity 1 case: 1528 + * 1529 + * __cfi_foo: 1530 + * 0: f3 0f 1e fa endbr64 1531 + * 4: 2d 78 56 34 12 sub $0x12345678, %eax 1532 + * 9: 49 0f 45 fa cmovne %rax, %rdi 1533 + * d: 2e 75 03 jne,pn foo+0x3 1534 + * 1535 + * foo: 1536 + * 10: 0f 1f 40 <d6> nopl -42(%rax) 1537 + * 1538 + * Notably, this scheme is incompatible with permissive CFI 1539 + * because the CMOVcc is unconditional and RDI will have been 1540 + * clobbered. 1541 + */ 1542 + asm( ".pushsection .rodata \n" 1543 + "fineibt_bhi1_start: \n" 1544 + " cmovne %rax, %rdi \n" 1545 + " cs jne fineibt_bhi1_func + 0x3 \n" 1546 + "fineibt_bhi1_func: \n" 1547 + " nopl -42(%rax) \n" 1548 + "fineibt_bhi1_end: \n" 1549 + ".popsection \n" 1550 + ); 1551 + 1552 + extern u8 fineibt_bhi1_start[]; 1553 + extern u8 fineibt_bhi1_end[]; 1554 + 1555 + #define fineibt_bhi1_size (fineibt_bhi1_end - fineibt_bhi1_start) 1556 + 1537 1557 static void cfi_fineibt_bhi_preamble(void *addr, int arity) 1538 1558 { 1559 + u8 bytes[MAX_INSN_SIZE]; 1560 + 1539 1561 if (!arity) 1540 1562 return; 1541 1563 1542 1564 if (!cfi_warn && arity == 1) { 1543 - /* 1544 - * Crazy scheme to allow arity-1 inline: 1545 - * 1546 - * __cfi_foo: 1547 - * 0: f3 0f 1e fa endbr64 1548 - * 4: 41 81 <ea> 78 56 34 12 sub 0x12345678, %r10d 1549 - * b: 49 0f 45 fa cmovne %r10, %rdi 1550 - * f: 75 f5 jne __cfi_foo+6 1551 - * 11: 0f 1f 00 nopl (%rax) 1552 - * 1553 - * Code that direct calls to foo()+0, decodes the tail end as: 1554 - * 1555 - * foo: 1556 - * 0: f5 cmc 1557 - * 1: 0f 1f 00 nopl (%rax) 1558 - * 1559 - * which clobbers CF, but does not affect anything ABI 1560 - * wise. 1561 - * 1562 - * Notably, this scheme is incompatible with permissive CFI 1563 - * because the CMOVcc is unconditional and RDI will have been 1564 - * clobbered. 1565 - */ 1566 - const u8 magic[9] = { 1567 - 0x49, 0x0f, 0x45, 0xfa, 1568 - 0x75, 0xf5, 1569 - BYTES_NOP3, 1570 - }; 1571 - 1572 - text_poke_early(addr + fineibt_preamble_bhi, magic, 9); 1573 - 1565 + text_poke_early(addr + fineibt_preamble_bhi, 1566 + fineibt_bhi1_start, fineibt_bhi1_size); 1574 1567 return; 1575 1568 } 1576 1569 1577 - text_poke_early(addr + fineibt_preamble_bhi, 1578 - text_gen_insn(CALL_INSN_OPCODE, 1579 - addr + fineibt_preamble_bhi, 1580 - __bhi_args[arity]), 1581 - CALL_INSN_SIZE); 1570 + /* 1571 + * Replace the bytes at fineibt_preamble_bhi with a CALL instruction 1572 + * that lines up exactly with the end of the preamble, such that the 1573 + * return address will be foo+0. 1574 + * 1575 + * __cfi_foo: 1576 + * 0: f3 0f 1e fa endbr64 1577 + * 4: 2d 78 56 34 12 sub $0x12345678, %eax 1578 + * 9: 2e 2e e8 DD DD DD DD cs cs call __bhi_args[arity] 1579 + */ 1580 + bytes[0] = 0x2e; 1581 + bytes[1] = 0x2e; 1582 + __text_gen_insn(bytes + 2, CALL_INSN_OPCODE, 1583 + addr + fineibt_preamble_bhi + 2, 1584 + __bhi_args[arity], CALL_INSN_SIZE); 1585 + 1586 + text_poke_early(addr + fineibt_preamble_bhi, bytes, 7); 1582 1587 } 1583 1588 1584 1589 static int cfi_rewrite_preamble(s32 *start, s32 *end) ··· 1685 1658 { 1686 1659 s32 *s; 1687 1660 1688 - BUG_ON(fineibt_paranoid_size != 20); 1689 - 1690 1661 for (s = start; s < end; s++) { 1691 1662 void *addr = (void *)s + *s; 1692 1663 struct insn insn; ··· 1737 1712 1738 1713 #define pr_cfi_debug(X...) if (cfi_debug) pr_info(X) 1739 1714 1715 + #define FINEIBT_WARN(_f, _v) \ 1716 + WARN_ONCE((_f) != (_v), "FineIBT: " #_f " %ld != %d\n", _f, _v) 1717 + 1740 1718 static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline, 1741 1719 s32 *start_cfi, s32 *end_cfi, bool builtin) 1742 1720 { 1743 1721 int ret; 1744 1722 1745 - if (WARN_ONCE(fineibt_preamble_size != 16, 1746 - "FineIBT preamble wrong size: %ld", fineibt_preamble_size)) 1723 + if (FINEIBT_WARN(fineibt_preamble_size, 20) || 1724 + FINEIBT_WARN(fineibt_preamble_bhi + fineibt_bhi1_size, 20) || 1725 + FINEIBT_WARN(fineibt_caller_size, 14) || 1726 + FINEIBT_WARN(fineibt_paranoid_size, 20)) 1747 1727 return; 1748 1728 1749 1729 if (cfi_mode == CFI_AUTO) { ··· 1869 1839 1870 1840 /* 1871 1841 * __cfi_\func: 1872 - * osp nopl (%rax) 1873 - * subl $0, %r10d 1874 - * jz 1f 1875 - * ud2 1876 - * 1: nop 1842 + * nopl -42(%rax) 1843 + * sub $0, %eax 1844 + * jne \func+3 1845 + * \func: 1846 + * nopl -42(%rax) 1877 1847 */ 1878 1848 poison_endbr(addr); 1879 1849 poison_hash(addr + fineibt_preamble_hash); ··· 1899 1869 } 1900 1870 } 1901 1871 1872 + #define fineibt_prefix_size (fineibt_preamble_size - ENDBR_INSN_SIZE) 1873 + 1902 1874 /* 1903 - * When regs->ip points to a 0xEA byte in the FineIBT preamble, 1875 + * When regs->ip points to a 0xD6 byte in the FineIBT preamble, 1904 1876 * return true and fill out target and type. 1905 1877 * 1906 1878 * We check the preamble by checking for the ENDBR instruction relative to the 1907 - * 0xEA instruction. 1879 + * UDB instruction. 1908 1880 */ 1909 1881 static bool decode_fineibt_preamble(struct pt_regs *regs, unsigned long *target, u32 *type) 1910 1882 { ··· 1916 1884 if (!exact_endbr((void *)addr)) 1917 1885 return false; 1918 1886 1919 - *target = addr + fineibt_preamble_size; 1887 + *target = addr + fineibt_prefix_size; 1920 1888 1921 1889 __get_kernel_nofault(&hash, addr + fineibt_preamble_hash, u32, Efault); 1922 - *type = (u32)regs->r10 + hash; 1890 + *type = (u32)regs->ax + hash; 1923 1891 1924 1892 /* 1925 1893 * Since regs->ip points to the middle of an instruction; it cannot ··· 1957 1925 __get_kernel_nofault(&addr, regs->sp, unsigned long, Efault); 1958 1926 *target = addr; 1959 1927 1960 - addr -= fineibt_preamble_size; 1928 + addr -= fineibt_prefix_size; 1961 1929 if (!exact_endbr((void *)addr)) 1962 1930 return false; 1963 1931 1964 1932 __get_kernel_nofault(&hash, addr + fineibt_preamble_hash, u32, Efault); 1965 - *type = (u32)regs->r10 + hash; 1933 + *type = (u32)regs->ax + hash; 1966 1934 1967 1935 /* 1968 1936 * The UD2 sites are constructed with a RET immediately following, ··· 1979 1947 u32 thunk; 1980 1948 1981 1949 __get_kernel_nofault(&thunk, (u32 *)addr, u32, Efault); 1982 - return (thunk & 0x00FFFFFF) == 0xfd75ea; 1950 + return (thunk & 0x00FFFFFF) == 0xfd75d6; 1983 1951 1984 1952 Efault: 1985 1953 return false; ··· 1987 1955 1988 1956 /* 1989 1957 * regs->ip points to a LOCK Jcc.d8 instruction from the fineibt_paranoid_start[] 1990 - * sequence, or to an invalid instruction (0xea) + Jcc.d8 for cfi_paranoid + ITS 1991 - * thunk. 1958 + * sequence, or to UDB + Jcc.d8 for cfi_paranoid + ITS thunk. 1992 1959 */ 1993 1960 static bool decode_fineibt_paranoid(struct pt_regs *regs, unsigned long *target, u32 *type) 1994 1961 { ··· 1997 1966 return false; 1998 1967 1999 1968 if (is_cfi_trap(addr + fineibt_caller_size - LEN_UD2)) { 2000 - *target = regs->r11 + fineibt_preamble_size; 2001 - *type = regs->r10; 1969 + *target = regs->r11 + fineibt_prefix_size; 1970 + *type = regs->ax; 2002 1971 2003 1972 /* 2004 1973 * Since the trapping instruction is the exact, but LOCK prefixed, ··· 2010 1979 /* 2011 1980 * The cfi_paranoid + ITS thunk combination results in: 2012 1981 * 2013 - * 0: 41 ba 78 56 34 12 mov $0x12345678, %r10d 2014 - * 6: 45 3b 53 f7 cmp -0x9(%r11), %r10d 2015 - * a: 4d 8d 5b f0 lea -0x10(%r11), %r11 1982 + * 0: b8 78 56 34 12 mov $0x12345678, %eax 1983 + * 5: 41 3b 43 f7 cmp -11(%r11), %eax 1984 + * a: 2e 3d 8d 5b f0 cs lea -0x10(%r11), %r11 2016 1985 * e: 2e e8 XX XX XX XX cs call __x86_indirect_paranoid_thunk_r11 2017 1986 * 2018 1987 * Where the paranoid_thunk looks like: 2019 1988 * 2020 - * 1d: <ea> (bad) 1989 + * 1d: <d6> udb 2021 1990 * __x86_indirect_paranoid_thunk_r11: 2022 1991 * 1e: 75 fd jne 1d 2023 1992 * __x86_indirect_its_thunk_r11: ··· 2026 1995 * 2027 1996 */ 2028 1997 if (is_paranoid_thunk(regs->ip)) { 2029 - *target = regs->r11 + fineibt_preamble_size; 2030 - *type = regs->r10; 1998 + *target = regs->r11 + fineibt_prefix_size; 1999 + *type = regs->ax; 2031 2000 2032 2001 regs->ip = *target; 2033 2002 return true;
+4 -4
arch/x86/kernel/traps.c
··· 97 97 * Check for UD1 or UD2, accounting for Address Size Override Prefixes. 98 98 * If it's a UD1, further decode to determine its use: 99 99 * 100 - * FineIBT: ea (bad) 100 + * FineIBT: d6 udb 101 101 * FineIBT: f0 75 f9 lock jne . - 6 102 102 * UBSan{0}: 67 0f b9 00 ud1 (%eax),%eax 103 103 * UBSan{10}: 67 0f b9 40 10 ud1 0x10(%eax),%eax ··· 130 130 WARN_ON_ONCE(!lock); 131 131 return BUG_LOCK; 132 132 133 - case 0xea: 133 + case 0xd6: 134 134 *len = addr - start; 135 - return BUG_EA; 135 + return BUG_UDB; 136 136 137 137 case OPCODE_ESCAPE: 138 138 break; ··· 341 341 } 342 342 fallthrough; 343 343 344 - case BUG_EA: 344 + case BUG_UDB: 345 345 case BUG_LOCK: 346 346 if (handle_cfi_failure(regs) == BUG_TRAP_TYPE_WARN) { 347 347 handled = true;
+29 -29
arch/x86/lib/bhi.S
··· 5 5 #include <asm/nospec-branch.h> 6 6 7 7 /* 8 - * Notably, the FineIBT preamble calling these will have ZF set and r10 zero. 8 + * Notably, the FineIBT preamble calling these will have ZF set and eax zero. 9 9 * 10 10 * The very last element is in fact larger than 32 bytes, but since its the 11 11 * last element, this does not matter, ··· 36 36 ANNOTATE_NOENDBR 37 37 UNWIND_HINT_FUNC 38 38 jne .Lud_1 39 - cmovne %r10, %rdi 39 + cmovne %rax, %rdi 40 40 ANNOTATE_UNRET_SAFE 41 41 ret 42 42 int3 ··· 53 53 ANNOTATE_NOENDBR 54 54 UNWIND_HINT_FUNC 55 55 jne .Lud_1 56 - cmovne %r10, %rdi 57 - cmovne %r10, %rsi 56 + cmovne %rax, %rdi 57 + cmovne %rax, %rsi 58 58 ANNOTATE_UNRET_SAFE 59 59 ret 60 60 int3 ··· 64 64 ANNOTATE_NOENDBR 65 65 UNWIND_HINT_FUNC 66 66 jne .Lud_1 67 - cmovne %r10, %rdi 68 - cmovne %r10, %rsi 69 - cmovne %r10, %rdx 67 + cmovne %rax, %rdi 68 + cmovne %rax, %rsi 69 + cmovne %rax, %rdx 70 70 ANNOTATE_UNRET_SAFE 71 71 ret 72 72 int3 ··· 76 76 ANNOTATE_NOENDBR 77 77 UNWIND_HINT_FUNC 78 78 jne .Lud_2 79 - cmovne %r10, %rdi 80 - cmovne %r10, %rsi 81 - cmovne %r10, %rdx 82 - cmovne %r10, %rcx 79 + cmovne %rax, %rdi 80 + cmovne %rax, %rsi 81 + cmovne %rax, %rdx 82 + cmovne %rax, %rcx 83 83 ANNOTATE_UNRET_SAFE 84 84 ret 85 85 int3 ··· 89 89 ANNOTATE_NOENDBR 90 90 UNWIND_HINT_FUNC 91 91 jne .Lud_2 92 - cmovne %r10, %rdi 93 - cmovne %r10, %rsi 94 - cmovne %r10, %rdx 95 - cmovne %r10, %rcx 96 - cmovne %r10, %r8 92 + cmovne %rax, %rdi 93 + cmovne %rax, %rsi 94 + cmovne %rax, %rdx 95 + cmovne %rax, %rcx 96 + cmovne %rax, %r8 97 97 ANNOTATE_UNRET_SAFE 98 98 ret 99 99 int3 ··· 110 110 ANNOTATE_NOENDBR 111 111 UNWIND_HINT_FUNC 112 112 jne .Lud_2 113 - cmovne %r10, %rdi 114 - cmovne %r10, %rsi 115 - cmovne %r10, %rdx 116 - cmovne %r10, %rcx 117 - cmovne %r10, %r8 118 - cmovne %r10, %r9 113 + cmovne %rax, %rdi 114 + cmovne %rax, %rsi 115 + cmovne %rax, %rdx 116 + cmovne %rax, %rcx 117 + cmovne %rax, %r8 118 + cmovne %rax, %r9 119 119 ANNOTATE_UNRET_SAFE 120 120 ret 121 121 int3 ··· 125 125 ANNOTATE_NOENDBR 126 126 UNWIND_HINT_FUNC 127 127 jne .Lud_2 128 - cmovne %r10, %rdi 129 - cmovne %r10, %rsi 130 - cmovne %r10, %rdx 131 - cmovne %r10, %rcx 132 - cmovne %r10, %r8 133 - cmovne %r10, %r9 134 - cmovne %r10, %rsp 128 + cmovne %rax, %rdi 129 + cmovne %rax, %rsi 130 + cmovne %rax, %rdx 131 + cmovne %rax, %rcx 132 + cmovne %rax, %r8 133 + cmovne %rax, %r9 134 + cmovne %rax, %rsp 135 135 ANNOTATE_UNRET_SAFE 136 136 ret 137 137 int3
+2 -2
arch/x86/lib/retpoline.S
··· 373 373 .macro ITS_THUNK reg 374 374 375 375 /* 376 - * If CFI paranoid is used then the ITS thunk starts with opcodes (0xea; jne 1b) 376 + * If CFI paranoid is used then the ITS thunk starts with opcodes (1: udb; jne 1b) 377 377 * that complete the fineibt_paranoid caller sequence. 378 378 */ 379 - 1: .byte 0xea 379 + 1: ASM_UDB 380 380 SYM_INNER_LABEL(__x86_indirect_paranoid_thunk_\reg, SYM_L_GLOBAL) 381 381 UNWIND_HINT_UNDEFINED 382 382 ANNOTATE_NOENDBR
+3 -3
arch/x86/net/bpf_jit_comp.c
··· 419 419 u8 *prog = *pprog; 420 420 421 421 EMIT_ENDBR(); 422 - EMIT3_off32(0x41, 0x81, 0xea, hash); /* subl $hash, %r10d */ 422 + EMIT1_off32(0x2d, hash); /* subl $hash, %eax */ 423 423 if (cfi_bhi) { 424 + EMIT2(0x2e, 0x2e); /* cs cs */ 424 425 emit_call(&prog, __bhi_args[arity], ip + 11); 425 426 } else { 426 - EMIT2(0x75, 0xf9); /* jne.d8 .-7 */ 427 - EMIT3(0x0f, 0x1f, 0x00); /* nop3 */ 427 + EMIT3_off32(0x2e, 0x0f, 0x85, 3); /* jne.d32,pn 3 */ 428 428 } 429 429 EMIT_ENDBR_POISON(); 430 430