Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

x86/insn,uprobes,alternative: Unify insn_is_nop()

Both uprobes and alternatives have insn_is_nop() variants, unify them
and make sure insn_is_nop() works for both x86_64 and i386.

Specifically, uprobe must not compare userspace instructions to kernel
nops as that does not work right in the compat case.

For the uprobe case we therefore must recognise common 32bit and 64bit
nops. Because uprobe will consume the instruction as a nop, it must
not mistakenly claim a non-nop instruction to be a nop. Eg. 'REX.b3
NOP' is 'xchg %r8,%rax' - not a nop.

For the kernel case similar constraints apply, is it used to optimize
NOPs by replacing strings of short(er) nops with longer nops. Must not
claim an instruction is a nop if it really isn't. Not recognising a
nop is non-fatal.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>

+151 -48
+2
arch/x86/include/asm/insn-eval.h
··· 44 44 45 45 enum insn_mmio_type insn_decode_mmio(struct insn *insn, int *bytes); 46 46 47 + bool insn_is_nop(struct insn *insn); 48 + 47 49 #endif /* _ASM_X86_INSN_EVAL_H */
+1 -19
arch/x86/kernel/alternative.c
··· 9 9 10 10 #include <asm/text-patching.h> 11 11 #include <asm/insn.h> 12 + #include <asm/insn-eval.h> 12 13 #include <asm/ibt.h> 13 14 #include <asm/set_memory.h> 14 15 #include <asm/nmi.h> ··· 344 343 345 344 for (;buf < target; buf++) 346 345 *buf = INT3_INSN_OPCODE; 347 - } 348 - 349 - /* 350 - * Matches NOP and NOPL, not any of the other possible NOPs. 351 - */ 352 - static bool insn_is_nop(struct insn *insn) 353 - { 354 - /* Anything NOP, but no REP NOP */ 355 - if (insn->opcode.bytes[0] == 0x90 && 356 - (!insn->prefixes.nbytes || insn->prefixes.bytes[0] != 0xF3)) 357 - return true; 358 - 359 - /* NOPL */ 360 - if (insn->opcode.bytes[0] == 0x0F && insn->opcode.bytes[1] == 0x1F) 361 - return true; 362 - 363 - /* TODO: more nops */ 364 - 365 - return false; 366 346 } 367 347 368 348 /*
+3 -29
arch/x86/kernel/uprobes.c
··· 17 17 #include <linux/kdebug.h> 18 18 #include <asm/processor.h> 19 19 #include <asm/insn.h> 20 + #include <asm/insn-eval.h> 20 21 #include <asm/mmu_context.h> 21 22 #include <asm/nops.h> 22 23 ··· 1159 1158 mmap_write_unlock(mm); 1160 1159 } 1161 1160 1162 - static bool insn_is_nop(struct insn *insn) 1163 - { 1164 - return insn->opcode.nbytes == 1 && insn->opcode.bytes[0] == 0x90; 1165 - } 1166 - 1167 - static bool insn_is_nopl(struct insn *insn) 1168 - { 1169 - if (insn->opcode.nbytes != 2) 1170 - return false; 1171 - 1172 - if (insn->opcode.bytes[0] != 0x0f || insn->opcode.bytes[1] != 0x1f) 1173 - return false; 1174 - 1175 - if (!insn->modrm.nbytes) 1176 - return false; 1177 - 1178 - if (X86_MODRM_REG(insn->modrm.bytes[0]) != 0) 1179 - return false; 1180 - 1181 - /* 0f 1f /0 - NOPL */ 1182 - return true; 1183 - } 1184 - 1185 1161 static bool can_optimize(struct insn *insn, unsigned long vaddr) 1186 1162 { 1187 1163 if (!insn->x86_64 || insn->length != 5) 1188 1164 return false; 1189 1165 1190 - if (!insn_is_nop(insn) && !insn_is_nopl(insn)) 1166 + if (!insn_is_nop(insn)) 1191 1167 return false; 1192 1168 1193 1169 /* We can't do cross page atomic writes yet. */ ··· 1406 1428 insn_byte_t p; 1407 1429 int i; 1408 1430 1409 - /* x86_nops[insn->length]; same as jmp with .offs = 0 */ 1410 - if (insn->length <= ASM_NOP_MAX && 1411 - !memcmp(insn->kaddr, x86_nops[insn->length], insn->length)) 1431 + if (insn_is_nop(insn)) 1412 1432 goto setup; 1413 1433 1414 1434 switch (opc1) { 1415 1435 case 0xeb: /* jmp 8 */ 1416 1436 case 0xe9: /* jmp 32 */ 1417 1437 break; 1418 - case 0x90: /* prefix* + nop; same as jmp with .offs = 0 */ 1419 - goto setup; 1420 1438 1421 1439 case 0xe8: /* call relative */ 1422 1440 branch_clear_offset(auprobe, insn);
+145
arch/x86/lib/insn-eval.c
··· 1676 1676 1677 1677 return type; 1678 1678 } 1679 + 1680 + /* 1681 + * Recognise typical NOP patterns for both 32bit and 64bit. 1682 + * 1683 + * Notably: 1684 + * - NOP, but not: REP NOP aka PAUSE 1685 + * - NOPL 1686 + * - MOV %reg, %reg 1687 + * - LEA 0(%reg),%reg 1688 + * - JMP +0 1689 + * 1690 + * Must not have false-positives; instructions identified as a NOP might be 1691 + * emulated as a NOP (uprobe) or Run Length Encoded in a larger NOP 1692 + * (alternatives). 1693 + * 1694 + * False-negatives are fine; need not be exhaustive. 1695 + */ 1696 + bool insn_is_nop(struct insn *insn) 1697 + { 1698 + u8 b3 = 0, x3 = 0, r3 = 0; 1699 + u8 b4 = 0, x4 = 0, r4 = 0, m = 0; 1700 + u8 modrm, modrm_mod, modrm_reg, modrm_rm; 1701 + u8 sib = 0, sib_scale, sib_index, sib_base; 1702 + u8 nrex, rex; 1703 + u8 p, rep = 0; 1704 + int i; 1705 + 1706 + if ((nrex = insn->rex_prefix.nbytes)) { 1707 + rex = insn->rex_prefix.bytes[nrex-1]; 1708 + 1709 + r3 = !!X86_REX_R(rex); 1710 + x3 = !!X86_REX_X(rex); 1711 + b3 = !!X86_REX_B(rex); 1712 + if (nrex > 1) { 1713 + r4 = !!X86_REX2_R(rex); 1714 + x4 = !!X86_REX2_X(rex); 1715 + b4 = !!X86_REX2_B(rex); 1716 + m = !!X86_REX2_M(rex); 1717 + } 1718 + 1719 + } else if (insn->vex_prefix.nbytes) { 1720 + /* 1721 + * Ignore VEX encoded NOPs 1722 + */ 1723 + return false; 1724 + } 1725 + 1726 + if (insn->modrm.nbytes) { 1727 + modrm = insn->modrm.bytes[0]; 1728 + modrm_mod = X86_MODRM_MOD(modrm); 1729 + modrm_reg = X86_MODRM_REG(modrm) + 8*r3 + 16*r4; 1730 + modrm_rm = X86_MODRM_RM(modrm) + 8*b3 + 16*b4; 1731 + modrm = 1; 1732 + } 1733 + 1734 + if (insn->sib.nbytes) { 1735 + sib = insn->sib.bytes[0]; 1736 + sib_scale = X86_SIB_SCALE(sib); 1737 + sib_index = X86_SIB_INDEX(sib) + 8*x3 + 16*x4; 1738 + sib_base = X86_SIB_BASE(sib) + 8*b3 + 16*b4; 1739 + sib = 1; 1740 + 1741 + modrm_rm = sib_base; 1742 + } 1743 + 1744 + for_each_insn_prefix(insn, i, p) { 1745 + if (p == 0xf3) /* REPE */ 1746 + rep = 1; 1747 + } 1748 + 1749 + /* 1750 + * Opcode map munging: 1751 + * 1752 + * REX2: 0 - single byte opcode 1753 + * 1 - 0f second byte opcode 1754 + */ 1755 + switch (m) { 1756 + case 0: break; 1757 + case 1: insn->opcode.value <<= 8; 1758 + insn->opcode.value |= 0x0f; 1759 + break; 1760 + default: 1761 + return false; 1762 + } 1763 + 1764 + switch (insn->opcode.bytes[0]) { 1765 + case 0x0f: /* 2nd byte */ 1766 + break; 1767 + 1768 + case 0x89: /* MOV */ 1769 + if (modrm_mod != 3) /* register-direct */ 1770 + return false; 1771 + 1772 + /* native size */ 1773 + if (insn->opnd_bytes != 4 * (1 + insn->x86_64)) 1774 + return false; 1775 + 1776 + return modrm_reg == modrm_rm; /* MOV %reg, %reg */ 1777 + 1778 + case 0x8d: /* LEA */ 1779 + if (modrm_mod == 0 || modrm_mod == 3) /* register-indirect with disp */ 1780 + return false; 1781 + 1782 + /* native size */ 1783 + if (insn->opnd_bytes != 4 * (1 + insn->x86_64)) 1784 + return false; 1785 + 1786 + if (insn->displacement.value != 0) 1787 + return false; 1788 + 1789 + if (sib && (sib_scale != 0 || sib_index != 4)) /* (%reg, %eiz, 1) */ 1790 + return false; 1791 + 1792 + for_each_insn_prefix(insn, i, p) { 1793 + if (p != 0x3e) /* DS */ 1794 + return false; 1795 + } 1796 + 1797 + return modrm_reg == modrm_rm; /* LEA 0(%reg), %reg */ 1798 + 1799 + case 0x90: /* NOP */ 1800 + if (b3 || b4) /* XCHG %r{8,16,24},%rax */ 1801 + return false; 1802 + 1803 + if (rep) /* REP NOP := PAUSE */ 1804 + return false; 1805 + 1806 + return true; 1807 + 1808 + case 0xe9: /* JMP.d32 */ 1809 + case 0xeb: /* JMP.d8 */ 1810 + return insn->immediate.value == 0; /* JMP +0 */ 1811 + 1812 + default: 1813 + return false; 1814 + } 1815 + 1816 + switch (insn->opcode.bytes[1]) { 1817 + case 0x1f: 1818 + return modrm_reg == 0; /* 0f 1f /0 -- NOPL */ 1819 + 1820 + default: 1821 + return false; 1822 + } 1823 + }