Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'core-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull misc fixes from Thomas Gleixner:
"This update contains:

- a fix for stomp-machine so the nmi_watchdog wont trigger on the cpu
waiting for the others to execute the callback

- various fixes and updates to objtool including an resync of the
instruction decoder to match the kernel's decoder"

* 'core-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
objtool: Un-capitalize "Warning" for out-of-sync instruction decoder
objtool: Resync x86 instruction decoder with the kernel's
objtool: Support new GCC 6 switch jump table pattern
stop_machine: Touch_nmi_watchdog() after MULTI_STOP_PREPARE
objtool: Add 'fixdep' to objtool/.gitignore

+318 -154
+8
kernel/stop_machine.c
··· 21 21 #include <linux/smpboot.h> 22 22 #include <linux/atomic.h> 23 23 #include <linux/lglock.h> 24 + #include <linux/nmi.h> 24 25 25 26 /* 26 27 * Structure to determine completion condition and record errors. May ··· 210 209 break; 211 210 } 212 211 ack_state(msdata); 212 + } else if (curstate > MULTI_STOP_PREPARE) { 213 + /* 214 + * At this stage all other CPUs we depend on must spin 215 + * in the same loop. Any reason for hard-lockup should 216 + * be detected and reported on their side. 217 + */ 218 + touch_nmi_watchdog(); 213 219 } 214 220 } while (curstate != MULTI_STOP_EXIT); 215 221
+1
tools/objtool/.gitignore
··· 1 1 arch/x86/insn/inat-tables.c 2 2 objtool 3 + fixdep
+1 -1
tools/objtool/Makefile
··· 51 51 diff -I'^#include' arch/x86/insn/insn.h ../../arch/x86/include/asm/insn.h >/dev/null && \ 52 52 diff -I'^#include' arch/x86/insn/inat.h ../../arch/x86/include/asm/inat.h >/dev/null && \ 53 53 diff -I'^#include' arch/x86/insn/inat_types.h ../../arch/x86/include/asm/inat_types.h >/dev/null) \ 54 - || echo "Warning: objtool: x86 instruction decoder differs from kernel" >&2 )) || true 54 + || echo "warning: objtool: x86 instruction decoder differs from kernel" >&2 )) || true 55 55 $(QUIET_LINK)$(CC) $(OBJTOOL_IN) $(LDFLAGS) -o $@ 56 56 57 57
+8 -3
tools/objtool/arch/x86/insn/gen-insn-attr-x86.awk
··· 72 72 lprefix_expr = "\\((66|F2|F3)\\)" 73 73 max_lprefix = 4 74 74 75 - # All opcodes starting with lower-case 'v' or with (v1) superscript 75 + # All opcodes starting with lower-case 'v', 'k' or with (v1) superscript 76 76 # accepts VEX prefix 77 - vexok_opcode_expr = "^v.*" 77 + vexok_opcode_expr = "^[vk].*" 78 78 vexok_expr = "\\(v1\\)" 79 79 # All opcodes with (v) superscript supports *only* VEX prefix 80 80 vexonly_expr = "\\(v\\)" 81 + # All opcodes with (ev) superscript supports *only* EVEX prefix 82 + evexonly_expr = "\\(ev\\)" 81 83 82 84 prefix_expr = "\\(Prefix\\)" 83 85 prefix_num["Operand-Size"] = "INAT_PFX_OPNDSZ" ··· 97 95 prefix_num["Address-Size"] = "INAT_PFX_ADDRSZ" 98 96 prefix_num["VEX+1byte"] = "INAT_PFX_VEX2" 99 97 prefix_num["VEX+2byte"] = "INAT_PFX_VEX3" 98 + prefix_num["EVEX"] = "INAT_PFX_EVEX" 100 99 101 100 clear_vars() 102 101 } ··· 322 319 flags = add_flags(flags, "INAT_MODRM") 323 320 324 321 # check VEX codes 325 - if (match(ext, vexonly_expr)) 322 + if (match(ext, evexonly_expr)) 323 + flags = add_flags(flags, "INAT_VEXOK | INAT_EVEXONLY") 324 + else if (match(ext, vexonly_expr)) 326 325 flags = add_flags(flags, "INAT_VEXOK | INAT_VEXONLY") 327 326 else if (match(ext, vexok_expr) || match(opcode, vexok_opcode_expr)) 328 327 flags = add_flags(flags, "INAT_VEXOK")
+15 -2
tools/objtool/arch/x86/insn/inat.h
··· 48 48 /* AVX VEX prefixes */ 49 49 #define INAT_PFX_VEX2 13 /* 2-bytes VEX prefix */ 50 50 #define INAT_PFX_VEX3 14 /* 3-bytes VEX prefix */ 51 + #define INAT_PFX_EVEX 15 /* EVEX prefix */ 51 52 52 53 #define INAT_LSTPFX_MAX 3 53 54 #define INAT_LGCPFX_MAX 11 ··· 90 89 #define INAT_VARIANT (1 << (INAT_FLAG_OFFS + 4)) 91 90 #define INAT_VEXOK (1 << (INAT_FLAG_OFFS + 5)) 92 91 #define INAT_VEXONLY (1 << (INAT_FLAG_OFFS + 6)) 92 + #define INAT_EVEXONLY (1 << (INAT_FLAG_OFFS + 7)) 93 93 /* Attribute making macros for attribute tables */ 94 94 #define INAT_MAKE_PREFIX(pfx) (pfx << INAT_PFX_OFFS) 95 95 #define INAT_MAKE_ESCAPE(esc) (esc << INAT_ESC_OFFS) ··· 143 141 static inline int inat_is_vex_prefix(insn_attr_t attr) 144 142 { 145 143 attr &= INAT_PFX_MASK; 146 - return attr == INAT_PFX_VEX2 || attr == INAT_PFX_VEX3; 144 + return attr == INAT_PFX_VEX2 || attr == INAT_PFX_VEX3 || 145 + attr == INAT_PFX_EVEX; 146 + } 147 + 148 + static inline int inat_is_evex_prefix(insn_attr_t attr) 149 + { 150 + return (attr & INAT_PFX_MASK) == INAT_PFX_EVEX; 147 151 } 148 152 149 153 static inline int inat_is_vex3_prefix(insn_attr_t attr) ··· 224 216 225 217 static inline int inat_must_vex(insn_attr_t attr) 226 218 { 227 - return attr & INAT_VEXONLY; 219 + return attr & (INAT_VEXONLY | INAT_EVEXONLY); 220 + } 221 + 222 + static inline int inat_must_evex(insn_attr_t attr) 223 + { 224 + return attr & INAT_EVEXONLY; 228 225 } 229 226 #endif
+15 -3
tools/objtool/arch/x86/insn/insn.c
··· 155 155 /* 156 156 * In 32-bits mode, if the [7:6] bits (mod bits of 157 157 * ModRM) on the second byte are not 11b, it is 158 - * LDS or LES. 158 + * LDS or LES or BOUND. 159 159 */ 160 160 if (X86_MODRM_MOD(b2) != 3) 161 161 goto vex_end; 162 162 } 163 163 insn->vex_prefix.bytes[0] = b; 164 164 insn->vex_prefix.bytes[1] = b2; 165 - if (inat_is_vex3_prefix(attr)) { 165 + if (inat_is_evex_prefix(attr)) { 166 + b2 = peek_nbyte_next(insn_byte_t, insn, 2); 167 + insn->vex_prefix.bytes[2] = b2; 168 + b2 = peek_nbyte_next(insn_byte_t, insn, 3); 169 + insn->vex_prefix.bytes[3] = b2; 170 + insn->vex_prefix.nbytes = 4; 171 + insn->next_byte += 4; 172 + if (insn->x86_64 && X86_VEX_W(b2)) 173 + /* VEX.W overrides opnd_size */ 174 + insn->opnd_bytes = 8; 175 + } else if (inat_is_vex3_prefix(attr)) { 166 176 b2 = peek_nbyte_next(insn_byte_t, insn, 2); 167 177 insn->vex_prefix.bytes[2] = b2; 168 178 insn->vex_prefix.nbytes = 3; ··· 231 221 m = insn_vex_m_bits(insn); 232 222 p = insn_vex_p_bits(insn); 233 223 insn->attr = inat_get_avx_attribute(op, m, p); 234 - if (!inat_accept_vex(insn->attr) && !inat_is_group(insn->attr)) 224 + if ((inat_must_evex(insn->attr) && !insn_is_evex(insn)) || 225 + (!inat_accept_vex(insn->attr) && 226 + !inat_is_group(insn->attr))) 235 227 insn->attr = 0; /* This instruction is bad */ 236 228 goto end; /* VEX has only 1 byte for opcode */ 237 229 }
+11 -1
tools/objtool/arch/x86/insn/insn.h
··· 91 91 #define X86_VEX_B(vex) ((vex) & 0x20) /* VEX3 Byte1 */ 92 92 #define X86_VEX_L(vex) ((vex) & 0x04) /* VEX3 Byte2, VEX2 Byte1 */ 93 93 /* VEX bit fields */ 94 + #define X86_EVEX_M(vex) ((vex) & 0x03) /* EVEX Byte1 */ 94 95 #define X86_VEX3_M(vex) ((vex) & 0x1f) /* VEX3 Byte1 */ 95 96 #define X86_VEX2_M 1 /* VEX2.M always 1 */ 96 97 #define X86_VEX_V(vex) (((vex) & 0x78) >> 3) /* VEX3 Byte2, VEX2 Byte1 */ ··· 134 133 return (insn->vex_prefix.value != 0); 135 134 } 136 135 136 + static inline int insn_is_evex(struct insn *insn) 137 + { 138 + if (!insn->prefixes.got) 139 + insn_get_prefixes(insn); 140 + return (insn->vex_prefix.nbytes == 4); 141 + } 142 + 137 143 /* Ensure this instruction is decoded completely */ 138 144 static inline int insn_complete(struct insn *insn) 139 145 { ··· 152 144 { 153 145 if (insn->vex_prefix.nbytes == 2) /* 2 bytes VEX */ 154 146 return X86_VEX2_M; 155 - else 147 + else if (insn->vex_prefix.nbytes == 3) /* 3 bytes VEX */ 156 148 return X86_VEX3_M(insn->vex_prefix.bytes[1]); 149 + else /* EVEX */ 150 + return X86_EVEX_M(insn->vex_prefix.bytes[1]); 157 151 } 158 152 159 153 static inline insn_byte_t insn_vex_p_bits(struct insn *insn)
+171 -92
tools/objtool/arch/x86/insn/x86-opcode-map.txt
··· 13 13 # opcode: escape # escaped-name 14 14 # EndTable 15 15 # 16 + # mnemonics that begin with lowercase 'v' accept a VEX or EVEX prefix 17 + # mnemonics that begin with lowercase 'k' accept a VEX prefix 18 + # 16 19 #<group maps> 17 20 # GrpTable: GrpXXX 18 21 # reg: mnemonic [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...] 19 22 # EndTable 20 23 # 21 24 # AVX Superscripts 25 + # (ev): this opcode requires EVEX prefix. 26 + # (evo): this opcode is changed by EVEX prefix (EVEX opcode) 22 27 # (v): this opcode requires VEX prefix. 23 28 # (v1): this opcode only supports 128bit VEX. 24 29 # ··· 142 137 # 0x60 - 0x6f 143 138 60: PUSHA/PUSHAD (i64) 144 139 61: POPA/POPAD (i64) 145 - 62: BOUND Gv,Ma (i64) 140 + 62: BOUND Gv,Ma (i64) | EVEX (Prefix) 146 141 63: ARPL Ew,Gw (i64) | MOVSXD Gv,Ev (o64) 147 142 64: SEG=FS (Prefix) 148 143 65: SEG=GS (Prefix) ··· 404 399 3f: 405 400 # 0x0f 0x40-0x4f 406 401 40: CMOVO Gv,Ev 407 - 41: CMOVNO Gv,Ev 408 - 42: CMOVB/C/NAE Gv,Ev 402 + 41: CMOVNO Gv,Ev | kandw/q Vk,Hk,Uk | kandb/d Vk,Hk,Uk (66) 403 + 42: CMOVB/C/NAE Gv,Ev | kandnw/q Vk,Hk,Uk | kandnb/d Vk,Hk,Uk (66) 409 404 43: CMOVAE/NB/NC Gv,Ev 410 - 44: CMOVE/Z Gv,Ev 411 - 45: CMOVNE/NZ Gv,Ev 412 - 46: CMOVBE/NA Gv,Ev 413 - 47: CMOVA/NBE Gv,Ev 405 + 44: CMOVE/Z Gv,Ev | knotw/q Vk,Uk | knotb/d Vk,Uk (66) 406 + 45: CMOVNE/NZ Gv,Ev | korw/q Vk,Hk,Uk | korb/d Vk,Hk,Uk (66) 407 + 46: CMOVBE/NA Gv,Ev | kxnorw/q Vk,Hk,Uk | kxnorb/d Vk,Hk,Uk (66) 408 + 47: CMOVA/NBE Gv,Ev | kxorw/q Vk,Hk,Uk | kxorb/d Vk,Hk,Uk (66) 414 409 48: CMOVS Gv,Ev 415 410 49: CMOVNS Gv,Ev 416 - 4a: CMOVP/PE Gv,Ev 417 - 4b: CMOVNP/PO Gv,Ev 411 + 4a: CMOVP/PE Gv,Ev | kaddw/q Vk,Hk,Uk | kaddb/d Vk,Hk,Uk (66) 412 + 4b: CMOVNP/PO Gv,Ev | kunpckbw Vk,Hk,Uk (66) | kunpckwd/dq Vk,Hk,Uk 418 413 4c: CMOVL/NGE Gv,Ev 419 414 4d: CMOVNL/GE Gv,Ev 420 415 4e: CMOVLE/NG Gv,Ev ··· 431 426 58: vaddps Vps,Hps,Wps | vaddpd Vpd,Hpd,Wpd (66) | vaddss Vss,Hss,Wss (F3),(v1) | vaddsd Vsd,Hsd,Wsd (F2),(v1) 432 427 59: vmulps Vps,Hps,Wps | vmulpd Vpd,Hpd,Wpd (66) | vmulss Vss,Hss,Wss (F3),(v1) | vmulsd Vsd,Hsd,Wsd (F2),(v1) 433 428 5a: vcvtps2pd Vpd,Wps | vcvtpd2ps Vps,Wpd (66) | vcvtss2sd Vsd,Hx,Wss (F3),(v1) | vcvtsd2ss Vss,Hx,Wsd (F2),(v1) 434 - 5b: vcvtdq2ps Vps,Wdq | vcvtps2dq Vdq,Wps (66) | vcvttps2dq Vdq,Wps (F3) 429 + 5b: vcvtdq2ps Vps,Wdq | vcvtqq2ps Vps,Wqq (evo) | vcvtps2dq Vdq,Wps (66) | vcvttps2dq Vdq,Wps (F3) 435 430 5c: vsubps Vps,Hps,Wps | vsubpd Vpd,Hpd,Wpd (66) | vsubss Vss,Hss,Wss (F3),(v1) | vsubsd Vsd,Hsd,Wsd (F2),(v1) 436 431 5d: vminps Vps,Hps,Wps | vminpd Vpd,Hpd,Wpd (66) | vminss Vss,Hss,Wss (F3),(v1) | vminsd Vsd,Hsd,Wsd (F2),(v1) 437 432 5e: vdivps Vps,Hps,Wps | vdivpd Vpd,Hpd,Wpd (66) | vdivss Vss,Hss,Wss (F3),(v1) | vdivsd Vsd,Hsd,Wsd (F2),(v1) ··· 452 447 6c: vpunpcklqdq Vx,Hx,Wx (66),(v1) 453 448 6d: vpunpckhqdq Vx,Hx,Wx (66),(v1) 454 449 6e: movd/q Pd,Ey | vmovd/q Vy,Ey (66),(v1) 455 - 6f: movq Pq,Qq | vmovdqa Vx,Wx (66) | vmovdqu Vx,Wx (F3) 450 + 6f: movq Pq,Qq | vmovdqa Vx,Wx (66) | vmovdqa32/64 Vx,Wx (66),(evo) | vmovdqu Vx,Wx (F3) | vmovdqu32/64 Vx,Wx (F3),(evo) | vmovdqu8/16 Vx,Wx (F2),(ev) 456 451 # 0x0f 0x70-0x7f 457 452 70: pshufw Pq,Qq,Ib | vpshufd Vx,Wx,Ib (66),(v1) | vpshufhw Vx,Wx,Ib (F3),(v1) | vpshuflw Vx,Wx,Ib (F2),(v1) 458 453 71: Grp12 (1A) ··· 463 458 76: pcmpeqd Pq,Qq | vpcmpeqd Vx,Hx,Wx (66),(v1) 464 459 # Note: Remove (v), because vzeroall and vzeroupper becomes emms without VEX. 465 460 77: emms | vzeroupper | vzeroall 466 - 78: VMREAD Ey,Gy 467 - 79: VMWRITE Gy,Ey 468 - 7a: 469 - 7b: 461 + 78: VMREAD Ey,Gy | vcvttps2udq/pd2udq Vx,Wpd (evo) | vcvttsd2usi Gv,Wx (F2),(ev) | vcvttss2usi Gv,Wx (F3),(ev) | vcvttps2uqq/pd2uqq Vx,Wx (66),(ev) 462 + 79: VMWRITE Gy,Ey | vcvtps2udq/pd2udq Vx,Wpd (evo) | vcvtsd2usi Gv,Wx (F2),(ev) | vcvtss2usi Gv,Wx (F3),(ev) | vcvtps2uqq/pd2uqq Vx,Wx (66),(ev) 463 + 7a: vcvtudq2pd/uqq2pd Vpd,Wx (F3),(ev) | vcvtudq2ps/uqq2ps Vpd,Wx (F2),(ev) | vcvttps2qq/pd2qq Vx,Wx (66),(ev) 464 + 7b: vcvtusi2sd Vpd,Hpd,Ev (F2),(ev) | vcvtusi2ss Vps,Hps,Ev (F3),(ev) | vcvtps2qq/pd2qq Vx,Wx (66),(ev) 470 465 7c: vhaddpd Vpd,Hpd,Wpd (66) | vhaddps Vps,Hps,Wps (F2) 471 466 7d: vhsubpd Vpd,Hpd,Wpd (66) | vhsubps Vps,Hps,Wps (F2) 472 467 7e: movd/q Ey,Pd | vmovd/q Ey,Vy (66),(v1) | vmovq Vq,Wq (F3),(v1) 473 - 7f: movq Qq,Pq | vmovdqa Wx,Vx (66) | vmovdqu Wx,Vx (F3) 468 + 7f: movq Qq,Pq | vmovdqa Wx,Vx (66) | vmovdqa32/64 Wx,Vx (66),(evo) | vmovdqu Wx,Vx (F3) | vmovdqu32/64 Wx,Vx (F3),(evo) | vmovdqu8/16 Wx,Vx (F2),(ev) 474 469 # 0x0f 0x80-0x8f 475 470 # Note: "forced64" is Intel CPU behavior (see comment about CALL insn). 476 471 80: JO Jz (f64) ··· 490 485 8e: JLE/JNG Jz (f64) 491 486 8f: JNLE/JG Jz (f64) 492 487 # 0x0f 0x90-0x9f 493 - 90: SETO Eb 494 - 91: SETNO Eb 495 - 92: SETB/C/NAE Eb 496 - 93: SETAE/NB/NC Eb 488 + 90: SETO Eb | kmovw/q Vk,Wk | kmovb/d Vk,Wk (66) 489 + 91: SETNO Eb | kmovw/q Mv,Vk | kmovb/d Mv,Vk (66) 490 + 92: SETB/C/NAE Eb | kmovw Vk,Rv | kmovb Vk,Rv (66) | kmovq/d Vk,Rv (F2) 491 + 93: SETAE/NB/NC Eb | kmovw Gv,Uk | kmovb Gv,Uk (66) | kmovq/d Gv,Uk (F2) 497 492 94: SETE/Z Eb 498 493 95: SETNE/NZ Eb 499 494 96: SETBE/NA Eb 500 495 97: SETA/NBE Eb 501 - 98: SETS Eb 502 - 99: SETNS Eb 496 + 98: SETS Eb | kortestw/q Vk,Uk | kortestb/d Vk,Uk (66) 497 + 99: SETNS Eb | ktestw/q Vk,Uk | ktestb/d Vk,Uk (66) 503 498 9a: SETP/PE Eb 504 499 9b: SETNP/PO Eb 505 500 9c: SETL/NGE Eb ··· 569 564 d8: psubusb Pq,Qq | vpsubusb Vx,Hx,Wx (66),(v1) 570 565 d9: psubusw Pq,Qq | vpsubusw Vx,Hx,Wx (66),(v1) 571 566 da: pminub Pq,Qq | vpminub Vx,Hx,Wx (66),(v1) 572 - db: pand Pq,Qq | vpand Vx,Hx,Wx (66),(v1) 567 + db: pand Pq,Qq | vpand Vx,Hx,Wx (66),(v1) | vpandd/q Vx,Hx,Wx (66),(evo) 573 568 dc: paddusb Pq,Qq | vpaddusb Vx,Hx,Wx (66),(v1) 574 569 dd: paddusw Pq,Qq | vpaddusw Vx,Hx,Wx (66),(v1) 575 570 de: pmaxub Pq,Qq | vpmaxub Vx,Hx,Wx (66),(v1) 576 - df: pandn Pq,Qq | vpandn Vx,Hx,Wx (66),(v1) 571 + df: pandn Pq,Qq | vpandn Vx,Hx,Wx (66),(v1) | vpandnd/q Vx,Hx,Wx (66),(evo) 577 572 # 0x0f 0xe0-0xef 578 573 e0: pavgb Pq,Qq | vpavgb Vx,Hx,Wx (66),(v1) 579 574 e1: psraw Pq,Qq | vpsraw Vx,Hx,Wx (66),(v1) ··· 581 576 e3: pavgw Pq,Qq | vpavgw Vx,Hx,Wx (66),(v1) 582 577 e4: pmulhuw Pq,Qq | vpmulhuw Vx,Hx,Wx (66),(v1) 583 578 e5: pmulhw Pq,Qq | vpmulhw Vx,Hx,Wx (66),(v1) 584 - e6: vcvttpd2dq Vx,Wpd (66) | vcvtdq2pd Vx,Wdq (F3) | vcvtpd2dq Vx,Wpd (F2) 579 + e6: vcvttpd2dq Vx,Wpd (66) | vcvtdq2pd Vx,Wdq (F3) | vcvtdq2pd/qq2pd Vx,Wdq (F3),(evo) | vcvtpd2dq Vx,Wpd (F2) 585 580 e7: movntq Mq,Pq | vmovntdq Mx,Vx (66) 586 581 e8: psubsb Pq,Qq | vpsubsb Vx,Hx,Wx (66),(v1) 587 582 e9: psubsw Pq,Qq | vpsubsw Vx,Hx,Wx (66),(v1) 588 583 ea: pminsw Pq,Qq | vpminsw Vx,Hx,Wx (66),(v1) 589 - eb: por Pq,Qq | vpor Vx,Hx,Wx (66),(v1) 584 + eb: por Pq,Qq | vpor Vx,Hx,Wx (66),(v1) | vpord/q Vx,Hx,Wx (66),(evo) 590 585 ec: paddsb Pq,Qq | vpaddsb Vx,Hx,Wx (66),(v1) 591 586 ed: paddsw Pq,Qq | vpaddsw Vx,Hx,Wx (66),(v1) 592 587 ee: pmaxsw Pq,Qq | vpmaxsw Vx,Hx,Wx (66),(v1) 593 - ef: pxor Pq,Qq | vpxor Vx,Hx,Wx (66),(v1) 588 + ef: pxor Pq,Qq | vpxor Vx,Hx,Wx (66),(v1) | vpxord/q Vx,Hx,Wx (66),(evo) 594 589 # 0x0f 0xf0-0xff 595 590 f0: vlddqu Vx,Mx (F2) 596 591 f1: psllw Pq,Qq | vpsllw Vx,Hx,Wx (66),(v1) ··· 631 626 0e: vtestps Vx,Wx (66),(v) 632 627 0f: vtestpd Vx,Wx (66),(v) 633 628 # 0x0f 0x38 0x10-0x1f 634 - 10: pblendvb Vdq,Wdq (66) 635 - 11: 636 - 12: 637 - 13: vcvtph2ps Vx,Wx,Ib (66),(v) 638 - 14: blendvps Vdq,Wdq (66) 639 - 15: blendvpd Vdq,Wdq (66) 640 - 16: vpermps Vqq,Hqq,Wqq (66),(v) 629 + 10: pblendvb Vdq,Wdq (66) | vpsrlvw Vx,Hx,Wx (66),(evo) | vpmovuswb Wx,Vx (F3),(ev) 630 + 11: vpmovusdb Wx,Vd (F3),(ev) | vpsravw Vx,Hx,Wx (66),(ev) 631 + 12: vpmovusqb Wx,Vq (F3),(ev) | vpsllvw Vx,Hx,Wx (66),(ev) 632 + 13: vcvtph2ps Vx,Wx (66),(v) | vpmovusdw Wx,Vd (F3),(ev) 633 + 14: blendvps Vdq,Wdq (66) | vpmovusqw Wx,Vq (F3),(ev) | vprorvd/q Vx,Hx,Wx (66),(evo) 634 + 15: blendvpd Vdq,Wdq (66) | vpmovusqd Wx,Vq (F3),(ev) | vprolvd/q Vx,Hx,Wx (66),(evo) 635 + 16: vpermps Vqq,Hqq,Wqq (66),(v) | vpermps/d Vqq,Hqq,Wqq (66),(evo) 641 636 17: vptest Vx,Wx (66) 642 637 18: vbroadcastss Vx,Wd (66),(v) 643 - 19: vbroadcastsd Vqq,Wq (66),(v) 644 - 1a: vbroadcastf128 Vqq,Mdq (66),(v) 645 - 1b: 638 + 19: vbroadcastsd Vqq,Wq (66),(v) | vbroadcastf32x2 Vqq,Wq (66),(evo) 639 + 1a: vbroadcastf128 Vqq,Mdq (66),(v) | vbroadcastf32x4/64x2 Vqq,Wq (66),(evo) 640 + 1b: vbroadcastf32x8/64x4 Vqq,Mdq (66),(ev) 646 641 1c: pabsb Pq,Qq | vpabsb Vx,Wx (66),(v1) 647 642 1d: pabsw Pq,Qq | vpabsw Vx,Wx (66),(v1) 648 643 1e: pabsd Pq,Qq | vpabsd Vx,Wx (66),(v1) 649 - 1f: 644 + 1f: vpabsq Vx,Wx (66),(ev) 650 645 # 0x0f 0x38 0x20-0x2f 651 - 20: vpmovsxbw Vx,Ux/Mq (66),(v1) 652 - 21: vpmovsxbd Vx,Ux/Md (66),(v1) 653 - 22: vpmovsxbq Vx,Ux/Mw (66),(v1) 654 - 23: vpmovsxwd Vx,Ux/Mq (66),(v1) 655 - 24: vpmovsxwq Vx,Ux/Md (66),(v1) 656 - 25: vpmovsxdq Vx,Ux/Mq (66),(v1) 657 - 26: 658 - 27: 659 - 28: vpmuldq Vx,Hx,Wx (66),(v1) 660 - 29: vpcmpeqq Vx,Hx,Wx (66),(v1) 661 - 2a: vmovntdqa Vx,Mx (66),(v1) 646 + 20: vpmovsxbw Vx,Ux/Mq (66),(v1) | vpmovswb Wx,Vx (F3),(ev) 647 + 21: vpmovsxbd Vx,Ux/Md (66),(v1) | vpmovsdb Wx,Vd (F3),(ev) 648 + 22: vpmovsxbq Vx,Ux/Mw (66),(v1) | vpmovsqb Wx,Vq (F3),(ev) 649 + 23: vpmovsxwd Vx,Ux/Mq (66),(v1) | vpmovsdw Wx,Vd (F3),(ev) 650 + 24: vpmovsxwq Vx,Ux/Md (66),(v1) | vpmovsqw Wx,Vq (F3),(ev) 651 + 25: vpmovsxdq Vx,Ux/Mq (66),(v1) | vpmovsqd Wx,Vq (F3),(ev) 652 + 26: vptestmb/w Vk,Hx,Wx (66),(ev) | vptestnmb/w Vk,Hx,Wx (F3),(ev) 653 + 27: vptestmd/q Vk,Hx,Wx (66),(ev) | vptestnmd/q Vk,Hx,Wx (F3),(ev) 654 + 28: vpmuldq Vx,Hx,Wx (66),(v1) | vpmovm2b/w Vx,Uk (F3),(ev) 655 + 29: vpcmpeqq Vx,Hx,Wx (66),(v1) | vpmovb2m/w2m Vk,Ux (F3),(ev) 656 + 2a: vmovntdqa Vx,Mx (66),(v1) | vpbroadcastmb2q Vx,Uk (F3),(ev) 662 657 2b: vpackusdw Vx,Hx,Wx (66),(v1) 663 - 2c: vmaskmovps Vx,Hx,Mx (66),(v) 664 - 2d: vmaskmovpd Vx,Hx,Mx (66),(v) 658 + 2c: vmaskmovps Vx,Hx,Mx (66),(v) | vscalefps/d Vx,Hx,Wx (66),(evo) 659 + 2d: vmaskmovpd Vx,Hx,Mx (66),(v) | vscalefss/d Vx,Hx,Wx (66),(evo) 665 660 2e: vmaskmovps Mx,Hx,Vx (66),(v) 666 661 2f: vmaskmovpd Mx,Hx,Vx (66),(v) 667 662 # 0x0f 0x38 0x30-0x3f 668 - 30: vpmovzxbw Vx,Ux/Mq (66),(v1) 669 - 31: vpmovzxbd Vx,Ux/Md (66),(v1) 670 - 32: vpmovzxbq Vx,Ux/Mw (66),(v1) 671 - 33: vpmovzxwd Vx,Ux/Mq (66),(v1) 672 - 34: vpmovzxwq Vx,Ux/Md (66),(v1) 673 - 35: vpmovzxdq Vx,Ux/Mq (66),(v1) 674 - 36: vpermd Vqq,Hqq,Wqq (66),(v) 663 + 30: vpmovzxbw Vx,Ux/Mq (66),(v1) | vpmovwb Wx,Vx (F3),(ev) 664 + 31: vpmovzxbd Vx,Ux/Md (66),(v1) | vpmovdb Wx,Vd (F3),(ev) 665 + 32: vpmovzxbq Vx,Ux/Mw (66),(v1) | vpmovqb Wx,Vq (F3),(ev) 666 + 33: vpmovzxwd Vx,Ux/Mq (66),(v1) | vpmovdw Wx,Vd (F3),(ev) 667 + 34: vpmovzxwq Vx,Ux/Md (66),(v1) | vpmovqw Wx,Vq (F3),(ev) 668 + 35: vpmovzxdq Vx,Ux/Mq (66),(v1) | vpmovqd Wx,Vq (F3),(ev) 669 + 36: vpermd Vqq,Hqq,Wqq (66),(v) | vpermd/q Vqq,Hqq,Wqq (66),(evo) 675 670 37: vpcmpgtq Vx,Hx,Wx (66),(v1) 676 - 38: vpminsb Vx,Hx,Wx (66),(v1) 677 - 39: vpminsd Vx,Hx,Wx (66),(v1) 678 - 3a: vpminuw Vx,Hx,Wx (66),(v1) 679 - 3b: vpminud Vx,Hx,Wx (66),(v1) 671 + 38: vpminsb Vx,Hx,Wx (66),(v1) | vpmovm2d/q Vx,Uk (F3),(ev) 672 + 39: vpminsd Vx,Hx,Wx (66),(v1) | vpminsd/q Vx,Hx,Wx (66),(evo) | vpmovd2m/q2m Vk,Ux (F3),(ev) 673 + 3a: vpminuw Vx,Hx,Wx (66),(v1) | vpbroadcastmw2d Vx,Uk (F3),(ev) 674 + 3b: vpminud Vx,Hx,Wx (66),(v1) | vpminud/q Vx,Hx,Wx (66),(evo) 680 675 3c: vpmaxsb Vx,Hx,Wx (66),(v1) 681 - 3d: vpmaxsd Vx,Hx,Wx (66),(v1) 676 + 3d: vpmaxsd Vx,Hx,Wx (66),(v1) | vpmaxsd/q Vx,Hx,Wx (66),(evo) 682 677 3e: vpmaxuw Vx,Hx,Wx (66),(v1) 683 - 3f: vpmaxud Vx,Hx,Wx (66),(v1) 678 + 3f: vpmaxud Vx,Hx,Wx (66),(v1) | vpmaxud/q Vx,Hx,Wx (66),(evo) 684 679 # 0x0f 0x38 0x40-0x8f 685 - 40: vpmulld Vx,Hx,Wx (66),(v1) 680 + 40: vpmulld Vx,Hx,Wx (66),(v1) | vpmulld/q Vx,Hx,Wx (66),(evo) 686 681 41: vphminposuw Vdq,Wdq (66),(v1) 687 - 42: 688 - 43: 689 - 44: 682 + 42: vgetexpps/d Vx,Wx (66),(ev) 683 + 43: vgetexpss/d Vx,Hx,Wx (66),(ev) 684 + 44: vplzcntd/q Vx,Wx (66),(ev) 690 685 45: vpsrlvd/q Vx,Hx,Wx (66),(v) 691 - 46: vpsravd Vx,Hx,Wx (66),(v) 686 + 46: vpsravd Vx,Hx,Wx (66),(v) | vpsravd/q Vx,Hx,Wx (66),(evo) 692 687 47: vpsllvd/q Vx,Hx,Wx (66),(v) 693 - # Skip 0x48-0x57 688 + # Skip 0x48-0x4b 689 + 4c: vrcp14ps/d Vpd,Wpd (66),(ev) 690 + 4d: vrcp14ss/d Vsd,Hpd,Wsd (66),(ev) 691 + 4e: vrsqrt14ps/d Vpd,Wpd (66),(ev) 692 + 4f: vrsqrt14ss/d Vsd,Hsd,Wsd (66),(ev) 693 + # Skip 0x50-0x57 694 694 58: vpbroadcastd Vx,Wx (66),(v) 695 - 59: vpbroadcastq Vx,Wx (66),(v) 696 - 5a: vbroadcasti128 Vqq,Mdq (66),(v) 697 - # Skip 0x5b-0x77 695 + 59: vpbroadcastq Vx,Wx (66),(v) | vbroadcasti32x2 Vx,Wx (66),(evo) 696 + 5a: vbroadcasti128 Vqq,Mdq (66),(v) | vbroadcasti32x4/64x2 Vx,Wx (66),(evo) 697 + 5b: vbroadcasti32x8/64x4 Vqq,Mdq (66),(ev) 698 + # Skip 0x5c-0x63 699 + 64: vpblendmd/q Vx,Hx,Wx (66),(ev) 700 + 65: vblendmps/d Vx,Hx,Wx (66),(ev) 701 + 66: vpblendmb/w Vx,Hx,Wx (66),(ev) 702 + # Skip 0x67-0x74 703 + 75: vpermi2b/w Vx,Hx,Wx (66),(ev) 704 + 76: vpermi2d/q Vx,Hx,Wx (66),(ev) 705 + 77: vpermi2ps/d Vx,Hx,Wx (66),(ev) 698 706 78: vpbroadcastb Vx,Wx (66),(v) 699 707 79: vpbroadcastw Vx,Wx (66),(v) 700 - # Skip 0x7a-0x7f 708 + 7a: vpbroadcastb Vx,Rv (66),(ev) 709 + 7b: vpbroadcastw Vx,Rv (66),(ev) 710 + 7c: vpbroadcastd/q Vx,Rv (66),(ev) 711 + 7d: vpermt2b/w Vx,Hx,Wx (66),(ev) 712 + 7e: vpermt2d/q Vx,Hx,Wx (66),(ev) 713 + 7f: vpermt2ps/d Vx,Hx,Wx (66),(ev) 701 714 80: INVEPT Gy,Mdq (66) 702 715 81: INVPID Gy,Mdq (66) 703 716 82: INVPCID Gy,Mdq (66) 717 + 83: vpmultishiftqb Vx,Hx,Wx (66),(ev) 718 + 88: vexpandps/d Vpd,Wpd (66),(ev) 719 + 89: vpexpandd/q Vx,Wx (66),(ev) 720 + 8a: vcompressps/d Wx,Vx (66),(ev) 721 + 8b: vpcompressd/q Wx,Vx (66),(ev) 704 722 8c: vpmaskmovd/q Vx,Hx,Mx (66),(v) 723 + 8d: vpermb/w Vx,Hx,Wx (66),(ev) 705 724 8e: vpmaskmovd/q Mx,Vx,Hx (66),(v) 706 725 # 0x0f 0x38 0x90-0xbf (FMA) 707 - 90: vgatherdd/q Vx,Hx,Wx (66),(v) 708 - 91: vgatherqd/q Vx,Hx,Wx (66),(v) 726 + 90: vgatherdd/q Vx,Hx,Wx (66),(v) | vpgatherdd/q Vx,Wx (66),(evo) 727 + 91: vgatherqd/q Vx,Hx,Wx (66),(v) | vpgatherqd/q Vx,Wx (66),(evo) 709 728 92: vgatherdps/d Vx,Hx,Wx (66),(v) 710 729 93: vgatherqps/d Vx,Hx,Wx (66),(v) 711 730 94: ··· 744 715 9d: vfnmadd132ss/d Vx,Hx,Wx (66),(v),(v1) 745 716 9e: vfnmsub132ps/d Vx,Hx,Wx (66),(v) 746 717 9f: vfnmsub132ss/d Vx,Hx,Wx (66),(v),(v1) 718 + a0: vpscatterdd/q Wx,Vx (66),(ev) 719 + a1: vpscatterqd/q Wx,Vx (66),(ev) 720 + a2: vscatterdps/d Wx,Vx (66),(ev) 721 + a3: vscatterqps/d Wx,Vx (66),(ev) 747 722 a6: vfmaddsub213ps/d Vx,Hx,Wx (66),(v) 748 723 a7: vfmsubadd213ps/d Vx,Hx,Wx (66),(v) 749 724 a8: vfmadd213ps/d Vx,Hx,Wx (66),(v) ··· 758 725 ad: vfnmadd213ss/d Vx,Hx,Wx (66),(v),(v1) 759 726 ae: vfnmsub213ps/d Vx,Hx,Wx (66),(v) 760 727 af: vfnmsub213ss/d Vx,Hx,Wx (66),(v),(v1) 728 + b4: vpmadd52luq Vx,Hx,Wx (66),(ev) 729 + b5: vpmadd52huq Vx,Hx,Wx (66),(ev) 761 730 b6: vfmaddsub231ps/d Vx,Hx,Wx (66),(v) 762 731 b7: vfmsubadd231ps/d Vx,Hx,Wx (66),(v) 763 732 b8: vfmadd231ps/d Vx,Hx,Wx (66),(v) ··· 771 736 be: vfnmsub231ps/d Vx,Hx,Wx (66),(v) 772 737 bf: vfnmsub231ss/d Vx,Hx,Wx (66),(v),(v1) 773 738 # 0x0f 0x38 0xc0-0xff 774 - c8: sha1nexte Vdq,Wdq 739 + c4: vpconflictd/q Vx,Wx (66),(ev) 740 + c6: Grp18 (1A) 741 + c7: Grp19 (1A) 742 + c8: sha1nexte Vdq,Wdq | vexp2ps/d Vx,Wx (66),(ev) 775 743 c9: sha1msg1 Vdq,Wdq 776 - ca: sha1msg2 Vdq,Wdq 777 - cb: sha256rnds2 Vdq,Wdq 778 - cc: sha256msg1 Vdq,Wdq 779 - cd: sha256msg2 Vdq,Wdq 744 + ca: sha1msg2 Vdq,Wdq | vrcp28ps/d Vx,Wx (66),(ev) 745 + cb: sha256rnds2 Vdq,Wdq | vrcp28ss/d Vx,Hx,Wx (66),(ev) 746 + cc: sha256msg1 Vdq,Wdq | vrsqrt28ps/d Vx,Wx (66),(ev) 747 + cd: sha256msg2 Vdq,Wdq | vrsqrt28ss/d Vx,Hx,Wx (66),(ev) 780 748 db: VAESIMC Vdq,Wdq (66),(v1) 781 749 dc: VAESENC Vdq,Hdq,Wdq (66),(v1) 782 750 dd: VAESENCLAST Vdq,Hdq,Wdq (66),(v1) ··· 801 763 00: vpermq Vqq,Wqq,Ib (66),(v) 802 764 01: vpermpd Vqq,Wqq,Ib (66),(v) 803 765 02: vpblendd Vx,Hx,Wx,Ib (66),(v) 804 - 03: 766 + 03: valignd/q Vx,Hx,Wx,Ib (66),(ev) 805 767 04: vpermilps Vx,Wx,Ib (66),(v) 806 768 05: vpermilpd Vx,Wx,Ib (66),(v) 807 769 06: vperm2f128 Vqq,Hqq,Wqq,Ib (66),(v) 808 770 07: 809 - 08: vroundps Vx,Wx,Ib (66) 810 - 09: vroundpd Vx,Wx,Ib (66) 811 - 0a: vroundss Vss,Wss,Ib (66),(v1) 812 - 0b: vroundsd Vsd,Wsd,Ib (66),(v1) 771 + 08: vroundps Vx,Wx,Ib (66) | vrndscaleps Vx,Wx,Ib (66),(evo) 772 + 09: vroundpd Vx,Wx,Ib (66) | vrndscalepd Vx,Wx,Ib (66),(evo) 773 + 0a: vroundss Vss,Wss,Ib (66),(v1) | vrndscaless Vx,Hx,Wx,Ib (66),(evo) 774 + 0b: vroundsd Vsd,Wsd,Ib (66),(v1) | vrndscalesd Vx,Hx,Wx,Ib (66),(evo) 813 775 0c: vblendps Vx,Hx,Wx,Ib (66) 814 776 0d: vblendpd Vx,Hx,Wx,Ib (66) 815 777 0e: vpblendw Vx,Hx,Wx,Ib (66),(v1) ··· 818 780 15: vpextrw Rd/Mw,Vdq,Ib (66),(v1) 819 781 16: vpextrd/q Ey,Vdq,Ib (66),(v1) 820 782 17: vextractps Ed,Vdq,Ib (66),(v1) 821 - 18: vinsertf128 Vqq,Hqq,Wqq,Ib (66),(v) 822 - 19: vextractf128 Wdq,Vqq,Ib (66),(v) 783 + 18: vinsertf128 Vqq,Hqq,Wqq,Ib (66),(v) | vinsertf32x4/64x2 Vqq,Hqq,Wqq,Ib (66),(evo) 784 + 19: vextractf128 Wdq,Vqq,Ib (66),(v) | vextractf32x4/64x2 Wdq,Vqq,Ib (66),(evo) 785 + 1a: vinsertf32x8/64x4 Vqq,Hqq,Wqq,Ib (66),(ev) 786 + 1b: vextractf32x8/64x4 Wdq,Vqq,Ib (66),(ev) 823 787 1d: vcvtps2ph Wx,Vx,Ib (66),(v) 788 + 1e: vpcmpud/q Vk,Hd,Wd,Ib (66),(ev) 789 + 1f: vpcmpd/q Vk,Hd,Wd,Ib (66),(ev) 824 790 20: vpinsrb Vdq,Hdq,Ry/Mb,Ib (66),(v1) 825 791 21: vinsertps Vdq,Hdq,Udq/Md,Ib (66),(v1) 826 792 22: vpinsrd/q Vdq,Hdq,Ey,Ib (66),(v1) 827 - 38: vinserti128 Vqq,Hqq,Wqq,Ib (66),(v) 828 - 39: vextracti128 Wdq,Vqq,Ib (66),(v) 793 + 23: vshuff32x4/64x2 Vx,Hx,Wx,Ib (66),(ev) 794 + 25: vpternlogd/q Vx,Hx,Wx,Ib (66),(ev) 795 + 26: vgetmantps/d Vx,Wx,Ib (66),(ev) 796 + 27: vgetmantss/d Vx,Hx,Wx,Ib (66),(ev) 797 + 30: kshiftrb/w Vk,Uk,Ib (66),(v) 798 + 31: kshiftrd/q Vk,Uk,Ib (66),(v) 799 + 32: kshiftlb/w Vk,Uk,Ib (66),(v) 800 + 33: kshiftld/q Vk,Uk,Ib (66),(v) 801 + 38: vinserti128 Vqq,Hqq,Wqq,Ib (66),(v) | vinserti32x4/64x2 Vqq,Hqq,Wqq,Ib (66),(evo) 802 + 39: vextracti128 Wdq,Vqq,Ib (66),(v) | vextracti32x4/64x2 Wdq,Vqq,Ib (66),(evo) 803 + 3a: vinserti32x8/64x4 Vqq,Hqq,Wqq,Ib (66),(ev) 804 + 3b: vextracti32x8/64x4 Wdq,Vqq,Ib (66),(ev) 805 + 3e: vpcmpub/w Vk,Hk,Wx,Ib (66),(ev) 806 + 3f: vpcmpb/w Vk,Hk,Wx,Ib (66),(ev) 829 807 40: vdpps Vx,Hx,Wx,Ib (66) 830 808 41: vdppd Vdq,Hdq,Wdq,Ib (66),(v1) 831 - 42: vmpsadbw Vx,Hx,Wx,Ib (66),(v1) 809 + 42: vmpsadbw Vx,Hx,Wx,Ib (66),(v1) | vdbpsadbw Vx,Hx,Wx,Ib (66),(evo) 810 + 43: vshufi32x4/64x2 Vx,Hx,Wx,Ib (66),(ev) 832 811 44: vpclmulqdq Vdq,Hdq,Wdq,Ib (66),(v1) 833 812 46: vperm2i128 Vqq,Hqq,Wqq,Ib (66),(v) 834 813 4a: vblendvps Vx,Hx,Wx,Lx (66),(v) 835 814 4b: vblendvpd Vx,Hx,Wx,Lx (66),(v) 836 815 4c: vpblendvb Vx,Hx,Wx,Lx (66),(v1) 816 + 50: vrangeps/d Vx,Hx,Wx,Ib (66),(ev) 817 + 51: vrangess/d Vx,Hx,Wx,Ib (66),(ev) 818 + 54: vfixupimmps/d Vx,Hx,Wx,Ib (66),(ev) 819 + 55: vfixupimmss/d Vx,Hx,Wx,Ib (66),(ev) 820 + 56: vreduceps/d Vx,Wx,Ib (66),(ev) 821 + 57: vreducess/d Vx,Hx,Wx,Ib (66),(ev) 837 822 60: vpcmpestrm Vdq,Wdq,Ib (66),(v1) 838 823 61: vpcmpestri Vdq,Wdq,Ib (66),(v1) 839 824 62: vpcmpistrm Vdq,Wdq,Ib (66),(v1) 840 825 63: vpcmpistri Vdq,Wdq,Ib (66),(v1) 826 + 66: vfpclassps/d Vk,Wx,Ib (66),(ev) 827 + 67: vfpclassss/d Vk,Wx,Ib (66),(ev) 841 828 cc: sha1rnds4 Vdq,Wdq,Ib 842 829 df: VAESKEYGEN Vdq,Wdq,Ib (66),(v1) 843 830 f0: RORX Gy,Ey,Ib (F2),(v) ··· 990 927 EndTable 991 928 992 929 GrpTable: Grp13 930 + 0: vprord/q Hx,Wx,Ib (66),(ev) 931 + 1: vprold/q Hx,Wx,Ib (66),(ev) 993 932 2: psrld Nq,Ib (11B) | vpsrld Hx,Ux,Ib (66),(11B),(v1) 994 - 4: psrad Nq,Ib (11B) | vpsrad Hx,Ux,Ib (66),(11B),(v1) 933 + 4: psrad Nq,Ib (11B) | vpsrad Hx,Ux,Ib (66),(11B),(v1) | vpsrad/q Hx,Ux,Ib (66),(evo) 995 934 6: pslld Nq,Ib (11B) | vpslld Hx,Ux,Ib (66),(11B),(v1) 996 935 EndTable 997 936 ··· 1026 961 1: BLSR By,Ey (v) 1027 962 2: BLSMSK By,Ey (v) 1028 963 3: BLSI By,Ey (v) 964 + EndTable 965 + 966 + GrpTable: Grp18 967 + 1: vgatherpf0dps/d Wx (66),(ev) 968 + 2: vgatherpf1dps/d Wx (66),(ev) 969 + 5: vscatterpf0dps/d Wx (66),(ev) 970 + 6: vscatterpf1dps/d Wx (66),(ev) 971 + EndTable 972 + 973 + GrpTable: Grp19 974 + 1: vgatherpf0qps/d Wx (66),(ev) 975 + 2: vgatherpf1qps/d Wx (66),(ev) 976 + 5: vscatterpf0qps/d Wx (66),(ev) 977 + 6: vscatterpf1qps/d Wx (66),(ev) 1029 978 EndTable 1030 979 1031 980 # AMD's Prefetch Group
+88 -52
tools/objtool/builtin-check.c
··· 107 107 insn->offset < func->offset + func->len; \ 108 108 insn = list_next_entry(insn, list)) 109 109 110 + #define func_for_each_insn_continue_reverse(file, func, insn) \ 111 + for (insn = list_prev_entry(insn, list); \ 112 + &insn->list != &file->insn_list && \ 113 + insn->sec == func->sec && insn->offset >= func->offset; \ 114 + insn = list_prev_entry(insn, list)) 115 + 110 116 #define sec_for_each_insn_from(file, insn) \ 111 117 for (; insn; insn = next_insn_same_sec(file, insn)) 112 118 ··· 670 664 return 0; 671 665 } 672 666 667 + /* 668 + * find_switch_table() - Given a dynamic jump, find the switch jump table in 669 + * .rodata associated with it. 670 + * 671 + * There are 3 basic patterns: 672 + * 673 + * 1. jmpq *[rodata addr](,%reg,8) 674 + * 675 + * This is the most common case by far. It jumps to an address in a simple 676 + * jump table which is stored in .rodata. 677 + * 678 + * 2. jmpq *[rodata addr](%rip) 679 + * 680 + * This is caused by a rare GCC quirk, currently only seen in three driver 681 + * functions in the kernel, only with certain obscure non-distro configs. 682 + * 683 + * As part of an optimization, GCC makes a copy of an existing switch jump 684 + * table, modifies it, and then hard-codes the jump (albeit with an indirect 685 + * jump) to use a single entry in the table. The rest of the jump table and 686 + * some of its jump targets remain as dead code. 687 + * 688 + * In such a case we can just crudely ignore all unreachable instruction 689 + * warnings for the entire object file. Ideally we would just ignore them 690 + * for the function, but that would require redesigning the code quite a 691 + * bit. And honestly that's just not worth doing: unreachable instruction 692 + * warnings are of questionable value anyway, and this is such a rare issue. 693 + * 694 + * 3. mov [rodata addr],%reg1 695 + * ... some instructions ... 696 + * jmpq *(%reg1,%reg2,8) 697 + * 698 + * This is a fairly uncommon pattern which is new for GCC 6. As of this 699 + * writing, there are 11 occurrences of it in the allmodconfig kernel. 700 + * 701 + * TODO: Once we have DWARF CFI and smarter instruction decoding logic, 702 + * ensure the same register is used in the mov and jump instructions. 703 + */ 704 + static struct rela *find_switch_table(struct objtool_file *file, 705 + struct symbol *func, 706 + struct instruction *insn) 707 + { 708 + struct rela *text_rela, *rodata_rela; 709 + 710 + text_rela = find_rela_by_dest_range(insn->sec, insn->offset, insn->len); 711 + if (text_rela && text_rela->sym == file->rodata->sym) { 712 + /* case 1 */ 713 + rodata_rela = find_rela_by_dest(file->rodata, 714 + text_rela->addend); 715 + if (rodata_rela) 716 + return rodata_rela; 717 + 718 + /* case 2 */ 719 + rodata_rela = find_rela_by_dest(file->rodata, 720 + text_rela->addend + 4); 721 + if (!rodata_rela) 722 + return NULL; 723 + file->ignore_unreachables = true; 724 + return rodata_rela; 725 + } 726 + 727 + /* case 3 */ 728 + func_for_each_insn_continue_reverse(file, func, insn) { 729 + if (insn->type == INSN_JUMP_UNCONDITIONAL || 730 + insn->type == INSN_JUMP_DYNAMIC) 731 + break; 732 + 733 + text_rela = find_rela_by_dest_range(insn->sec, insn->offset, 734 + insn->len); 735 + if (text_rela && text_rela->sym == file->rodata->sym) 736 + return find_rela_by_dest(file->rodata, 737 + text_rela->addend); 738 + } 739 + 740 + return NULL; 741 + } 742 + 673 743 static int add_func_switch_tables(struct objtool_file *file, 674 744 struct symbol *func) 675 745 { 676 - struct instruction *insn, *prev_jump; 677 - struct rela *text_rela, *rodata_rela, *prev_rela = NULL; 746 + struct instruction *insn, *prev_jump = NULL; 747 + struct rela *rela, *prev_rela = NULL; 678 748 int ret; 679 - 680 - prev_jump = NULL; 681 749 682 750 func_for_each_insn(file, func, insn) { 683 751 if (insn->type != INSN_JUMP_DYNAMIC) 684 752 continue; 685 753 686 - text_rela = find_rela_by_dest_range(insn->sec, insn->offset, 687 - insn->len); 688 - if (!text_rela || text_rela->sym != file->rodata->sym) 689 - continue; 690 - 691 - /* common case: jmpq *[addr](,%rax,8) */ 692 - rodata_rela = find_rela_by_dest(file->rodata, 693 - text_rela->addend); 694 - 695 - /* 696 - * rare case: jmpq *[addr](%rip) 697 - * 698 - * This check is for a rare gcc quirk, currently only seen in 699 - * three driver functions in the kernel, only with certain 700 - * obscure non-distro configs. 701 - * 702 - * As part of an optimization, gcc makes a copy of an existing 703 - * switch jump table, modifies it, and then hard-codes the jump 704 - * (albeit with an indirect jump) to use a single entry in the 705 - * table. The rest of the jump table and some of its jump 706 - * targets remain as dead code. 707 - * 708 - * In such a case we can just crudely ignore all unreachable 709 - * instruction warnings for the entire object file. Ideally we 710 - * would just ignore them for the function, but that would 711 - * require redesigning the code quite a bit. And honestly 712 - * that's just not worth doing: unreachable instruction 713 - * warnings are of questionable value anyway, and this is such 714 - * a rare issue. 715 - * 716 - * kbuild reports: 717 - * - https://lkml.kernel.org/r/201603231906.LWcVUpxm%25fengguang.wu@intel.com 718 - * - https://lkml.kernel.org/r/201603271114.K9i45biy%25fengguang.wu@intel.com 719 - * - https://lkml.kernel.org/r/201603291058.zuJ6ben1%25fengguang.wu@intel.com 720 - * 721 - * gcc bug: 722 - * - https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70604 723 - */ 724 - if (!rodata_rela) { 725 - rodata_rela = find_rela_by_dest(file->rodata, 726 - text_rela->addend + 4); 727 - if (rodata_rela) 728 - file->ignore_unreachables = true; 729 - } 730 - 731 - if (!rodata_rela) 754 + rela = find_switch_table(file, func, insn); 755 + if (!rela) 732 756 continue; 733 757 734 758 /* ··· 768 732 */ 769 733 if (prev_jump) { 770 734 ret = add_switch_table(file, func, prev_jump, prev_rela, 771 - rodata_rela); 735 + rela); 772 736 if (ret) 773 737 return ret; 774 738 } 775 739 776 740 prev_jump = insn; 777 - prev_rela = rodata_rela; 741 + prev_rela = rela; 778 742 } 779 743 780 744 if (prev_jump) {