KVM: x86: Add AVX support to the emulator's register fetch and writeback

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

Prepare struct operand for hosting AVX registers. Remove the
existing, incomplete code that placed the Avx flag in the operand
alignment field, and repurpose the name for a separate bit that
indicates:

- after decode, whether an instruction supports the VEX prefix;

- before writeback, that the instruction did have the VEX prefix and
therefore 1) it can have op_bytes == 32; 2) t should clear high
bytes of XMM registers.

Right now the bit will never be set and the patch has no intended
functional change. However, this is actually more vexing than the
decoder changes itself, and therefore worth separating.

Co-developed-by: Keith Busch <kbusch@kernel.org>
Signed-off-by: Keith Busch <kbusch@kernel.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Link: https://patch.msgid.link/20251114003633.60689-8-pbonzini@redhat.com
[sean: guard ymm[8-15] accesses with #ifdef CONFIG_X86_64]
Signed-off-by: Sean Christopherson <seanjc@google.com>

authored by

Paolo Bonzini and committed by

Sean Christopherson 4 months ago 4cb21be4 f106797f

+114 -17

3 changed files

expand all

arch

x86

kvm

emulate.c

fpu.h

kvm_emulate.h

+44 -14

arch/x86/kvm/emulate.c

··· 141 141 #define No64 (1<<28) /* Instruction generates #UD in 64-bit mode */ 142 142 #define PageTable (1 << 29) /* instruction used to write page table */ 143 143 #define NotImpl (1 << 30) /* instruction is not implemented */ 144 + #define Avx ((u64)1 << 31) /* Instruction uses VEX prefix */ 144 145 #define Src2Shift (32) /* Source 2 operand type at bits 32-36 */ 145 146 #define Src2None (OpNone << Src2Shift) 146 147 #define Src2Mem (OpMem << Src2Shift) ··· 158 157 #define Src2Mask (OpMask << Src2Shift) 159 158 /* free: 37-39 */ 160 159 #define Mmx ((u64)1 << 40) /* MMX Vector instruction */ 161 - #define AlignMask ((u64)7 << 41) /* Memory alignment requirement at bits 41-43 */ 160 + #define AlignMask ((u64)3 << 41) /* Memory alignment requirement at bits 41-42 */ 162 161 #define Aligned ((u64)1 << 41) /* Explicitly aligned (e.g. MOVDQA) */ 163 162 #define Unaligned ((u64)2 << 41) /* Explicitly unaligned (e.g. MOVDQU) */ 164 - #define Avx ((u64)3 << 41) /* Advanced Vector Extensions */ 165 - #define Aligned16 ((u64)4 << 41) /* Aligned to 16 byte boundary (e.g. FXSAVE) */ 166 - /* free: 44 */ 163 + #define Aligned16 ((u64)3 << 41) /* Aligned to 16 byte boundary (e.g. FXSAVE) */ 164 + /* free: 43-44 */ 167 165 #define NoWrite ((u64)1 << 45) /* No writeback */ 168 166 #define SrcWrite ((u64)1 << 46) /* Write back src operand */ 169 167 #define NoMod ((u64)1 << 47) /* Mod field is ignored */ ··· 618 618 619 619 switch (alignment) { 620 620 case Unaligned: 621 - case Avx: 622 621 return 1; 623 622 case Aligned16: 624 623 return 16; ··· 1074 1075 static void __decode_register_operand(struct x86_emulate_ctxt *ctxt, 1075 1076 struct operand *op, int reg) 1076 1077 { 1077 - if (ctxt->d & Sse) { 1078 + if ((ctxt->d & Avx) && ctxt->op_bytes == 32) { 1079 + op->type = OP_YMM; 1080 + op->bytes = 32; 1081 + op->addr.xmm = reg; 1082 + kvm_read_avx_reg(reg, &op->vec_val2); 1083 + return; 1084 + } 1085 + if (ctxt->d & (Avx|Sse)) { 1078 1086 op->type = OP_XMM; 1079 1087 op->bytes = 16; 1080 1088 op->addr.xmm = reg; ··· 1773 1767 op->data, 1774 1768 op->bytes * op->count); 1775 1769 case OP_XMM: 1776 - kvm_write_sse_reg(op->addr.xmm, &op->vec_val); 1770 + if (!(ctxt->d & Avx)) { 1771 + kvm_write_sse_reg(op->addr.xmm, &op->vec_val); 1772 + break; 1773 + } 1774 + /* full YMM write but with high bytes cleared */ 1775 + memset(op->valptr + 16, 0, 16); 1776 + fallthrough; 1777 + case OP_YMM: 1778 + kvm_write_avx_reg(op->addr.xmm, &op->vec_val2); 1777 1779 break; 1778 1780 case OP_MM: 1779 1781 kvm_write_mmx_reg(op->addr.mm, &op->mm_val); ··· 4875 4861 ctxt->op_bytes = 8; /* REX.W */ 4876 4862 4877 4863 /* Opcode byte(s). */ 4878 - opcode = opcode_table[ctxt->b]; 4879 - /* Two-byte opcode? */ 4880 4864 if (ctxt->b == 0x0f) { 4865 + /* Two- or three-byte opcode */ 4881 4866 ctxt->opcode_len = 2; 4882 4867 ctxt->b = insn_fetch(u8, ctxt); 4883 4868 opcode = twobyte_table[ctxt->b]; ··· 4887 4874 ctxt->b = insn_fetch(u8, ctxt); 4888 4875 opcode = opcode_map_0f_38[ctxt->b]; 4889 4876 } 4877 + } else { 4878 + /* Opcode byte(s). */ 4879 + opcode = opcode_table[ctxt->b]; 4890 4880 } 4891 4881 ctxt->d = opcode.flags; 4892 4882 ··· 5038 5022 ctxt->op_bytes = 4; 5039 5023 5040 5024 if (ctxt->d & Sse) 5041 - ctxt->op_bytes = 16; 5025 + ctxt->op_bytes = 16, ctxt->d &= ~Avx; 5042 5026 else if (ctxt->d & Mmx) 5043 5027 ctxt->op_bytes = 8; 5044 5028 } ··· 5170 5154 } 5171 5155 5172 5156 if (unlikely(ctxt->d & 5173 - (No64|Undefined|Sse|Mmx|Intercept|CheckPerm|Priv|Prot|String))) { 5157 + (No64|Undefined|Avx|Sse|Mmx|Intercept|CheckPerm|Priv|Prot|String))) { 5174 5158 if ((ctxt->mode == X86EMUL_MODE_PROT64 && (ctxt->d & No64)) || 5175 5159 (ctxt->d & Undefined)) { 5176 5160 rc = emulate_ud(ctxt); 5177 5161 goto done; 5178 5162 } 5179 5163 5180 - if (((ctxt->d & (Sse|Mmx)) && ((ops->get_cr(ctxt, 0) & X86_CR0_EM))) 5181 - || ((ctxt->d & Sse) && !(ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR))) { 5164 + if ((ctxt->d & (Avx|Sse|Mmx)) && ((ops->get_cr(ctxt, 0) & X86_CR0_EM))) { 5182 5165 rc = emulate_ud(ctxt); 5183 5166 goto done; 5184 5167 } 5185 5168 5186 - if ((ctxt->d & (Sse|Mmx)) && (ops->get_cr(ctxt, 0) & X86_CR0_TS)) { 5169 + if (ctxt->d & Avx) { 5170 + u64 xcr = 0; 5171 + if (!(ops->get_cr(ctxt, 4) & X86_CR4_OSXSAVE) 5172 + || ops->get_xcr(ctxt, 0, &xcr) 5173 + || !(xcr & XFEATURE_MASK_YMM)) { 5174 + rc = emulate_ud(ctxt); 5175 + goto done; 5176 + } 5177 + } else if (ctxt->d & Sse) { 5178 + if (!(ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR)) { 5179 + rc = emulate_ud(ctxt); 5180 + goto done; 5181 + } 5182 + } 5183 + 5184 + if ((ctxt->d & (Avx|Sse|Mmx)) && (ops->get_cr(ctxt, 0) & X86_CR0_TS)) { 5187 5185 rc = emulate_nm(ctxt); 5188 5186 goto done; 5189 5187 }

+66

arch/x86/kvm/fpu.h

··· 15 15 #define sse128_l3(x) ({ __sse128_u t; t.vec = x; t.as_u32[3]; }) 16 16 #define sse128(lo, hi) ({ __sse128_u t; t.as_u64[0] = lo; t.as_u64[1] = hi; t.vec; }) 17 17 18 + typedef u32 __attribute__((vector_size(32))) avx256_t; 19 + 20 + static inline void _kvm_read_avx_reg(int reg, avx256_t *data) 21 + { 22 + switch (reg) { 23 + case 0: asm("vmovdqa %%ymm0, %0" : "=m"(*data)); break; 24 + case 1: asm("vmovdqa %%ymm1, %0" : "=m"(*data)); break; 25 + case 2: asm("vmovdqa %%ymm2, %0" : "=m"(*data)); break; 26 + case 3: asm("vmovdqa %%ymm3, %0" : "=m"(*data)); break; 27 + case 4: asm("vmovdqa %%ymm4, %0" : "=m"(*data)); break; 28 + case 5: asm("vmovdqa %%ymm5, %0" : "=m"(*data)); break; 29 + case 6: asm("vmovdqa %%ymm6, %0" : "=m"(*data)); break; 30 + case 7: asm("vmovdqa %%ymm7, %0" : "=m"(*data)); break; 31 + #ifdef CONFIG_X86_64 32 + case 8: asm("vmovdqa %%ymm8, %0" : "=m"(*data)); break; 33 + case 9: asm("vmovdqa %%ymm9, %0" : "=m"(*data)); break; 34 + case 10: asm("vmovdqa %%ymm10, %0" : "=m"(*data)); break; 35 + case 11: asm("vmovdqa %%ymm11, %0" : "=m"(*data)); break; 36 + case 12: asm("vmovdqa %%ymm12, %0" : "=m"(*data)); break; 37 + case 13: asm("vmovdqa %%ymm13, %0" : "=m"(*data)); break; 38 + case 14: asm("vmovdqa %%ymm14, %0" : "=m"(*data)); break; 39 + case 15: asm("vmovdqa %%ymm15, %0" : "=m"(*data)); break; 40 + #endif 41 + default: BUG(); 42 + } 43 + } 44 + 45 + static inline void _kvm_write_avx_reg(int reg, const avx256_t *data) 46 + { 47 + switch (reg) { 48 + case 0: asm("vmovdqa %0, %%ymm0" : : "m"(*data)); break; 49 + case 1: asm("vmovdqa %0, %%ymm1" : : "m"(*data)); break; 50 + case 2: asm("vmovdqa %0, %%ymm2" : : "m"(*data)); break; 51 + case 3: asm("vmovdqa %0, %%ymm3" : : "m"(*data)); break; 52 + case 4: asm("vmovdqa %0, %%ymm4" : : "m"(*data)); break; 53 + case 5: asm("vmovdqa %0, %%ymm5" : : "m"(*data)); break; 54 + case 6: asm("vmovdqa %0, %%ymm6" : : "m"(*data)); break; 55 + case 7: asm("vmovdqa %0, %%ymm7" : : "m"(*data)); break; 56 + #ifdef CONFIG_X86_64 57 + case 8: asm("vmovdqa %0, %%ymm8" : : "m"(*data)); break; 58 + case 9: asm("vmovdqa %0, %%ymm9" : : "m"(*data)); break; 59 + case 10: asm("vmovdqa %0, %%ymm10" : : "m"(*data)); break; 60 + case 11: asm("vmovdqa %0, %%ymm11" : : "m"(*data)); break; 61 + case 12: asm("vmovdqa %0, %%ymm12" : : "m"(*data)); break; 62 + case 13: asm("vmovdqa %0, %%ymm13" : : "m"(*data)); break; 63 + case 14: asm("vmovdqa %0, %%ymm14" : : "m"(*data)); break; 64 + case 15: asm("vmovdqa %0, %%ymm15" : : "m"(*data)); break; 65 + #endif 66 + default: BUG(); 67 + } 68 + } 69 + 18 70 static inline void _kvm_read_sse_reg(int reg, sse128_t *data) 19 71 { 20 72 switch (reg) { ··· 159 107 static inline void kvm_fpu_put(void) 160 108 { 161 109 fpregs_unlock(); 110 + } 111 + 112 + static inline void kvm_read_avx_reg(int reg, avx256_t *data) 113 + { 114 + kvm_fpu_get(); 115 + _kvm_read_avx_reg(reg, data); 116 + kvm_fpu_put(); 117 + } 118 + 119 + static inline void kvm_write_avx_reg(int reg, const avx256_t *data) 120 + { 121 + kvm_fpu_get(); 122 + _kvm_write_avx_reg(reg, data); 123 + kvm_fpu_put(); 162 124 } 163 125 164 126 static inline void kvm_read_sse_reg(int reg, sse128_t *data)

+4 -3

arch/x86/kvm/kvm_emulate.h

··· 249 249 250 250 /* Type, address-of, and value of an instruction's operand. */ 251 251 struct operand { 252 - enum { OP_REG, OP_MEM, OP_MEM_STR, OP_IMM, OP_XMM, OP_MM, OP_NONE } type; 252 + enum { OP_REG, OP_MEM, OP_MEM_STR, OP_IMM, OP_XMM, OP_YMM, OP_MM, OP_NONE } type; 253 253 unsigned int bytes; 254 254 unsigned int count; 255 255 union { ··· 268 268 union { 269 269 unsigned long val; 270 270 u64 val64; 271 - char valptr[sizeof(sse128_t)]; 271 + char valptr[sizeof(avx256_t)]; 272 272 sse128_t vec_val; 273 + avx256_t vec_val2; 273 274 u64 mm_val; 274 275 void *data; 275 - }; 276 + } __aligned(32); 276 277 }; 277 278 278 279 #define X86_MAX_INSTRUCTION_LENGTH 15