Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

KVM: x86: Introduce KVM_TDX_GET_CPUID

Implement an IOCTL to allow userspace to read the CPUID bit values for a
configured TD.

The TDX module doesn't provide the ability to set all CPUID bits. Instead
some are configured indirectly, or have fixed values. But it does allow
for the final resulting CPUID bits to be read. This information will be
useful for userspace to understand the configuration of the TD, and set
KVM's copy via KVM_SET_CPUID2.

Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Co-developed-by: Tony Lindgren <tony.lindgren@linux.intel.com>
Signed-off-by: Tony Lindgren <tony.lindgren@linux.intel.com>
Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
---
- Fix subleaf mask check (Binbin)
- Search all possible sub-leafs (Francesco Lavra)
- Reduce off-by-one error sensitve code (Francesco, Xiaoyao)
- Handle buffers too small from userspace (Xiaoyao)
- Read max CPUID from TD instead of using fixed values (Xiaoyao)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

authored by

Xiaoyao Li and committed by
Paolo Bonzini
488808e6 a50f673f

+198
+1
arch/x86/include/uapi/asm/kvm.h
··· 932 932 KVM_TDX_CAPABILITIES = 0, 933 933 KVM_TDX_INIT_VM, 934 934 KVM_TDX_INIT_VCPU, 935 + KVM_TDX_GET_CPUID, 935 936 936 937 KVM_TDX_CMD_NR_MAX, 937 938 };
+191
arch/x86/kvm/vmx/tdx.c
··· 3 3 #include <asm/cpufeature.h> 4 4 #include <asm/tdx.h> 5 5 #include "capabilities.h" 6 + #include "mmu.h" 6 7 #include "x86_ops.h" 7 8 #include "lapic.h" 8 9 #include "tdx.h" ··· 850 849 return ret; 851 850 } 852 851 852 + static u64 tdx_td_metadata_field_read(struct kvm_tdx *tdx, u64 field_id, 853 + u64 *data) 854 + { 855 + u64 err; 856 + 857 + err = tdh_mng_rd(&tdx->td, field_id, data); 858 + 859 + return err; 860 + } 861 + 862 + #define TDX_MD_UNREADABLE_LEAF_MASK GENMASK(30, 7) 863 + #define TDX_MD_UNREADABLE_SUBLEAF_MASK GENMASK(31, 7) 864 + 865 + static int tdx_read_cpuid(struct kvm_vcpu *vcpu, u32 leaf, u32 sub_leaf, 866 + bool sub_leaf_set, int *entry_index, 867 + struct kvm_cpuid_entry2 *out) 868 + { 869 + struct kvm_tdx *kvm_tdx = to_kvm_tdx(vcpu->kvm); 870 + u64 field_id = TD_MD_FIELD_ID_CPUID_VALUES; 871 + u64 ebx_eax, edx_ecx; 872 + u64 err = 0; 873 + 874 + if (sub_leaf > 0b1111111) 875 + return -EINVAL; 876 + 877 + if (*entry_index >= KVM_MAX_CPUID_ENTRIES) 878 + return -EINVAL; 879 + 880 + if (leaf & TDX_MD_UNREADABLE_LEAF_MASK || 881 + sub_leaf & TDX_MD_UNREADABLE_SUBLEAF_MASK) 882 + return -EINVAL; 883 + 884 + /* 885 + * bit 23:17, REVSERVED: reserved, must be 0; 886 + * bit 16, LEAF_31: leaf number bit 31; 887 + * bit 15:9, LEAF_6_0: leaf number bits 6:0, leaf bits 30:7 are 888 + * implicitly 0; 889 + * bit 8, SUBLEAF_NA: sub-leaf not applicable flag; 890 + * bit 7:1, SUBLEAF_6_0: sub-leaf number bits 6:0. If SUBLEAF_NA is 1, 891 + * the SUBLEAF_6_0 is all-1. 892 + * sub-leaf bits 31:7 are implicitly 0; 893 + * bit 0, ELEMENT_I: Element index within field; 894 + */ 895 + field_id |= ((leaf & 0x80000000) ? 1 : 0) << 16; 896 + field_id |= (leaf & 0x7f) << 9; 897 + if (sub_leaf_set) 898 + field_id |= (sub_leaf & 0x7f) << 1; 899 + else 900 + field_id |= 0x1fe; 901 + 902 + err = tdx_td_metadata_field_read(kvm_tdx, field_id, &ebx_eax); 903 + if (err) //TODO check for specific errors 904 + goto err_out; 905 + 906 + out->eax = (u32) ebx_eax; 907 + out->ebx = (u32) (ebx_eax >> 32); 908 + 909 + field_id++; 910 + err = tdx_td_metadata_field_read(kvm_tdx, field_id, &edx_ecx); 911 + /* 912 + * It's weird that reading edx_ecx fails while reading ebx_eax 913 + * succeeded. 914 + */ 915 + if (WARN_ON_ONCE(err)) 916 + goto err_out; 917 + 918 + out->ecx = (u32) edx_ecx; 919 + out->edx = (u32) (edx_ecx >> 32); 920 + 921 + out->function = leaf; 922 + out->index = sub_leaf; 923 + out->flags |= sub_leaf_set ? KVM_CPUID_FLAG_SIGNIFCANT_INDEX : 0; 924 + 925 + /* 926 + * Work around missing support on old TDX modules, fetch 927 + * guest maxpa from gfn_direct_bits. 928 + */ 929 + if (leaf == 0x80000008) { 930 + gpa_t gpa_bits = gfn_to_gpa(kvm_gfn_direct_bits(vcpu->kvm)); 931 + unsigned int g_maxpa = __ffs(gpa_bits) + 1; 932 + 933 + out->eax = tdx_set_guest_phys_addr_bits(out->eax, g_maxpa); 934 + } 935 + 936 + (*entry_index)++; 937 + 938 + return 0; 939 + 940 + err_out: 941 + out->eax = 0; 942 + out->ebx = 0; 943 + out->ecx = 0; 944 + out->edx = 0; 945 + 946 + return -EIO; 947 + } 948 + 853 949 static int tdx_td_init(struct kvm *kvm, struct kvm_tdx_cmd *cmd) 854 950 { 855 951 struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm); ··· 1141 1043 return ret; 1142 1044 } 1143 1045 1046 + /* Sometimes reads multipple subleafs. Return how many enties were written. */ 1047 + static int tdx_vcpu_get_cpuid_leaf(struct kvm_vcpu *vcpu, u32 leaf, int *entry_index, 1048 + struct kvm_cpuid_entry2 *output_e) 1049 + { 1050 + int sub_leaf = 0; 1051 + int ret; 1052 + 1053 + /* First try without a subleaf */ 1054 + ret = tdx_read_cpuid(vcpu, leaf, 0, false, entry_index, output_e); 1055 + 1056 + /* If success, or invalid leaf, just give up */ 1057 + if (ret != -EIO) 1058 + return ret; 1059 + 1060 + /* 1061 + * If the try without a subleaf failed, try reading subleafs until 1062 + * failure. The TDX module only supports 6 bits of subleaf index. 1063 + */ 1064 + while (1) { 1065 + /* Keep reading subleafs until there is a failure. */ 1066 + if (tdx_read_cpuid(vcpu, leaf, sub_leaf, true, entry_index, output_e)) 1067 + return !sub_leaf; 1068 + 1069 + sub_leaf++; 1070 + output_e++; 1071 + } 1072 + 1073 + return 0; 1074 + } 1075 + 1076 + static int tdx_vcpu_get_cpuid(struct kvm_vcpu *vcpu, struct kvm_tdx_cmd *cmd) 1077 + { 1078 + struct kvm_cpuid2 __user *output, *td_cpuid; 1079 + int r = 0, i = 0, leaf; 1080 + u32 level; 1081 + 1082 + output = u64_to_user_ptr(cmd->data); 1083 + td_cpuid = kzalloc(sizeof(*td_cpuid) + 1084 + sizeof(output->entries[0]) * KVM_MAX_CPUID_ENTRIES, 1085 + GFP_KERNEL); 1086 + if (!td_cpuid) 1087 + return -ENOMEM; 1088 + 1089 + if (copy_from_user(td_cpuid, output, sizeof(*output))) { 1090 + r = -EFAULT; 1091 + goto out; 1092 + } 1093 + 1094 + /* Read max CPUID for normal range */ 1095 + if (tdx_vcpu_get_cpuid_leaf(vcpu, 0, &i, &td_cpuid->entries[i])) { 1096 + r = -EIO; 1097 + goto out; 1098 + } 1099 + level = td_cpuid->entries[0].eax; 1100 + 1101 + for (leaf = 1; leaf <= level; leaf++) 1102 + tdx_vcpu_get_cpuid_leaf(vcpu, leaf, &i, &td_cpuid->entries[i]); 1103 + 1104 + /* Read max CPUID for extended range */ 1105 + if (tdx_vcpu_get_cpuid_leaf(vcpu, 0x80000000, &i, &td_cpuid->entries[i])) { 1106 + r = -EIO; 1107 + goto out; 1108 + } 1109 + level = td_cpuid->entries[i - 1].eax; 1110 + 1111 + for (leaf = 0x80000001; leaf <= level; leaf++) 1112 + tdx_vcpu_get_cpuid_leaf(vcpu, leaf, &i, &td_cpuid->entries[i]); 1113 + 1114 + if (td_cpuid->nent < i) 1115 + r = -E2BIG; 1116 + td_cpuid->nent = i; 1117 + 1118 + if (copy_to_user(output, td_cpuid, sizeof(*output))) { 1119 + r = -EFAULT; 1120 + goto out; 1121 + } 1122 + 1123 + if (r == -E2BIG) 1124 + goto out; 1125 + 1126 + if (copy_to_user(output->entries, td_cpuid->entries, 1127 + td_cpuid->nent * sizeof(struct kvm_cpuid_entry2))) 1128 + r = -EFAULT; 1129 + 1130 + out: 1131 + kfree(td_cpuid); 1132 + 1133 + return r; 1134 + } 1135 + 1144 1136 static int tdx_vcpu_init(struct kvm_vcpu *vcpu, struct kvm_tdx_cmd *cmd) 1145 1137 { 1146 1138 u64 apic_base; ··· 1279 1091 switch (cmd.id) { 1280 1092 case KVM_TDX_INIT_VCPU: 1281 1093 ret = tdx_vcpu_init(vcpu, &cmd); 1094 + break; 1095 + case KVM_TDX_GET_CPUID: 1096 + ret = tdx_vcpu_get_cpuid(vcpu, &cmd); 1282 1097 break; 1283 1098 default: 1284 1099 ret = -EINVAL;
+5
arch/x86/kvm/vmx/tdx_arch.h
··· 123 123 124 124 #define MD_FIELD_ID_FEATURES0_TOPOLOGY_ENUM BIT_ULL(20) 125 125 126 + /* 127 + * TD scope metadata field ID. 128 + */ 129 + #define TD_MD_FIELD_ID_CPUID_VALUES 0x9410000300000000ULL 130 + 126 131 #endif /* __KVM_X86_TDX_ARCH_H */
+1
arch/x86/kvm/vmx/tdx_errno.h
··· 23 23 #define TDX_FLUSHVP_NOT_DONE 0x8000082400000000ULL 24 24 #define TDX_EPT_WALK_FAILED 0xC0000B0000000000ULL 25 25 #define TDX_EPT_ENTRY_STATE_INCORRECT 0xC0000B0D00000000ULL 26 + #define TDX_METADATA_FIELD_NOT_READABLE 0xC0000C0200000000ULL 26 27 27 28 /* 28 29 * TDX module operand ID, appears in 31:0 part of error code as