Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

KVM: TDX: Do TDX specific vcpu initialization

TD guest vcpu needs TDX specific initialization before running. Repurpose
KVM_MEMORY_ENCRYPT_OP to vcpu-scope, add a new sub-command
KVM_TDX_INIT_VCPU, and implement the callback for it.

Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
Co-developed-by: Tony Lindgren <tony.lindgren@linux.intel.com>
Signed-off-by: Tony Lindgren <tony.lindgren@linux.intel.com>
Co-developed-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
---
- Fix comment: https://lore.kernel.org/kvm/Z36OYfRW9oPjW8be@google.com/
(Sean)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

authored by

Isaku Yamahata and committed by
Paolo Bonzini
a50f673f 9002f8cf

+205 -2
+1
arch/x86/include/asm/kvm-x86-ops.h
··· 127 127 #endif 128 128 KVM_X86_OP_OPTIONAL(dev_get_attr) 129 129 KVM_X86_OP(mem_enc_ioctl) 130 + KVM_X86_OP_OPTIONAL(vcpu_mem_enc_ioctl) 130 131 KVM_X86_OP_OPTIONAL(mem_enc_register_region) 131 132 KVM_X86_OP_OPTIONAL(mem_enc_unregister_region) 132 133 KVM_X86_OP_OPTIONAL(vm_copy_enc_context_from)
+1
arch/x86/include/asm/kvm_host.h
··· 1849 1849 1850 1850 int (*dev_get_attr)(u32 group, u64 attr, u64 *val); 1851 1851 int (*mem_enc_ioctl)(struct kvm *kvm, void __user *argp); 1852 + int (*vcpu_mem_enc_ioctl)(struct kvm_vcpu *vcpu, void __user *argp); 1852 1853 int (*mem_enc_register_region)(struct kvm *kvm, struct kvm_enc_region *argp); 1853 1854 int (*mem_enc_unregister_region)(struct kvm *kvm, struct kvm_enc_region *argp); 1854 1855 int (*vm_copy_enc_context_from)(struct kvm *kvm, unsigned int source_fd);
+1
arch/x86/include/uapi/asm/kvm.h
··· 931 931 enum kvm_tdx_cmd_id { 932 932 KVM_TDX_CAPABILITIES = 0, 933 933 KVM_TDX_INIT_VM, 934 + KVM_TDX_INIT_VCPU, 934 935 935 936 KVM_TDX_CMD_NR_MAX, 936 937 };
+1
arch/x86/kvm/lapic.c
··· 2657 2657 kvm_recalculate_apic_map(vcpu->kvm); 2658 2658 return 0; 2659 2659 } 2660 + EXPORT_SYMBOL_GPL(kvm_apic_set_base); 2660 2661 2661 2662 void kvm_apic_update_apicv(struct kvm_vcpu *vcpu) 2662 2663 {
+9
arch/x86/kvm/vmx/main.c
··· 106 106 return tdx_vm_ioctl(kvm, argp); 107 107 } 108 108 109 + static int vt_vcpu_mem_enc_ioctl(struct kvm_vcpu *vcpu, void __user *argp) 110 + { 111 + if (!is_td_vcpu(vcpu)) 112 + return -EINVAL; 113 + 114 + return tdx_vcpu_ioctl(vcpu, argp); 115 + } 116 + 109 117 #define VMX_REQUIRED_APICV_INHIBITS \ 110 118 (BIT(APICV_INHIBIT_REASON_DISABLED) | \ 111 119 BIT(APICV_INHIBIT_REASON_ABSENT) | \ ··· 270 262 .get_untagged_addr = vmx_get_untagged_addr, 271 263 272 264 .mem_enc_ioctl = vt_mem_enc_ioctl, 265 + .vcpu_mem_enc_ioctl = vt_vcpu_mem_enc_ioctl, 273 266 }; 274 267 275 268 struct kvm_x86_init_ops vt_init_ops __initdata = {
+171 -1
arch/x86/kvm/vmx/tdx.c
··· 410 410 int tdx_vcpu_create(struct kvm_vcpu *vcpu) 411 411 { 412 412 struct kvm_tdx *kvm_tdx = to_kvm_tdx(vcpu->kvm); 413 + struct vcpu_tdx *tdx = to_tdx(vcpu); 413 414 414 415 if (kvm_tdx->state != TD_STATE_INITIALIZED) 415 416 return -EIO; ··· 439 438 if ((kvm_tdx->xfam & XFEATURE_MASK_XTILE) == XFEATURE_MASK_XTILE) 440 439 vcpu->arch.xfd_no_write_intercept = true; 441 440 441 + tdx->state = VCPU_TD_STATE_UNINITIALIZED; 442 + 442 443 return 0; 443 444 } 444 445 445 446 void tdx_vcpu_free(struct kvm_vcpu *vcpu) 446 447 { 447 - /* This is stub for now. More logic will come. */ 448 + struct kvm_tdx *kvm_tdx = to_kvm_tdx(vcpu->kvm); 449 + struct vcpu_tdx *tdx = to_tdx(vcpu); 450 + int i; 451 + 452 + /* 453 + * It is not possible to reclaim pages while hkid is assigned. It might 454 + * be assigned if: 455 + * 1. the TD VM is being destroyed but freeing hkid failed, in which 456 + * case the pages are leaked 457 + * 2. TD VCPU creation failed and this on the error path, in which case 458 + * there is nothing to do anyway 459 + */ 460 + if (is_hkid_assigned(kvm_tdx)) 461 + return; 462 + 463 + if (tdx->vp.tdcx_pages) { 464 + for (i = 0; i < kvm_tdx->td.tdcx_nr_pages; i++) { 465 + if (tdx->vp.tdcx_pages[i]) 466 + tdx_reclaim_control_page(tdx->vp.tdcx_pages[i]); 467 + } 468 + kfree(tdx->vp.tdcx_pages); 469 + tdx->vp.tdcx_pages = NULL; 470 + } 471 + if (tdx->vp.tdvpr_page) { 472 + tdx_reclaim_control_page(tdx->vp.tdvpr_page); 473 + tdx->vp.tdvpr_page = 0; 474 + } 475 + 476 + tdx->state = VCPU_TD_STATE_UNINITIALIZED; 448 477 } 449 478 450 479 static int tdx_get_capabilities(struct kvm_tdx_cmd *cmd) ··· 684 653 goto free_hkid; 685 654 686 655 kvm_tdx->td.tdcs_nr_pages = tdx_sysinfo->td_ctrl.tdcs_base_size / PAGE_SIZE; 656 + /* TDVPS = TDVPR(4K page) + TDCX(multiple 4K pages), -1 for TDVPR. */ 657 + kvm_tdx->td.tdcx_nr_pages = tdx_sysinfo->td_ctrl.tdvps_base_size / PAGE_SIZE - 1; 687 658 tdcs_pages = kcalloc(kvm_tdx->td.tdcs_nr_pages, sizeof(*kvm_tdx->td.tdcs_pages), 688 659 GFP_KERNEL | __GFP_ZERO); 689 660 if (!tdcs_pages) ··· 961 928 out: 962 929 mutex_unlock(&kvm->lock); 963 930 return r; 931 + } 932 + 933 + /* VMM can pass one 64bit auxiliary data to vcpu via RCX for guest BIOS. */ 934 + static int tdx_td_vcpu_init(struct kvm_vcpu *vcpu, u64 vcpu_rcx) 935 + { 936 + struct kvm_tdx *kvm_tdx = to_kvm_tdx(vcpu->kvm); 937 + struct vcpu_tdx *tdx = to_tdx(vcpu); 938 + struct page *page; 939 + int ret, i; 940 + u64 err; 941 + 942 + page = alloc_page(GFP_KERNEL); 943 + if (!page) 944 + return -ENOMEM; 945 + tdx->vp.tdvpr_page = page; 946 + 947 + tdx->vp.tdcx_pages = kcalloc(kvm_tdx->td.tdcx_nr_pages, sizeof(*tdx->vp.tdcx_pages), 948 + GFP_KERNEL); 949 + if (!tdx->vp.tdcx_pages) { 950 + ret = -ENOMEM; 951 + goto free_tdvpr; 952 + } 953 + 954 + for (i = 0; i < kvm_tdx->td.tdcx_nr_pages; i++) { 955 + page = alloc_page(GFP_KERNEL); 956 + if (!page) { 957 + ret = -ENOMEM; 958 + goto free_tdcx; 959 + } 960 + tdx->vp.tdcx_pages[i] = page; 961 + } 962 + 963 + err = tdh_vp_create(&kvm_tdx->td, &tdx->vp); 964 + if (KVM_BUG_ON(err, vcpu->kvm)) { 965 + ret = -EIO; 966 + pr_tdx_error(TDH_VP_CREATE, err); 967 + goto free_tdcx; 968 + } 969 + 970 + for (i = 0; i < kvm_tdx->td.tdcx_nr_pages; i++) { 971 + err = tdh_vp_addcx(&tdx->vp, tdx->vp.tdcx_pages[i]); 972 + if (KVM_BUG_ON(err, vcpu->kvm)) { 973 + pr_tdx_error(TDH_VP_ADDCX, err); 974 + /* 975 + * Pages already added are reclaimed by the vcpu_free 976 + * method, but the rest are freed here. 977 + */ 978 + for (; i < kvm_tdx->td.tdcx_nr_pages; i++) { 979 + __free_page(tdx->vp.tdcx_pages[i]); 980 + tdx->vp.tdcx_pages[i] = NULL; 981 + } 982 + return -EIO; 983 + } 984 + } 985 + 986 + err = tdh_vp_init(&tdx->vp, vcpu_rcx, vcpu->vcpu_id); 987 + if (KVM_BUG_ON(err, vcpu->kvm)) { 988 + pr_tdx_error(TDH_VP_INIT, err); 989 + return -EIO; 990 + } 991 + 992 + vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; 993 + 994 + return 0; 995 + 996 + free_tdcx: 997 + for (i = 0; i < kvm_tdx->td.tdcx_nr_pages; i++) { 998 + if (tdx->vp.tdcx_pages[i]) 999 + __free_page(tdx->vp.tdcx_pages[i]); 1000 + tdx->vp.tdcx_pages[i] = NULL; 1001 + } 1002 + kfree(tdx->vp.tdcx_pages); 1003 + tdx->vp.tdcx_pages = NULL; 1004 + 1005 + free_tdvpr: 1006 + if (tdx->vp.tdvpr_page) 1007 + __free_page(tdx->vp.tdvpr_page); 1008 + tdx->vp.tdvpr_page = 0; 1009 + 1010 + return ret; 1011 + } 1012 + 1013 + static int tdx_vcpu_init(struct kvm_vcpu *vcpu, struct kvm_tdx_cmd *cmd) 1014 + { 1015 + u64 apic_base; 1016 + struct vcpu_tdx *tdx = to_tdx(vcpu); 1017 + int ret; 1018 + 1019 + if (cmd->flags) 1020 + return -EINVAL; 1021 + 1022 + if (tdx->state != VCPU_TD_STATE_UNINITIALIZED) 1023 + return -EINVAL; 1024 + 1025 + /* 1026 + * TDX requires X2APIC, userspace is responsible for configuring guest 1027 + * CPUID accordingly. 1028 + */ 1029 + apic_base = APIC_DEFAULT_PHYS_BASE | LAPIC_MODE_X2APIC | 1030 + (kvm_vcpu_is_reset_bsp(vcpu) ? MSR_IA32_APICBASE_BSP : 0); 1031 + if (kvm_apic_set_base(vcpu, apic_base, true)) 1032 + return -EINVAL; 1033 + 1034 + ret = tdx_td_vcpu_init(vcpu, (u64)cmd->data); 1035 + if (ret) 1036 + return ret; 1037 + 1038 + tdx->state = VCPU_TD_STATE_INITIALIZED; 1039 + 1040 + return 0; 1041 + } 1042 + 1043 + int tdx_vcpu_ioctl(struct kvm_vcpu *vcpu, void __user *argp) 1044 + { 1045 + struct kvm_tdx *kvm_tdx = to_kvm_tdx(vcpu->kvm); 1046 + struct kvm_tdx_cmd cmd; 1047 + int ret; 1048 + 1049 + if (!is_hkid_assigned(kvm_tdx) || kvm_tdx->state == TD_STATE_RUNNABLE) 1050 + return -EINVAL; 1051 + 1052 + if (copy_from_user(&cmd, argp, sizeof(cmd))) 1053 + return -EFAULT; 1054 + 1055 + if (cmd.hw_error) 1056 + return -EINVAL; 1057 + 1058 + switch (cmd.id) { 1059 + case KVM_TDX_INIT_VCPU: 1060 + ret = tdx_vcpu_init(vcpu, &cmd); 1061 + break; 1062 + default: 1063 + ret = -EINVAL; 1064 + break; 1065 + } 1066 + 1067 + return ret; 964 1068 } 965 1069 966 1070 static int tdx_online_cpu(unsigned int cpu)
+10 -1
arch/x86/kvm/vmx/tdx.h
··· 33 33 struct tdx_td td; 34 34 }; 35 35 36 + /* TDX module vCPU states */ 37 + enum vcpu_tdx_state { 38 + VCPU_TD_STATE_UNINITIALIZED = 0, 39 + VCPU_TD_STATE_INITIALIZED, 40 + }; 41 + 36 42 struct vcpu_tdx { 37 43 struct kvm_vcpu vcpu; 38 - /* TDX specific members follow. */ 44 + 45 + struct tdx_vp vp; 46 + 47 + enum vcpu_tdx_state state; 39 48 }; 40 49 41 50 static inline bool is_td(struct kvm *kvm)
+4
arch/x86/kvm/vmx/x86_ops.h
··· 129 129 130 130 int tdx_vcpu_create(struct kvm_vcpu *vcpu); 131 131 void tdx_vcpu_free(struct kvm_vcpu *vcpu); 132 + 133 + int tdx_vcpu_ioctl(struct kvm_vcpu *vcpu, void __user *argp); 132 134 #else 133 135 static inline int tdx_vm_init(struct kvm *kvm) { return -EOPNOTSUPP; } 134 136 static inline void tdx_mmu_release_hkid(struct kvm *kvm) {} ··· 139 137 140 138 static inline int tdx_vcpu_create(struct kvm_vcpu *vcpu) { return -EOPNOTSUPP; } 141 139 static inline void tdx_vcpu_free(struct kvm_vcpu *vcpu) {} 140 + 141 + static inline int tdx_vcpu_ioctl(struct kvm_vcpu *vcpu, void __user *argp) { return -EOPNOTSUPP; } 142 142 #endif 143 143 144 144 #endif /* __KVM_X86_VMX_X86_OPS_H */
+7
arch/x86/kvm/x86.c
··· 6287 6287 case KVM_SET_DEVICE_ATTR: 6288 6288 r = kvm_vcpu_ioctl_device_attr(vcpu, ioctl, argp); 6289 6289 break; 6290 + case KVM_MEMORY_ENCRYPT_OP: 6291 + r = -ENOTTY; 6292 + if (!kvm_x86_ops.vcpu_mem_enc_ioctl) 6293 + goto out; 6294 + r = kvm_x86_ops.vcpu_mem_enc_ioctl(vcpu, argp); 6295 + break; 6290 6296 default: 6291 6297 r = -EINVAL; 6292 6298 } ··· 12682 12676 { 12683 12677 return vcpu->kvm->arch.bsp_vcpu_id == vcpu->vcpu_id; 12684 12678 } 12679 + EXPORT_SYMBOL_GPL(kvm_vcpu_is_reset_bsp); 12685 12680 12686 12681 bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu) 12687 12682 {