Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'kvm-sev-move-context' into kvm-master

Add support for AMD SEV and SEV-ES intra-host migration support. Intra
host migration provides a low-cost mechanism for userspace VMM upgrades.

In the common case for intra host migration, we can rely on the normal
ioctls for passing data from one VMM to the next. SEV, SEV-ES, and other
confidential compute environments make most of this information opaque, and
render KVM ioctls such as "KVM_GET_REGS" irrelevant. As a result, we need
the ability to pass this opaque metadata from one VMM to the next. The
easiest way to do this is to leave this data in the kernel, and transfer
ownership of the metadata from one KVM VM (or vCPU) to the next. In-kernel
hand off makes it possible to move any data that would be
unsafe/impossible for the kernel to hand directly to userspace, and
cannot be reproduced using data that can be handed to userspace.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

+550 -81
+14
Documentation/virt/kvm/api.rst
··· 6911 6911 When enabled the VMM may make use of the ``KVM_ARM_MTE_COPY_TAGS`` ioctl to 6912 6912 perform a bulk copy of tags to/from the guest. 6913 6913 6914 + 7.29 KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM 6915 + ------------------------------------- 6916 + 6917 + Architectures: x86 SEV enabled 6918 + Type: vm 6919 + Parameters: args[0] is the fd of the source vm 6920 + Returns: 0 on success 6921 + 6922 + This capability enables userspace to migrate the encryption context from the VM 6923 + indicated by the fd to the VM this is called on. 6924 + 6925 + This is intended to support intra-host migration of VMs between userspace VMMs, 6926 + upgrading the VMM process without interrupting the guest. 6927 + 6914 6928 8. Other capabilities. 6915 6929 ====================== 6916 6930
+1
arch/x86/include/asm/kvm_host.h
··· 1476 1476 int (*mem_enc_reg_region)(struct kvm *kvm, struct kvm_enc_region *argp); 1477 1477 int (*mem_enc_unreg_region)(struct kvm *kvm, struct kvm_enc_region *argp); 1478 1478 int (*vm_copy_enc_context_from)(struct kvm *kvm, unsigned int source_fd); 1479 + int (*vm_move_enc_context_from)(struct kvm *kvm, unsigned int source_fd); 1479 1480 1480 1481 int (*get_msr_feature)(struct kvm_msr_entry *entry); 1481 1482
+255 -47
arch/x86/kvm/svm/sev.c
··· 120 120 return true; 121 121 } 122 122 123 + static int sev_misc_cg_try_charge(struct kvm_sev_info *sev) 124 + { 125 + enum misc_res_type type = sev->es_active ? MISC_CG_RES_SEV_ES : MISC_CG_RES_SEV; 126 + return misc_cg_try_charge(type, sev->misc_cg, 1); 127 + } 128 + 129 + static void sev_misc_cg_uncharge(struct kvm_sev_info *sev) 130 + { 131 + enum misc_res_type type = sev->es_active ? MISC_CG_RES_SEV_ES : MISC_CG_RES_SEV; 132 + misc_cg_uncharge(type, sev->misc_cg, 1); 133 + } 134 + 123 135 static int sev_asid_new(struct kvm_sev_info *sev) 124 136 { 125 137 int asid, min_asid, max_asid, ret; 126 138 bool retry = true; 127 - enum misc_res_type type; 128 139 129 - type = sev->es_active ? MISC_CG_RES_SEV_ES : MISC_CG_RES_SEV; 130 140 WARN_ON(sev->misc_cg); 131 141 sev->misc_cg = get_current_misc_cg(); 132 - ret = misc_cg_try_charge(type, sev->misc_cg, 1); 142 + ret = sev_misc_cg_try_charge(sev); 133 143 if (ret) { 134 144 put_misc_cg(sev->misc_cg); 135 145 sev->misc_cg = NULL; ··· 172 162 173 163 return asid; 174 164 e_uncharge: 175 - misc_cg_uncharge(type, sev->misc_cg, 1); 165 + sev_misc_cg_uncharge(sev); 176 166 put_misc_cg(sev->misc_cg); 177 167 sev->misc_cg = NULL; 178 168 return ret; ··· 189 179 { 190 180 struct svm_cpu_data *sd; 191 181 int cpu; 192 - enum misc_res_type type; 193 182 194 183 mutex_lock(&sev_bitmap_lock); 195 184 ··· 201 192 202 193 mutex_unlock(&sev_bitmap_lock); 203 194 204 - type = sev->es_active ? MISC_CG_RES_SEV_ES : MISC_CG_RES_SEV; 205 - misc_cg_uncharge(type, sev->misc_cg, 1); 195 + sev_misc_cg_uncharge(sev); 206 196 put_misc_cg(sev->misc_cg); 207 197 sev->misc_cg = NULL; 208 198 } ··· 598 590 * traditional VMSA as it has been built so far (in prep 599 591 * for LAUNCH_UPDATE_VMSA) to be the initial SEV-ES state. 600 592 */ 601 - memcpy(svm->vmsa, save, sizeof(*save)); 593 + memcpy(svm->sev_es.vmsa, save, sizeof(*save)); 602 594 603 595 return 0; 604 596 } ··· 620 612 * the VMSA memory content (i.e it will write the same memory region 621 613 * with the guest's key), so invalidate it first. 622 614 */ 623 - clflush_cache_range(svm->vmsa, PAGE_SIZE); 615 + clflush_cache_range(svm->sev_es.vmsa, PAGE_SIZE); 624 616 625 617 vmsa.reserved = 0; 626 618 vmsa.handle = to_kvm_svm(kvm)->sev_info.handle; 627 - vmsa.address = __sme_pa(svm->vmsa); 619 + vmsa.address = __sme_pa(svm->sev_es.vmsa); 628 620 vmsa.len = PAGE_SIZE; 629 621 ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_VMSA, &vmsa, error); 630 622 if (ret) ··· 1544 1536 return false; 1545 1537 } 1546 1538 1539 + static int sev_lock_for_migration(struct kvm *kvm) 1540 + { 1541 + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 1542 + 1543 + /* 1544 + * Bail if this VM is already involved in a migration to avoid deadlock 1545 + * between two VMs trying to migrate to/from each other. 1546 + */ 1547 + if (atomic_cmpxchg_acquire(&sev->migration_in_progress, 0, 1)) 1548 + return -EBUSY; 1549 + 1550 + mutex_lock(&kvm->lock); 1551 + 1552 + return 0; 1553 + } 1554 + 1555 + static void sev_unlock_after_migration(struct kvm *kvm) 1556 + { 1557 + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; 1558 + 1559 + mutex_unlock(&kvm->lock); 1560 + atomic_set_release(&sev->migration_in_progress, 0); 1561 + } 1562 + 1563 + 1564 + static int sev_lock_vcpus_for_migration(struct kvm *kvm) 1565 + { 1566 + struct kvm_vcpu *vcpu; 1567 + int i, j; 1568 + 1569 + kvm_for_each_vcpu(i, vcpu, kvm) { 1570 + if (mutex_lock_killable(&vcpu->mutex)) 1571 + goto out_unlock; 1572 + } 1573 + 1574 + return 0; 1575 + 1576 + out_unlock: 1577 + kvm_for_each_vcpu(j, vcpu, kvm) { 1578 + if (i == j) 1579 + break; 1580 + 1581 + mutex_unlock(&vcpu->mutex); 1582 + } 1583 + return -EINTR; 1584 + } 1585 + 1586 + static void sev_unlock_vcpus_for_migration(struct kvm *kvm) 1587 + { 1588 + struct kvm_vcpu *vcpu; 1589 + int i; 1590 + 1591 + kvm_for_each_vcpu(i, vcpu, kvm) { 1592 + mutex_unlock(&vcpu->mutex); 1593 + } 1594 + } 1595 + 1596 + static void sev_migrate_from(struct kvm_sev_info *dst, 1597 + struct kvm_sev_info *src) 1598 + { 1599 + dst->active = true; 1600 + dst->asid = src->asid; 1601 + dst->handle = src->handle; 1602 + dst->pages_locked = src->pages_locked; 1603 + 1604 + src->asid = 0; 1605 + src->active = false; 1606 + src->handle = 0; 1607 + src->pages_locked = 0; 1608 + 1609 + if (dst->misc_cg != src->misc_cg) 1610 + sev_misc_cg_uncharge(src); 1611 + 1612 + put_misc_cg(src->misc_cg); 1613 + src->misc_cg = NULL; 1614 + 1615 + INIT_LIST_HEAD(&dst->regions_list); 1616 + list_replace_init(&src->regions_list, &dst->regions_list); 1617 + } 1618 + 1619 + static int sev_es_migrate_from(struct kvm *dst, struct kvm *src) 1620 + { 1621 + int i; 1622 + struct kvm_vcpu *dst_vcpu, *src_vcpu; 1623 + struct vcpu_svm *dst_svm, *src_svm; 1624 + 1625 + if (atomic_read(&src->online_vcpus) != atomic_read(&dst->online_vcpus)) 1626 + return -EINVAL; 1627 + 1628 + kvm_for_each_vcpu(i, src_vcpu, src) { 1629 + if (!src_vcpu->arch.guest_state_protected) 1630 + return -EINVAL; 1631 + } 1632 + 1633 + kvm_for_each_vcpu(i, src_vcpu, src) { 1634 + src_svm = to_svm(src_vcpu); 1635 + dst_vcpu = kvm_get_vcpu(dst, i); 1636 + dst_svm = to_svm(dst_vcpu); 1637 + 1638 + /* 1639 + * Transfer VMSA and GHCB state to the destination. Nullify and 1640 + * clear source fields as appropriate, the state now belongs to 1641 + * the destination. 1642 + */ 1643 + memcpy(&dst_svm->sev_es, &src_svm->sev_es, sizeof(src_svm->sev_es)); 1644 + dst_svm->vmcb->control.ghcb_gpa = src_svm->vmcb->control.ghcb_gpa; 1645 + dst_svm->vmcb->control.vmsa_pa = src_svm->vmcb->control.vmsa_pa; 1646 + dst_vcpu->arch.guest_state_protected = true; 1647 + 1648 + memset(&src_svm->sev_es, 0, sizeof(src_svm->sev_es)); 1649 + src_svm->vmcb->control.ghcb_gpa = INVALID_PAGE; 1650 + src_svm->vmcb->control.vmsa_pa = INVALID_PAGE; 1651 + src_vcpu->arch.guest_state_protected = false; 1652 + } 1653 + to_kvm_svm(src)->sev_info.es_active = false; 1654 + to_kvm_svm(dst)->sev_info.es_active = true; 1655 + 1656 + return 0; 1657 + } 1658 + 1659 + int svm_vm_migrate_from(struct kvm *kvm, unsigned int source_fd) 1660 + { 1661 + struct kvm_sev_info *dst_sev = &to_kvm_svm(kvm)->sev_info; 1662 + struct kvm_sev_info *src_sev; 1663 + struct file *source_kvm_file; 1664 + struct kvm *source_kvm; 1665 + int ret; 1666 + 1667 + ret = sev_lock_for_migration(kvm); 1668 + if (ret) 1669 + return ret; 1670 + 1671 + if (sev_guest(kvm)) { 1672 + ret = -EINVAL; 1673 + goto out_unlock; 1674 + } 1675 + 1676 + source_kvm_file = fget(source_fd); 1677 + if (!file_is_kvm(source_kvm_file)) { 1678 + ret = -EBADF; 1679 + goto out_fput; 1680 + } 1681 + 1682 + source_kvm = source_kvm_file->private_data; 1683 + ret = sev_lock_for_migration(source_kvm); 1684 + if (ret) 1685 + goto out_fput; 1686 + 1687 + if (!sev_guest(source_kvm)) { 1688 + ret = -EINVAL; 1689 + goto out_source; 1690 + } 1691 + 1692 + src_sev = &to_kvm_svm(source_kvm)->sev_info; 1693 + dst_sev->misc_cg = get_current_misc_cg(); 1694 + if (dst_sev->misc_cg != src_sev->misc_cg) { 1695 + ret = sev_misc_cg_try_charge(dst_sev); 1696 + if (ret) 1697 + goto out_dst_put_cgroup; 1698 + } 1699 + 1700 + ret = sev_lock_vcpus_for_migration(kvm); 1701 + if (ret) 1702 + goto out_dst_cgroup; 1703 + ret = sev_lock_vcpus_for_migration(source_kvm); 1704 + if (ret) 1705 + goto out_dst_vcpu; 1706 + 1707 + if (sev_es_guest(source_kvm)) { 1708 + ret = sev_es_migrate_from(kvm, source_kvm); 1709 + if (ret) 1710 + goto out_source_vcpu; 1711 + } 1712 + sev_migrate_from(dst_sev, src_sev); 1713 + kvm_vm_dead(source_kvm); 1714 + ret = 0; 1715 + 1716 + out_source_vcpu: 1717 + sev_unlock_vcpus_for_migration(source_kvm); 1718 + out_dst_vcpu: 1719 + sev_unlock_vcpus_for_migration(kvm); 1720 + out_dst_cgroup: 1721 + if (ret < 0) { 1722 + sev_misc_cg_uncharge(dst_sev); 1723 + out_dst_put_cgroup: 1724 + put_misc_cg(dst_sev->misc_cg); 1725 + dst_sev->misc_cg = NULL; 1726 + } 1727 + out_source: 1728 + sev_unlock_after_migration(source_kvm); 1729 + out_fput: 1730 + if (source_kvm_file) 1731 + fput(source_kvm_file); 1732 + out_unlock: 1733 + sev_unlock_after_migration(kvm); 1734 + return ret; 1735 + } 1736 + 1547 1737 int svm_mem_enc_op(struct kvm *kvm, void __user *argp) 1548 1738 { 1549 1739 struct kvm_sev_cmd sev_cmd; ··· 2244 2038 svm = to_svm(vcpu); 2245 2039 2246 2040 if (vcpu->arch.guest_state_protected) 2247 - sev_flush_guest_memory(svm, svm->vmsa, PAGE_SIZE); 2248 - __free_page(virt_to_page(svm->vmsa)); 2041 + sev_flush_guest_memory(svm, svm->sev_es.vmsa, PAGE_SIZE); 2042 + __free_page(virt_to_page(svm->sev_es.vmsa)); 2249 2043 2250 - if (svm->ghcb_sa_free) 2251 - kfree(svm->ghcb_sa); 2044 + if (svm->sev_es.ghcb_sa_free) 2045 + kfree(svm->sev_es.ghcb_sa); 2252 2046 } 2253 2047 2254 2048 static void dump_ghcb(struct vcpu_svm *svm) 2255 2049 { 2256 - struct ghcb *ghcb = svm->ghcb; 2050 + struct ghcb *ghcb = svm->sev_es.ghcb; 2257 2051 unsigned int nbits; 2258 2052 2259 2053 /* Re-use the dump_invalid_vmcb module parameter */ ··· 2279 2073 static void sev_es_sync_to_ghcb(struct vcpu_svm *svm) 2280 2074 { 2281 2075 struct kvm_vcpu *vcpu = &svm->vcpu; 2282 - struct ghcb *ghcb = svm->ghcb; 2076 + struct ghcb *ghcb = svm->sev_es.ghcb; 2283 2077 2284 2078 /* 2285 2079 * The GHCB protocol so far allows for the following data ··· 2299 2093 { 2300 2094 struct vmcb_control_area *control = &svm->vmcb->control; 2301 2095 struct kvm_vcpu *vcpu = &svm->vcpu; 2302 - struct ghcb *ghcb = svm->ghcb; 2096 + struct ghcb *ghcb = svm->sev_es.ghcb; 2303 2097 u64 exit_code; 2304 2098 2305 2099 /* ··· 2346 2140 struct ghcb *ghcb; 2347 2141 u64 exit_code = 0; 2348 2142 2349 - ghcb = svm->ghcb; 2143 + ghcb = svm->sev_es.ghcb; 2350 2144 2351 2145 /* Only GHCB Usage code 0 is supported */ 2352 2146 if (ghcb->ghcb_usage) ··· 2464 2258 2465 2259 void sev_es_unmap_ghcb(struct vcpu_svm *svm) 2466 2260 { 2467 - if (!svm->ghcb) 2261 + if (!svm->sev_es.ghcb) 2468 2262 return; 2469 2263 2470 - if (svm->ghcb_sa_free) { 2264 + if (svm->sev_es.ghcb_sa_free) { 2471 2265 /* 2472 2266 * The scratch area lives outside the GHCB, so there is a 2473 2267 * buffer that, depending on the operation performed, may 2474 2268 * need to be synced, then freed. 2475 2269 */ 2476 - if (svm->ghcb_sa_sync) { 2270 + if (svm->sev_es.ghcb_sa_sync) { 2477 2271 kvm_write_guest(svm->vcpu.kvm, 2478 - ghcb_get_sw_scratch(svm->ghcb), 2479 - svm->ghcb_sa, svm->ghcb_sa_len); 2480 - svm->ghcb_sa_sync = false; 2272 + ghcb_get_sw_scratch(svm->sev_es.ghcb), 2273 + svm->sev_es.ghcb_sa, 2274 + svm->sev_es.ghcb_sa_len); 2275 + svm->sev_es.ghcb_sa_sync = false; 2481 2276 } 2482 2277 2483 - kfree(svm->ghcb_sa); 2484 - svm->ghcb_sa = NULL; 2485 - svm->ghcb_sa_free = false; 2278 + kfree(svm->sev_es.ghcb_sa); 2279 + svm->sev_es.ghcb_sa = NULL; 2280 + svm->sev_es.ghcb_sa_free = false; 2486 2281 } 2487 2282 2488 - trace_kvm_vmgexit_exit(svm->vcpu.vcpu_id, svm->ghcb); 2283 + trace_kvm_vmgexit_exit(svm->vcpu.vcpu_id, svm->sev_es.ghcb); 2489 2284 2490 2285 sev_es_sync_to_ghcb(svm); 2491 2286 2492 - kvm_vcpu_unmap(&svm->vcpu, &svm->ghcb_map, true); 2493 - svm->ghcb = NULL; 2287 + kvm_vcpu_unmap(&svm->vcpu, &svm->sev_es.ghcb_map, true); 2288 + svm->sev_es.ghcb = NULL; 2494 2289 } 2495 2290 2496 2291 void pre_sev_run(struct vcpu_svm *svm, int cpu) ··· 2521 2314 static bool setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len) 2522 2315 { 2523 2316 struct vmcb_control_area *control = &svm->vmcb->control; 2524 - struct ghcb *ghcb = svm->ghcb; 2317 + struct ghcb *ghcb = svm->sev_es.ghcb; 2525 2318 u64 ghcb_scratch_beg, ghcb_scratch_end; 2526 2319 u64 scratch_gpa_beg, scratch_gpa_end; 2527 2320 void *scratch_va; ··· 2557 2350 return false; 2558 2351 } 2559 2352 2560 - scratch_va = (void *)svm->ghcb; 2353 + scratch_va = (void *)svm->sev_es.ghcb; 2561 2354 scratch_va += (scratch_gpa_beg - control->ghcb_gpa); 2562 2355 } else { 2563 2356 /* ··· 2587 2380 * the vCPU next time (i.e. a read was requested so the data 2588 2381 * must be written back to the guest memory). 2589 2382 */ 2590 - svm->ghcb_sa_sync = sync; 2591 - svm->ghcb_sa_free = true; 2383 + svm->sev_es.ghcb_sa_sync = sync; 2384 + svm->sev_es.ghcb_sa_free = true; 2592 2385 } 2593 2386 2594 - svm->ghcb_sa = scratch_va; 2595 - svm->ghcb_sa_len = len; 2387 + svm->sev_es.ghcb_sa = scratch_va; 2388 + svm->sev_es.ghcb_sa_len = len; 2596 2389 2597 2390 return true; 2598 2391 } ··· 2711 2504 return -EINVAL; 2712 2505 } 2713 2506 2714 - if (kvm_vcpu_map(vcpu, ghcb_gpa >> PAGE_SHIFT, &svm->ghcb_map)) { 2507 + if (kvm_vcpu_map(vcpu, ghcb_gpa >> PAGE_SHIFT, &svm->sev_es.ghcb_map)) { 2715 2508 /* Unable to map GHCB from guest */ 2716 2509 vcpu_unimpl(vcpu, "vmgexit: error mapping GHCB [%#llx] from guest\n", 2717 2510 ghcb_gpa); 2718 2511 return -EINVAL; 2719 2512 } 2720 2513 2721 - svm->ghcb = svm->ghcb_map.hva; 2722 - ghcb = svm->ghcb_map.hva; 2514 + svm->sev_es.ghcb = svm->sev_es.ghcb_map.hva; 2515 + ghcb = svm->sev_es.ghcb_map.hva; 2723 2516 2724 2517 trace_kvm_vmgexit_enter(vcpu->vcpu_id, ghcb); 2725 2518 ··· 2742 2535 ret = kvm_sev_es_mmio_read(vcpu, 2743 2536 control->exit_info_1, 2744 2537 control->exit_info_2, 2745 - svm->ghcb_sa); 2538 + svm->sev_es.ghcb_sa); 2746 2539 break; 2747 2540 case SVM_VMGEXIT_MMIO_WRITE: 2748 2541 if (!setup_vmgexit_scratch(svm, false, control->exit_info_2)) ··· 2751 2544 ret = kvm_sev_es_mmio_write(vcpu, 2752 2545 control->exit_info_1, 2753 2546 control->exit_info_2, 2754 - svm->ghcb_sa); 2547 + svm->sev_es.ghcb_sa); 2755 2548 break; 2756 2549 case SVM_VMGEXIT_NMI_COMPLETE: 2757 2550 ret = svm_invoke_exit_handler(vcpu, SVM_EXIT_IRET); ··· 2811 2604 if (!setup_vmgexit_scratch(svm, in, bytes)) 2812 2605 return -EINVAL; 2813 2606 2814 - return kvm_sev_es_string_io(&svm->vcpu, size, port, svm->ghcb_sa, count, in); 2607 + return kvm_sev_es_string_io(&svm->vcpu, size, port, svm->sev_es.ghcb_sa, 2608 + count, in); 2815 2609 } 2816 2610 2817 2611 void sev_es_init_vmcb(struct vcpu_svm *svm) ··· 2827 2619 * VMCB page. Do not include the encryption mask on the VMSA physical 2828 2620 * address since hardware will access it using the guest key. 2829 2621 */ 2830 - svm->vmcb->control.vmsa_pa = __pa(svm->vmsa); 2622 + svm->vmcb->control.vmsa_pa = __pa(svm->sev_es.vmsa); 2831 2623 2832 2624 /* Can't intercept CR register access, HV can't modify CR registers */ 2833 2625 svm_clr_intercept(svm, INTERCEPT_CR0_READ); ··· 2899 2691 struct vcpu_svm *svm = to_svm(vcpu); 2900 2692 2901 2693 /* First SIPI: Use the values as initially set by the VMM */ 2902 - if (!svm->received_first_sipi) { 2903 - svm->received_first_sipi = true; 2694 + if (!svm->sev_es.received_first_sipi) { 2695 + svm->sev_es.received_first_sipi = true; 2904 2696 return; 2905 2697 } 2906 2698 ··· 2909 2701 * the guest will set the CS and RIP. Set SW_EXIT_INFO_2 to a 2910 2702 * non-zero value. 2911 2703 */ 2912 - if (!svm->ghcb) 2704 + if (!svm->sev_es.ghcb) 2913 2705 return; 2914 2706 2915 - ghcb_set_sw_exit_info_2(svm->ghcb, 1); 2707 + ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, 1); 2916 2708 }
+5 -4
arch/x86/kvm/svm/svm.c
··· 1452 1452 svm_switch_vmcb(svm, &svm->vmcb01); 1453 1453 1454 1454 if (vmsa_page) 1455 - svm->vmsa = page_address(vmsa_page); 1455 + svm->sev_es.vmsa = page_address(vmsa_page); 1456 1456 1457 1457 svm->guest_state_loaded = false; 1458 1458 ··· 2835 2835 static int svm_complete_emulated_msr(struct kvm_vcpu *vcpu, int err) 2836 2836 { 2837 2837 struct vcpu_svm *svm = to_svm(vcpu); 2838 - if (!err || !sev_es_guest(vcpu->kvm) || WARN_ON_ONCE(!svm->ghcb)) 2838 + if (!err || !sev_es_guest(vcpu->kvm) || WARN_ON_ONCE(!svm->sev_es.ghcb)) 2839 2839 return kvm_complete_insn_gp(vcpu, err); 2840 2840 2841 - ghcb_set_sw_exit_info_1(svm->ghcb, 1); 2842 - ghcb_set_sw_exit_info_2(svm->ghcb, 2841 + ghcb_set_sw_exit_info_1(svm->sev_es.ghcb, 1); 2842 + ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, 2843 2843 X86_TRAP_GP | 2844 2844 SVM_EVTINJ_TYPE_EXEPT | 2845 2845 SVM_EVTINJ_VALID); ··· 4701 4701 .mem_enc_unreg_region = svm_unregister_enc_region, 4702 4702 4703 4703 .vm_copy_enc_context_from = svm_vm_copy_asid_from, 4704 + .vm_move_enc_context_from = svm_vm_migrate_from, 4704 4705 4705 4706 .can_emulate_instruction = svm_can_emulate_instruction, 4706 4707
+17 -11
arch/x86/kvm/svm/svm.h
··· 80 80 u64 ap_jump_table; /* SEV-ES AP Jump Table address */ 81 81 struct kvm *enc_context_owner; /* Owner of copied encryption context */ 82 82 struct misc_cg *misc_cg; /* For misc cgroup accounting */ 83 + atomic_t migration_in_progress; 83 84 }; 84 85 85 86 struct kvm_svm { ··· 122 121 struct vmcb_control_area ctl; 123 122 124 123 bool initialized; 124 + }; 125 + 126 + struct vcpu_sev_es_state { 127 + /* SEV-ES support */ 128 + struct vmcb_save_area *vmsa; 129 + struct ghcb *ghcb; 130 + struct kvm_host_map ghcb_map; 131 + bool received_first_sipi; 132 + 133 + /* SEV-ES scratch area support */ 134 + void *ghcb_sa; 135 + u32 ghcb_sa_len; 136 + bool ghcb_sa_sync; 137 + bool ghcb_sa_free; 125 138 }; 126 139 127 140 struct vcpu_svm { ··· 201 186 DECLARE_BITMAP(write, MAX_DIRECT_ACCESS_MSRS); 202 187 } shadow_msr_intercept; 203 188 204 - /* SEV-ES support */ 205 - struct vmcb_save_area *vmsa; 206 - struct ghcb *ghcb; 207 - struct kvm_host_map ghcb_map; 208 - bool received_first_sipi; 209 - 210 - /* SEV-ES scratch area support */ 211 - void *ghcb_sa; 212 - u32 ghcb_sa_len; 213 - bool ghcb_sa_sync; 214 - bool ghcb_sa_free; 189 + struct vcpu_sev_es_state sev_es; 215 190 216 191 bool guest_state_loaded; 217 192 }; ··· 563 558 int svm_unregister_enc_region(struct kvm *kvm, 564 559 struct kvm_enc_region *range); 565 560 int svm_vm_copy_asid_from(struct kvm *kvm, unsigned int source_fd); 561 + int svm_vm_migrate_from(struct kvm *kvm, unsigned int source_fd); 566 562 void pre_sev_run(struct vcpu_svm *svm, int cpu); 567 563 void __init sev_set_cpu_caps(void); 568 564 void __init sev_hardware_setup(void);
+7 -1
arch/x86/kvm/x86.c
··· 5728 5728 if (kvm_x86_ops.vm_copy_enc_context_from) 5729 5729 r = kvm_x86_ops.vm_copy_enc_context_from(kvm, cap->args[0]); 5730 5730 return r; 5731 + case KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM: 5732 + r = -EINVAL; 5733 + if (kvm_x86_ops.vm_move_enc_context_from) 5734 + r = kvm_x86_ops.vm_move_enc_context_from( 5735 + kvm, cap->args[0]); 5736 + return r; 5731 5737 case KVM_CAP_EXIT_HYPERCALL: 5732 5738 if (cap->args[0] & ~KVM_EXIT_HYPERCALL_VALID_MASK) { 5733 5739 r = -EINVAL; ··· 9558 9552 } 9559 9553 9560 9554 if (kvm_request_pending(vcpu)) { 9561 - if (kvm_check_request(KVM_REQ_VM_BUGGED, vcpu)) { 9555 + if (kvm_check_request(KVM_REQ_VM_DEAD, vcpu)) { 9562 9556 r = -EIO; 9563 9557 goto out; 9564 9558 }
+10 -2
include/linux/kvm_host.h
··· 150 150 #define KVM_REQ_MMU_RELOAD (1 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) 151 151 #define KVM_REQ_UNBLOCK 2 152 152 #define KVM_REQ_UNHALT 3 153 - #define KVM_REQ_VM_BUGGED (4 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) 153 + #define KVM_REQ_VM_DEAD (4 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) 154 154 #define KVM_REQUEST_ARCH_BASE 8 155 155 156 156 #define KVM_ARCH_REQ_FLAGS(nr, flags) ({ \ ··· 617 617 unsigned int max_halt_poll_ns; 618 618 u32 dirty_ring_size; 619 619 bool vm_bugged; 620 + bool vm_dead; 620 621 621 622 #ifdef CONFIG_HAVE_KVM_PM_NOTIFIER 622 623 struct notifier_block pm_notifier; ··· 651 650 #define vcpu_err(vcpu, fmt, ...) \ 652 651 kvm_err("vcpu%i " fmt, (vcpu)->vcpu_id, ## __VA_ARGS__) 653 652 653 + static inline void kvm_vm_dead(struct kvm *kvm) 654 + { 655 + kvm->vm_dead = true; 656 + kvm_make_all_cpus_request(kvm, KVM_REQ_VM_DEAD); 657 + } 658 + 654 659 static inline void kvm_vm_bugged(struct kvm *kvm) 655 660 { 656 661 kvm->vm_bugged = true; 657 - kvm_make_all_cpus_request(kvm, KVM_REQ_VM_BUGGED); 662 + kvm_vm_dead(kvm); 658 663 } 664 + 659 665 660 666 #define KVM_BUG(cond, kvm, fmt...) \ 661 667 ({ \
+1
include/uapi/linux/kvm.h
··· 1130 1130 #define KVM_CAP_BINARY_STATS_FD 203 1131 1131 #define KVM_CAP_EXIT_ON_EMULATION_FAILURE 204 1132 1132 #define KVM_CAP_ARM_MTE 205 1133 + #define KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM 206 1133 1134 1134 1135 #ifdef KVM_CAP_IRQ_ROUTING 1135 1136
+2 -1
tools/testing/selftests/kvm/Makefile
··· 73 73 TEST_GEN_PROGS_x86_64 += x86_64/vmx_pmu_msrs_test 74 74 TEST_GEN_PROGS_x86_64 += x86_64/xen_shinfo_test 75 75 TEST_GEN_PROGS_x86_64 += x86_64/xen_vmcall_test 76 - TEST_GEN_PROGS_x86_64 += access_tracking_perf_test 76 + TEST_GEN_PROGS_x86_64 += x86_64/vmx_pi_mmio_test 77 + TEST_GEN_PROGS_x86_64 += x86_64/sev_migrate_tests 77 78 TEST_GEN_PROGS_x86_64 += demand_paging_test 78 79 TEST_GEN_PROGS_x86_64 += dirty_log_test 79 80 TEST_GEN_PROGS_x86_64 += dirty_log_perf_test
+1
tools/testing/selftests/kvm/include/kvm_util.h
··· 82 82 }; 83 83 extern const struct vm_guest_mode_params vm_guest_mode_params[]; 84 84 85 + int open_path_or_exit(const char *path, int flags); 85 86 int open_kvm_dev_path_or_exit(void); 86 87 int kvm_check_cap(long cap); 87 88 int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap);
+2
tools/testing/selftests/kvm/include/x86_64/svm_util.h
··· 46 46 return ecx & CPUID_SVM; 47 47 } 48 48 49 + int open_sev_dev_path_or_exit(void); 50 + 49 51 #endif /* SELFTEST_KVM_SVM_UTILS_H */
+14 -10
tools/testing/selftests/kvm/lib/kvm_util.c
··· 31 31 return (void *) (((size_t) x + mask) & ~mask); 32 32 } 33 33 34 + int open_path_or_exit(const char *path, int flags) 35 + { 36 + int fd; 37 + 38 + fd = open(path, flags); 39 + if (fd < 0) { 40 + print_skip("%s not available (errno: %d)", path, errno); 41 + exit(KSFT_SKIP); 42 + } 43 + 44 + return fd; 45 + } 46 + 34 47 /* 35 48 * Open KVM_DEV_PATH if available, otherwise exit the entire program. 36 49 * ··· 55 42 */ 56 43 static int _open_kvm_dev_path_or_exit(int flags) 57 44 { 58 - int fd; 59 - 60 - fd = open(KVM_DEV_PATH, flags); 61 - if (fd < 0) { 62 - print_skip("%s not available, is KVM loaded? (errno: %d)", 63 - KVM_DEV_PATH, errno); 64 - exit(KSFT_SKIP); 65 - } 66 - 67 - return fd; 45 + return open_path_or_exit(KVM_DEV_PATH, flags); 68 46 } 69 47 70 48 int open_kvm_dev_path_or_exit(void)
+13
tools/testing/selftests/kvm/lib/x86_64/svm.c
··· 13 13 #include "processor.h" 14 14 #include "svm_util.h" 15 15 16 + #define SEV_DEV_PATH "/dev/sev" 17 + 16 18 struct gpr64_regs guest_regs; 17 19 u64 rflags; 18 20 ··· 173 171 print_skip("nested SVM not enabled"); 174 172 exit(KSFT_SKIP); 175 173 } 174 + } 175 + 176 + /* 177 + * Open SEV_DEV_PATH if available, otherwise exit the entire program. 178 + * 179 + * Return: 180 + * The opened file descriptor of /dev/sev. 181 + */ 182 + int open_sev_dev_path_or_exit(void) 183 + { 184 + return open_path_or_exit(SEV_DEV_PATH, 0); 176 185 }
+203
tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + #include <linux/kvm.h> 3 + #include <linux/psp-sev.h> 4 + #include <stdio.h> 5 + #include <sys/ioctl.h> 6 + #include <stdlib.h> 7 + #include <errno.h> 8 + #include <pthread.h> 9 + 10 + #include "test_util.h" 11 + #include "kvm_util.h" 12 + #include "processor.h" 13 + #include "svm_util.h" 14 + #include "kselftest.h" 15 + #include "../lib/kvm_util_internal.h" 16 + 17 + #define SEV_POLICY_ES 0b100 18 + 19 + #define NR_MIGRATE_TEST_VCPUS 4 20 + #define NR_MIGRATE_TEST_VMS 3 21 + #define NR_LOCK_TESTING_THREADS 3 22 + #define NR_LOCK_TESTING_ITERATIONS 10000 23 + 24 + static void sev_ioctl(int vm_fd, int cmd_id, void *data) 25 + { 26 + struct kvm_sev_cmd cmd = { 27 + .id = cmd_id, 28 + .data = (uint64_t)data, 29 + .sev_fd = open_sev_dev_path_or_exit(), 30 + }; 31 + int ret; 32 + 33 + ret = ioctl(vm_fd, KVM_MEMORY_ENCRYPT_OP, &cmd); 34 + TEST_ASSERT((ret == 0 || cmd.error == SEV_RET_SUCCESS), 35 + "%d failed: return code: %d, errno: %d, fw error: %d", 36 + cmd_id, ret, errno, cmd.error); 37 + } 38 + 39 + static struct kvm_vm *sev_vm_create(bool es) 40 + { 41 + struct kvm_vm *vm; 42 + struct kvm_sev_launch_start start = { 0 }; 43 + int i; 44 + 45 + vm = vm_create(VM_MODE_DEFAULT, 0, O_RDWR); 46 + sev_ioctl(vm->fd, es ? KVM_SEV_ES_INIT : KVM_SEV_INIT, NULL); 47 + for (i = 0; i < NR_MIGRATE_TEST_VCPUS; ++i) 48 + vm_vcpu_add(vm, i); 49 + if (es) 50 + start.policy |= SEV_POLICY_ES; 51 + sev_ioctl(vm->fd, KVM_SEV_LAUNCH_START, &start); 52 + if (es) 53 + sev_ioctl(vm->fd, KVM_SEV_LAUNCH_UPDATE_VMSA, NULL); 54 + return vm; 55 + } 56 + 57 + static struct kvm_vm *__vm_create(void) 58 + { 59 + struct kvm_vm *vm; 60 + int i; 61 + 62 + vm = vm_create(VM_MODE_DEFAULT, 0, O_RDWR); 63 + for (i = 0; i < NR_MIGRATE_TEST_VCPUS; ++i) 64 + vm_vcpu_add(vm, i); 65 + 66 + return vm; 67 + } 68 + 69 + static int __sev_migrate_from(int dst_fd, int src_fd) 70 + { 71 + struct kvm_enable_cap cap = { 72 + .cap = KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM, 73 + .args = { src_fd } 74 + }; 75 + 76 + return ioctl(dst_fd, KVM_ENABLE_CAP, &cap); 77 + } 78 + 79 + 80 + static void sev_migrate_from(int dst_fd, int src_fd) 81 + { 82 + int ret; 83 + 84 + ret = __sev_migrate_from(dst_fd, src_fd); 85 + TEST_ASSERT(!ret, "Migration failed, ret: %d, errno: %d\n", ret, errno); 86 + } 87 + 88 + static void test_sev_migrate_from(bool es) 89 + { 90 + struct kvm_vm *src_vm; 91 + struct kvm_vm *dst_vms[NR_MIGRATE_TEST_VMS]; 92 + int i; 93 + 94 + src_vm = sev_vm_create(es); 95 + for (i = 0; i < NR_MIGRATE_TEST_VMS; ++i) 96 + dst_vms[i] = __vm_create(); 97 + 98 + /* Initial migration from the src to the first dst. */ 99 + sev_migrate_from(dst_vms[0]->fd, src_vm->fd); 100 + 101 + for (i = 1; i < NR_MIGRATE_TEST_VMS; i++) 102 + sev_migrate_from(dst_vms[i]->fd, dst_vms[i - 1]->fd); 103 + 104 + /* Migrate the guest back to the original VM. */ 105 + sev_migrate_from(src_vm->fd, dst_vms[NR_MIGRATE_TEST_VMS - 1]->fd); 106 + 107 + kvm_vm_free(src_vm); 108 + for (i = 0; i < NR_MIGRATE_TEST_VMS; ++i) 109 + kvm_vm_free(dst_vms[i]); 110 + } 111 + 112 + struct locking_thread_input { 113 + struct kvm_vm *vm; 114 + int source_fds[NR_LOCK_TESTING_THREADS]; 115 + }; 116 + 117 + static void *locking_test_thread(void *arg) 118 + { 119 + int i, j; 120 + struct locking_thread_input *input = (struct locking_thread_input *)arg; 121 + 122 + for (i = 0; i < NR_LOCK_TESTING_ITERATIONS; ++i) { 123 + j = i % NR_LOCK_TESTING_THREADS; 124 + __sev_migrate_from(input->vm->fd, input->source_fds[j]); 125 + } 126 + 127 + return NULL; 128 + } 129 + 130 + static void test_sev_migrate_locking(void) 131 + { 132 + struct locking_thread_input input[NR_LOCK_TESTING_THREADS]; 133 + pthread_t pt[NR_LOCK_TESTING_THREADS]; 134 + int i; 135 + 136 + for (i = 0; i < NR_LOCK_TESTING_THREADS; ++i) { 137 + input[i].vm = sev_vm_create(/* es= */ false); 138 + input[0].source_fds[i] = input[i].vm->fd; 139 + } 140 + for (i = 1; i < NR_LOCK_TESTING_THREADS; ++i) 141 + memcpy(input[i].source_fds, input[0].source_fds, 142 + sizeof(input[i].source_fds)); 143 + 144 + for (i = 0; i < NR_LOCK_TESTING_THREADS; ++i) 145 + pthread_create(&pt[i], NULL, locking_test_thread, &input[i]); 146 + 147 + for (i = 0; i < NR_LOCK_TESTING_THREADS; ++i) 148 + pthread_join(pt[i], NULL); 149 + } 150 + 151 + static void test_sev_migrate_parameters(void) 152 + { 153 + struct kvm_vm *sev_vm, *sev_es_vm, *vm_no_vcpu, *vm_no_sev, 154 + *sev_es_vm_no_vmsa; 155 + int ret; 156 + 157 + sev_vm = sev_vm_create(/* es= */ false); 158 + sev_es_vm = sev_vm_create(/* es= */ true); 159 + vm_no_vcpu = vm_create(VM_MODE_DEFAULT, 0, O_RDWR); 160 + vm_no_sev = __vm_create(); 161 + sev_es_vm_no_vmsa = vm_create(VM_MODE_DEFAULT, 0, O_RDWR); 162 + sev_ioctl(sev_es_vm_no_vmsa->fd, KVM_SEV_ES_INIT, NULL); 163 + vm_vcpu_add(sev_es_vm_no_vmsa, 1); 164 + 165 + 166 + ret = __sev_migrate_from(sev_vm->fd, sev_es_vm->fd); 167 + TEST_ASSERT( 168 + ret == -1 && errno == EINVAL, 169 + "Should not be able migrate to SEV enabled VM. ret: %d, errno: %d\n", 170 + ret, errno); 171 + 172 + ret = __sev_migrate_from(sev_es_vm->fd, sev_vm->fd); 173 + TEST_ASSERT( 174 + ret == -1 && errno == EINVAL, 175 + "Should not be able migrate to SEV-ES enabled VM. ret: %d, errno: %d\n", 176 + ret, errno); 177 + 178 + ret = __sev_migrate_from(vm_no_vcpu->fd, sev_es_vm->fd); 179 + TEST_ASSERT( 180 + ret == -1 && errno == EINVAL, 181 + "SEV-ES migrations require same number of vCPUS. ret: %d, errno: %d\n", 182 + ret, errno); 183 + 184 + ret = __sev_migrate_from(vm_no_vcpu->fd, sev_es_vm_no_vmsa->fd); 185 + TEST_ASSERT( 186 + ret == -1 && errno == EINVAL, 187 + "SEV-ES migrations require UPDATE_VMSA. ret %d, errno: %d\n", 188 + ret, errno); 189 + 190 + ret = __sev_migrate_from(vm_no_vcpu->fd, vm_no_sev->fd); 191 + TEST_ASSERT(ret == -1 && errno == EINVAL, 192 + "Migrations require SEV enabled. ret %d, errno: %d\n", ret, 193 + errno); 194 + } 195 + 196 + int main(int argc, char *argv[]) 197 + { 198 + test_sev_migrate_from(/* es= */ false); 199 + test_sev_migrate_from(/* es= */ true); 200 + test_sev_migrate_locking(); 201 + test_sev_migrate_parameters(); 202 + return 0; 203 + }
+5 -5
virt/kvm/kvm_main.c
··· 3747 3747 struct kvm_fpu *fpu = NULL; 3748 3748 struct kvm_sregs *kvm_sregs = NULL; 3749 3749 3750 - if (vcpu->kvm->mm != current->mm || vcpu->kvm->vm_bugged) 3750 + if (vcpu->kvm->mm != current->mm || vcpu->kvm->vm_dead) 3751 3751 return -EIO; 3752 3752 3753 3753 if (unlikely(_IOC_TYPE(ioctl) != KVMIO)) ··· 3957 3957 void __user *argp = compat_ptr(arg); 3958 3958 int r; 3959 3959 3960 - if (vcpu->kvm->mm != current->mm || vcpu->kvm->vm_bugged) 3960 + if (vcpu->kvm->mm != current->mm || vcpu->kvm->vm_dead) 3961 3961 return -EIO; 3962 3962 3963 3963 switch (ioctl) { ··· 4023 4023 { 4024 4024 struct kvm_device *dev = filp->private_data; 4025 4025 4026 - if (dev->kvm->mm != current->mm || dev->kvm->vm_bugged) 4026 + if (dev->kvm->mm != current->mm || dev->kvm->vm_dead) 4027 4027 return -EIO; 4028 4028 4029 4029 switch (ioctl) { ··· 4345 4345 void __user *argp = (void __user *)arg; 4346 4346 int r; 4347 4347 4348 - if (kvm->mm != current->mm || kvm->vm_bugged) 4348 + if (kvm->mm != current->mm || kvm->vm_dead) 4349 4349 return -EIO; 4350 4350 switch (ioctl) { 4351 4351 case KVM_CREATE_VCPU: ··· 4556 4556 struct kvm *kvm = filp->private_data; 4557 4557 int r; 4558 4558 4559 - if (kvm->mm != current->mm || kvm->vm_bugged) 4559 + if (kvm->mm != current->mm || kvm->vm_dead) 4560 4560 return -EIO; 4561 4561 switch (ioctl) { 4562 4562 #ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT