Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

x86/hyperv: Add smp support for SEV-SNP guest

In the AMD SEV-SNP guest, AP needs to be started up via sev es
save area and Hyper-V requires to call HVCALL_START_VP hypercall
to pass the gpa of sev es save area with AP's vp index and VTL(Virtual
trust level) parameters. Override wakeup_secondary_cpu_64 callback
with hv_snp_boot_ap.

Reviewed-by: Dexuan Cui <decui@microsoft.com>
Reviewed-by: Michael Kelley <mikelley@microsoft.com>
Signed-off-by: Tianyu Lan <tiala@microsoft.com>
Signed-off-by: Wei Liu <wei.liu@kernel.org>
Link: https://lore.kernel.org/r/20230818102919.1318039-8-ltykernel@gmail.com

authored by

Tianyu Lan and committed by
Wei Liu
44676bb9 45f46b1a

+157 -1
+138
arch/x86/hyperv/ivm.c
··· 18 18 #include <asm/mshyperv.h> 19 19 #include <asm/hypervisor.h> 20 20 #include <asm/mtrr.h> 21 + #include <asm/coco.h> 22 + #include <asm/io_apic.h> 23 + #include <asm/sev.h> 24 + #include <asm/realmode.h> 25 + #include <asm/e820/api.h> 26 + #include <asm/desc.h> 21 27 22 28 #ifdef CONFIG_AMD_MEM_ENCRYPT 23 29 24 30 #define GHCB_USAGE_HYPERV_CALL 1 31 + 32 + static u8 ap_start_input_arg[PAGE_SIZE] __bss_decrypted __aligned(PAGE_SIZE); 33 + static u8 ap_start_stack[PAGE_SIZE] __aligned(PAGE_SIZE); 25 34 26 35 union hv_ghcb { 27 36 struct ghcb ghcb; ··· 64 55 u64 reserved2; 65 56 } hypercall; 66 57 } __packed __aligned(HV_HYP_PAGE_SIZE); 58 + 59 + static DEFINE_PER_CPU(struct sev_es_save_area *, hv_sev_vmsa); 67 60 68 61 static u16 hv_ghcb_version __ro_after_init; 69 62 ··· 366 355 return true; 367 356 368 357 return false; 358 + } 359 + 360 + #define hv_populate_vmcb_seg(seg, gdtr_base) \ 361 + do { \ 362 + if (seg.selector) { \ 363 + seg.base = 0; \ 364 + seg.limit = HV_AP_SEGMENT_LIMIT; \ 365 + seg.attrib = *(u16 *)(gdtr_base + seg.selector + 5); \ 366 + seg.attrib = (seg.attrib & 0xFF) | ((seg.attrib >> 4) & 0xF00); \ 367 + } \ 368 + } while (0) \ 369 + 370 + static int snp_set_vmsa(void *va, bool vmsa) 371 + { 372 + u64 attrs; 373 + 374 + /* 375 + * Running at VMPL0 allows the kernel to change the VMSA bit for a page 376 + * using the RMPADJUST instruction. However, for the instruction to 377 + * succeed it must target the permissions of a lesser privileged 378 + * (higher numbered) VMPL level, so use VMPL1 (refer to the RMPADJUST 379 + * instruction in the AMD64 APM Volume 3). 380 + */ 381 + attrs = 1; 382 + if (vmsa) 383 + attrs |= RMPADJUST_VMSA_PAGE_BIT; 384 + 385 + return rmpadjust((unsigned long)va, RMP_PG_SIZE_4K, attrs); 386 + } 387 + 388 + static void snp_cleanup_vmsa(struct sev_es_save_area *vmsa) 389 + { 390 + int err; 391 + 392 + err = snp_set_vmsa(vmsa, false); 393 + if (err) 394 + pr_err("clear VMSA page failed (%u), leaking page\n", err); 395 + else 396 + free_page((unsigned long)vmsa); 397 + } 398 + 399 + int hv_snp_boot_ap(int cpu, unsigned long start_ip) 400 + { 401 + struct sev_es_save_area *vmsa = (struct sev_es_save_area *) 402 + __get_free_page(GFP_KERNEL | __GFP_ZERO); 403 + struct sev_es_save_area *cur_vmsa; 404 + struct desc_ptr gdtr; 405 + u64 ret, retry = 5; 406 + struct hv_enable_vp_vtl *start_vp_input; 407 + unsigned long flags; 408 + 409 + if (!vmsa) 410 + return -ENOMEM; 411 + 412 + native_store_gdt(&gdtr); 413 + 414 + vmsa->gdtr.base = gdtr.address; 415 + vmsa->gdtr.limit = gdtr.size; 416 + 417 + asm volatile("movl %%es, %%eax;" : "=a" (vmsa->es.selector)); 418 + hv_populate_vmcb_seg(vmsa->es, vmsa->gdtr.base); 419 + 420 + asm volatile("movl %%cs, %%eax;" : "=a" (vmsa->cs.selector)); 421 + hv_populate_vmcb_seg(vmsa->cs, vmsa->gdtr.base); 422 + 423 + asm volatile("movl %%ss, %%eax;" : "=a" (vmsa->ss.selector)); 424 + hv_populate_vmcb_seg(vmsa->ss, vmsa->gdtr.base); 425 + 426 + asm volatile("movl %%ds, %%eax;" : "=a" (vmsa->ds.selector)); 427 + hv_populate_vmcb_seg(vmsa->ds, vmsa->gdtr.base); 428 + 429 + vmsa->efer = native_read_msr(MSR_EFER); 430 + 431 + asm volatile("movq %%cr4, %%rax;" : "=a" (vmsa->cr4)); 432 + asm volatile("movq %%cr3, %%rax;" : "=a" (vmsa->cr3)); 433 + asm volatile("movq %%cr0, %%rax;" : "=a" (vmsa->cr0)); 434 + 435 + vmsa->xcr0 = 1; 436 + vmsa->g_pat = HV_AP_INIT_GPAT_DEFAULT; 437 + vmsa->rip = (u64)secondary_startup_64_no_verify; 438 + vmsa->rsp = (u64)&ap_start_stack[PAGE_SIZE]; 439 + 440 + /* 441 + * Set the SNP-specific fields for this VMSA: 442 + * VMPL level 443 + * SEV_FEATURES (matches the SEV STATUS MSR right shifted 2 bits) 444 + */ 445 + vmsa->vmpl = 0; 446 + vmsa->sev_features = sev_status >> 2; 447 + 448 + ret = snp_set_vmsa(vmsa, true); 449 + if (!ret) { 450 + pr_err("RMPADJUST(%llx) failed: %llx\n", (u64)vmsa, ret); 451 + free_page((u64)vmsa); 452 + return ret; 453 + } 454 + 455 + local_irq_save(flags); 456 + start_vp_input = (struct hv_enable_vp_vtl *)ap_start_input_arg; 457 + memset(start_vp_input, 0, sizeof(*start_vp_input)); 458 + start_vp_input->partition_id = -1; 459 + start_vp_input->vp_index = cpu; 460 + start_vp_input->target_vtl.target_vtl = ms_hyperv.vtl; 461 + *(u64 *)&start_vp_input->vp_context = __pa(vmsa) | 1; 462 + 463 + do { 464 + ret = hv_do_hypercall(HVCALL_START_VP, 465 + start_vp_input, NULL); 466 + } while (hv_result(ret) == HV_STATUS_TIME_OUT && retry--); 467 + 468 + local_irq_restore(flags); 469 + 470 + if (!hv_result_success(ret)) { 471 + pr_err("HvCallStartVirtualProcessor failed: %llx\n", ret); 472 + snp_cleanup_vmsa(vmsa); 473 + vmsa = NULL; 474 + } 475 + 476 + cur_vmsa = per_cpu(hv_sev_vmsa, cpu); 477 + /* Free up any previous VMSA page */ 478 + if (cur_vmsa) 479 + snp_cleanup_vmsa(cur_vmsa); 480 + 481 + /* Record the current VMSA page */ 482 + per_cpu(hv_sev_vmsa, cpu) = vmsa; 483 + 484 + return ret; 369 485 } 370 486 371 487 void __init hv_vtom_init(void)
+8
arch/x86/include/asm/mshyperv.h
··· 49 49 extern union hv_ghcb * __percpu *hv_ghcb_pg; 50 50 51 51 extern bool hv_isolation_type_en_snp(void); 52 + /* 53 + * DEFAULT INIT GPAT and SEGMENT LIMIT value in struct VMSA 54 + * to start AP in enlightened SEV guest. 55 + */ 56 + #define HV_AP_INIT_GPAT_DEFAULT 0x0007040600070406ULL 57 + #define HV_AP_SEGMENT_LIMIT 0xffffffff 52 58 53 59 int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages); 54 60 int hv_call_add_logical_proc(int node, u32 lp_index, u32 acpi_id); ··· 262 256 bool hv_ghcb_negotiate_protocol(void); 263 257 void __noreturn hv_ghcb_terminate(unsigned int set, unsigned int reason); 264 258 void hv_vtom_init(void); 259 + int hv_snp_boot_ap(int cpu, unsigned long start_ip); 265 260 #else 266 261 static inline void hv_ghcb_msr_write(u64 msr, u64 value) {} 267 262 static inline void hv_ghcb_msr_read(u64 msr, u64 *value) {} 268 263 static inline bool hv_ghcb_negotiate_protocol(void) { return false; } 269 264 static inline void hv_ghcb_terminate(unsigned int set, unsigned int reason) {} 270 265 static inline void hv_vtom_init(void) {} 266 + static int hv_snp_boot_ap(int cpu, unsigned long start_ip) { return 0; } 271 267 #endif 272 268 273 269 extern bool hv_isolation_type_snp(void);
+10 -1
arch/x86/kernel/cpu/mshyperv.c
··· 295 295 296 296 native_smp_prepare_cpus(max_cpus); 297 297 298 + /* 299 + * Override wakeup_secondary_cpu_64 callback for SEV-SNP 300 + * enlightened guest. 301 + */ 302 + if (hv_isolation_type_en_snp()) { 303 + apic->wakeup_secondary_cpu_64 = hv_snp_boot_ap; 304 + return; 305 + } 306 + 298 307 #ifdef CONFIG_X86_64 299 308 for_each_present_cpu(i) { 300 309 if (i == 0) ··· 511 502 512 503 # ifdef CONFIG_SMP 513 504 smp_ops.smp_prepare_boot_cpu = hv_smp_prepare_boot_cpu; 514 - if (hv_root_partition) 505 + if (hv_root_partition || hv_isolation_type_en_snp()) 515 506 smp_ops.smp_prepare_cpus = hv_smp_prepare_cpus; 516 507 # endif 517 508
+1
include/asm-generic/hyperv-tlfs.h
··· 223 223 #define HV_STATUS_INVALID_PORT_ID 17 224 224 #define HV_STATUS_INVALID_CONNECTION_ID 18 225 225 #define HV_STATUS_INSUFFICIENT_BUFFERS 19 226 + #define HV_STATUS_TIME_OUT 120 226 227 #define HV_STATUS_VTL_ALREADY_ENABLED 134 227 228 228 229 /*