Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'hyperv-next-signed-20260218' of git://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux

Pull Hyper-V updates from Wei Liu:

- Debugfs support for MSHV statistics (Nuno Das Neves)

- Support for the integrated scheduler (Stanislav Kinsburskii)

- Various fixes for MSHV memory management and hypervisor status
handling (Stanislav Kinsburskii)

- Expose more capabilities and flags for MSHV partition management
(Anatol Belski, Muminul Islam, Magnus Kulke)

- Miscellaneous fixes to improve code quality and stability (Carlos
López, Ethan Nelson-Moore, Li RongQing, Michael Kelley, Mukesh
Rathor, Purna Pavan Chandra Aekkaladevi, Stanislav Kinsburskii, Uros
Bizjak)

- PREEMPT_RT fixes for vmbus interrupts (Jan Kiszka)

* tag 'hyperv-next-signed-20260218' of git://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux: (34 commits)
mshv: Handle insufficient root memory hypervisor statuses
mshv: Handle insufficient contiguous memory hypervisor status
mshv: Introduce hv_deposit_memory helper functions
mshv: Introduce hv_result_needs_memory() helper function
mshv: Add SMT_ENABLED_GUEST partition creation flag
mshv: Add nested virtualization creation flag
Drivers: hv: vmbus: Simplify allocation of vmbus_evt
mshv: expose the scrub partition hypercall
mshv: Add support for integrated scheduler
mshv: Use try_cmpxchg() instead of cmpxchg()
x86/hyperv: Fix error pointer dereference
x86/hyperv: Reserve 3 interrupt vectors used exclusively by MSHV
Drivers: hv: vmbus: Use kthread for vmbus interrupts on PREEMPT_RT
x86/hyperv: Remove ASM_CALL_CONSTRAINT with VMMCALL insn
x86/hyperv: Use savesegment() instead of inline asm() to save segment registers
mshv: fix SRCU protection in irqfd resampler ack handler
mshv: make field names descriptive in a header struct
x86/hyperv: Update comment in hyperv_cleanup()
mshv: clear eventfd counter on irqfd shutdown
x86/hyperv: Use memremap()/memunmap() instead of ioremap_cache()/iounmap()
...

+1775 -260
+1 -2
arch/x86/hyperv/hv_crash.c
··· 279 279 static noinline __noclone void crash_nmi_callback(struct pt_regs *regs) 280 280 { 281 281 struct hv_input_disable_hyp_ex *input; 282 - u64 status; 283 282 int msecs = 1000, ccpu = smp_processor_id(); 284 283 285 284 if (ccpu == 0) { ··· 312 313 input->rip = trampoline_pa; 313 314 input->arg = devirt_arg; 314 315 315 - status = hv_do_hypercall(HVCALL_DISABLE_HYP_EX, input, NULL); 316 + (void)hv_do_hypercall(HVCALL_DISABLE_HYP_EX, input, NULL); 316 317 317 318 hv_panic_timeout_reboot(); 318 319 }
+13 -7
arch/x86/hyperv/hv_init.c
··· 103 103 */ 104 104 rdmsrq(MSR_AMD64_SEV_ES_GHCB, ghcb_gpa); 105 105 106 - /* Mask out vTOM bit. ioremap_cache() maps decrypted */ 106 + /* Mask out vTOM bit and map as decrypted */ 107 107 ghcb_gpa &= ~ms_hyperv.shared_gpa_boundary; 108 - ghcb_va = (void *)ioremap_cache(ghcb_gpa, HV_HYP_PAGE_SIZE); 108 + ghcb_va = memremap(ghcb_gpa, HV_HYP_PAGE_SIZE, MEMREMAP_WB | MEMREMAP_DEC); 109 109 if (!ghcb_va) 110 110 return -ENOMEM; 111 111 ··· 277 277 if (hv_ghcb_pg) { 278 278 ghcb_va = (void **)this_cpu_ptr(hv_ghcb_pg); 279 279 if (*ghcb_va) 280 - iounmap(*ghcb_va); 280 + memunmap(*ghcb_va); 281 281 *ghcb_va = NULL; 282 282 } 283 283 ··· 558 558 memunmap(src); 559 559 560 560 hv_remap_tsc_clocksource(); 561 - hv_root_crash_init(); 562 561 hv_sleep_notifiers_register(); 563 562 } else { 564 563 hypercall_msr.guest_physical_address = vmalloc_to_pfn(hv_hypercall_pg); ··· 565 566 } 566 567 567 568 hv_set_hypercall_pg(hv_hypercall_pg); 569 + 570 + if (hv_root_partition()) /* after set hypercall pg */ 571 + hv_root_crash_init(); 568 572 569 573 skip_hypercall_pg_init: 570 574 /* ··· 635 633 hv_ivm_msr_write(HV_X64_MSR_GUEST_OS_ID, 0); 636 634 637 635 /* 638 - * Reset hypercall page reference before reset the page, 639 - * let hypercall operations fail safely rather than 640 - * panic the kernel for using invalid hypercall page 636 + * Reset hv_hypercall_pg before resetting it in the hypervisor. 637 + * hv_set_hypercall_pg(NULL) is not used because at this point in the 638 + * panic path other CPUs have been stopped, causing static_call_update() 639 + * to hang. So resetting hv_hypercall_pg to cause hypercalls to fail 640 + * cleanly is only operative on 32-bit builds. But this is OK as it is 641 + * just a preventative measure to ease detecting a hypercall being made 642 + * after this point, which shouldn't be happening anyway. 641 643 */ 642 644 hv_hypercall_pg = NULL; 643 645
+5 -3
arch/x86/hyperv/hv_vtl.c
··· 110 110 111 111 static int hv_vtl_bringup_vcpu(u32 target_vp_index, int cpu, u64 eip_ignored) 112 112 { 113 - u64 status; 113 + u64 status, rsp, rip; 114 114 int ret = 0; 115 115 struct hv_enable_vp_vtl *input; 116 116 unsigned long irq_flags; ··· 123 123 struct desc_struct *gdt; 124 124 125 125 struct task_struct *idle = idle_thread_get(cpu); 126 - u64 rsp = (unsigned long)idle->thread.sp; 126 + if (IS_ERR(idle)) 127 + return PTR_ERR(idle); 127 128 128 - u64 rip = (u64)&hv_vtl_ap_entry; 129 + rsp = (unsigned long)idle->thread.sp; 130 + rip = (u64)&hv_vtl_ap_entry; 129 131 130 132 native_store_gdt(&gdt_ptr); 131 133 store_idt(&idt_ptr);
+6 -5
arch/x86/hyperv/ivm.c
··· 25 25 #include <asm/e820/api.h> 26 26 #include <asm/desc.h> 27 27 #include <asm/msr.h> 28 + #include <asm/segment.h> 28 29 #include <uapi/asm/vmx.h> 29 30 30 31 #ifdef CONFIG_AMD_MEM_ENCRYPT ··· 316 315 vmsa->gdtr.base = gdtr.address; 317 316 vmsa->gdtr.limit = gdtr.size; 318 317 319 - asm volatile("movl %%es, %%eax;" : "=a" (vmsa->es.selector)); 318 + savesegment(es, vmsa->es.selector); 320 319 hv_populate_vmcb_seg(vmsa->es, vmsa->gdtr.base); 321 320 322 - asm volatile("movl %%cs, %%eax;" : "=a" (vmsa->cs.selector)); 321 + savesegment(cs, vmsa->cs.selector); 323 322 hv_populate_vmcb_seg(vmsa->cs, vmsa->gdtr.base); 324 323 325 - asm volatile("movl %%ss, %%eax;" : "=a" (vmsa->ss.selector)); 324 + savesegment(ss, vmsa->ss.selector); 326 325 hv_populate_vmcb_seg(vmsa->ss, vmsa->gdtr.base); 327 326 328 - asm volatile("movl %%ds, %%eax;" : "=a" (vmsa->ds.selector)); 327 + savesegment(ds, vmsa->ds.selector); 329 328 hv_populate_vmcb_seg(vmsa->ds, vmsa->gdtr.base); 330 329 331 330 vmsa->efer = native_read_msr(MSR_EFER); ··· 392 391 393 392 register u64 __r8 asm("r8") = param2; 394 393 asm volatile("vmmcall" 395 - : "=a" (hv_status), ASM_CALL_CONSTRAINT, 394 + : "=a" (hv_status), 396 395 "+c" (control), "+d" (param1), "+r" (__r8) 397 396 : : "cc", "memory", "r9", "r10", "r11"); 398 397
+25
arch/x86/kernel/cpu/mshyperv.c
··· 478 478 } 479 479 EXPORT_SYMBOL_GPL(hv_get_hypervisor_version); 480 480 481 + /* 482 + * Reserved vectors hard coded in the hypervisor. If used outside, the hypervisor 483 + * will either crash or hang or attempt to break into debugger. 484 + */ 485 + static void hv_reserve_irq_vectors(void) 486 + { 487 + #define HYPERV_DBG_FASTFAIL_VECTOR 0x29 488 + #define HYPERV_DBG_ASSERT_VECTOR 0x2C 489 + #define HYPERV_DBG_SERVICE_VECTOR 0x2D 490 + 491 + if (cpu_feature_enabled(X86_FEATURE_FRED)) 492 + return; 493 + 494 + if (test_and_set_bit(HYPERV_DBG_ASSERT_VECTOR, system_vectors) || 495 + test_and_set_bit(HYPERV_DBG_SERVICE_VECTOR, system_vectors) || 496 + test_and_set_bit(HYPERV_DBG_FASTFAIL_VECTOR, system_vectors)) 497 + BUG(); 498 + 499 + pr_info("Hyper-V: reserve vectors: %d %d %d\n", HYPERV_DBG_ASSERT_VECTOR, 500 + HYPERV_DBG_SERVICE_VECTOR, HYPERV_DBG_FASTFAIL_VECTOR); 501 + } 502 + 481 503 static void __init ms_hyperv_init_platform(void) 482 504 { 483 505 int hv_max_functions_eax, eax; ··· 531 509 ms_hyperv.max_vp_index, ms_hyperv.max_lp_index); 532 510 533 511 hv_identify_partition_type(); 512 + 513 + if (hv_root_partition()) 514 + hv_reserve_irq_vectors(); 534 515 535 516 if (cc_platform_has(CC_ATTR_SNP_SECURE_AVIC)) 536 517 ms_hyperv.hints |= HV_DEPRECATING_AEOI_RECOMMENDED;
+1
drivers/hv/Makefile
··· 15 15 hv_utils-y := hv_util.o hv_kvp.o hv_snapshot.o hv_utils_transport.o 16 16 mshv_root-y := mshv_root_main.o mshv_synic.o mshv_eventfd.o mshv_irq.o \ 17 17 mshv_root_hv_call.o mshv_portid_table.o mshv_regions.o 18 + mshv_root-$(CONFIG_DEBUG_FS) += mshv_debugfs.o 18 19 mshv_vtl-y := mshv_vtl_main.o 19 20 20 21 # Code that must be built-in
+6 -6
drivers/hv/hv.c
··· 287 287 simp.simp_enabled = 1; 288 288 289 289 if (ms_hyperv.paravisor_present || hv_root_partition()) { 290 - /* Mask out vTOM bit. ioremap_cache() maps decrypted */ 290 + /* Mask out vTOM bit and map as decrypted */ 291 291 u64 base = (simp.base_simp_gpa << HV_HYP_PAGE_SHIFT) & 292 292 ~ms_hyperv.shared_gpa_boundary; 293 293 hv_cpu->hyp_synic_message_page = 294 - (void *)ioremap_cache(base, HV_HYP_PAGE_SIZE); 294 + memremap(base, HV_HYP_PAGE_SIZE, MEMREMAP_WB | MEMREMAP_DEC); 295 295 if (!hv_cpu->hyp_synic_message_page) 296 296 pr_err("Fail to map synic message page.\n"); 297 297 } else { ··· 306 306 siefp.siefp_enabled = 1; 307 307 308 308 if (ms_hyperv.paravisor_present || hv_root_partition()) { 309 - /* Mask out vTOM bit. ioremap_cache() maps decrypted */ 309 + /* Mask out vTOM bit and map as decrypted */ 310 310 u64 base = (siefp.base_siefp_gpa << HV_HYP_PAGE_SHIFT) & 311 311 ~ms_hyperv.shared_gpa_boundary; 312 312 hv_cpu->hyp_synic_event_page = 313 - (void *)ioremap_cache(base, HV_HYP_PAGE_SIZE); 313 + memremap(base, HV_HYP_PAGE_SIZE, MEMREMAP_WB | MEMREMAP_DEC); 314 314 if (!hv_cpu->hyp_synic_event_page) 315 315 pr_err("Fail to map synic event page.\n"); 316 316 } else { ··· 429 429 simp.simp_enabled = 0; 430 430 if (ms_hyperv.paravisor_present || hv_root_partition()) { 431 431 if (hv_cpu->hyp_synic_message_page) { 432 - iounmap(hv_cpu->hyp_synic_message_page); 432 + memunmap(hv_cpu->hyp_synic_message_page); 433 433 hv_cpu->hyp_synic_message_page = NULL; 434 434 } 435 435 } else { ··· 443 443 444 444 if (ms_hyperv.paravisor_present || hv_root_partition()) { 445 445 if (hv_cpu->hyp_synic_event_page) { 446 - iounmap(hv_cpu->hyp_synic_event_page); 446 + memunmap(hv_cpu->hyp_synic_event_page); 447 447 hv_cpu->hyp_synic_event_page = NULL; 448 448 } 449 449 } else {
+3
drivers/hv/hv_common.c
··· 793 793 _STATUS_INFO(HV_STATUS_UNKNOWN_PROPERTY, -EIO), 794 794 _STATUS_INFO(HV_STATUS_PROPERTY_VALUE_OUT_OF_RANGE, -EIO), 795 795 _STATUS_INFO(HV_STATUS_INSUFFICIENT_MEMORY, -ENOMEM), 796 + _STATUS_INFO(HV_STATUS_INSUFFICIENT_CONTIGUOUS_MEMORY, -ENOMEM), 797 + _STATUS_INFO(HV_STATUS_INSUFFICIENT_ROOT_MEMORY, -ENOMEM), 798 + _STATUS_INFO(HV_STATUS_INSUFFICIENT_CONTIGUOUS_ROOT_MEMORY, -ENOMEM), 796 799 _STATUS_INFO(HV_STATUS_INVALID_PARTITION_ID, -EINVAL), 797 800 _STATUS_INFO(HV_STATUS_INVALID_VP_INDEX, -EINVAL), 798 801 _STATUS_INFO(HV_STATUS_NOT_FOUND, -EIO),
+49 -4
drivers/hv/hv_proc.c
··· 110 110 } 111 111 EXPORT_SYMBOL_GPL(hv_call_deposit_pages); 112 112 113 + int hv_deposit_memory_node(int node, u64 partition_id, 114 + u64 hv_status) 115 + { 116 + u32 num_pages = 1; 117 + 118 + switch (hv_result(hv_status)) { 119 + case HV_STATUS_INSUFFICIENT_MEMORY: 120 + break; 121 + case HV_STATUS_INSUFFICIENT_CONTIGUOUS_MEMORY: 122 + num_pages = HV_MAX_CONTIGUOUS_ALLOCATION_PAGES; 123 + break; 124 + 125 + case HV_STATUS_INSUFFICIENT_CONTIGUOUS_ROOT_MEMORY: 126 + num_pages = HV_MAX_CONTIGUOUS_ALLOCATION_PAGES; 127 + fallthrough; 128 + case HV_STATUS_INSUFFICIENT_ROOT_MEMORY: 129 + if (!hv_root_partition()) { 130 + hv_status_err(hv_status, "Unexpected root memory deposit\n"); 131 + return -ENOMEM; 132 + } 133 + partition_id = HV_PARTITION_ID_SELF; 134 + break; 135 + 136 + default: 137 + hv_status_err(hv_status, "Unexpected!\n"); 138 + return -ENOMEM; 139 + } 140 + return hv_call_deposit_pages(node, partition_id, num_pages); 141 + } 142 + EXPORT_SYMBOL_GPL(hv_deposit_memory_node); 143 + 144 + bool hv_result_needs_memory(u64 status) 145 + { 146 + switch (hv_result(status)) { 147 + case HV_STATUS_INSUFFICIENT_MEMORY: 148 + case HV_STATUS_INSUFFICIENT_CONTIGUOUS_MEMORY: 149 + case HV_STATUS_INSUFFICIENT_ROOT_MEMORY: 150 + case HV_STATUS_INSUFFICIENT_CONTIGUOUS_ROOT_MEMORY: 151 + return true; 152 + } 153 + return false; 154 + } 155 + EXPORT_SYMBOL_GPL(hv_result_needs_memory); 156 + 113 157 int hv_call_add_logical_proc(int node, u32 lp_index, u32 apic_id) 114 158 { 115 159 struct hv_input_add_logical_processor *input; ··· 181 137 input, output); 182 138 local_irq_restore(flags); 183 139 184 - if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) { 140 + if (!hv_result_needs_memory(status)) { 185 141 if (!hv_result_success(status)) { 186 142 hv_status_err(status, "cpu %u apic ID: %u\n", 187 143 lp_index, apic_id); ··· 189 145 } 190 146 break; 191 147 } 192 - ret = hv_call_deposit_pages(node, hv_current_partition_id, 1); 148 + ret = hv_deposit_memory_node(node, hv_current_partition_id, 149 + status); 193 150 } while (!ret); 194 151 195 152 return ret; ··· 224 179 status = hv_do_hypercall(HVCALL_CREATE_VP, input, NULL); 225 180 local_irq_restore(irq_flags); 226 181 227 - if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) { 182 + if (!hv_result_needs_memory(status)) { 228 183 if (!hv_result_success(status)) { 229 184 hv_status_err(status, "vcpu: %u, lp: %u\n", 230 185 vp_index, flags); ··· 232 187 } 233 188 break; 234 189 } 235 - ret = hv_call_deposit_pages(node, partition_id, 1); 190 + ret = hv_deposit_memory_node(node, partition_id, status); 236 191 237 192 } while (!ret); 238 193
+2 -2
drivers/hv/hyperv_vmbus.h
··· 370 370 * CHANNELMSG_UNLOAD_RESPONSE and we don't care about other messages 371 371 * on crash. 372 372 */ 373 - if (cmpxchg(&msg->header.message_type, old_msg_type, 374 - HVMSG_NONE) != old_msg_type) 373 + if (!try_cmpxchg(&msg->header.message_type, 374 + &old_msg_type, HVMSG_NONE)) 375 375 return; 376 376 377 377 /*
+726
drivers/hv/mshv_debugfs.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* 3 + * Copyright (c) 2026, Microsoft Corporation. 4 + * 5 + * The /sys/kernel/debug/mshv directory contents. 6 + * Contains various statistics data, provided by the hypervisor. 7 + * 8 + * Authors: Microsoft Linux virtualization team 9 + */ 10 + 11 + #include <linux/debugfs.h> 12 + #include <linux/stringify.h> 13 + #include <asm/mshyperv.h> 14 + #include <linux/slab.h> 15 + 16 + #include "mshv.h" 17 + #include "mshv_root.h" 18 + 19 + /* Ensure this file is not used elsewhere by accident */ 20 + #define MSHV_DEBUGFS_C 21 + #include "mshv_debugfs_counters.c" 22 + 23 + #define U32_BUF_SZ 11 24 + #define U64_BUF_SZ 21 25 + /* Only support SELF and PARENT areas */ 26 + #define NUM_STATS_AREAS 2 27 + static_assert(HV_STATS_AREA_SELF == 0 && HV_STATS_AREA_PARENT == 1, 28 + "SELF and PARENT areas must be usable as indices into an array of size NUM_STATS_AREAS"); 29 + /* HV_HYPERVISOR_COUNTER */ 30 + #define HV_HYPERVISOR_COUNTER_LOGICAL_PROCESSORS 1 31 + 32 + static struct dentry *mshv_debugfs; 33 + static struct dentry *mshv_debugfs_partition; 34 + static struct dentry *mshv_debugfs_lp; 35 + static struct dentry **parent_vp_stats; 36 + static struct dentry *parent_partition_stats; 37 + 38 + static u64 mshv_lps_count; 39 + static struct hv_stats_page **mshv_lps_stats; 40 + 41 + static int lp_stats_show(struct seq_file *m, void *v) 42 + { 43 + const struct hv_stats_page *stats = m->private; 44 + int idx; 45 + 46 + for (idx = 0; idx < ARRAY_SIZE(hv_lp_counters); idx++) { 47 + char *name = hv_lp_counters[idx]; 48 + 49 + if (!name) 50 + continue; 51 + seq_printf(m, "%-32s: %llu\n", name, stats->data[idx]); 52 + } 53 + 54 + return 0; 55 + } 56 + DEFINE_SHOW_ATTRIBUTE(lp_stats); 57 + 58 + static void mshv_lp_stats_unmap(u32 lp_index) 59 + { 60 + union hv_stats_object_identity identity = { 61 + .lp.lp_index = lp_index, 62 + .lp.stats_area_type = HV_STATS_AREA_SELF, 63 + }; 64 + int err; 65 + 66 + err = hv_unmap_stats_page(HV_STATS_OBJECT_LOGICAL_PROCESSOR, 67 + mshv_lps_stats[lp_index], &identity); 68 + if (err) 69 + pr_err("%s: failed to unmap logical processor %u stats, err: %d\n", 70 + __func__, lp_index, err); 71 + 72 + mshv_lps_stats[lp_index] = NULL; 73 + } 74 + 75 + static struct hv_stats_page * __init mshv_lp_stats_map(u32 lp_index) 76 + { 77 + union hv_stats_object_identity identity = { 78 + .lp.lp_index = lp_index, 79 + .lp.stats_area_type = HV_STATS_AREA_SELF, 80 + }; 81 + struct hv_stats_page *stats; 82 + int err; 83 + 84 + err = hv_map_stats_page(HV_STATS_OBJECT_LOGICAL_PROCESSOR, &identity, 85 + &stats); 86 + if (err) { 87 + pr_err("%s: failed to map logical processor %u stats, err: %d\n", 88 + __func__, lp_index, err); 89 + return ERR_PTR(err); 90 + } 91 + mshv_lps_stats[lp_index] = stats; 92 + 93 + return stats; 94 + } 95 + 96 + static struct hv_stats_page * __init lp_debugfs_stats_create(u32 lp_index, 97 + struct dentry *parent) 98 + { 99 + struct dentry *dentry; 100 + struct hv_stats_page *stats; 101 + 102 + stats = mshv_lp_stats_map(lp_index); 103 + if (IS_ERR(stats)) 104 + return stats; 105 + 106 + dentry = debugfs_create_file("stats", 0400, parent, 107 + stats, &lp_stats_fops); 108 + if (IS_ERR(dentry)) { 109 + mshv_lp_stats_unmap(lp_index); 110 + return ERR_CAST(dentry); 111 + } 112 + return stats; 113 + } 114 + 115 + static int __init lp_debugfs_create(u32 lp_index, struct dentry *parent) 116 + { 117 + struct dentry *idx; 118 + char lp_idx_str[U32_BUF_SZ]; 119 + struct hv_stats_page *stats; 120 + int err; 121 + 122 + sprintf(lp_idx_str, "%u", lp_index); 123 + 124 + idx = debugfs_create_dir(lp_idx_str, parent); 125 + if (IS_ERR(idx)) 126 + return PTR_ERR(idx); 127 + 128 + stats = lp_debugfs_stats_create(lp_index, idx); 129 + if (IS_ERR(stats)) { 130 + err = PTR_ERR(stats); 131 + goto remove_debugfs_lp_idx; 132 + } 133 + 134 + return 0; 135 + 136 + remove_debugfs_lp_idx: 137 + debugfs_remove_recursive(idx); 138 + return err; 139 + } 140 + 141 + static void mshv_debugfs_lp_remove(void) 142 + { 143 + int lp_index; 144 + 145 + debugfs_remove_recursive(mshv_debugfs_lp); 146 + 147 + for (lp_index = 0; lp_index < mshv_lps_count; lp_index++) 148 + mshv_lp_stats_unmap(lp_index); 149 + 150 + kfree(mshv_lps_stats); 151 + mshv_lps_stats = NULL; 152 + } 153 + 154 + static int __init mshv_debugfs_lp_create(struct dentry *parent) 155 + { 156 + struct dentry *lp_dir; 157 + int err, lp_index; 158 + 159 + mshv_lps_stats = kcalloc(mshv_lps_count, 160 + sizeof(*mshv_lps_stats), 161 + GFP_KERNEL_ACCOUNT); 162 + 163 + if (!mshv_lps_stats) 164 + return -ENOMEM; 165 + 166 + lp_dir = debugfs_create_dir("lp", parent); 167 + if (IS_ERR(lp_dir)) { 168 + err = PTR_ERR(lp_dir); 169 + goto free_lp_stats; 170 + } 171 + 172 + for (lp_index = 0; lp_index < mshv_lps_count; lp_index++) { 173 + err = lp_debugfs_create(lp_index, lp_dir); 174 + if (err) 175 + goto remove_debugfs_lps; 176 + } 177 + 178 + mshv_debugfs_lp = lp_dir; 179 + 180 + return 0; 181 + 182 + remove_debugfs_lps: 183 + for (lp_index -= 1; lp_index >= 0; lp_index--) 184 + mshv_lp_stats_unmap(lp_index); 185 + debugfs_remove_recursive(lp_dir); 186 + free_lp_stats: 187 + kfree(mshv_lps_stats); 188 + mshv_lps_stats = NULL; 189 + 190 + return err; 191 + } 192 + 193 + static int vp_stats_show(struct seq_file *m, void *v) 194 + { 195 + const struct hv_stats_page **pstats = m->private; 196 + u64 parent_val, self_val; 197 + int idx; 198 + 199 + /* 200 + * For VP and partition stats, there may be two stats areas mapped, 201 + * SELF and PARENT. These refer to the privilege level of the data in 202 + * each page. Some fields may be 0 in SELF and nonzero in PARENT, or 203 + * vice versa. 204 + * 205 + * Hence, prioritize printing from the PARENT page (more privileged 206 + * data), but use the value from the SELF page if the PARENT value is 207 + * 0. 208 + */ 209 + 210 + for (idx = 0; idx < ARRAY_SIZE(hv_vp_counters); idx++) { 211 + char *name = hv_vp_counters[idx]; 212 + 213 + if (!name) 214 + continue; 215 + 216 + parent_val = pstats[HV_STATS_AREA_PARENT]->data[idx]; 217 + self_val = pstats[HV_STATS_AREA_SELF]->data[idx]; 218 + seq_printf(m, "%-43s: %llu\n", name, 219 + parent_val ? parent_val : self_val); 220 + } 221 + 222 + return 0; 223 + } 224 + DEFINE_SHOW_ATTRIBUTE(vp_stats); 225 + 226 + static void vp_debugfs_remove(struct dentry *vp_stats) 227 + { 228 + debugfs_remove_recursive(vp_stats->d_parent); 229 + } 230 + 231 + static int vp_debugfs_create(u64 partition_id, u32 vp_index, 232 + struct hv_stats_page **pstats, 233 + struct dentry **vp_stats_ptr, 234 + struct dentry *parent) 235 + { 236 + struct dentry *vp_idx_dir, *d; 237 + char vp_idx_str[U32_BUF_SZ]; 238 + int err; 239 + 240 + sprintf(vp_idx_str, "%u", vp_index); 241 + 242 + vp_idx_dir = debugfs_create_dir(vp_idx_str, parent); 243 + if (IS_ERR(vp_idx_dir)) 244 + return PTR_ERR(vp_idx_dir); 245 + 246 + d = debugfs_create_file("stats", 0400, vp_idx_dir, 247 + pstats, &vp_stats_fops); 248 + if (IS_ERR(d)) { 249 + err = PTR_ERR(d); 250 + goto remove_debugfs_vp_idx; 251 + } 252 + 253 + *vp_stats_ptr = d; 254 + 255 + return 0; 256 + 257 + remove_debugfs_vp_idx: 258 + debugfs_remove_recursive(vp_idx_dir); 259 + return err; 260 + } 261 + 262 + static int partition_stats_show(struct seq_file *m, void *v) 263 + { 264 + const struct hv_stats_page **pstats = m->private; 265 + u64 parent_val, self_val; 266 + int idx; 267 + 268 + for (idx = 0; idx < ARRAY_SIZE(hv_partition_counters); idx++) { 269 + char *name = hv_partition_counters[idx]; 270 + 271 + if (!name) 272 + continue; 273 + 274 + parent_val = pstats[HV_STATS_AREA_PARENT]->data[idx]; 275 + self_val = pstats[HV_STATS_AREA_SELF]->data[idx]; 276 + seq_printf(m, "%-37s: %llu\n", name, 277 + parent_val ? parent_val : self_val); 278 + } 279 + 280 + return 0; 281 + } 282 + DEFINE_SHOW_ATTRIBUTE(partition_stats); 283 + 284 + static void mshv_partition_stats_unmap(u64 partition_id, 285 + struct hv_stats_page *stats_page, 286 + enum hv_stats_area_type stats_area_type) 287 + { 288 + union hv_stats_object_identity identity = { 289 + .partition.partition_id = partition_id, 290 + .partition.stats_area_type = stats_area_type, 291 + }; 292 + int err; 293 + 294 + err = hv_unmap_stats_page(HV_STATS_OBJECT_PARTITION, stats_page, 295 + &identity); 296 + if (err) 297 + pr_err("%s: failed to unmap partition %lld %s stats, err: %d\n", 298 + __func__, partition_id, 299 + (stats_area_type == HV_STATS_AREA_SELF) ? "self" : "parent", 300 + err); 301 + } 302 + 303 + static struct hv_stats_page *mshv_partition_stats_map(u64 partition_id, 304 + enum hv_stats_area_type stats_area_type) 305 + { 306 + union hv_stats_object_identity identity = { 307 + .partition.partition_id = partition_id, 308 + .partition.stats_area_type = stats_area_type, 309 + }; 310 + struct hv_stats_page *stats; 311 + int err; 312 + 313 + err = hv_map_stats_page(HV_STATS_OBJECT_PARTITION, &identity, &stats); 314 + if (err) { 315 + pr_err("%s: failed to map partition %lld %s stats, err: %d\n", 316 + __func__, partition_id, 317 + (stats_area_type == HV_STATS_AREA_SELF) ? "self" : "parent", 318 + err); 319 + return ERR_PTR(err); 320 + } 321 + return stats; 322 + } 323 + 324 + static int mshv_debugfs_partition_stats_create(u64 partition_id, 325 + struct dentry **partition_stats_ptr, 326 + struct dentry *parent) 327 + { 328 + struct dentry *dentry; 329 + struct hv_stats_page **pstats; 330 + int err; 331 + 332 + pstats = kcalloc(NUM_STATS_AREAS, sizeof(struct hv_stats_page *), 333 + GFP_KERNEL_ACCOUNT); 334 + if (!pstats) 335 + return -ENOMEM; 336 + 337 + pstats[HV_STATS_AREA_SELF] = mshv_partition_stats_map(partition_id, 338 + HV_STATS_AREA_SELF); 339 + if (IS_ERR(pstats[HV_STATS_AREA_SELF])) { 340 + err = PTR_ERR(pstats[HV_STATS_AREA_SELF]); 341 + goto cleanup; 342 + } 343 + 344 + /* 345 + * L1VH partition cannot access its partition stats in parent area. 346 + */ 347 + if (is_l1vh_parent(partition_id)) { 348 + pstats[HV_STATS_AREA_PARENT] = pstats[HV_STATS_AREA_SELF]; 349 + } else { 350 + pstats[HV_STATS_AREA_PARENT] = mshv_partition_stats_map(partition_id, 351 + HV_STATS_AREA_PARENT); 352 + if (IS_ERR(pstats[HV_STATS_AREA_PARENT])) { 353 + err = PTR_ERR(pstats[HV_STATS_AREA_PARENT]); 354 + goto unmap_self; 355 + } 356 + if (!pstats[HV_STATS_AREA_PARENT]) 357 + pstats[HV_STATS_AREA_PARENT] = pstats[HV_STATS_AREA_SELF]; 358 + } 359 + 360 + dentry = debugfs_create_file("stats", 0400, parent, 361 + pstats, &partition_stats_fops); 362 + if (IS_ERR(dentry)) { 363 + err = PTR_ERR(dentry); 364 + goto unmap_partition_stats; 365 + } 366 + 367 + *partition_stats_ptr = dentry; 368 + return 0; 369 + 370 + unmap_partition_stats: 371 + if (pstats[HV_STATS_AREA_PARENT] != pstats[HV_STATS_AREA_SELF]) 372 + mshv_partition_stats_unmap(partition_id, pstats[HV_STATS_AREA_PARENT], 373 + HV_STATS_AREA_PARENT); 374 + unmap_self: 375 + mshv_partition_stats_unmap(partition_id, pstats[HV_STATS_AREA_SELF], 376 + HV_STATS_AREA_SELF); 377 + cleanup: 378 + kfree(pstats); 379 + return err; 380 + } 381 + 382 + static void partition_debugfs_remove(u64 partition_id, struct dentry *dentry) 383 + { 384 + struct hv_stats_page **pstats = NULL; 385 + 386 + pstats = dentry->d_inode->i_private; 387 + 388 + debugfs_remove_recursive(dentry->d_parent); 389 + 390 + if (pstats[HV_STATS_AREA_PARENT] != pstats[HV_STATS_AREA_SELF]) { 391 + mshv_partition_stats_unmap(partition_id, 392 + pstats[HV_STATS_AREA_PARENT], 393 + HV_STATS_AREA_PARENT); 394 + } 395 + 396 + mshv_partition_stats_unmap(partition_id, 397 + pstats[HV_STATS_AREA_SELF], 398 + HV_STATS_AREA_SELF); 399 + 400 + kfree(pstats); 401 + } 402 + 403 + static int partition_debugfs_create(u64 partition_id, 404 + struct dentry **vp_dir_ptr, 405 + struct dentry **partition_stats_ptr, 406 + struct dentry *parent) 407 + { 408 + char part_id_str[U64_BUF_SZ]; 409 + struct dentry *part_id_dir, *vp_dir; 410 + int err; 411 + 412 + if (is_l1vh_parent(partition_id)) 413 + sprintf(part_id_str, "self"); 414 + else 415 + sprintf(part_id_str, "%llu", partition_id); 416 + 417 + part_id_dir = debugfs_create_dir(part_id_str, parent); 418 + if (IS_ERR(part_id_dir)) 419 + return PTR_ERR(part_id_dir); 420 + 421 + vp_dir = debugfs_create_dir("vp", part_id_dir); 422 + if (IS_ERR(vp_dir)) { 423 + err = PTR_ERR(vp_dir); 424 + goto remove_debugfs_partition_id; 425 + } 426 + 427 + err = mshv_debugfs_partition_stats_create(partition_id, 428 + partition_stats_ptr, 429 + part_id_dir); 430 + if (err) 431 + goto remove_debugfs_partition_id; 432 + 433 + *vp_dir_ptr = vp_dir; 434 + 435 + return 0; 436 + 437 + remove_debugfs_partition_id: 438 + debugfs_remove_recursive(part_id_dir); 439 + return err; 440 + } 441 + 442 + static void parent_vp_debugfs_remove(u32 vp_index, 443 + struct dentry *vp_stats_ptr) 444 + { 445 + struct hv_stats_page **pstats; 446 + 447 + pstats = vp_stats_ptr->d_inode->i_private; 448 + vp_debugfs_remove(vp_stats_ptr); 449 + mshv_vp_stats_unmap(hv_current_partition_id, vp_index, pstats); 450 + kfree(pstats); 451 + } 452 + 453 + static void mshv_debugfs_parent_partition_remove(void) 454 + { 455 + int idx; 456 + 457 + for_each_online_cpu(idx) 458 + parent_vp_debugfs_remove(hv_vp_index[idx], 459 + parent_vp_stats[idx]); 460 + 461 + partition_debugfs_remove(hv_current_partition_id, 462 + parent_partition_stats); 463 + kfree(parent_vp_stats); 464 + parent_vp_stats = NULL; 465 + parent_partition_stats = NULL; 466 + } 467 + 468 + static int __init parent_vp_debugfs_create(u32 vp_index, 469 + struct dentry **vp_stats_ptr, 470 + struct dentry *parent) 471 + { 472 + struct hv_stats_page **pstats; 473 + int err; 474 + 475 + pstats = kcalloc(NUM_STATS_AREAS, sizeof(struct hv_stats_page *), 476 + GFP_KERNEL_ACCOUNT); 477 + if (!pstats) 478 + return -ENOMEM; 479 + 480 + err = mshv_vp_stats_map(hv_current_partition_id, vp_index, pstats); 481 + if (err) 482 + goto cleanup; 483 + 484 + err = vp_debugfs_create(hv_current_partition_id, vp_index, pstats, 485 + vp_stats_ptr, parent); 486 + if (err) 487 + goto unmap_vp_stats; 488 + 489 + return 0; 490 + 491 + unmap_vp_stats: 492 + mshv_vp_stats_unmap(hv_current_partition_id, vp_index, pstats); 493 + cleanup: 494 + kfree(pstats); 495 + return err; 496 + } 497 + 498 + static int __init mshv_debugfs_parent_partition_create(void) 499 + { 500 + struct dentry *vp_dir; 501 + int err, idx, i; 502 + 503 + mshv_debugfs_partition = debugfs_create_dir("partition", 504 + mshv_debugfs); 505 + if (IS_ERR(mshv_debugfs_partition)) 506 + return PTR_ERR(mshv_debugfs_partition); 507 + 508 + err = partition_debugfs_create(hv_current_partition_id, 509 + &vp_dir, 510 + &parent_partition_stats, 511 + mshv_debugfs_partition); 512 + if (err) 513 + goto remove_debugfs_partition; 514 + 515 + parent_vp_stats = kcalloc(nr_cpu_ids, sizeof(*parent_vp_stats), 516 + GFP_KERNEL); 517 + if (!parent_vp_stats) { 518 + err = -ENOMEM; 519 + goto remove_debugfs_partition; 520 + } 521 + 522 + for_each_online_cpu(idx) { 523 + err = parent_vp_debugfs_create(hv_vp_index[idx], 524 + &parent_vp_stats[idx], 525 + vp_dir); 526 + if (err) 527 + goto remove_debugfs_partition_vp; 528 + } 529 + 530 + return 0; 531 + 532 + remove_debugfs_partition_vp: 533 + for_each_online_cpu(i) { 534 + if (i >= idx) 535 + break; 536 + parent_vp_debugfs_remove(i, parent_vp_stats[i]); 537 + } 538 + partition_debugfs_remove(hv_current_partition_id, 539 + parent_partition_stats); 540 + 541 + kfree(parent_vp_stats); 542 + parent_vp_stats = NULL; 543 + parent_partition_stats = NULL; 544 + 545 + remove_debugfs_partition: 546 + debugfs_remove_recursive(mshv_debugfs_partition); 547 + mshv_debugfs_partition = NULL; 548 + return err; 549 + } 550 + 551 + static int hv_stats_show(struct seq_file *m, void *v) 552 + { 553 + const struct hv_stats_page *stats = m->private; 554 + int idx; 555 + 556 + for (idx = 0; idx < ARRAY_SIZE(hv_hypervisor_counters); idx++) { 557 + char *name = hv_hypervisor_counters[idx]; 558 + 559 + if (!name) 560 + continue; 561 + seq_printf(m, "%-27s: %llu\n", name, stats->data[idx]); 562 + } 563 + 564 + return 0; 565 + } 566 + DEFINE_SHOW_ATTRIBUTE(hv_stats); 567 + 568 + static void mshv_hv_stats_unmap(void) 569 + { 570 + union hv_stats_object_identity identity = { 571 + .hv.stats_area_type = HV_STATS_AREA_SELF, 572 + }; 573 + int err; 574 + 575 + err = hv_unmap_stats_page(HV_STATS_OBJECT_HYPERVISOR, NULL, &identity); 576 + if (err) 577 + pr_err("%s: failed to unmap hypervisor stats: %d\n", 578 + __func__, err); 579 + } 580 + 581 + static void * __init mshv_hv_stats_map(void) 582 + { 583 + union hv_stats_object_identity identity = { 584 + .hv.stats_area_type = HV_STATS_AREA_SELF, 585 + }; 586 + struct hv_stats_page *stats; 587 + int err; 588 + 589 + err = hv_map_stats_page(HV_STATS_OBJECT_HYPERVISOR, &identity, &stats); 590 + if (err) { 591 + pr_err("%s: failed to map hypervisor stats: %d\n", 592 + __func__, err); 593 + return ERR_PTR(err); 594 + } 595 + return stats; 596 + } 597 + 598 + static int __init mshv_debugfs_hv_stats_create(struct dentry *parent) 599 + { 600 + struct dentry *dentry; 601 + u64 *stats; 602 + int err; 603 + 604 + stats = mshv_hv_stats_map(); 605 + if (IS_ERR(stats)) 606 + return PTR_ERR(stats); 607 + 608 + dentry = debugfs_create_file("stats", 0400, parent, 609 + stats, &hv_stats_fops); 610 + if (IS_ERR(dentry)) { 611 + err = PTR_ERR(dentry); 612 + pr_err("%s: failed to create hypervisor stats dentry: %d\n", 613 + __func__, err); 614 + goto unmap_hv_stats; 615 + } 616 + 617 + mshv_lps_count = stats[HV_HYPERVISOR_COUNTER_LOGICAL_PROCESSORS]; 618 + 619 + return 0; 620 + 621 + unmap_hv_stats: 622 + mshv_hv_stats_unmap(); 623 + return err; 624 + } 625 + 626 + int mshv_debugfs_vp_create(struct mshv_vp *vp) 627 + { 628 + struct mshv_partition *p = vp->vp_partition; 629 + 630 + if (!mshv_debugfs) 631 + return 0; 632 + 633 + return vp_debugfs_create(p->pt_id, vp->vp_index, 634 + vp->vp_stats_pages, 635 + &vp->vp_stats_dentry, 636 + p->pt_vp_dentry); 637 + } 638 + 639 + void mshv_debugfs_vp_remove(struct mshv_vp *vp) 640 + { 641 + if (!mshv_debugfs) 642 + return; 643 + 644 + vp_debugfs_remove(vp->vp_stats_dentry); 645 + } 646 + 647 + int mshv_debugfs_partition_create(struct mshv_partition *partition) 648 + { 649 + int err; 650 + 651 + if (!mshv_debugfs) 652 + return 0; 653 + 654 + err = partition_debugfs_create(partition->pt_id, 655 + &partition->pt_vp_dentry, 656 + &partition->pt_stats_dentry, 657 + mshv_debugfs_partition); 658 + if (err) 659 + return err; 660 + 661 + return 0; 662 + } 663 + 664 + void mshv_debugfs_partition_remove(struct mshv_partition *partition) 665 + { 666 + if (!mshv_debugfs) 667 + return; 668 + 669 + partition_debugfs_remove(partition->pt_id, 670 + partition->pt_stats_dentry); 671 + } 672 + 673 + int __init mshv_debugfs_init(void) 674 + { 675 + int err; 676 + 677 + mshv_debugfs = debugfs_create_dir("mshv", NULL); 678 + if (IS_ERR(mshv_debugfs)) { 679 + pr_err("%s: failed to create debugfs directory\n", __func__); 680 + return PTR_ERR(mshv_debugfs); 681 + } 682 + 683 + if (hv_root_partition()) { 684 + err = mshv_debugfs_hv_stats_create(mshv_debugfs); 685 + if (err) 686 + goto remove_mshv_dir; 687 + 688 + err = mshv_debugfs_lp_create(mshv_debugfs); 689 + if (err) 690 + goto unmap_hv_stats; 691 + } 692 + 693 + err = mshv_debugfs_parent_partition_create(); 694 + if (err) 695 + goto unmap_lp_stats; 696 + 697 + return 0; 698 + 699 + unmap_lp_stats: 700 + if (hv_root_partition()) { 701 + mshv_debugfs_lp_remove(); 702 + mshv_debugfs_lp = NULL; 703 + } 704 + unmap_hv_stats: 705 + if (hv_root_partition()) 706 + mshv_hv_stats_unmap(); 707 + remove_mshv_dir: 708 + debugfs_remove_recursive(mshv_debugfs); 709 + mshv_debugfs = NULL; 710 + return err; 711 + } 712 + 713 + void mshv_debugfs_exit(void) 714 + { 715 + mshv_debugfs_parent_partition_remove(); 716 + 717 + if (hv_root_partition()) { 718 + mshv_debugfs_lp_remove(); 719 + mshv_debugfs_lp = NULL; 720 + mshv_hv_stats_unmap(); 721 + } 722 + 723 + debugfs_remove_recursive(mshv_debugfs); 724 + mshv_debugfs = NULL; 725 + mshv_debugfs_partition = NULL; 726 + }
+490
drivers/hv/mshv_debugfs_counters.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* 3 + * Copyright (c) 2026, Microsoft Corporation. 4 + * 5 + * Data for printing stats page counters via debugfs. 6 + * 7 + * Authors: Microsoft Linux virtualization team 8 + */ 9 + 10 + /* 11 + * For simplicity, this file is included directly in mshv_debugfs.c. 12 + * If these are ever needed elsewhere they should be compiled separately. 13 + * Ensure this file is not used twice by accident. 14 + */ 15 + #ifndef MSHV_DEBUGFS_C 16 + #error "This file should only be included in mshv_debugfs.c" 17 + #endif 18 + 19 + /* HV_HYPERVISOR_COUNTER */ 20 + static char *hv_hypervisor_counters[] = { 21 + [1] = "HvLogicalProcessors", 22 + [2] = "HvPartitions", 23 + [3] = "HvTotalPages", 24 + [4] = "HvVirtualProcessors", 25 + [5] = "HvMonitoredNotifications", 26 + [6] = "HvModernStandbyEntries", 27 + [7] = "HvPlatformIdleTransitions", 28 + [8] = "HvHypervisorStartupCost", 29 + 30 + [10] = "HvIOSpacePages", 31 + [11] = "HvNonEssentialPagesForDump", 32 + [12] = "HvSubsumedPages", 33 + }; 34 + 35 + /* HV_CPU_COUNTER */ 36 + static char *hv_lp_counters[] = { 37 + [1] = "LpGlobalTime", 38 + [2] = "LpTotalRunTime", 39 + [3] = "LpHypervisorRunTime", 40 + [4] = "LpHardwareInterrupts", 41 + [5] = "LpContextSwitches", 42 + [6] = "LpInterProcessorInterrupts", 43 + [7] = "LpSchedulerInterrupts", 44 + [8] = "LpTimerInterrupts", 45 + [9] = "LpInterProcessorInterruptsSent", 46 + [10] = "LpProcessorHalts", 47 + [11] = "LpMonitorTransitionCost", 48 + [12] = "LpContextSwitchTime", 49 + [13] = "LpC1TransitionsCount", 50 + [14] = "LpC1RunTime", 51 + [15] = "LpC2TransitionsCount", 52 + [16] = "LpC2RunTime", 53 + [17] = "LpC3TransitionsCount", 54 + [18] = "LpC3RunTime", 55 + [19] = "LpRootVpIndex", 56 + [20] = "LpIdleSequenceNumber", 57 + [21] = "LpGlobalTscCount", 58 + [22] = "LpActiveTscCount", 59 + [23] = "LpIdleAccumulation", 60 + [24] = "LpReferenceCycleCount0", 61 + [25] = "LpActualCycleCount0", 62 + [26] = "LpReferenceCycleCount1", 63 + [27] = "LpActualCycleCount1", 64 + [28] = "LpProximityDomainId", 65 + [29] = "LpPostedInterruptNotifications", 66 + [30] = "LpBranchPredictorFlushes", 67 + #if IS_ENABLED(CONFIG_X86_64) 68 + [31] = "LpL1DataCacheFlushes", 69 + [32] = "LpImmediateL1DataCacheFlushes", 70 + [33] = "LpMbFlushes", 71 + [34] = "LpCounterRefreshSequenceNumber", 72 + [35] = "LpCounterRefreshReferenceTime", 73 + [36] = "LpIdleAccumulationSnapshot", 74 + [37] = "LpActiveTscCountSnapshot", 75 + [38] = "LpHwpRequestContextSwitches", 76 + [39] = "LpPlaceholder1", 77 + [40] = "LpPlaceholder2", 78 + [41] = "LpPlaceholder3", 79 + [42] = "LpPlaceholder4", 80 + [43] = "LpPlaceholder5", 81 + [44] = "LpPlaceholder6", 82 + [45] = "LpPlaceholder7", 83 + [46] = "LpPlaceholder8", 84 + [47] = "LpPlaceholder9", 85 + [48] = "LpSchLocalRunListSize", 86 + [49] = "LpReserveGroupId", 87 + [50] = "LpRunningPriority", 88 + [51] = "LpPerfmonInterruptCount", 89 + #elif IS_ENABLED(CONFIG_ARM64) 90 + [31] = "LpCounterRefreshSequenceNumber", 91 + [32] = "LpCounterRefreshReferenceTime", 92 + [33] = "LpIdleAccumulationSnapshot", 93 + [34] = "LpActiveTscCountSnapshot", 94 + [35] = "LpHwpRequestContextSwitches", 95 + [36] = "LpPlaceholder2", 96 + [37] = "LpPlaceholder3", 97 + [38] = "LpPlaceholder4", 98 + [39] = "LpPlaceholder5", 99 + [40] = "LpPlaceholder6", 100 + [41] = "LpPlaceholder7", 101 + [42] = "LpPlaceholder8", 102 + [43] = "LpPlaceholder9", 103 + [44] = "LpSchLocalRunListSize", 104 + [45] = "LpReserveGroupId", 105 + [46] = "LpRunningPriority", 106 + #endif 107 + }; 108 + 109 + /* HV_PROCESS_COUNTER */ 110 + static char *hv_partition_counters[] = { 111 + [1] = "PtVirtualProcessors", 112 + 113 + [3] = "PtTlbSize", 114 + [4] = "PtAddressSpaces", 115 + [5] = "PtDepositedPages", 116 + [6] = "PtGpaPages", 117 + [7] = "PtGpaSpaceModifications", 118 + [8] = "PtVirtualTlbFlushEntires", 119 + [9] = "PtRecommendedTlbSize", 120 + [10] = "PtGpaPages4K", 121 + [11] = "PtGpaPages2M", 122 + [12] = "PtGpaPages1G", 123 + [13] = "PtGpaPages512G", 124 + [14] = "PtDevicePages4K", 125 + [15] = "PtDevicePages2M", 126 + [16] = "PtDevicePages1G", 127 + [17] = "PtDevicePages512G", 128 + [18] = "PtAttachedDevices", 129 + [19] = "PtDeviceInterruptMappings", 130 + [20] = "PtIoTlbFlushes", 131 + [21] = "PtIoTlbFlushCost", 132 + [22] = "PtDeviceInterruptErrors", 133 + [23] = "PtDeviceDmaErrors", 134 + [24] = "PtDeviceInterruptThrottleEvents", 135 + [25] = "PtSkippedTimerTicks", 136 + [26] = "PtPartitionId", 137 + #if IS_ENABLED(CONFIG_X86_64) 138 + [27] = "PtNestedTlbSize", 139 + [28] = "PtRecommendedNestedTlbSize", 140 + [29] = "PtNestedTlbFreeListSize", 141 + [30] = "PtNestedTlbTrimmedPages", 142 + [31] = "PtPagesShattered", 143 + [32] = "PtPagesRecombined", 144 + [33] = "PtHwpRequestValue", 145 + [34] = "PtAutoSuspendEnableTime", 146 + [35] = "PtAutoSuspendTriggerTime", 147 + [36] = "PtAutoSuspendDisableTime", 148 + [37] = "PtPlaceholder1", 149 + [38] = "PtPlaceholder2", 150 + [39] = "PtPlaceholder3", 151 + [40] = "PtPlaceholder4", 152 + [41] = "PtPlaceholder5", 153 + [42] = "PtPlaceholder6", 154 + [43] = "PtPlaceholder7", 155 + [44] = "PtPlaceholder8", 156 + [45] = "PtHypervisorStateTransferGeneration", 157 + [46] = "PtNumberofActiveChildPartitions", 158 + #elif IS_ENABLED(CONFIG_ARM64) 159 + [27] = "PtHwpRequestValue", 160 + [28] = "PtAutoSuspendEnableTime", 161 + [29] = "PtAutoSuspendTriggerTime", 162 + [30] = "PtAutoSuspendDisableTime", 163 + [31] = "PtPlaceholder1", 164 + [32] = "PtPlaceholder2", 165 + [33] = "PtPlaceholder3", 166 + [34] = "PtPlaceholder4", 167 + [35] = "PtPlaceholder5", 168 + [36] = "PtPlaceholder6", 169 + [37] = "PtPlaceholder7", 170 + [38] = "PtPlaceholder8", 171 + [39] = "PtHypervisorStateTransferGeneration", 172 + [40] = "PtNumberofActiveChildPartitions", 173 + #endif 174 + }; 175 + 176 + /* HV_THREAD_COUNTER */ 177 + static char *hv_vp_counters[] = { 178 + [1] = "VpTotalRunTime", 179 + [2] = "VpHypervisorRunTime", 180 + [3] = "VpRemoteNodeRunTime", 181 + [4] = "VpNormalizedRunTime", 182 + [5] = "VpIdealCpu", 183 + 184 + [7] = "VpHypercallsCount", 185 + [8] = "VpHypercallsTime", 186 + #if IS_ENABLED(CONFIG_X86_64) 187 + [9] = "VpPageInvalidationsCount", 188 + [10] = "VpPageInvalidationsTime", 189 + [11] = "VpControlRegisterAccessesCount", 190 + [12] = "VpControlRegisterAccessesTime", 191 + [13] = "VpIoInstructionsCount", 192 + [14] = "VpIoInstructionsTime", 193 + [15] = "VpHltInstructionsCount", 194 + [16] = "VpHltInstructionsTime", 195 + [17] = "VpMwaitInstructionsCount", 196 + [18] = "VpMwaitInstructionsTime", 197 + [19] = "VpCpuidInstructionsCount", 198 + [20] = "VpCpuidInstructionsTime", 199 + [21] = "VpMsrAccessesCount", 200 + [22] = "VpMsrAccessesTime", 201 + [23] = "VpOtherInterceptsCount", 202 + [24] = "VpOtherInterceptsTime", 203 + [25] = "VpExternalInterruptsCount", 204 + [26] = "VpExternalInterruptsTime", 205 + [27] = "VpPendingInterruptsCount", 206 + [28] = "VpPendingInterruptsTime", 207 + [29] = "VpEmulatedInstructionsCount", 208 + [30] = "VpEmulatedInstructionsTime", 209 + [31] = "VpDebugRegisterAccessesCount", 210 + [32] = "VpDebugRegisterAccessesTime", 211 + [33] = "VpPageFaultInterceptsCount", 212 + [34] = "VpPageFaultInterceptsTime", 213 + [35] = "VpGuestPageTableMaps", 214 + [36] = "VpLargePageTlbFills", 215 + [37] = "VpSmallPageTlbFills", 216 + [38] = "VpReflectedGuestPageFaults", 217 + [39] = "VpApicMmioAccesses", 218 + [40] = "VpIoInterceptMessages", 219 + [41] = "VpMemoryInterceptMessages", 220 + [42] = "VpApicEoiAccesses", 221 + [43] = "VpOtherMessages", 222 + [44] = "VpPageTableAllocations", 223 + [45] = "VpLogicalProcessorMigrations", 224 + [46] = "VpAddressSpaceEvictions", 225 + [47] = "VpAddressSpaceSwitches", 226 + [48] = "VpAddressDomainFlushes", 227 + [49] = "VpAddressSpaceFlushes", 228 + [50] = "VpGlobalGvaRangeFlushes", 229 + [51] = "VpLocalGvaRangeFlushes", 230 + [52] = "VpPageTableEvictions", 231 + [53] = "VpPageTableReclamations", 232 + [54] = "VpPageTableResets", 233 + [55] = "VpPageTableValidations", 234 + [56] = "VpApicTprAccesses", 235 + [57] = "VpPageTableWriteIntercepts", 236 + [58] = "VpSyntheticInterrupts", 237 + [59] = "VpVirtualInterrupts", 238 + [60] = "VpApicIpisSent", 239 + [61] = "VpApicSelfIpisSent", 240 + [62] = "VpGpaSpaceHypercalls", 241 + [63] = "VpLogicalProcessorHypercalls", 242 + [64] = "VpLongSpinWaitHypercalls", 243 + [65] = "VpOtherHypercalls", 244 + [66] = "VpSyntheticInterruptHypercalls", 245 + [67] = "VpVirtualInterruptHypercalls", 246 + [68] = "VpVirtualMmuHypercalls", 247 + [69] = "VpVirtualProcessorHypercalls", 248 + [70] = "VpHardwareInterrupts", 249 + [71] = "VpNestedPageFaultInterceptsCount", 250 + [72] = "VpNestedPageFaultInterceptsTime", 251 + [73] = "VpPageScans", 252 + [74] = "VpLogicalProcessorDispatches", 253 + [75] = "VpWaitingForCpuTime", 254 + [76] = "VpExtendedHypercalls", 255 + [77] = "VpExtendedHypercallInterceptMessages", 256 + [78] = "VpMbecNestedPageTableSwitches", 257 + [79] = "VpOtherReflectedGuestExceptions", 258 + [80] = "VpGlobalIoTlbFlushes", 259 + [81] = "VpGlobalIoTlbFlushCost", 260 + [82] = "VpLocalIoTlbFlushes", 261 + [83] = "VpLocalIoTlbFlushCost", 262 + [84] = "VpHypercallsForwardedCount", 263 + [85] = "VpHypercallsForwardingTime", 264 + [86] = "VpPageInvalidationsForwardedCount", 265 + [87] = "VpPageInvalidationsForwardingTime", 266 + [88] = "VpControlRegisterAccessesForwardedCount", 267 + [89] = "VpControlRegisterAccessesForwardingTime", 268 + [90] = "VpIoInstructionsForwardedCount", 269 + [91] = "VpIoInstructionsForwardingTime", 270 + [92] = "VpHltInstructionsForwardedCount", 271 + [93] = "VpHltInstructionsForwardingTime", 272 + [94] = "VpMwaitInstructionsForwardedCount", 273 + [95] = "VpMwaitInstructionsForwardingTime", 274 + [96] = "VpCpuidInstructionsForwardedCount", 275 + [97] = "VpCpuidInstructionsForwardingTime", 276 + [98] = "VpMsrAccessesForwardedCount", 277 + [99] = "VpMsrAccessesForwardingTime", 278 + [100] = "VpOtherInterceptsForwardedCount", 279 + [101] = "VpOtherInterceptsForwardingTime", 280 + [102] = "VpExternalInterruptsForwardedCount", 281 + [103] = "VpExternalInterruptsForwardingTime", 282 + [104] = "VpPendingInterruptsForwardedCount", 283 + [105] = "VpPendingInterruptsForwardingTime", 284 + [106] = "VpEmulatedInstructionsForwardedCount", 285 + [107] = "VpEmulatedInstructionsForwardingTime", 286 + [108] = "VpDebugRegisterAccessesForwardedCount", 287 + [109] = "VpDebugRegisterAccessesForwardingTime", 288 + [110] = "VpPageFaultInterceptsForwardedCount", 289 + [111] = "VpPageFaultInterceptsForwardingTime", 290 + [112] = "VpVmclearEmulationCount", 291 + [113] = "VpVmclearEmulationTime", 292 + [114] = "VpVmptrldEmulationCount", 293 + [115] = "VpVmptrldEmulationTime", 294 + [116] = "VpVmptrstEmulationCount", 295 + [117] = "VpVmptrstEmulationTime", 296 + [118] = "VpVmreadEmulationCount", 297 + [119] = "VpVmreadEmulationTime", 298 + [120] = "VpVmwriteEmulationCount", 299 + [121] = "VpVmwriteEmulationTime", 300 + [122] = "VpVmxoffEmulationCount", 301 + [123] = "VpVmxoffEmulationTime", 302 + [124] = "VpVmxonEmulationCount", 303 + [125] = "VpVmxonEmulationTime", 304 + [126] = "VpNestedVMEntriesCount", 305 + [127] = "VpNestedVMEntriesTime", 306 + [128] = "VpNestedSLATSoftPageFaultsCount", 307 + [129] = "VpNestedSLATSoftPageFaultsTime", 308 + [130] = "VpNestedSLATHardPageFaultsCount", 309 + [131] = "VpNestedSLATHardPageFaultsTime", 310 + [132] = "VpInvEptAllContextEmulationCount", 311 + [133] = "VpInvEptAllContextEmulationTime", 312 + [134] = "VpInvEptSingleContextEmulationCount", 313 + [135] = "VpInvEptSingleContextEmulationTime", 314 + [136] = "VpInvVpidAllContextEmulationCount", 315 + [137] = "VpInvVpidAllContextEmulationTime", 316 + [138] = "VpInvVpidSingleContextEmulationCount", 317 + [139] = "VpInvVpidSingleContextEmulationTime", 318 + [140] = "VpInvVpidSingleAddressEmulationCount", 319 + [141] = "VpInvVpidSingleAddressEmulationTime", 320 + [142] = "VpNestedTlbPageTableReclamations", 321 + [143] = "VpNestedTlbPageTableEvictions", 322 + [144] = "VpFlushGuestPhysicalAddressSpaceHypercalls", 323 + [145] = "VpFlushGuestPhysicalAddressListHypercalls", 324 + [146] = "VpPostedInterruptNotifications", 325 + [147] = "VpPostedInterruptScans", 326 + [148] = "VpTotalCoreRunTime", 327 + [149] = "VpMaximumRunTime", 328 + [150] = "VpHwpRequestContextSwitches", 329 + [151] = "VpWaitingForCpuTimeBucket0", 330 + [152] = "VpWaitingForCpuTimeBucket1", 331 + [153] = "VpWaitingForCpuTimeBucket2", 332 + [154] = "VpWaitingForCpuTimeBucket3", 333 + [155] = "VpWaitingForCpuTimeBucket4", 334 + [156] = "VpWaitingForCpuTimeBucket5", 335 + [157] = "VpWaitingForCpuTimeBucket6", 336 + [158] = "VpVmloadEmulationCount", 337 + [159] = "VpVmloadEmulationTime", 338 + [160] = "VpVmsaveEmulationCount", 339 + [161] = "VpVmsaveEmulationTime", 340 + [162] = "VpGifInstructionEmulationCount", 341 + [163] = "VpGifInstructionEmulationTime", 342 + [164] = "VpEmulatedErrataSvmInstructions", 343 + [165] = "VpPlaceholder1", 344 + [166] = "VpPlaceholder2", 345 + [167] = "VpPlaceholder3", 346 + [168] = "VpPlaceholder4", 347 + [169] = "VpPlaceholder5", 348 + [170] = "VpPlaceholder6", 349 + [171] = "VpPlaceholder7", 350 + [172] = "VpPlaceholder8", 351 + [173] = "VpContentionTime", 352 + [174] = "VpWakeUpTime", 353 + [175] = "VpSchedulingPriority", 354 + [176] = "VpRdpmcInstructionsCount", 355 + [177] = "VpRdpmcInstructionsTime", 356 + [178] = "VpPerfmonPmuMsrAccessesCount", 357 + [179] = "VpPerfmonLbrMsrAccessesCount", 358 + [180] = "VpPerfmonIptMsrAccessesCount", 359 + [181] = "VpPerfmonInterruptCount", 360 + [182] = "VpVtl1DispatchCount", 361 + [183] = "VpVtl2DispatchCount", 362 + [184] = "VpVtl2DispatchBucket0", 363 + [185] = "VpVtl2DispatchBucket1", 364 + [186] = "VpVtl2DispatchBucket2", 365 + [187] = "VpVtl2DispatchBucket3", 366 + [188] = "VpVtl2DispatchBucket4", 367 + [189] = "VpVtl2DispatchBucket5", 368 + [190] = "VpVtl2DispatchBucket6", 369 + [191] = "VpVtl1RunTime", 370 + [192] = "VpVtl2RunTime", 371 + [193] = "VpIommuHypercalls", 372 + [194] = "VpCpuGroupHypercalls", 373 + [195] = "VpVsmHypercalls", 374 + [196] = "VpEventLogHypercalls", 375 + [197] = "VpDeviceDomainHypercalls", 376 + [198] = "VpDepositHypercalls", 377 + [199] = "VpSvmHypercalls", 378 + [200] = "VpBusLockAcquisitionCount", 379 + [201] = "VpLoadAvg", 380 + [202] = "VpRootDispatchThreadBlocked", 381 + [203] = "VpIdleCpuTime", 382 + [204] = "VpWaitingForCpuTimeBucket7", 383 + [205] = "VpWaitingForCpuTimeBucket8", 384 + [206] = "VpWaitingForCpuTimeBucket9", 385 + [207] = "VpWaitingForCpuTimeBucket10", 386 + [208] = "VpWaitingForCpuTimeBucket11", 387 + [209] = "VpWaitingForCpuTimeBucket12", 388 + [210] = "VpHierarchicalSuspendTime", 389 + [211] = "VpExpressSchedulingAttempts", 390 + [212] = "VpExpressSchedulingCount", 391 + #elif IS_ENABLED(CONFIG_ARM64) 392 + [9] = "VpSysRegAccessesCount", 393 + [10] = "VpSysRegAccessesTime", 394 + [11] = "VpSmcInstructionsCount", 395 + [12] = "VpSmcInstructionsTime", 396 + [13] = "VpOtherInterceptsCount", 397 + [14] = "VpOtherInterceptsTime", 398 + [15] = "VpExternalInterruptsCount", 399 + [16] = "VpExternalInterruptsTime", 400 + [17] = "VpPendingInterruptsCount", 401 + [18] = "VpPendingInterruptsTime", 402 + [19] = "VpGuestPageTableMaps", 403 + [20] = "VpLargePageTlbFills", 404 + [21] = "VpSmallPageTlbFills", 405 + [22] = "VpReflectedGuestPageFaults", 406 + [23] = "VpMemoryInterceptMessages", 407 + [24] = "VpOtherMessages", 408 + [25] = "VpLogicalProcessorMigrations", 409 + [26] = "VpAddressDomainFlushes", 410 + [27] = "VpAddressSpaceFlushes", 411 + [28] = "VpSyntheticInterrupts", 412 + [29] = "VpVirtualInterrupts", 413 + [30] = "VpApicSelfIpisSent", 414 + [31] = "VpGpaSpaceHypercalls", 415 + [32] = "VpLogicalProcessorHypercalls", 416 + [33] = "VpLongSpinWaitHypercalls", 417 + [34] = "VpOtherHypercalls", 418 + [35] = "VpSyntheticInterruptHypercalls", 419 + [36] = "VpVirtualInterruptHypercalls", 420 + [37] = "VpVirtualMmuHypercalls", 421 + [38] = "VpVirtualProcessorHypercalls", 422 + [39] = "VpHardwareInterrupts", 423 + [40] = "VpNestedPageFaultInterceptsCount", 424 + [41] = "VpNestedPageFaultInterceptsTime", 425 + [42] = "VpLogicalProcessorDispatches", 426 + [43] = "VpWaitingForCpuTime", 427 + [44] = "VpExtendedHypercalls", 428 + [45] = "VpExtendedHypercallInterceptMessages", 429 + [46] = "VpMbecNestedPageTableSwitches", 430 + [47] = "VpOtherReflectedGuestExceptions", 431 + [48] = "VpGlobalIoTlbFlushes", 432 + [49] = "VpGlobalIoTlbFlushCost", 433 + [50] = "VpLocalIoTlbFlushes", 434 + [51] = "VpLocalIoTlbFlushCost", 435 + [52] = "VpFlushGuestPhysicalAddressSpaceHypercalls", 436 + [53] = "VpFlushGuestPhysicalAddressListHypercalls", 437 + [54] = "VpPostedInterruptNotifications", 438 + [55] = "VpPostedInterruptScans", 439 + [56] = "VpTotalCoreRunTime", 440 + [57] = "VpMaximumRunTime", 441 + [58] = "VpWaitingForCpuTimeBucket0", 442 + [59] = "VpWaitingForCpuTimeBucket1", 443 + [60] = "VpWaitingForCpuTimeBucket2", 444 + [61] = "VpWaitingForCpuTimeBucket3", 445 + [62] = "VpWaitingForCpuTimeBucket4", 446 + [63] = "VpWaitingForCpuTimeBucket5", 447 + [64] = "VpWaitingForCpuTimeBucket6", 448 + [65] = "VpHwpRequestContextSwitches", 449 + [66] = "VpPlaceholder2", 450 + [67] = "VpPlaceholder3", 451 + [68] = "VpPlaceholder4", 452 + [69] = "VpPlaceholder5", 453 + [70] = "VpPlaceholder6", 454 + [71] = "VpPlaceholder7", 455 + [72] = "VpPlaceholder8", 456 + [73] = "VpContentionTime", 457 + [74] = "VpWakeUpTime", 458 + [75] = "VpSchedulingPriority", 459 + [76] = "VpVtl1DispatchCount", 460 + [77] = "VpVtl2DispatchCount", 461 + [78] = "VpVtl2DispatchBucket0", 462 + [79] = "VpVtl2DispatchBucket1", 463 + [80] = "VpVtl2DispatchBucket2", 464 + [81] = "VpVtl2DispatchBucket3", 465 + [82] = "VpVtl2DispatchBucket4", 466 + [83] = "VpVtl2DispatchBucket5", 467 + [84] = "VpVtl2DispatchBucket6", 468 + [85] = "VpVtl1RunTime", 469 + [86] = "VpVtl2RunTime", 470 + [87] = "VpIommuHypercalls", 471 + [88] = "VpCpuGroupHypercalls", 472 + [89] = "VpVsmHypercalls", 473 + [90] = "VpEventLogHypercalls", 474 + [91] = "VpDeviceDomainHypercalls", 475 + [92] = "VpDepositHypercalls", 476 + [93] = "VpSvmHypercalls", 477 + [94] = "VpLoadAvg", 478 + [95] = "VpRootDispatchThreadBlocked", 479 + [96] = "VpIdleCpuTime", 480 + [97] = "VpWaitingForCpuTimeBucket7", 481 + [98] = "VpWaitingForCpuTimeBucket8", 482 + [99] = "VpWaitingForCpuTimeBucket9", 483 + [100] = "VpWaitingForCpuTimeBucket10", 484 + [101] = "VpWaitingForCpuTimeBucket11", 485 + [102] = "VpWaitingForCpuTimeBucket12", 486 + [103] = "VpHierarchicalSuspendTime", 487 + [104] = "VpExpressSchedulingAttempts", 488 + [105] = "VpExpressSchedulingCount", 489 + #endif 490 + };
+11 -11
drivers/hv/mshv_eventfd.c
··· 87 87 88 88 idx = srcu_read_lock(&partition->pt_irq_srcu); 89 89 90 - hlist_for_each_entry_rcu(irqfd, &resampler->rsmplr_irqfd_list, 91 - irqfd_resampler_hnode) { 90 + hlist_for_each_entry_srcu(irqfd, &resampler->rsmplr_irqfd_list, 91 + irqfd_resampler_hnode, 92 + srcu_read_lock_held(&partition->pt_irq_srcu)) { 92 93 if (hv_should_clear_interrupt(irqfd->irqfd_lapic_irq.lapic_control.interrupt_type)) 93 94 hv_call_clear_virtual_interrupt(partition->pt_id); 94 95 ··· 129 128 130 129 new_iv.vector[new_iv.vector_count++] = vector; 131 130 132 - if (cmpxchg(&vp->vp_register_page->interrupt_vectors.as_uint64, 133 - iv.as_uint64, new_iv.as_uint64) != iv.as_uint64) 131 + if (!try_cmpxchg(&vp->vp_register_page->interrupt_vectors.as_uint64, 132 + &iv.as_uint64, new_iv.as_uint64)) 134 133 return -EAGAIN; 135 134 136 135 return 0; ··· 248 247 { 249 248 struct mshv_irqfd *irqfd = 250 249 container_of(work, struct mshv_irqfd, irqfd_shutdown); 250 + u64 cnt; 251 251 252 252 /* 253 253 * Synchronize with the wait-queue and unhook ourselves to prevent 254 254 * further events. 255 255 */ 256 - remove_wait_queue(irqfd->irqfd_wqh, &irqfd->irqfd_wait); 256 + eventfd_ctx_remove_wait_queue(irqfd->irqfd_eventfd_ctx, &irqfd->irqfd_wait, &cnt); 257 257 258 258 if (irqfd->irqfd_resampler) { 259 259 mshv_irqfd_resampler_shutdown(irqfd); ··· 297 295 { 298 296 struct mshv_irqfd *irqfd = container_of(wait, struct mshv_irqfd, 299 297 irqfd_wait); 300 - unsigned long flags = (unsigned long)key; 298 + __poll_t flags = key_to_poll(key); 301 299 int idx; 302 300 unsigned int seq; 303 301 struct mshv_partition *pt = irqfd->irqfd_partn; 304 302 int ret = 0; 305 303 306 - if (flags & POLLIN) { 304 + if (flags & EPOLLIN) { 307 305 u64 cnt; 308 306 309 307 eventfd_ctx_do_read(irqfd->irqfd_eventfd_ctx, &cnt); ··· 322 320 ret = 1; 323 321 } 324 322 325 - if (flags & POLLHUP) { 323 + if (flags & EPOLLHUP) { 326 324 /* The eventfd is closing, detach from the partition */ 327 325 unsigned long flags; 328 326 ··· 372 370 { 373 371 struct mshv_irqfd *irqfd = 374 372 container_of(polltbl, struct mshv_irqfd, irqfd_polltbl); 375 - 376 - irqfd->irqfd_wqh = wqh; 377 373 378 374 /* 379 375 * TODO: Ensure there isn't already an exclusive, priority waiter, e.g. ··· 506 506 */ 507 507 events = vfs_poll(fd_file(f), &irqfd->irqfd_polltbl); 508 508 509 - if (events & POLLIN) 509 + if (events & EPOLLIN) 510 510 mshv_assert_irq_slow(irqfd); 511 511 512 512 srcu_read_unlock(&pt->pt_irq_srcu, idx);
-1
drivers/hv/mshv_eventfd.h
··· 32 32 struct mshv_lapic_irq irqfd_lapic_irq; 33 33 struct hlist_node irqfd_hnode; 34 34 poll_table irqfd_polltbl; 35 - wait_queue_head_t *irqfd_wqh; 36 35 wait_queue_entry_t irqfd_wait; 37 36 struct work_struct irqfd_shutdown; 38 37 struct mshv_irqfd_resampler *irqfd_resampler;
+30 -30
drivers/hv/mshv_regions.c
··· 88 88 struct page *page; 89 89 int stride, ret; 90 90 91 - page = region->pages[page_offset]; 91 + page = region->mreg_pages[page_offset]; 92 92 if (!page) 93 93 return -EINVAL; 94 94 ··· 98 98 99 99 /* Start at stride since the first stride is validated */ 100 100 for (count = stride; count < page_count; count += stride) { 101 - page = region->pages[page_offset + count]; 101 + page = region->mreg_pages[page_offset + count]; 102 102 103 103 /* Break if current page is not present */ 104 104 if (!page) ··· 152 152 153 153 while (page_count) { 154 154 /* Skip non-present pages */ 155 - if (!region->pages[page_offset]) { 155 + if (!region->mreg_pages[page_offset]) { 156 156 page_offset++; 157 157 page_count--; 158 158 continue; ··· 190 190 if (flags & BIT(MSHV_SET_MEM_BIT_EXECUTABLE)) 191 191 region->hv_map_flags |= HV_MAP_GPA_EXECUTABLE; 192 192 193 - kref_init(&region->refcount); 193 + kref_init(&region->mreg_refcount); 194 194 195 195 return region; 196 196 } ··· 204 204 flags |= HV_MODIFY_SPA_PAGE_HOST_ACCESS_LARGE_PAGE; 205 205 206 206 return hv_call_modify_spa_host_access(region->partition->pt_id, 207 - region->pages + page_offset, 207 + region->mreg_pages + page_offset, 208 208 page_count, 209 209 HV_MAP_GPA_READABLE | 210 210 HV_MAP_GPA_WRITABLE, ··· 229 229 flags |= HV_MODIFY_SPA_PAGE_HOST_ACCESS_LARGE_PAGE; 230 230 231 231 return hv_call_modify_spa_host_access(region->partition->pt_id, 232 - region->pages + page_offset, 232 + region->mreg_pages + page_offset, 233 233 page_count, 0, 234 234 flags, false); 235 235 } ··· 254 254 return hv_call_map_gpa_pages(region->partition->pt_id, 255 255 region->start_gfn + page_offset, 256 256 page_count, flags, 257 - region->pages + page_offset); 257 + region->mreg_pages + page_offset); 258 258 } 259 259 260 260 static int mshv_region_remap_pages(struct mshv_mem_region *region, ··· 277 277 static void mshv_region_invalidate_pages(struct mshv_mem_region *region, 278 278 u64 page_offset, u64 page_count) 279 279 { 280 - if (region->type == MSHV_REGION_TYPE_MEM_PINNED) 281 - unpin_user_pages(region->pages + page_offset, page_count); 280 + if (region->mreg_type == MSHV_REGION_TYPE_MEM_PINNED) 281 + unpin_user_pages(region->mreg_pages + page_offset, page_count); 282 282 283 - memset(region->pages + page_offset, 0, 283 + memset(region->mreg_pages + page_offset, 0, 284 284 page_count * sizeof(struct page *)); 285 285 } 286 286 ··· 297 297 int ret; 298 298 299 299 for (done_count = 0; done_count < region->nr_pages; done_count += ret) { 300 - pages = region->pages + done_count; 300 + pages = region->mreg_pages + done_count; 301 301 userspace_addr = region->start_uaddr + 302 302 done_count * HV_HYP_PAGE_SIZE; 303 303 nr_pages = min(region->nr_pages - done_count, ··· 348 348 static void mshv_region_destroy(struct kref *ref) 349 349 { 350 350 struct mshv_mem_region *region = 351 - container_of(ref, struct mshv_mem_region, refcount); 351 + container_of(ref, struct mshv_mem_region, mreg_refcount); 352 352 struct mshv_partition *partition = region->partition; 353 353 int ret; 354 354 355 - if (region->type == MSHV_REGION_TYPE_MEM_MOVABLE) 355 + if (region->mreg_type == MSHV_REGION_TYPE_MEM_MOVABLE) 356 356 mshv_region_movable_fini(region); 357 357 358 358 if (mshv_partition_encrypted(partition)) { ··· 374 374 375 375 void mshv_region_put(struct mshv_mem_region *region) 376 376 { 377 - kref_put(&region->refcount, mshv_region_destroy); 377 + kref_put(&region->mreg_refcount, mshv_region_destroy); 378 378 } 379 379 380 380 int mshv_region_get(struct mshv_mem_region *region) 381 381 { 382 - return kref_get_unless_zero(&region->refcount); 382 + return kref_get_unless_zero(&region->mreg_refcount); 383 383 } 384 384 385 385 /** ··· 405 405 int ret; 406 406 407 407 range->notifier_seq = mmu_interval_read_begin(range->notifier); 408 - mmap_read_lock(region->mni.mm); 408 + mmap_read_lock(region->mreg_mni.mm); 409 409 ret = hmm_range_fault(range); 410 - mmap_read_unlock(region->mni.mm); 410 + mmap_read_unlock(region->mreg_mni.mm); 411 411 if (ret) 412 412 return ret; 413 413 414 - mutex_lock(&region->mutex); 414 + mutex_lock(&region->mreg_mutex); 415 415 416 416 if (mmu_interval_read_retry(range->notifier, range->notifier_seq)) { 417 - mutex_unlock(&region->mutex); 417 + mutex_unlock(&region->mreg_mutex); 418 418 cond_resched(); 419 419 return -EBUSY; 420 420 } ··· 438 438 u64 page_offset, u64 page_count) 439 439 { 440 440 struct hmm_range range = { 441 - .notifier = &region->mni, 441 + .notifier = &region->mreg_mni, 442 442 .default_flags = HMM_PFN_REQ_FAULT | HMM_PFN_REQ_WRITE, 443 443 }; 444 444 unsigned long *pfns; ··· 461 461 goto out; 462 462 463 463 for (i = 0; i < page_count; i++) 464 - region->pages[page_offset + i] = hmm_pfn_to_page(pfns[i]); 464 + region->mreg_pages[page_offset + i] = hmm_pfn_to_page(pfns[i]); 465 465 466 466 ret = mshv_region_remap_pages(region, region->hv_map_flags, 467 467 page_offset, page_count); 468 468 469 - mutex_unlock(&region->mutex); 469 + mutex_unlock(&region->mreg_mutex); 470 470 out: 471 471 kfree(pfns); 472 472 return ret; ··· 520 520 { 521 521 struct mshv_mem_region *region = container_of(mni, 522 522 struct mshv_mem_region, 523 - mni); 523 + mreg_mni); 524 524 u64 page_offset, page_count; 525 525 unsigned long mstart, mend; 526 526 int ret = -EPERM; ··· 533 533 page_count = HVPFN_DOWN(mend - mstart); 534 534 535 535 if (mmu_notifier_range_blockable(range)) 536 - mutex_lock(&region->mutex); 537 - else if (!mutex_trylock(&region->mutex)) 536 + mutex_lock(&region->mreg_mutex); 537 + else if (!mutex_trylock(&region->mreg_mutex)) 538 538 goto out_fail; 539 539 540 540 mmu_interval_set_seq(mni, cur_seq); ··· 546 546 547 547 mshv_region_invalidate_pages(region, page_offset, page_count); 548 548 549 - mutex_unlock(&region->mutex); 549 + mutex_unlock(&region->mreg_mutex); 550 550 551 551 return true; 552 552 553 553 out_unlock: 554 - mutex_unlock(&region->mutex); 554 + mutex_unlock(&region->mreg_mutex); 555 555 out_fail: 556 556 WARN_ONCE(ret, 557 557 "Failed to invalidate region %#llx-%#llx (range %#lx-%#lx, event: %u, pages %#llx-%#llx, mm: %#llx): %d\n", ··· 568 568 569 569 void mshv_region_movable_fini(struct mshv_mem_region *region) 570 570 { 571 - mmu_interval_notifier_remove(&region->mni); 571 + mmu_interval_notifier_remove(&region->mreg_mni); 572 572 } 573 573 574 574 bool mshv_region_movable_init(struct mshv_mem_region *region) 575 575 { 576 576 int ret; 577 577 578 - ret = mmu_interval_notifier_insert(&region->mni, current->mm, 578 + ret = mmu_interval_notifier_insert(&region->mreg_mni, current->mm, 579 579 region->start_uaddr, 580 580 region->nr_pages << HV_HYP_PAGE_SHIFT, 581 581 &mshv_region_mni_ops); 582 582 if (ret) 583 583 return false; 584 584 585 - mutex_init(&region->mutex); 585 + mutex_init(&region->mreg_mutex); 586 586 587 587 return true; 588 588 }
+52 -7
drivers/hv/mshv_root.h
··· 52 52 unsigned int kicked_by_hv; 53 53 wait_queue_head_t vp_suspend_queue; 54 54 } run; 55 + #if IS_ENABLED(CONFIG_DEBUG_FS) 56 + struct dentry *vp_stats_dentry; 57 + #endif 55 58 }; 56 59 57 60 #define vp_fmt(fmt) "p%lluvp%u: " fmt ··· 82 79 83 80 struct mshv_mem_region { 84 81 struct hlist_node hnode; 85 - struct kref refcount; 82 + struct kref mreg_refcount; 86 83 u64 nr_pages; 87 84 u64 start_gfn; 88 85 u64 start_uaddr; 89 86 u32 hv_map_flags; 90 87 struct mshv_partition *partition; 91 - enum mshv_region_type type; 92 - struct mmu_interval_notifier mni; 93 - struct mutex mutex; /* protects region pages remapping */ 94 - struct page *pages[]; 88 + enum mshv_region_type mreg_type; 89 + struct mmu_interval_notifier mreg_mni; 90 + struct mutex mreg_mutex; /* protects region pages remapping */ 91 + struct page *mreg_pages[]; 95 92 }; 96 93 97 94 struct mshv_irq_ack_notifier { ··· 139 136 u64 isolation_type; 140 137 bool import_completed; 141 138 bool pt_initialized; 139 + #if IS_ENABLED(CONFIG_DEBUG_FS) 140 + struct dentry *pt_stats_dentry; 141 + struct dentry *pt_vp_dentry; 142 + #endif 142 143 }; 143 144 144 145 #define pt_fmt(fmt) "p%llu: " fmt ··· 261 254 void mshv_partition_put(struct mshv_partition *partition); 262 255 struct mshv_partition *mshv_partition_find(u64 partition_id) __must_hold(RCU); 263 256 257 + static inline bool is_l1vh_parent(u64 partition_id) 258 + { 259 + return hv_l1vh_partition() && (partition_id == HV_PARTITION_ID_SELF); 260 + } 261 + 262 + int mshv_vp_stats_map(u64 partition_id, u32 vp_index, 263 + struct hv_stats_page **stats_pages); 264 + void mshv_vp_stats_unmap(u64 partition_id, u32 vp_index, 265 + struct hv_stats_page **stats_pages); 266 + 264 267 /* hypercalls */ 265 268 266 269 int hv_call_withdraw_memory(u64 count, int node, u64 partition_id); ··· 324 307 int hv_call_notify_port_ring_empty(u32 sint_index); 325 308 int hv_map_stats_page(enum hv_stats_object_type type, 326 309 const union hv_stats_object_identity *identity, 327 - void **addr); 328 - int hv_unmap_stats_page(enum hv_stats_object_type type, void *page_addr, 310 + struct hv_stats_page **addr); 311 + int hv_unmap_stats_page(enum hv_stats_object_type type, 312 + struct hv_stats_page *page_addr, 329 313 const union hv_stats_object_identity *identity); 330 314 int hv_call_modify_spa_host_access(u64 partition_id, struct page **pages, 331 315 u64 page_struct_count, u32 host_access, 332 316 u32 flags, u8 acquire); 333 317 int hv_call_get_partition_property_ex(u64 partition_id, u64 property_code, u64 arg, 334 318 void *property_value, size_t property_value_sz); 319 + 320 + #if IS_ENABLED(CONFIG_DEBUG_FS) 321 + int __init mshv_debugfs_init(void); 322 + void mshv_debugfs_exit(void); 323 + 324 + int mshv_debugfs_partition_create(struct mshv_partition *partition); 325 + void mshv_debugfs_partition_remove(struct mshv_partition *partition); 326 + int mshv_debugfs_vp_create(struct mshv_vp *vp); 327 + void mshv_debugfs_vp_remove(struct mshv_vp *vp); 328 + #else 329 + static inline int __init mshv_debugfs_init(void) 330 + { 331 + return 0; 332 + } 333 + static inline void mshv_debugfs_exit(void) { } 334 + 335 + static inline int mshv_debugfs_partition_create(struct mshv_partition *partition) 336 + { 337 + return 0; 338 + } 339 + static inline void mshv_debugfs_partition_remove(struct mshv_partition *partition) { } 340 + static inline int mshv_debugfs_vp_create(struct mshv_vp *vp) 341 + { 342 + return 0; 343 + } 344 + static inline void mshv_debugfs_vp_remove(struct mshv_vp *vp) { } 345 + #endif 335 346 336 347 extern struct mshv_root mshv_root; 337 348 extern enum hv_scheduler_type hv_scheduler_type;
+71 -33
drivers/hv/mshv_root_hv_call.c
··· 115 115 status = hv_do_hypercall(HVCALL_CREATE_PARTITION, 116 116 input, output); 117 117 118 - if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) { 118 + if (!hv_result_needs_memory(status)) { 119 119 if (hv_result_success(status)) 120 120 *partition_id = output->partition_id; 121 121 local_irq_restore(irq_flags); ··· 123 123 break; 124 124 } 125 125 local_irq_restore(irq_flags); 126 - ret = hv_call_deposit_pages(NUMA_NO_NODE, 127 - hv_current_partition_id, 1); 126 + ret = hv_deposit_memory(hv_current_partition_id, status); 128 127 } while (!ret); 129 128 130 129 return ret; ··· 146 147 status = hv_do_fast_hypercall8(HVCALL_INITIALIZE_PARTITION, 147 148 *(u64 *)&input); 148 149 149 - if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) { 150 + if (!hv_result_needs_memory(status)) { 150 151 ret = hv_result_to_errno(status); 151 152 break; 152 153 } 153 - ret = hv_call_deposit_pages(NUMA_NO_NODE, partition_id, 1); 154 + ret = hv_deposit_memory(partition_id, status); 154 155 } while (!ret); 155 156 156 157 return ret; ··· 238 239 239 240 completed = hv_repcomp(status); 240 241 241 - if (hv_result(status) == HV_STATUS_INSUFFICIENT_MEMORY) { 242 + if (hv_result_needs_memory(status)) { 242 243 ret = hv_call_deposit_pages(NUMA_NO_NODE, partition_id, 243 244 HV_MAP_GPA_DEPOSIT_PAGES); 244 245 if (ret) ··· 454 455 455 456 status = hv_do_hypercall(control, input, output); 456 457 457 - if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) { 458 + if (!hv_result_needs_memory(status)) { 458 459 if (hv_result_success(status) && ret_output) 459 460 memcpy(ret_output, output, sizeof(*output)); 460 461 ··· 464 465 } 465 466 local_irq_restore(flags); 466 467 467 - ret = hv_call_deposit_pages(NUMA_NO_NODE, 468 - partition_id, 1); 468 + ret = hv_deposit_memory(partition_id, status); 469 469 } while (!ret); 470 470 471 471 return ret; ··· 516 518 517 519 status = hv_do_hypercall(control, input, NULL); 518 520 519 - if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) { 521 + if (!hv_result_needs_memory(status)) { 520 522 local_irq_restore(flags); 521 523 ret = hv_result_to_errno(status); 522 524 break; 523 525 } 524 526 local_irq_restore(flags); 525 527 526 - ret = hv_call_deposit_pages(NUMA_NO_NODE, 527 - partition_id, 1); 528 + ret = hv_deposit_memory(partition_id, status); 528 529 } while (!ret); 529 530 530 531 return ret; ··· 560 563 status = hv_do_hypercall(HVCALL_MAP_VP_STATE_PAGE, input, 561 564 output); 562 565 563 - if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) { 566 + if (!hv_result_needs_memory(status)) { 564 567 if (hv_result_success(status)) 565 568 *state_page = pfn_to_page(output->map_location); 566 569 local_irq_restore(flags); ··· 570 573 571 574 local_irq_restore(flags); 572 575 573 - ret = hv_call_deposit_pages(NUMA_NO_NODE, partition_id, 1); 576 + ret = hv_deposit_memory(partition_id, status); 574 577 } while (!ret); 575 578 576 579 return ret; ··· 715 718 if (hv_result_success(status)) 716 719 break; 717 720 718 - if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) { 721 + if (!hv_result_needs_memory(status)) { 719 722 ret = hv_result_to_errno(status); 720 723 break; 721 724 } 722 - ret = hv_call_deposit_pages(NUMA_NO_NODE, port_partition_id, 1); 723 - 725 + ret = hv_deposit_memory(port_partition_id, status); 724 726 } while (!ret); 725 727 726 728 return ret; ··· 768 772 if (hv_result_success(status)) 769 773 break; 770 774 771 - if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) { 775 + if (!hv_result_needs_memory(status)) { 772 776 ret = hv_result_to_errno(status); 773 777 break; 774 778 } 775 - ret = hv_call_deposit_pages(NUMA_NO_NODE, 776 - connection_partition_id, 1); 779 + ret = hv_deposit_memory(connection_partition_id, status); 777 780 } while (!ret); 778 781 779 782 return ret; ··· 808 813 return hv_result_to_errno(status); 809 814 } 810 815 816 + /* 817 + * Equivalent of hv_call_map_stats_page() for cases when the caller provides 818 + * the map location. 819 + * 820 + * NOTE: This is a newer hypercall that always supports SELF and PARENT stats 821 + * areas, unlike hv_call_map_stats_page(). 822 + */ 811 823 static int hv_call_map_stats_page2(enum hv_stats_object_type type, 812 824 const union hv_stats_object_identity *identity, 813 825 u64 map_location) ··· 845 843 if (!ret) 846 844 break; 847 845 848 - if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) { 846 + if (!hv_result_needs_memory(status)) { 849 847 hv_status_debug(status, "\n"); 850 848 break; 851 849 } 852 850 853 - ret = hv_call_deposit_pages(NUMA_NO_NODE, 854 - hv_current_partition_id, 1); 851 + ret = hv_deposit_memory(hv_current_partition_id, status); 855 852 } while (!ret); 856 853 857 854 return ret; 858 855 } 859 856 860 - static int hv_call_map_stats_page(enum hv_stats_object_type type, 861 - const union hv_stats_object_identity *identity, 862 - void **addr) 857 + static int 858 + hv_stats_get_area_type(enum hv_stats_object_type type, 859 + const union hv_stats_object_identity *identity) 860 + { 861 + switch (type) { 862 + case HV_STATS_OBJECT_HYPERVISOR: 863 + return identity->hv.stats_area_type; 864 + case HV_STATS_OBJECT_LOGICAL_PROCESSOR: 865 + return identity->lp.stats_area_type; 866 + case HV_STATS_OBJECT_PARTITION: 867 + return identity->partition.stats_area_type; 868 + case HV_STATS_OBJECT_VP: 869 + return identity->vp.stats_area_type; 870 + } 871 + 872 + return -EINVAL; 873 + } 874 + 875 + /* 876 + * Map a stats page, where the page location is provided by the hypervisor. 877 + * 878 + * NOTE: The concept of separate SELF and PARENT stats areas does not exist on 879 + * older hypervisor versions. All the available stats information can be found 880 + * on the SELF page. When attempting to map the PARENT area on a hypervisor 881 + * that doesn't support it, return "success" but with a NULL address. The 882 + * caller should check for this case and instead fallback to the SELF area 883 + * alone. 884 + */ 885 + static int 886 + hv_call_map_stats_page(enum hv_stats_object_type type, 887 + const union hv_stats_object_identity *identity, 888 + struct hv_stats_page **addr) 863 889 { 864 890 unsigned long flags; 865 891 struct hv_input_map_stats_page *input; ··· 908 878 pfn = output->map_location; 909 879 910 880 local_irq_restore(flags); 911 - if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) { 912 - ret = hv_result_to_errno(status); 881 + 882 + if (!hv_result_needs_memory(status)) { 913 883 if (hv_result_success(status)) 914 884 break; 915 - return ret; 885 + 886 + if (hv_stats_get_area_type(type, identity) == HV_STATS_AREA_PARENT && 887 + hv_result(status) == HV_STATUS_INVALID_PARAMETER) { 888 + *addr = NULL; 889 + return 0; 890 + } 891 + 892 + hv_status_debug(status, "\n"); 893 + return hv_result_to_errno(status); 916 894 } 917 895 918 - ret = hv_call_deposit_pages(NUMA_NO_NODE, 919 - hv_current_partition_id, 1); 896 + ret = hv_deposit_memory(hv_current_partition_id, status); 920 897 if (ret) 921 898 return ret; 922 899 } while (!ret); ··· 935 898 936 899 int hv_map_stats_page(enum hv_stats_object_type type, 937 900 const union hv_stats_object_identity *identity, 938 - void **addr) 901 + struct hv_stats_page **addr) 939 902 { 940 903 int ret; 941 904 struct page *allocated_page = NULL; ··· 983 946 return hv_result_to_errno(status); 984 947 } 985 948 986 - int hv_unmap_stats_page(enum hv_stats_object_type type, void *page_addr, 949 + int hv_unmap_stats_page(enum hv_stats_object_type type, 950 + struct hv_stats_page *page_addr, 987 951 const union hv_stats_object_identity *identity) 988 952 { 989 953 int ret;
+146 -92
drivers/hv/mshv_root_main.c
··· 39 39 MODULE_LICENSE("GPL"); 40 40 MODULE_DESCRIPTION("Microsoft Hyper-V root partition VMM interface /dev/mshv"); 41 41 42 - /* TODO move this to another file when debugfs code is added */ 43 - enum hv_stats_vp_counters { /* HV_THREAD_COUNTER */ 44 - #if defined(CONFIG_X86) 45 - VpRootDispatchThreadBlocked = 202, 42 + /* HV_THREAD_COUNTER */ 43 + #if defined(CONFIG_X86_64) 44 + #define HV_VP_COUNTER_ROOT_DISPATCH_THREAD_BLOCKED 202 46 45 #elif defined(CONFIG_ARM64) 47 - VpRootDispatchThreadBlocked = 94, 46 + #define HV_VP_COUNTER_ROOT_DISPATCH_THREAD_BLOCKED 95 48 47 #endif 49 - VpStatsMaxCounter 50 - }; 51 - 52 - struct hv_stats_page { 53 - union { 54 - u64 vp_cntrs[VpStatsMaxCounter]; /* VP counters */ 55 - u8 data[HV_HYP_PAGE_SIZE]; 56 - }; 57 - } __packed; 58 48 59 49 struct mshv_root mshv_root; 60 50 ··· 120 130 HVCALL_SET_VP_REGISTERS, 121 131 HVCALL_TRANSLATE_VIRTUAL_ADDRESS, 122 132 HVCALL_CLEAR_VIRTUAL_INTERRUPT, 133 + HVCALL_SCRUB_PARTITION, 123 134 HVCALL_REGISTER_INTERCEPT_RESULT, 124 135 HVCALL_ASSERT_VIRTUAL_INTERRUPT, 125 136 HVCALL_GET_GPA_PAGES_ACCESS_STATES, ··· 252 261 if (hv_result_success(status)) 253 262 break; 254 263 255 - if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) 264 + if (!hv_result_needs_memory(status)) 256 265 ret = hv_result_to_errno(status); 257 266 else 258 - ret = hv_call_deposit_pages(NUMA_NO_NODE, 259 - pt_id, 1); 267 + ret = hv_deposit_memory(pt_id, status); 260 268 } while (!ret); 261 269 262 270 args.status = hv_result(status); ··· 475 485 static bool mshv_vp_dispatch_thread_blocked(struct mshv_vp *vp) 476 486 { 477 487 struct hv_stats_page **stats = vp->vp_stats_pages; 478 - u64 *self_vp_cntrs = stats[HV_STATS_AREA_SELF]->vp_cntrs; 479 - u64 *parent_vp_cntrs = stats[HV_STATS_AREA_PARENT]->vp_cntrs; 488 + u64 *self_vp_cntrs = stats[HV_STATS_AREA_SELF]->data; 489 + u64 *parent_vp_cntrs = stats[HV_STATS_AREA_PARENT]->data; 480 490 481 - if (self_vp_cntrs[VpRootDispatchThreadBlocked]) 482 - return self_vp_cntrs[VpRootDispatchThreadBlocked]; 483 - return parent_vp_cntrs[VpRootDispatchThreadBlocked]; 491 + return parent_vp_cntrs[HV_VP_COUNTER_ROOT_DISPATCH_THREAD_BLOCKED] || 492 + self_vp_cntrs[HV_VP_COUNTER_ROOT_DISPATCH_THREAD_BLOCKED]; 484 493 } 485 494 486 495 static int ··· 650 661 return false; 651 662 652 663 /* Only movable memory ranges are supported for GPA intercepts */ 653 - if (region->type == MSHV_REGION_TYPE_MEM_MOVABLE) 664 + if (region->mreg_type == MSHV_REGION_TYPE_MEM_MOVABLE) 654 665 ret = mshv_region_handle_gfn_fault(region, gfn); 655 666 else 656 667 ret = false; ··· 946 957 return 0; 947 958 } 948 959 949 - static void mshv_vp_stats_unmap(u64 partition_id, u32 vp_index, 950 - void *stats_pages[]) 960 + void mshv_vp_stats_unmap(u64 partition_id, u32 vp_index, 961 + struct hv_stats_page *stats_pages[]) 951 962 { 952 963 union hv_stats_object_identity identity = { 953 964 .vp.partition_id = partition_id, 954 965 .vp.vp_index = vp_index, 955 966 }; 967 + int err; 956 968 957 969 identity.vp.stats_area_type = HV_STATS_AREA_SELF; 958 - hv_unmap_stats_page(HV_STATS_OBJECT_VP, NULL, &identity); 970 + err = hv_unmap_stats_page(HV_STATS_OBJECT_VP, 971 + stats_pages[HV_STATS_AREA_SELF], 972 + &identity); 973 + if (err) 974 + pr_err("%s: failed to unmap partition %llu vp %u self stats, err: %d\n", 975 + __func__, partition_id, vp_index, err); 959 976 960 - identity.vp.stats_area_type = HV_STATS_AREA_PARENT; 961 - hv_unmap_stats_page(HV_STATS_OBJECT_VP, NULL, &identity); 977 + if (stats_pages[HV_STATS_AREA_PARENT] != stats_pages[HV_STATS_AREA_SELF]) { 978 + identity.vp.stats_area_type = HV_STATS_AREA_PARENT; 979 + err = hv_unmap_stats_page(HV_STATS_OBJECT_VP, 980 + stats_pages[HV_STATS_AREA_PARENT], 981 + &identity); 982 + if (err) 983 + pr_err("%s: failed to unmap partition %llu vp %u parent stats, err: %d\n", 984 + __func__, partition_id, vp_index, err); 985 + } 962 986 } 963 987 964 - static int mshv_vp_stats_map(u64 partition_id, u32 vp_index, 965 - void *stats_pages[]) 988 + int mshv_vp_stats_map(u64 partition_id, u32 vp_index, 989 + struct hv_stats_page *stats_pages[]) 966 990 { 967 991 union hv_stats_object_identity identity = { 968 992 .vp.partition_id = partition_id, ··· 986 984 identity.vp.stats_area_type = HV_STATS_AREA_SELF; 987 985 err = hv_map_stats_page(HV_STATS_OBJECT_VP, &identity, 988 986 &stats_pages[HV_STATS_AREA_SELF]); 989 - if (err) 987 + if (err) { 988 + pr_err("%s: failed to map partition %llu vp %u self stats, err: %d\n", 989 + __func__, partition_id, vp_index, err); 990 990 return err; 991 + } 991 992 992 - identity.vp.stats_area_type = HV_STATS_AREA_PARENT; 993 - err = hv_map_stats_page(HV_STATS_OBJECT_VP, &identity, 994 - &stats_pages[HV_STATS_AREA_PARENT]); 995 - if (err) 996 - goto unmap_self; 993 + /* 994 + * L1VH partition cannot access its vp stats in parent area. 995 + */ 996 + if (is_l1vh_parent(partition_id)) { 997 + stats_pages[HV_STATS_AREA_PARENT] = stats_pages[HV_STATS_AREA_SELF]; 998 + } else { 999 + identity.vp.stats_area_type = HV_STATS_AREA_PARENT; 1000 + err = hv_map_stats_page(HV_STATS_OBJECT_VP, &identity, 1001 + &stats_pages[HV_STATS_AREA_PARENT]); 1002 + if (err) { 1003 + pr_err("%s: failed to map partition %llu vp %u parent stats, err: %d\n", 1004 + __func__, partition_id, vp_index, err); 1005 + goto unmap_self; 1006 + } 1007 + if (!stats_pages[HV_STATS_AREA_PARENT]) 1008 + stats_pages[HV_STATS_AREA_PARENT] = stats_pages[HV_STATS_AREA_SELF]; 1009 + } 997 1010 998 1011 return 0; 999 1012 1000 1013 unmap_self: 1001 1014 identity.vp.stats_area_type = HV_STATS_AREA_SELF; 1002 - hv_unmap_stats_page(HV_STATS_OBJECT_VP, NULL, &identity); 1015 + hv_unmap_stats_page(HV_STATS_OBJECT_VP, 1016 + stats_pages[HV_STATS_AREA_SELF], 1017 + &identity); 1003 1018 return err; 1004 1019 } 1005 1020 ··· 1027 1008 struct mshv_create_vp args; 1028 1009 struct mshv_vp *vp; 1029 1010 struct page *intercept_msg_page, *register_page, *ghcb_page; 1030 - void *stats_pages[2]; 1011 + struct hv_stats_page *stats_pages[2]; 1031 1012 long ret; 1032 1013 1033 1014 if (copy_from_user(&args, arg, sizeof(args))) ··· 1067 1048 goto unmap_register_page; 1068 1049 } 1069 1050 1070 - /* 1071 - * This mapping of the stats page is for detecting if dispatch thread 1072 - * is blocked - only relevant for root scheduler 1073 - */ 1074 - if (hv_scheduler_type == HV_SCHEDULER_TYPE_ROOT) { 1075 - ret = mshv_vp_stats_map(partition->pt_id, args.vp_index, 1076 - stats_pages); 1077 - if (ret) 1078 - goto unmap_ghcb_page; 1079 - } 1051 + ret = mshv_vp_stats_map(partition->pt_id, args.vp_index, 1052 + stats_pages); 1053 + if (ret) 1054 + goto unmap_ghcb_page; 1080 1055 1081 1056 vp = kzalloc(sizeof(*vp), GFP_KERNEL); 1082 1057 if (!vp) ··· 1094 1081 if (mshv_partition_encrypted(partition) && is_ghcb_mapping_available()) 1095 1082 vp->vp_ghcb_page = page_to_virt(ghcb_page); 1096 1083 1097 - if (hv_scheduler_type == HV_SCHEDULER_TYPE_ROOT) 1098 - memcpy(vp->vp_stats_pages, stats_pages, sizeof(stats_pages)); 1084 + memcpy(vp->vp_stats_pages, stats_pages, sizeof(stats_pages)); 1085 + 1086 + ret = mshv_debugfs_vp_create(vp); 1087 + if (ret) 1088 + goto put_partition; 1099 1089 1100 1090 /* 1101 1091 * Keep anon_inode_getfd last: it installs fd in the file struct and ··· 1107 1091 ret = anon_inode_getfd("mshv_vp", &mshv_vp_fops, vp, 1108 1092 O_RDWR | O_CLOEXEC); 1109 1093 if (ret < 0) 1110 - goto put_partition; 1094 + goto remove_debugfs_vp; 1111 1095 1112 1096 /* already exclusive with the partition mutex for all ioctls */ 1113 1097 partition->pt_vp_count++; ··· 1115 1099 1116 1100 return ret; 1117 1101 1102 + remove_debugfs_vp: 1103 + mshv_debugfs_vp_remove(vp); 1118 1104 put_partition: 1119 1105 mshv_partition_put(partition); 1120 1106 free_vp: 1121 1107 kfree(vp); 1122 1108 unmap_stats_pages: 1123 - if (hv_scheduler_type == HV_SCHEDULER_TYPE_ROOT) 1124 - mshv_vp_stats_unmap(partition->pt_id, args.vp_index, stats_pages); 1109 + mshv_vp_stats_unmap(partition->pt_id, args.vp_index, stats_pages); 1125 1110 unmap_ghcb_page: 1126 1111 if (mshv_partition_encrypted(partition) && is_ghcb_mapping_available()) 1127 1112 hv_unmap_vp_state_page(partition->pt_id, args.vp_index, ··· 1193 1176 return PTR_ERR(rg); 1194 1177 1195 1178 if (is_mmio) 1196 - rg->type = MSHV_REGION_TYPE_MMIO; 1179 + rg->mreg_type = MSHV_REGION_TYPE_MMIO; 1197 1180 else if (mshv_partition_encrypted(partition) || 1198 1181 !mshv_region_movable_init(rg)) 1199 - rg->type = MSHV_REGION_TYPE_MEM_PINNED; 1182 + rg->mreg_type = MSHV_REGION_TYPE_MEM_PINNED; 1200 1183 else 1201 - rg->type = MSHV_REGION_TYPE_MEM_MOVABLE; 1184 + rg->mreg_type = MSHV_REGION_TYPE_MEM_MOVABLE; 1202 1185 1203 1186 rg->partition = partition; 1204 1187 ··· 1315 1298 if (ret) 1316 1299 return ret; 1317 1300 1318 - switch (region->type) { 1301 + switch (region->mreg_type) { 1319 1302 case MSHV_REGION_TYPE_MEM_PINNED: 1320 1303 ret = mshv_prepare_pinned_region(region); 1321 1304 break; ··· 1559 1542 if (ret) 1560 1543 goto withdraw_mem; 1561 1544 1545 + ret = mshv_debugfs_partition_create(partition); 1546 + if (ret) 1547 + goto finalize_partition; 1548 + 1562 1549 partition->pt_initialized = true; 1563 1550 1564 1551 return 0; 1565 1552 1553 + finalize_partition: 1554 + hv_call_finalize_partition(partition->pt_id); 1566 1555 withdraw_mem: 1567 1556 hv_call_withdraw_memory(U64_MAX, NUMA_NO_NODE, partition->pt_id); 1568 1557 ··· 1748 1725 if (!vp) 1749 1726 continue; 1750 1727 1751 - if (hv_scheduler_type == HV_SCHEDULER_TYPE_ROOT) 1752 - mshv_vp_stats_unmap(partition->pt_id, vp->vp_index, 1753 - (void **)vp->vp_stats_pages); 1728 + mshv_debugfs_vp_remove(vp); 1729 + mshv_vp_stats_unmap(partition->pt_id, vp->vp_index, 1730 + vp->vp_stats_pages); 1754 1731 1755 1732 if (vp->vp_register_page) { 1756 1733 (void)hv_unmap_vp_state_page(partition->pt_id, ··· 1781 1758 1782 1759 partition->pt_vp_array[i] = NULL; 1783 1760 } 1761 + 1762 + mshv_debugfs_partition_remove(partition); 1784 1763 1785 1764 /* Deallocates and unmaps everything including vcpus, GPA mappings etc */ 1786 1765 hv_call_finalize_partition(partition->pt_id); ··· 1946 1921 *pt_flags |= HV_PARTITION_CREATION_FLAG_X2APIC_CAPABLE; 1947 1922 if (args.pt_flags & BIT_ULL(MSHV_PT_BIT_GPA_SUPER_PAGES)) 1948 1923 *pt_flags |= HV_PARTITION_CREATION_FLAG_GPA_SUPER_PAGES_ENABLED; 1924 + if (args.pt_flags & BIT(MSHV_PT_BIT_NESTED_VIRTUALIZATION)) 1925 + *pt_flags |= HV_PARTITION_CREATION_FLAG_NESTED_VIRTUALIZATION_CAPABLE; 1926 + if (args.pt_flags & BIT(MSHV_PT_BIT_SMT_ENABLED_GUEST)) 1927 + *pt_flags |= HV_PARTITION_CREATION_FLAG_SMT_ENABLED_GUEST; 1949 1928 1950 1929 isol_props->as_uint64 = 0; 1951 1930 ··· 2083 2054 }; 2084 2055 } 2085 2056 2057 + static int __init l1vh_retrieve_scheduler_type(enum hv_scheduler_type *out) 2058 + { 2059 + u64 integrated_sched_enabled; 2060 + int ret; 2061 + 2062 + *out = HV_SCHEDULER_TYPE_CORE_SMT; 2063 + 2064 + if (!mshv_root.vmm_caps.vmm_enable_integrated_scheduler) 2065 + return 0; 2066 + 2067 + ret = hv_call_get_partition_property_ex(HV_PARTITION_ID_SELF, 2068 + HV_PARTITION_PROPERTY_INTEGRATED_SCHEDULER_ENABLED, 2069 + 0, &integrated_sched_enabled, 2070 + sizeof(integrated_sched_enabled)); 2071 + if (ret) 2072 + return ret; 2073 + 2074 + if (integrated_sched_enabled) 2075 + *out = HV_SCHEDULER_TYPE_ROOT; 2076 + 2077 + return 0; 2078 + } 2079 + 2086 2080 /* TODO move this to hv_common.c when needed outside */ 2087 2081 static int __init hv_retrieve_scheduler_type(enum hv_scheduler_type *out) 2088 2082 { ··· 2138 2086 /* Retrieve and stash the supported scheduler type */ 2139 2087 static int __init mshv_retrieve_scheduler_type(struct device *dev) 2140 2088 { 2141 - int ret = 0; 2089 + int ret; 2142 2090 2143 2091 if (hv_l1vh_partition()) 2144 - hv_scheduler_type = HV_SCHEDULER_TYPE_CORE_SMT; 2092 + ret = l1vh_retrieve_scheduler_type(&hv_scheduler_type); 2145 2093 else 2146 2094 ret = hv_retrieve_scheduler_type(&hv_scheduler_type); 2147 - 2148 2095 if (ret) 2149 2096 return ret; 2150 2097 ··· 2263 2212 static void mshv_root_partition_exit(void) 2264 2213 { 2265 2214 unregister_reboot_notifier(&mshv_reboot_nb); 2266 - root_scheduler_deinit(); 2267 2215 } 2268 2216 2269 2217 static int __init mshv_root_partition_init(struct device *dev) 2270 2218 { 2271 - int err; 2272 - 2273 - err = root_scheduler_init(dev); 2274 - if (err) 2275 - return err; 2276 - 2277 - err = register_reboot_notifier(&mshv_reboot_nb); 2278 - if (err) 2279 - goto root_sched_deinit; 2280 - 2281 - return 0; 2282 - 2283 - root_sched_deinit: 2284 - root_scheduler_deinit(); 2285 - return err; 2219 + return register_reboot_notifier(&mshv_reboot_nb); 2286 2220 } 2287 2221 2288 - static void mshv_init_vmm_caps(struct device *dev) 2222 + static int __init mshv_init_vmm_caps(struct device *dev) 2289 2223 { 2290 - /* 2291 - * This can only fail here if HVCALL_GET_PARTITION_PROPERTY_EX or 2292 - * HV_PARTITION_PROPERTY_VMM_CAPABILITIES are not supported. In that 2293 - * case it's valid to proceed as if all vmm_caps are disabled (zero). 2294 - */ 2295 - if (hv_call_get_partition_property_ex(HV_PARTITION_ID_SELF, 2296 - HV_PARTITION_PROPERTY_VMM_CAPABILITIES, 2297 - 0, &mshv_root.vmm_caps, 2298 - sizeof(mshv_root.vmm_caps))) 2299 - dev_warn(dev, "Unable to get VMM capabilities\n"); 2224 + int ret; 2225 + 2226 + ret = hv_call_get_partition_property_ex(HV_PARTITION_ID_SELF, 2227 + HV_PARTITION_PROPERTY_VMM_CAPABILITIES, 2228 + 0, &mshv_root.vmm_caps, 2229 + sizeof(mshv_root.vmm_caps)); 2230 + if (ret && hv_l1vh_partition()) { 2231 + dev_err(dev, "Failed to get VMM capabilities: %d\n", ret); 2232 + return ret; 2233 + } 2300 2234 2301 2235 dev_dbg(dev, "vmm_caps = %#llx\n", mshv_root.vmm_caps.as_uint64[0]); 2236 + 2237 + return 0; 2302 2238 } 2303 2239 2304 2240 static int __init mshv_parent_partition_init(void) ··· 2331 2293 2332 2294 mshv_cpuhp_online = ret; 2333 2295 2296 + ret = mshv_init_vmm_caps(dev); 2297 + if (ret) 2298 + goto remove_cpu_state; 2299 + 2334 2300 ret = mshv_retrieve_scheduler_type(dev); 2335 2301 if (ret) 2336 2302 goto remove_cpu_state; ··· 2344 2302 if (ret) 2345 2303 goto remove_cpu_state; 2346 2304 2347 - mshv_init_vmm_caps(dev); 2305 + ret = root_scheduler_init(dev); 2306 + if (ret) 2307 + goto exit_partition; 2308 + 2309 + ret = mshv_debugfs_init(); 2310 + if (ret) 2311 + goto deinit_root_scheduler; 2348 2312 2349 2313 ret = mshv_irqfd_wq_init(); 2350 2314 if (ret) 2351 - goto exit_partition; 2315 + goto exit_debugfs; 2352 2316 2353 2317 spin_lock_init(&mshv_root.pt_ht_lock); 2354 2318 hash_init(mshv_root.pt_htable); ··· 2363 2315 2364 2316 return 0; 2365 2317 2318 + exit_debugfs: 2319 + mshv_debugfs_exit(); 2320 + deinit_root_scheduler: 2321 + root_scheduler_deinit(); 2366 2322 exit_partition: 2367 2323 if (hv_root_partition()) 2368 2324 mshv_root_partition_exit(); ··· 2383 2331 { 2384 2332 hv_setup_mshv_handler(NULL); 2385 2333 mshv_port_table_fini(); 2334 + mshv_debugfs_exit(); 2386 2335 misc_deregister(&mshv_dev); 2387 2336 mshv_irqfd_wq_cleanup(); 2337 + root_scheduler_deinit(); 2388 2338 if (hv_root_partition()) 2389 2339 mshv_root_partition_exit(); 2390 2340 cpuhp_remove_state(mshv_cpuhp_online);
+3 -2
drivers/hv/mshv_vtl_main.c
··· 845 845 .mmap = mshv_vtl_mmap, 846 846 }; 847 847 848 - static void mshv_vtl_synic_mask_vmbus_sint(const u8 *mask) 848 + static void mshv_vtl_synic_mask_vmbus_sint(void *info) 849 849 { 850 850 union hv_synic_sint sint; 851 + const u8 *mask = info; 851 852 852 853 sint.as_uint64 = 0; 853 854 sint.vector = HYPERVISOR_CALLBACK_VECTOR; ··· 1000 999 if (copy_from_user(&mask, arg, sizeof(mask))) 1001 1000 return -EFAULT; 1002 1001 guard(mutex)(&vtl2_vmbus_sint_mask_mutex); 1003 - on_each_cpu((smp_call_func_t)mshv_vtl_synic_mask_vmbus_sint, &mask.mask, 1); 1002 + on_each_cpu(mshv_vtl_synic_mask_vmbus_sint, &mask.mask, 1); 1004 1003 WRITE_ONCE(vtl_synic_mask_vmbus_sint_masked, mask.mask != 0); 1005 1004 if (mask.mask) 1006 1005 wake_up_interruptible_poll(&fd_wait_queue, EPOLLIN);
+72 -14
drivers/hv/vmbus_drv.c
··· 25 25 #include <linux/cpu.h> 26 26 #include <linux/sched/isolation.h> 27 27 #include <linux/sched/task_stack.h> 28 + #include <linux/smpboot.h> 28 29 29 30 #include <linux/delay.h> 30 31 #include <linux/panic_notifier.h> ··· 52 51 53 52 static int hyperv_cpuhp_online; 54 53 55 - static long __percpu *vmbus_evt; 54 + static DEFINE_PER_CPU(long, vmbus_evt); 56 55 57 56 /* Values parsed from ACPI DSDT */ 58 57 int vmbus_irq; ··· 1351 1350 } 1352 1351 } 1353 1352 1354 - void vmbus_isr(void) 1353 + static void __vmbus_isr(void) 1355 1354 { 1356 1355 struct hv_per_cpu_context *hv_cpu 1357 1356 = this_cpu_ptr(hv_context.cpu_context); ··· 1363 1362 vmbus_message_sched(hv_cpu, hv_cpu->para_synic_message_page); 1364 1363 1365 1364 add_interrupt_randomness(vmbus_interrupt); 1365 + } 1366 + 1367 + static DEFINE_PER_CPU(bool, vmbus_irq_pending); 1368 + static DEFINE_PER_CPU(struct task_struct *, vmbus_irqd); 1369 + 1370 + static void vmbus_irqd_wake(void) 1371 + { 1372 + struct task_struct *tsk = __this_cpu_read(vmbus_irqd); 1373 + 1374 + __this_cpu_write(vmbus_irq_pending, true); 1375 + wake_up_process(tsk); 1376 + } 1377 + 1378 + static void vmbus_irqd_setup(unsigned int cpu) 1379 + { 1380 + sched_set_fifo(current); 1381 + } 1382 + 1383 + static int vmbus_irqd_should_run(unsigned int cpu) 1384 + { 1385 + return __this_cpu_read(vmbus_irq_pending); 1386 + } 1387 + 1388 + static void run_vmbus_irqd(unsigned int cpu) 1389 + { 1390 + __this_cpu_write(vmbus_irq_pending, false); 1391 + __vmbus_isr(); 1392 + } 1393 + 1394 + static bool vmbus_irq_initialized; 1395 + 1396 + static struct smp_hotplug_thread vmbus_irq_threads = { 1397 + .store = &vmbus_irqd, 1398 + .setup = vmbus_irqd_setup, 1399 + .thread_should_run = vmbus_irqd_should_run, 1400 + .thread_fn = run_vmbus_irqd, 1401 + .thread_comm = "vmbus_irq/%u", 1402 + }; 1403 + 1404 + void vmbus_isr(void) 1405 + { 1406 + if (IS_ENABLED(CONFIG_PREEMPT_RT)) { 1407 + vmbus_irqd_wake(); 1408 + } else { 1409 + lockdep_hardirq_threaded(); 1410 + __vmbus_isr(); 1411 + } 1366 1412 } 1367 1413 EXPORT_SYMBOL_FOR_MODULES(vmbus_isr, "mshv_vtl"); 1368 1414 ··· 1510 1462 * the VMbus interrupt handler. 1511 1463 */ 1512 1464 1465 + if (IS_ENABLED(CONFIG_PREEMPT_RT) && !vmbus_irq_initialized) { 1466 + ret = smpboot_register_percpu_thread(&vmbus_irq_threads); 1467 + if (ret) 1468 + goto err_kthread; 1469 + vmbus_irq_initialized = true; 1470 + } 1471 + 1513 1472 if (vmbus_irq == -1) { 1514 1473 hv_setup_vmbus_handler(vmbus_isr); 1515 1474 } else { 1516 - vmbus_evt = alloc_percpu(long); 1517 1475 ret = request_percpu_irq(vmbus_irq, vmbus_percpu_isr, 1518 - "Hyper-V VMbus", vmbus_evt); 1476 + "Hyper-V VMbus", &vmbus_evt); 1519 1477 if (ret) { 1520 1478 pr_err("Can't request Hyper-V VMbus IRQ %d, Err %d", 1521 1479 vmbus_irq, ret); 1522 - free_percpu(vmbus_evt); 1523 1480 goto err_setup; 1524 1481 } 1525 1482 } ··· 1553 1500 return 0; 1554 1501 1555 1502 err_connect: 1556 - if (vmbus_irq == -1) { 1503 + if (vmbus_irq == -1) 1557 1504 hv_remove_vmbus_handler(); 1558 - } else { 1559 - free_percpu_irq(vmbus_irq, vmbus_evt); 1560 - free_percpu(vmbus_evt); 1561 - } 1505 + else 1506 + free_percpu_irq(vmbus_irq, &vmbus_evt); 1562 1507 err_setup: 1508 + if (IS_ENABLED(CONFIG_PREEMPT_RT) && vmbus_irq_initialized) { 1509 + smpboot_unregister_percpu_thread(&vmbus_irq_threads); 1510 + vmbus_irq_initialized = false; 1511 + } 1512 + err_kthread: 1563 1513 bus_unregister(&hv_bus); 1564 1514 return ret; 1565 1515 } ··· 3026 2970 vmbus_connection.conn_state = DISCONNECTED; 3027 2971 hv_stimer_global_cleanup(); 3028 2972 vmbus_disconnect(); 3029 - if (vmbus_irq == -1) { 2973 + if (vmbus_irq == -1) 3030 2974 hv_remove_vmbus_handler(); 3031 - } else { 3032 - free_percpu_irq(vmbus_irq, vmbus_evt); 3033 - free_percpu(vmbus_evt); 2975 + else 2976 + free_percpu_irq(vmbus_irq, &vmbus_evt); 2977 + if (IS_ENABLED(CONFIG_PREEMPT_RT) && vmbus_irq_initialized) { 2978 + smpboot_unregister_percpu_thread(&vmbus_irq_threads); 2979 + vmbus_irq_initialized = false; 3034 2980 } 3035 2981 for_each_online_cpu(cpu) { 3036 2982 struct hv_per_cpu_context *hv_cpu
-12
drivers/pci/controller/pci-hyperv-intf.c
··· 52 52 } 53 53 EXPORT_SYMBOL_GPL(hyperv_reg_block_invalidate); 54 54 55 - static void __exit exit_hv_pci_intf(void) 56 - { 57 - } 58 - 59 - static int __init init_hv_pci_intf(void) 60 - { 61 - return 0; 62 - } 63 - 64 - module_init(init_hv_pci_intf); 65 - module_exit(exit_hv_pci_intf); 66 - 67 55 MODULE_DESCRIPTION("Hyper-V PCI Interface"); 68 56 MODULE_LICENSE("GPL v2");
-1
drivers/pci/controller/pci-hyperv.c
··· 501 501 struct resource *low_mmio_res; 502 502 struct resource *high_mmio_res; 503 503 struct completion *survey_event; 504 - struct pci_bus *pci_bus; 505 504 spinlock_t config_lock; /* Avoid two threads writing index page */ 506 505 spinlock_t device_list_lock; /* Protect lists below */ 507 506 void __iomem *cfg_addr;
+13
include/asm-generic/mshyperv.h
··· 342 342 { 343 343 return hv_root_partition() || hv_l1vh_partition(); 344 344 } 345 + 346 + bool hv_result_needs_memory(u64 status); 347 + int hv_deposit_memory_node(int node, u64 partition_id, u64 status); 345 348 int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages); 346 349 int hv_call_add_logical_proc(int node, u32 lp_index, u32 acpi_id); 347 350 int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags); ··· 353 350 static inline bool hv_root_partition(void) { return false; } 354 351 static inline bool hv_l1vh_partition(void) { return false; } 355 352 static inline bool hv_parent_partition(void) { return false; } 353 + static inline bool hv_result_needs_memory(u64 status) { return false; } 354 + static inline int hv_deposit_memory_node(int node, u64 partition_id, u64 status) 355 + { 356 + return -EOPNOTSUPP; 357 + } 356 358 static inline int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages) 357 359 { 358 360 return -EOPNOTSUPP; ··· 371 363 return -EOPNOTSUPP; 372 364 } 373 365 #endif /* CONFIG_MSHV_ROOT */ 366 + 367 + static inline int hv_deposit_memory(u64 partition_id, u64 status) 368 + { 369 + return hv_deposit_memory_node(NUMA_NO_NODE, partition_id, status); 370 + } 374 371 375 372 #if IS_ENABLED(CONFIG_HYPERV_VTL_MODE) 376 373 u8 __init get_vtl(void);
+31 -27
include/hyperv/hvgdk_mini.h
··· 14 14 } __packed; 15 15 16 16 /* NOTE: when adding below, update hv_result_to_string() */ 17 - #define HV_STATUS_SUCCESS 0x0 18 - #define HV_STATUS_INVALID_HYPERCALL_CODE 0x2 19 - #define HV_STATUS_INVALID_HYPERCALL_INPUT 0x3 20 - #define HV_STATUS_INVALID_ALIGNMENT 0x4 21 - #define HV_STATUS_INVALID_PARAMETER 0x5 22 - #define HV_STATUS_ACCESS_DENIED 0x6 23 - #define HV_STATUS_INVALID_PARTITION_STATE 0x7 24 - #define HV_STATUS_OPERATION_DENIED 0x8 25 - #define HV_STATUS_UNKNOWN_PROPERTY 0x9 26 - #define HV_STATUS_PROPERTY_VALUE_OUT_OF_RANGE 0xA 27 - #define HV_STATUS_INSUFFICIENT_MEMORY 0xB 28 - #define HV_STATUS_INVALID_PARTITION_ID 0xD 29 - #define HV_STATUS_INVALID_VP_INDEX 0xE 30 - #define HV_STATUS_NOT_FOUND 0x10 31 - #define HV_STATUS_INVALID_PORT_ID 0x11 32 - #define HV_STATUS_INVALID_CONNECTION_ID 0x12 33 - #define HV_STATUS_INSUFFICIENT_BUFFERS 0x13 34 - #define HV_STATUS_NOT_ACKNOWLEDGED 0x14 35 - #define HV_STATUS_INVALID_VP_STATE 0x15 36 - #define HV_STATUS_NO_RESOURCES 0x1D 37 - #define HV_STATUS_PROCESSOR_FEATURE_NOT_SUPPORTED 0x20 38 - #define HV_STATUS_INVALID_LP_INDEX 0x41 39 - #define HV_STATUS_INVALID_REGISTER_VALUE 0x50 40 - #define HV_STATUS_OPERATION_FAILED 0x71 41 - #define HV_STATUS_TIME_OUT 0x78 42 - #define HV_STATUS_CALL_PENDING 0x79 43 - #define HV_STATUS_VTL_ALREADY_ENABLED 0x86 17 + #define HV_STATUS_SUCCESS 0x0 18 + #define HV_STATUS_INVALID_HYPERCALL_CODE 0x2 19 + #define HV_STATUS_INVALID_HYPERCALL_INPUT 0x3 20 + #define HV_STATUS_INVALID_ALIGNMENT 0x4 21 + #define HV_STATUS_INVALID_PARAMETER 0x5 22 + #define HV_STATUS_ACCESS_DENIED 0x6 23 + #define HV_STATUS_INVALID_PARTITION_STATE 0x7 24 + #define HV_STATUS_OPERATION_DENIED 0x8 25 + #define HV_STATUS_UNKNOWN_PROPERTY 0x9 26 + #define HV_STATUS_PROPERTY_VALUE_OUT_OF_RANGE 0xA 27 + #define HV_STATUS_INSUFFICIENT_MEMORY 0xB 28 + #define HV_STATUS_INVALID_PARTITION_ID 0xD 29 + #define HV_STATUS_INVALID_VP_INDEX 0xE 30 + #define HV_STATUS_NOT_FOUND 0x10 31 + #define HV_STATUS_INVALID_PORT_ID 0x11 32 + #define HV_STATUS_INVALID_CONNECTION_ID 0x12 33 + #define HV_STATUS_INSUFFICIENT_BUFFERS 0x13 34 + #define HV_STATUS_NOT_ACKNOWLEDGED 0x14 35 + #define HV_STATUS_INVALID_VP_STATE 0x15 36 + #define HV_STATUS_NO_RESOURCES 0x1D 37 + #define HV_STATUS_PROCESSOR_FEATURE_NOT_SUPPORTED 0x20 38 + #define HV_STATUS_INVALID_LP_INDEX 0x41 39 + #define HV_STATUS_INVALID_REGISTER_VALUE 0x50 40 + #define HV_STATUS_OPERATION_FAILED 0x71 41 + #define HV_STATUS_INSUFFICIENT_ROOT_MEMORY 0x73 42 + #define HV_STATUS_INSUFFICIENT_CONTIGUOUS_MEMORY 0x75 43 + #define HV_STATUS_TIME_OUT 0x78 44 + #define HV_STATUS_CALL_PENDING 0x79 45 + #define HV_STATUS_INSUFFICIENT_CONTIGUOUS_ROOT_MEMORY 0x83 46 + #define HV_STATUS_VTL_ALREADY_ENABLED 0x86 44 47 45 48 /* 46 49 * The Hyper-V TimeRefCount register and the TSC ··· 477 474 #define HVCALL_NOTIFY_PARTITION_EVENT 0x0087 478 475 #define HVCALL_ENTER_SLEEP_STATE 0x0084 479 476 #define HVCALL_NOTIFY_PORT_RING_EMPTY 0x008b 477 + #define HVCALL_SCRUB_PARTITION 0x008d 480 478 #define HVCALL_REGISTER_INTERCEPT_RESULT 0x0091 481 479 #define HVCALL_ASSERT_VIRTUAL_INTERRUPT 0x0094 482 480 #define HVCALL_CREATE_PORT 0x0095
+9
include/hyperv/hvhdk.h
··· 10 10 #include "hvhdk_mini.h" 11 11 #include "hvgdk.h" 12 12 13 + /* 14 + * Hypervisor statistics page format 15 + */ 16 + struct hv_stats_page { 17 + u64 data[HV_HYP_PAGE_SIZE / sizeof(u64)]; 18 + } __packed; 19 + 13 20 /* Bits for dirty mask of hv_vp_register_page */ 14 21 #define HV_X64_REGISTER_CLASS_GENERAL 0 15 22 #define HV_X64_REGISTER_CLASS_IP 1 ··· 335 328 #define HV_PARTITION_ISOLATION_HOST_TYPE_RESERVED 0x2 336 329 337 330 /* Note: Exo partition is enabled by default */ 331 + #define HV_PARTITION_CREATION_FLAG_SMT_ENABLED_GUEST BIT(0) 332 + #define HV_PARTITION_CREATION_FLAG_NESTED_VIRTUALIZATION_CAPABLE BIT(1) 338 333 #define HV_PARTITION_CREATION_FLAG_GPA_SUPER_PAGES_ENABLED BIT(4) 339 334 #define HV_PARTITION_CREATION_FLAG_EXO_PARTITION BIT(8) 340 335 #define HV_PARTITION_CREATION_FLAG_LAPIC_ENABLED BIT(13)
+8 -1
include/hyperv/hvhdk_mini.h
··· 7 7 8 8 #include "hvgdk_mini.h" 9 9 10 + #define HV_MAX_CONTIGUOUS_ALLOCATION_PAGES 8 11 + 10 12 /* 11 13 * Doorbell connection_info flags. 12 14 */ ··· 89 87 HV_PARTITION_PROPERTY_PRIVILEGE_FLAGS = 0x00010000, 90 88 HV_PARTITION_PROPERTY_SYNTHETIC_PROC_FEATURES = 0x00010001, 91 89 90 + /* Integrated scheduling properties */ 91 + HV_PARTITION_PROPERTY_INTEGRATED_SCHEDULER_ENABLED = 0x00020005, 92 + 92 93 /* Resource properties */ 93 94 HV_PARTITION_PROPERTY_GPA_PAGE_ACCESS_TRACKING = 0x00050005, 94 95 HV_PARTITION_PROPERTY_UNIMPLEMENTED_MSR_ACTION = 0x00050017, ··· 107 102 }; 108 103 109 104 #define HV_PARTITION_VMM_CAPABILITIES_BANK_COUNT 1 110 - #define HV_PARTITION_VMM_CAPABILITIES_RESERVED_BITFIELD_COUNT 59 105 + #define HV_PARTITION_VMM_CAPABILITIES_RESERVED_BITFIELD_COUNT 57 111 106 112 107 struct hv_partition_property_vmm_capabilities { 113 108 u16 bank_count; ··· 124 119 u64 reservedbit3: 1; 125 120 #endif 126 121 u64 assignable_synthetic_proc_features: 1; 122 + u64 reservedbit5: 1; 123 + u64 vmm_enable_integrated_scheduler : 1; 127 124 u64 reserved0: HV_PARTITION_VMM_CAPABILITIES_RESERVED_BITFIELD_COUNT; 128 125 } __packed; 129 126 };
+2
include/uapi/linux/mshv.h
··· 27 27 MSHV_PT_BIT_X2APIC, 28 28 MSHV_PT_BIT_GPA_SUPER_PAGES, 29 29 MSHV_PT_BIT_CPU_AND_XSAVE_FEATURES, 30 + MSHV_PT_BIT_NESTED_VIRTUALIZATION, 31 + MSHV_PT_BIT_SMT_ENABLED_GUEST, 30 32 MSHV_PT_BIT_COUNT, 31 33 }; 32 34