Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'hyperv-next-signed-20210216' of git://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux

Pull Hyper-V updates from Wei Liu:

- VMBus hardening patches from Andrea Parri and Andres Beltran.

- Patches to make Linux boot as the root partition on Microsoft
Hypervisor from Wei Liu.

- One patch to add a new sysfs interface to support hibernation on
Hyper-V from Dexuan Cui.

- Two miscellaneous clean-up patches from Colin and Gustavo.

* tag 'hyperv-next-signed-20210216' of git://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux: (31 commits)
Revert "Drivers: hv: vmbus: Copy packets sent by Hyper-V out of the ring buffer"
iommu/hyperv: setup an IO-APIC IRQ remapping domain for root partition
x86/hyperv: implement an MSI domain for root partition
asm-generic/hyperv: import data structures for mapping device interrupts
asm-generic/hyperv: introduce hv_device_id and auxiliary structures
asm-generic/hyperv: update hv_interrupt_entry
asm-generic/hyperv: update hv_msi_entry
x86/hyperv: implement and use hv_smp_prepare_cpus
x86/hyperv: provide a bunch of helper functions
ACPI / NUMA: add a stub function for node_to_pxm()
x86/hyperv: handling hypercall page setup for root
x86/hyperv: extract partition ID from Microsoft Hypervisor if necessary
x86/hyperv: allocate output arg pages if required
clocksource/hyperv: use MSR-based access if running as root
Drivers: hv: vmbus: skip VMBus initialization if Linux is root
x86/hyperv: detect if Linux is the root partition
asm-generic/hyperv: change HV_CPU_POWER_MANAGEMENT to HV_CPU_MANAGEMENT
hv: hyperv.h: Replace one-element array with flexible-array in struct icmsg_negotiate
hv_netvsc: Restrict configurations on isolated guests
Drivers: hv: vmbus: Enforce 'VMBus version >= 5.2' on isolated guests
...

+1730 -246
+7
Documentation/ABI/stable/sysfs-bus-vmbus
··· 1 + What: /sys/bus/vmbus/hibernation 2 + Date: Jan 2021 3 + KernelVersion: 5.12 4 + Contact: Dexuan Cui <decui@microsoft.com> 5 + Description: Whether the host supports hibernation for the VM. 6 + Users: Daemon that sets up swap partition/file for hibernation. 7 + 1 8 What: /sys/bus/vmbus/devices/<UUID>/id 2 9 Date: Jul 2009 3 10 KernelVersion: 2.6.31
+2 -2
arch/x86/hyperv/Makefile
··· 1 1 # SPDX-License-Identifier: GPL-2.0-only 2 - obj-y := hv_init.o mmu.o nested.o 3 - obj-$(CONFIG_X86_64) += hv_apic.o 2 + obj-y := hv_init.o mmu.o nested.o irqdomain.o 3 + obj-$(CONFIG_X86_64) += hv_apic.o hv_proc.o 4 4 5 5 ifdef CONFIG_X86_64 6 6 obj-$(CONFIG_PARAVIRT_SPINLOCKS) += hv_spinlock.o
+114 -8
arch/x86/hyperv/hv_init.c
··· 10 10 #include <linux/acpi.h> 11 11 #include <linux/efi.h> 12 12 #include <linux/types.h> 13 + #include <linux/bitfield.h> 13 14 #include <asm/apic.h> 14 15 #include <asm/desc.h> 15 16 #include <asm/hypervisor.h> ··· 27 26 #include <linux/cpuhotplug.h> 28 27 #include <linux/syscore_ops.h> 29 28 #include <clocksource/hyperv_timer.h> 29 + #include <linux/highmem.h> 30 30 31 31 int hyperv_init_cpuhp; 32 + u64 hv_current_partition_id = ~0ull; 33 + EXPORT_SYMBOL_GPL(hv_current_partition_id); 32 34 33 35 void *hv_hypercall_pg; 34 36 EXPORT_SYMBOL_GPL(hv_hypercall_pg); ··· 47 43 48 44 void __percpu **hyperv_pcpu_input_arg; 49 45 EXPORT_SYMBOL_GPL(hyperv_pcpu_input_arg); 46 + 47 + void __percpu **hyperv_pcpu_output_arg; 48 + EXPORT_SYMBOL_GPL(hyperv_pcpu_output_arg); 50 49 51 50 u32 hv_max_vp_index; 52 51 EXPORT_SYMBOL_GPL(hv_max_vp_index); ··· 83 76 void **input_arg; 84 77 struct page *pg; 85 78 86 - input_arg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg); 87 79 /* hv_cpu_init() can be called with IRQs disabled from hv_resume() */ 88 - pg = alloc_page(irqs_disabled() ? GFP_ATOMIC : GFP_KERNEL); 80 + pg = alloc_pages(irqs_disabled() ? GFP_ATOMIC : GFP_KERNEL, hv_root_partition ? 1 : 0); 89 81 if (unlikely(!pg)) 90 82 return -ENOMEM; 83 + 84 + input_arg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg); 91 85 *input_arg = page_address(pg); 86 + if (hv_root_partition) { 87 + void **output_arg; 88 + 89 + output_arg = (void **)this_cpu_ptr(hyperv_pcpu_output_arg); 90 + *output_arg = page_address(pg + 1); 91 + } 92 92 93 93 hv_get_vp_index(msr_vp_index); 94 94 ··· 222 208 unsigned int new_cpu; 223 209 unsigned long flags; 224 210 void **input_arg; 225 - void *input_pg = NULL; 211 + void *pg; 226 212 227 213 local_irq_save(flags); 228 214 input_arg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg); 229 - input_pg = *input_arg; 215 + pg = *input_arg; 230 216 *input_arg = NULL; 217 + 218 + if (hv_root_partition) { 219 + void **output_arg; 220 + 221 + output_arg = (void **)this_cpu_ptr(hyperv_pcpu_output_arg); 222 + *output_arg = NULL; 223 + } 224 + 231 225 local_irq_restore(flags); 232 - free_page((unsigned long)input_pg); 226 + 227 + free_pages((unsigned long)pg, hv_root_partition ? 1 : 0); 233 228 234 229 if (hv_vp_assist_page && hv_vp_assist_page[cpu]) 235 230 wrmsrl(HV_X64_MSR_VP_ASSIST_PAGE, 0); ··· 286 263 { 287 264 union hv_x64_msr_hypercall_contents hypercall_msr; 288 265 int ret; 266 + 267 + if (hv_root_partition) 268 + return -EPERM; 289 269 290 270 /* 291 271 * Reset the hypercall page as it is going to be invalidated ··· 360 334 old_setup_percpu_clockev(); 361 335 } 362 336 337 + static void __init hv_get_partition_id(void) 338 + { 339 + struct hv_get_partition_id *output_page; 340 + u64 status; 341 + unsigned long flags; 342 + 343 + local_irq_save(flags); 344 + output_page = *this_cpu_ptr(hyperv_pcpu_output_arg); 345 + status = hv_do_hypercall(HVCALL_GET_PARTITION_ID, NULL, output_page); 346 + if ((status & HV_HYPERCALL_RESULT_MASK) != HV_STATUS_SUCCESS) { 347 + /* No point in proceeding if this failed */ 348 + pr_err("Failed to get partition ID: %lld\n", status); 349 + BUG(); 350 + } 351 + hv_current_partition_id = output_page->partition_id; 352 + local_irq_restore(flags); 353 + } 354 + 363 355 /* 364 356 * This function is to be invoked early in the boot sequence after the 365 357 * hypervisor has been detected. ··· 411 367 hyperv_pcpu_input_arg = alloc_percpu(void *); 412 368 413 369 BUG_ON(hyperv_pcpu_input_arg == NULL); 370 + 371 + /* Allocate the per-CPU state for output arg for root */ 372 + if (hv_root_partition) { 373 + hyperv_pcpu_output_arg = alloc_percpu(void *); 374 + BUG_ON(hyperv_pcpu_output_arg == NULL); 375 + } 414 376 415 377 /* Allocate percpu VP index */ 416 378 hv_vp_index = kmalloc_array(num_possible_cpus(), sizeof(*hv_vp_index), ··· 458 408 459 409 rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); 460 410 hypercall_msr.enable = 1; 461 - hypercall_msr.guest_physical_address = vmalloc_to_pfn(hv_hypercall_pg); 462 - wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); 411 + 412 + if (hv_root_partition) { 413 + struct page *pg; 414 + void *src, *dst; 415 + 416 + /* 417 + * For the root partition, the hypervisor will set up its 418 + * hypercall page. The hypervisor guarantees it will not show 419 + * up in the root's address space. The root can't change the 420 + * location of the hypercall page. 421 + * 422 + * Order is important here. We must enable the hypercall page 423 + * so it is populated with code, then copy the code to an 424 + * executable page. 425 + */ 426 + wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); 427 + 428 + pg = vmalloc_to_page(hv_hypercall_pg); 429 + dst = kmap(pg); 430 + src = memremap(hypercall_msr.guest_physical_address << PAGE_SHIFT, PAGE_SIZE, 431 + MEMREMAP_WB); 432 + BUG_ON(!(src && dst)); 433 + memcpy(dst, src, HV_HYP_PAGE_SIZE); 434 + memunmap(src); 435 + kunmap(pg); 436 + } else { 437 + hypercall_msr.guest_physical_address = vmalloc_to_pfn(hv_hypercall_pg); 438 + wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); 439 + } 463 440 464 441 /* 465 442 * hyperv_init() is called before LAPIC is initialized: see ··· 505 428 register_syscore_ops(&hv_syscore_ops); 506 429 507 430 hyperv_init_cpuhp = cpuhp; 431 + 432 + if (cpuid_ebx(HYPERV_CPUID_FEATURES) & HV_ACCESS_PARTITION_ID) 433 + hv_get_partition_id(); 434 + 435 + BUG_ON(hv_root_partition && hv_current_partition_id == ~0ull); 436 + 437 + #ifdef CONFIG_PCI_MSI 438 + /* 439 + * If we're running as root, we want to create our own PCI MSI domain. 440 + * We can't set this in hv_pci_init because that would be too late. 441 + */ 442 + if (hv_root_partition) 443 + x86_init.irqs.create_pci_msi_domain = hv_create_pci_msi_domain; 444 + #endif 445 + 508 446 return; 509 447 510 448 remove_cpuhp_state: ··· 644 552 645 553 bool hv_is_hibernation_supported(void) 646 554 { 647 - return acpi_sleep_state_supported(ACPI_STATE_S4); 555 + return !hv_root_partition && acpi_sleep_state_supported(ACPI_STATE_S4); 648 556 } 649 557 EXPORT_SYMBOL_GPL(hv_is_hibernation_supported); 558 + 559 + enum hv_isolation_type hv_get_isolation_type(void) 560 + { 561 + if (!(ms_hyperv.features_b & HV_ISOLATION)) 562 + return HV_ISOLATION_TYPE_NONE; 563 + return FIELD_GET(HV_ISOLATION_TYPE, ms_hyperv.isolation_config_b); 564 + } 565 + EXPORT_SYMBOL_GPL(hv_get_isolation_type); 566 + 567 + bool hv_is_isolation_supported(void) 568 + { 569 + return hv_get_isolation_type() != HV_ISOLATION_TYPE_NONE; 570 + } 571 + EXPORT_SYMBOL_GPL(hv_is_isolation_supported);
+219
arch/x86/hyperv/hv_proc.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #include <linux/types.h> 3 + #include <linux/version.h> 4 + #include <linux/vmalloc.h> 5 + #include <linux/mm.h> 6 + #include <linux/clockchips.h> 7 + #include <linux/acpi.h> 8 + #include <linux/hyperv.h> 9 + #include <linux/slab.h> 10 + #include <linux/cpuhotplug.h> 11 + #include <linux/minmax.h> 12 + #include <asm/hypervisor.h> 13 + #include <asm/mshyperv.h> 14 + #include <asm/apic.h> 15 + 16 + #include <asm/trace/hyperv.h> 17 + 18 + /* 19 + * See struct hv_deposit_memory. The first u64 is partition ID, the rest 20 + * are GPAs. 21 + */ 22 + #define HV_DEPOSIT_MAX (HV_HYP_PAGE_SIZE / sizeof(u64) - 1) 23 + 24 + /* Deposits exact number of pages. Must be called with interrupts enabled. */ 25 + int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages) 26 + { 27 + struct page **pages, *page; 28 + int *counts; 29 + int num_allocations; 30 + int i, j, page_count; 31 + int order; 32 + u64 status; 33 + int ret; 34 + u64 base_pfn; 35 + struct hv_deposit_memory *input_page; 36 + unsigned long flags; 37 + 38 + if (num_pages > HV_DEPOSIT_MAX) 39 + return -E2BIG; 40 + if (!num_pages) 41 + return 0; 42 + 43 + /* One buffer for page pointers and counts */ 44 + page = alloc_page(GFP_KERNEL); 45 + if (!page) 46 + return -ENOMEM; 47 + pages = page_address(page); 48 + 49 + counts = kcalloc(HV_DEPOSIT_MAX, sizeof(int), GFP_KERNEL); 50 + if (!counts) { 51 + free_page((unsigned long)pages); 52 + return -ENOMEM; 53 + } 54 + 55 + /* Allocate all the pages before disabling interrupts */ 56 + i = 0; 57 + 58 + while (num_pages) { 59 + /* Find highest order we can actually allocate */ 60 + order = 31 - __builtin_clz(num_pages); 61 + 62 + while (1) { 63 + pages[i] = alloc_pages_node(node, GFP_KERNEL, order); 64 + if (pages[i]) 65 + break; 66 + if (!order) { 67 + ret = -ENOMEM; 68 + num_allocations = i; 69 + goto err_free_allocations; 70 + } 71 + --order; 72 + } 73 + 74 + split_page(pages[i], order); 75 + counts[i] = 1 << order; 76 + num_pages -= counts[i]; 77 + i++; 78 + } 79 + num_allocations = i; 80 + 81 + local_irq_save(flags); 82 + 83 + input_page = *this_cpu_ptr(hyperv_pcpu_input_arg); 84 + 85 + input_page->partition_id = partition_id; 86 + 87 + /* Populate gpa_page_list - these will fit on the input page */ 88 + for (i = 0, page_count = 0; i < num_allocations; ++i) { 89 + base_pfn = page_to_pfn(pages[i]); 90 + for (j = 0; j < counts[i]; ++j, ++page_count) 91 + input_page->gpa_page_list[page_count] = base_pfn + j; 92 + } 93 + status = hv_do_rep_hypercall(HVCALL_DEPOSIT_MEMORY, 94 + page_count, 0, input_page, NULL); 95 + local_irq_restore(flags); 96 + 97 + if ((status & HV_HYPERCALL_RESULT_MASK) != HV_STATUS_SUCCESS) { 98 + pr_err("Failed to deposit pages: %lld\n", status); 99 + ret = status; 100 + goto err_free_allocations; 101 + } 102 + 103 + ret = 0; 104 + goto free_buf; 105 + 106 + err_free_allocations: 107 + for (i = 0; i < num_allocations; ++i) { 108 + base_pfn = page_to_pfn(pages[i]); 109 + for (j = 0; j < counts[i]; ++j) 110 + __free_page(pfn_to_page(base_pfn + j)); 111 + } 112 + 113 + free_buf: 114 + free_page((unsigned long)pages); 115 + kfree(counts); 116 + return ret; 117 + } 118 + 119 + int hv_call_add_logical_proc(int node, u32 lp_index, u32 apic_id) 120 + { 121 + struct hv_add_logical_processor_in *input; 122 + struct hv_add_logical_processor_out *output; 123 + u64 status; 124 + unsigned long flags; 125 + int ret = 0; 126 + int pxm = node_to_pxm(node); 127 + 128 + /* 129 + * When adding a logical processor, the hypervisor may return 130 + * HV_STATUS_INSUFFICIENT_MEMORY. When that happens, we deposit more 131 + * pages and retry. 132 + */ 133 + do { 134 + local_irq_save(flags); 135 + 136 + input = *this_cpu_ptr(hyperv_pcpu_input_arg); 137 + /* We don't do anything with the output right now */ 138 + output = *this_cpu_ptr(hyperv_pcpu_output_arg); 139 + 140 + input->lp_index = lp_index; 141 + input->apic_id = apic_id; 142 + input->flags = 0; 143 + input->proximity_domain_info.domain_id = pxm; 144 + input->proximity_domain_info.flags.reserved = 0; 145 + input->proximity_domain_info.flags.proximity_info_valid = 1; 146 + input->proximity_domain_info.flags.proximity_preferred = 1; 147 + status = hv_do_hypercall(HVCALL_ADD_LOGICAL_PROCESSOR, 148 + input, output); 149 + local_irq_restore(flags); 150 + 151 + status &= HV_HYPERCALL_RESULT_MASK; 152 + 153 + if (status != HV_STATUS_INSUFFICIENT_MEMORY) { 154 + if (status != HV_STATUS_SUCCESS) { 155 + pr_err("%s: cpu %u apic ID %u, %lld\n", __func__, 156 + lp_index, apic_id, status); 157 + ret = status; 158 + } 159 + break; 160 + } 161 + ret = hv_call_deposit_pages(node, hv_current_partition_id, 1); 162 + } while (!ret); 163 + 164 + return ret; 165 + } 166 + 167 + int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags) 168 + { 169 + struct hv_create_vp *input; 170 + u64 status; 171 + unsigned long irq_flags; 172 + int ret = 0; 173 + int pxm = node_to_pxm(node); 174 + 175 + /* Root VPs don't seem to need pages deposited */ 176 + if (partition_id != hv_current_partition_id) { 177 + /* The value 90 is empirically determined. It may change. */ 178 + ret = hv_call_deposit_pages(node, partition_id, 90); 179 + if (ret) 180 + return ret; 181 + } 182 + 183 + do { 184 + local_irq_save(irq_flags); 185 + 186 + input = *this_cpu_ptr(hyperv_pcpu_input_arg); 187 + 188 + input->partition_id = partition_id; 189 + input->vp_index = vp_index; 190 + input->flags = flags; 191 + input->subnode_type = HvSubnodeAny; 192 + if (node != NUMA_NO_NODE) { 193 + input->proximity_domain_info.domain_id = pxm; 194 + input->proximity_domain_info.flags.reserved = 0; 195 + input->proximity_domain_info.flags.proximity_info_valid = 1; 196 + input->proximity_domain_info.flags.proximity_preferred = 1; 197 + } else { 198 + input->proximity_domain_info.as_uint64 = 0; 199 + } 200 + status = hv_do_hypercall(HVCALL_CREATE_VP, input, NULL); 201 + local_irq_restore(irq_flags); 202 + 203 + status &= HV_HYPERCALL_RESULT_MASK; 204 + 205 + if (status != HV_STATUS_INSUFFICIENT_MEMORY) { 206 + if (status != HV_STATUS_SUCCESS) { 207 + pr_err("%s: vcpu %u, lp %u, %lld\n", __func__, 208 + vp_index, flags, status); 209 + ret = status; 210 + } 211 + break; 212 + } 213 + ret = hv_call_deposit_pages(node, partition_id, 1); 214 + 215 + } while (!ret); 216 + 217 + return ret; 218 + } 219 +
+385
arch/x86/hyperv/irqdomain.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + /* 4 + * Irqdomain for Linux to run as the root partition on Microsoft Hypervisor. 5 + * 6 + * Authors: 7 + * Sunil Muthuswamy <sunilmut@microsoft.com> 8 + * Wei Liu <wei.liu@kernel.org> 9 + */ 10 + 11 + #include <linux/pci.h> 12 + #include <linux/irq.h> 13 + #include <asm/mshyperv.h> 14 + 15 + static int hv_map_interrupt(union hv_device_id device_id, bool level, 16 + int cpu, int vector, struct hv_interrupt_entry *entry) 17 + { 18 + struct hv_input_map_device_interrupt *input; 19 + struct hv_output_map_device_interrupt *output; 20 + struct hv_device_interrupt_descriptor *intr_desc; 21 + unsigned long flags; 22 + u64 status; 23 + int nr_bank, var_size; 24 + 25 + local_irq_save(flags); 26 + 27 + input = *this_cpu_ptr(hyperv_pcpu_input_arg); 28 + output = *this_cpu_ptr(hyperv_pcpu_output_arg); 29 + 30 + intr_desc = &input->interrupt_descriptor; 31 + memset(input, 0, sizeof(*input)); 32 + input->partition_id = hv_current_partition_id; 33 + input->device_id = device_id.as_uint64; 34 + intr_desc->interrupt_type = HV_X64_INTERRUPT_TYPE_FIXED; 35 + intr_desc->vector_count = 1; 36 + intr_desc->target.vector = vector; 37 + 38 + if (level) 39 + intr_desc->trigger_mode = HV_INTERRUPT_TRIGGER_MODE_LEVEL; 40 + else 41 + intr_desc->trigger_mode = HV_INTERRUPT_TRIGGER_MODE_EDGE; 42 + 43 + intr_desc->target.vp_set.valid_bank_mask = 0; 44 + intr_desc->target.vp_set.format = HV_GENERIC_SET_SPARSE_4K; 45 + nr_bank = cpumask_to_vpset(&(intr_desc->target.vp_set), cpumask_of(cpu)); 46 + if (nr_bank < 0) { 47 + local_irq_restore(flags); 48 + pr_err("%s: unable to generate VP set\n", __func__); 49 + return EINVAL; 50 + } 51 + intr_desc->target.flags = HV_DEVICE_INTERRUPT_TARGET_PROCESSOR_SET; 52 + 53 + /* 54 + * var-sized hypercall, var-size starts after vp_mask (thus 55 + * vp_set.format does not count, but vp_set.valid_bank_mask 56 + * does). 57 + */ 58 + var_size = nr_bank + 1; 59 + 60 + status = hv_do_rep_hypercall(HVCALL_MAP_DEVICE_INTERRUPT, 0, var_size, 61 + input, output); 62 + *entry = output->interrupt_entry; 63 + 64 + local_irq_restore(flags); 65 + 66 + if ((status & HV_HYPERCALL_RESULT_MASK) != HV_STATUS_SUCCESS) 67 + pr_err("%s: hypercall failed, status %lld\n", __func__, status); 68 + 69 + return status & HV_HYPERCALL_RESULT_MASK; 70 + } 71 + 72 + static int hv_unmap_interrupt(u64 id, struct hv_interrupt_entry *old_entry) 73 + { 74 + unsigned long flags; 75 + struct hv_input_unmap_device_interrupt *input; 76 + struct hv_interrupt_entry *intr_entry; 77 + u64 status; 78 + 79 + local_irq_save(flags); 80 + input = *this_cpu_ptr(hyperv_pcpu_input_arg); 81 + 82 + memset(input, 0, sizeof(*input)); 83 + intr_entry = &input->interrupt_entry; 84 + input->partition_id = hv_current_partition_id; 85 + input->device_id = id; 86 + *intr_entry = *old_entry; 87 + 88 + status = hv_do_hypercall(HVCALL_UNMAP_DEVICE_INTERRUPT, input, NULL); 89 + local_irq_restore(flags); 90 + 91 + return status & HV_HYPERCALL_RESULT_MASK; 92 + } 93 + 94 + #ifdef CONFIG_PCI_MSI 95 + struct rid_data { 96 + struct pci_dev *bridge; 97 + u32 rid; 98 + }; 99 + 100 + static int get_rid_cb(struct pci_dev *pdev, u16 alias, void *data) 101 + { 102 + struct rid_data *rd = data; 103 + u8 bus = PCI_BUS_NUM(rd->rid); 104 + 105 + if (pdev->bus->number != bus || PCI_BUS_NUM(alias) != bus) { 106 + rd->bridge = pdev; 107 + rd->rid = alias; 108 + } 109 + 110 + return 0; 111 + } 112 + 113 + static union hv_device_id hv_build_pci_dev_id(struct pci_dev *dev) 114 + { 115 + union hv_device_id dev_id; 116 + struct rid_data data = { 117 + .bridge = NULL, 118 + .rid = PCI_DEVID(dev->bus->number, dev->devfn) 119 + }; 120 + 121 + pci_for_each_dma_alias(dev, get_rid_cb, &data); 122 + 123 + dev_id.as_uint64 = 0; 124 + dev_id.device_type = HV_DEVICE_TYPE_PCI; 125 + dev_id.pci.segment = pci_domain_nr(dev->bus); 126 + 127 + dev_id.pci.bdf.bus = PCI_BUS_NUM(data.rid); 128 + dev_id.pci.bdf.device = PCI_SLOT(data.rid); 129 + dev_id.pci.bdf.function = PCI_FUNC(data.rid); 130 + dev_id.pci.source_shadow = HV_SOURCE_SHADOW_NONE; 131 + 132 + if (data.bridge) { 133 + int pos; 134 + 135 + /* 136 + * Microsoft Hypervisor requires a bus range when the bridge is 137 + * running in PCI-X mode. 138 + * 139 + * To distinguish conventional vs PCI-X bridge, we can check 140 + * the bridge's PCI-X Secondary Status Register, Secondary Bus 141 + * Mode and Frequency bits. See PCI Express to PCI/PCI-X Bridge 142 + * Specification Revision 1.0 5.2.2.1.3. 143 + * 144 + * Value zero means it is in conventional mode, otherwise it is 145 + * in PCI-X mode. 146 + */ 147 + 148 + pos = pci_find_capability(data.bridge, PCI_CAP_ID_PCIX); 149 + if (pos) { 150 + u16 status; 151 + 152 + pci_read_config_word(data.bridge, pos + 153 + PCI_X_BRIDGE_SSTATUS, &status); 154 + 155 + if (status & PCI_X_SSTATUS_FREQ) { 156 + /* Non-zero, PCI-X mode */ 157 + u8 sec_bus, sub_bus; 158 + 159 + dev_id.pci.source_shadow = HV_SOURCE_SHADOW_BRIDGE_BUS_RANGE; 160 + 161 + pci_read_config_byte(data.bridge, PCI_SECONDARY_BUS, &sec_bus); 162 + dev_id.pci.shadow_bus_range.secondary_bus = sec_bus; 163 + pci_read_config_byte(data.bridge, PCI_SUBORDINATE_BUS, &sub_bus); 164 + dev_id.pci.shadow_bus_range.subordinate_bus = sub_bus; 165 + } 166 + } 167 + } 168 + 169 + return dev_id; 170 + } 171 + 172 + static int hv_map_msi_interrupt(struct pci_dev *dev, int cpu, int vector, 173 + struct hv_interrupt_entry *entry) 174 + { 175 + union hv_device_id device_id = hv_build_pci_dev_id(dev); 176 + 177 + return hv_map_interrupt(device_id, false, cpu, vector, entry); 178 + } 179 + 180 + static inline void entry_to_msi_msg(struct hv_interrupt_entry *entry, struct msi_msg *msg) 181 + { 182 + /* High address is always 0 */ 183 + msg->address_hi = 0; 184 + msg->address_lo = entry->msi_entry.address.as_uint32; 185 + msg->data = entry->msi_entry.data.as_uint32; 186 + } 187 + 188 + static int hv_unmap_msi_interrupt(struct pci_dev *dev, struct hv_interrupt_entry *old_entry); 189 + static void hv_irq_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) 190 + { 191 + struct msi_desc *msidesc; 192 + struct pci_dev *dev; 193 + struct hv_interrupt_entry out_entry, *stored_entry; 194 + struct irq_cfg *cfg = irqd_cfg(data); 195 + cpumask_t *affinity; 196 + int cpu; 197 + u64 status; 198 + 199 + msidesc = irq_data_get_msi_desc(data); 200 + dev = msi_desc_to_pci_dev(msidesc); 201 + 202 + if (!cfg) { 203 + pr_debug("%s: cfg is NULL", __func__); 204 + return; 205 + } 206 + 207 + affinity = irq_data_get_effective_affinity_mask(data); 208 + cpu = cpumask_first_and(affinity, cpu_online_mask); 209 + 210 + if (data->chip_data) { 211 + /* 212 + * This interrupt is already mapped. Let's unmap first. 213 + * 214 + * We don't use retarget interrupt hypercalls here because 215 + * Microsoft Hypervisor doens't allow root to change the vector 216 + * or specify VPs outside of the set that is initially used 217 + * during mapping. 218 + */ 219 + stored_entry = data->chip_data; 220 + data->chip_data = NULL; 221 + 222 + status = hv_unmap_msi_interrupt(dev, stored_entry); 223 + 224 + kfree(stored_entry); 225 + 226 + if (status != HV_STATUS_SUCCESS) { 227 + pr_debug("%s: failed to unmap, status %lld", __func__, status); 228 + return; 229 + } 230 + } 231 + 232 + stored_entry = kzalloc(sizeof(*stored_entry), GFP_ATOMIC); 233 + if (!stored_entry) { 234 + pr_debug("%s: failed to allocate chip data\n", __func__); 235 + return; 236 + } 237 + 238 + status = hv_map_msi_interrupt(dev, cpu, cfg->vector, &out_entry); 239 + if (status != HV_STATUS_SUCCESS) { 240 + kfree(stored_entry); 241 + return; 242 + } 243 + 244 + *stored_entry = out_entry; 245 + data->chip_data = stored_entry; 246 + entry_to_msi_msg(&out_entry, msg); 247 + 248 + return; 249 + } 250 + 251 + static int hv_unmap_msi_interrupt(struct pci_dev *dev, struct hv_interrupt_entry *old_entry) 252 + { 253 + return hv_unmap_interrupt(hv_build_pci_dev_id(dev).as_uint64, old_entry); 254 + } 255 + 256 + static void hv_teardown_msi_irq_common(struct pci_dev *dev, struct msi_desc *msidesc, int irq) 257 + { 258 + u64 status; 259 + struct hv_interrupt_entry old_entry; 260 + struct irq_desc *desc; 261 + struct irq_data *data; 262 + struct msi_msg msg; 263 + 264 + desc = irq_to_desc(irq); 265 + if (!desc) { 266 + pr_debug("%s: no irq desc\n", __func__); 267 + return; 268 + } 269 + 270 + data = &desc->irq_data; 271 + if (!data) { 272 + pr_debug("%s: no irq data\n", __func__); 273 + return; 274 + } 275 + 276 + if (!data->chip_data) { 277 + pr_debug("%s: no chip data\n!", __func__); 278 + return; 279 + } 280 + 281 + old_entry = *(struct hv_interrupt_entry *)data->chip_data; 282 + entry_to_msi_msg(&old_entry, &msg); 283 + 284 + kfree(data->chip_data); 285 + data->chip_data = NULL; 286 + 287 + status = hv_unmap_msi_interrupt(dev, &old_entry); 288 + 289 + if (status != HV_STATUS_SUCCESS) { 290 + pr_err("%s: hypercall failed, status %lld\n", __func__, status); 291 + return; 292 + } 293 + } 294 + 295 + static void hv_msi_domain_free_irqs(struct irq_domain *domain, struct device *dev) 296 + { 297 + int i; 298 + struct msi_desc *entry; 299 + struct pci_dev *pdev; 300 + 301 + if (WARN_ON_ONCE(!dev_is_pci(dev))) 302 + return; 303 + 304 + pdev = to_pci_dev(dev); 305 + 306 + for_each_pci_msi_entry(entry, pdev) { 307 + if (entry->irq) { 308 + for (i = 0; i < entry->nvec_used; i++) { 309 + hv_teardown_msi_irq_common(pdev, entry, entry->irq + i); 310 + irq_domain_free_irqs(entry->irq + i, 1); 311 + } 312 + } 313 + } 314 + } 315 + 316 + /* 317 + * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices, 318 + * which implement the MSI or MSI-X Capability Structure. 319 + */ 320 + static struct irq_chip hv_pci_msi_controller = { 321 + .name = "HV-PCI-MSI", 322 + .irq_unmask = pci_msi_unmask_irq, 323 + .irq_mask = pci_msi_mask_irq, 324 + .irq_ack = irq_chip_ack_parent, 325 + .irq_retrigger = irq_chip_retrigger_hierarchy, 326 + .irq_compose_msi_msg = hv_irq_compose_msi_msg, 327 + .irq_set_affinity = msi_domain_set_affinity, 328 + .flags = IRQCHIP_SKIP_SET_WAKE, 329 + }; 330 + 331 + static struct msi_domain_ops pci_msi_domain_ops = { 332 + .domain_free_irqs = hv_msi_domain_free_irqs, 333 + .msi_prepare = pci_msi_prepare, 334 + }; 335 + 336 + static struct msi_domain_info hv_pci_msi_domain_info = { 337 + .flags = MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS | 338 + MSI_FLAG_PCI_MSIX, 339 + .ops = &pci_msi_domain_ops, 340 + .chip = &hv_pci_msi_controller, 341 + .handler = handle_edge_irq, 342 + .handler_name = "edge", 343 + }; 344 + 345 + struct irq_domain * __init hv_create_pci_msi_domain(void) 346 + { 347 + struct irq_domain *d = NULL; 348 + struct fwnode_handle *fn; 349 + 350 + fn = irq_domain_alloc_named_fwnode("HV-PCI-MSI"); 351 + if (fn) 352 + d = pci_msi_create_irq_domain(fn, &hv_pci_msi_domain_info, x86_vector_domain); 353 + 354 + /* No point in going further if we can't get an irq domain */ 355 + BUG_ON(!d); 356 + 357 + return d; 358 + } 359 + 360 + #endif /* CONFIG_PCI_MSI */ 361 + 362 + int hv_unmap_ioapic_interrupt(int ioapic_id, struct hv_interrupt_entry *entry) 363 + { 364 + union hv_device_id device_id; 365 + 366 + device_id.as_uint64 = 0; 367 + device_id.device_type = HV_DEVICE_TYPE_IOAPIC; 368 + device_id.ioapic.ioapic_id = (u8)ioapic_id; 369 + 370 + return hv_unmap_interrupt(device_id.as_uint64, entry); 371 + } 372 + EXPORT_SYMBOL_GPL(hv_unmap_ioapic_interrupt); 373 + 374 + int hv_map_ioapic_interrupt(int ioapic_id, bool level, int cpu, int vector, 375 + struct hv_interrupt_entry *entry) 376 + { 377 + union hv_device_id device_id; 378 + 379 + device_id.as_uint64 = 0; 380 + device_id.device_type = HV_DEVICE_TYPE_IOAPIC; 381 + device_id.ioapic.ioapic_id = (u8)ioapic_id; 382 + 383 + return hv_map_interrupt(device_id, level, cpu, vector, entry); 384 + } 385 + EXPORT_SYMBOL_GPL(hv_map_ioapic_interrupt);
+38
arch/x86/include/asm/hyperv-tlfs.h
··· 21 21 #define HYPERV_CPUID_FEATURES 0x40000003 22 22 #define HYPERV_CPUID_ENLIGHTMENT_INFO 0x40000004 23 23 #define HYPERV_CPUID_IMPLEMENT_LIMITS 0x40000005 24 + #define HYPERV_CPUID_CPU_MANAGEMENT_FEATURES 0x40000007 24 25 #define HYPERV_CPUID_NESTED_FEATURES 0x4000000A 26 + #define HYPERV_CPUID_ISOLATION_CONFIG 0x4000000C 25 27 26 28 #define HYPERV_CPUID_VIRT_STACK_INTERFACE 0x40000081 27 29 #define HYPERV_VS_INTERFACE_EAX_SIGNATURE 0x31235356 /* "VS#1" */ ··· 113 111 #define HV_X64_ENLIGHTENED_VMCS_RECOMMENDED BIT(14) 114 112 115 113 /* 114 + * CPU management features identification. 115 + * These are HYPERV_CPUID_CPU_MANAGEMENT_FEATURES.EAX bits. 116 + */ 117 + #define HV_X64_START_LOGICAL_PROCESSOR BIT(0) 118 + #define HV_X64_CREATE_ROOT_VIRTUAL_PROCESSOR BIT(1) 119 + #define HV_X64_PERFORMANCE_COUNTER_SYNC BIT(2) 120 + #define HV_X64_RESERVED_IDENTITY_BIT BIT(31) 121 + 122 + /* 116 123 * Virtual processor will never share a physical core with another virtual 117 124 * processor, except for virtual processors that are reported as sibling SMT 118 125 * threads. ··· 132 121 #define HV_X64_NESTED_DIRECT_FLUSH BIT(17) 133 122 #define HV_X64_NESTED_GUEST_MAPPING_FLUSH BIT(18) 134 123 #define HV_X64_NESTED_MSR_BITMAP BIT(19) 124 + 125 + /* HYPERV_CPUID_ISOLATION_CONFIG.EAX bits. */ 126 + #define HV_PARAVISOR_PRESENT BIT(0) 127 + 128 + /* HYPERV_CPUID_ISOLATION_CONFIG.EBX bits. */ 129 + #define HV_ISOLATION_TYPE GENMASK(3, 0) 130 + #define HV_SHARED_GPA_BOUNDARY_ACTIVE BIT(5) 131 + #define HV_SHARED_GPA_BOUNDARY_BITS GENMASK(11, 6) 132 + 133 + enum hv_isolation_type { 134 + HV_ISOLATION_TYPE_NONE = 0, 135 + HV_ISOLATION_TYPE_VBS = 1, 136 + HV_ISOLATION_TYPE_SNP = 2 137 + }; 135 138 136 139 /* Hyper-V specific model specific registers (MSRs) */ 137 140 ··· 548 523 u32 tlb_lock_count; 549 524 }; 550 525 526 + enum hv_interrupt_type { 527 + HV_X64_INTERRUPT_TYPE_FIXED = 0x0000, 528 + HV_X64_INTERRUPT_TYPE_LOWESTPRIORITY = 0x0001, 529 + HV_X64_INTERRUPT_TYPE_SMI = 0x0002, 530 + HV_X64_INTERRUPT_TYPE_REMOTEREAD = 0x0003, 531 + HV_X64_INTERRUPT_TYPE_NMI = 0x0004, 532 + HV_X64_INTERRUPT_TYPE_INIT = 0x0005, 533 + HV_X64_INTERRUPT_TYPE_SIPI = 0x0006, 534 + HV_X64_INTERRUPT_TYPE_EXTINT = 0x0007, 535 + HV_X64_INTERRUPT_TYPE_LOCALINT0 = 0x0008, 536 + HV_X64_INTERRUPT_TYPE_LOCALINT1 = 0x0009, 537 + HV_X64_INTERRUPT_TYPE_MAXIMUM = 0x000A, 538 + }; 551 539 552 540 #include <asm-generic/hyperv-tlfs.h> 553 541
+17 -2
arch/x86/include/asm/mshyperv.h
··· 78 78 79 79 extern void *hv_hypercall_pg; 80 80 extern void __percpu **hyperv_pcpu_input_arg; 81 + extern void __percpu **hyperv_pcpu_output_arg; 82 + 83 + extern u64 hv_current_partition_id; 84 + 85 + int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages); 86 + int hv_call_add_logical_proc(int node, u32 lp_index, u32 acpi_id); 87 + int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags); 81 88 82 89 static inline u64 hv_do_hypercall(u64 control, void *input, void *output) 83 90 { ··· 246 239 struct hv_guest_mapping_flush_list *flush, 247 240 u64 start_gfn, u64 end_gfn); 248 241 242 + extern bool hv_root_partition; 243 + 249 244 #ifdef CONFIG_X86_64 250 245 void hv_apic_init(void); 251 246 void __init hv_init_spinlocks(void); ··· 259 250 static inline void hv_set_msi_entry_from_desc(union hv_msi_entry *msi_entry, 260 251 struct msi_desc *msi_desc) 261 252 { 262 - msi_entry->address = msi_desc->msg.address_lo; 263 - msi_entry->data = msi_desc->msg.data; 253 + msi_entry->address.as_uint32 = msi_desc->msg.address_lo; 254 + msi_entry->data.as_uint32 = msi_desc->msg.data; 264 255 } 256 + 257 + struct irq_domain *hv_create_pci_msi_domain(void); 258 + 259 + int hv_map_ioapic_interrupt(int ioapic_id, bool level, int vcpu, int vector, 260 + struct hv_interrupt_entry *entry); 261 + int hv_unmap_ioapic_interrupt(int ioapic_id, struct hv_interrupt_entry *entry); 265 262 266 263 #else /* CONFIG_HYPERV */ 267 264 static inline void hyperv_init(void) {}
+58
arch/x86/kernel/cpu/mshyperv.c
··· 31 31 #include <asm/reboot.h> 32 32 #include <asm/nmi.h> 33 33 #include <clocksource/hyperv_timer.h> 34 + #include <asm/numa.h> 35 + 36 + /* Is Linux running as the root partition? */ 37 + bool hv_root_partition; 38 + EXPORT_SYMBOL_GPL(hv_root_partition); 34 39 35 40 struct ms_hyperv_info ms_hyperv; 36 41 EXPORT_SYMBOL_GPL(ms_hyperv); ··· 231 226 hv_init_spinlocks(); 232 227 #endif 233 228 } 229 + 230 + static void __init hv_smp_prepare_cpus(unsigned int max_cpus) 231 + { 232 + #ifdef CONFIG_X86_64 233 + int i; 234 + int ret; 235 + #endif 236 + 237 + native_smp_prepare_cpus(max_cpus); 238 + 239 + #ifdef CONFIG_X86_64 240 + for_each_present_cpu(i) { 241 + if (i == 0) 242 + continue; 243 + ret = hv_call_add_logical_proc(numa_cpu_node(i), i, cpu_physical_id(i)); 244 + BUG_ON(ret); 245 + } 246 + 247 + for_each_present_cpu(i) { 248 + if (i == 0) 249 + continue; 250 + ret = hv_call_create_vp(numa_cpu_node(i), hv_current_partition_id, i, i); 251 + BUG_ON(ret); 252 + } 253 + #endif 254 + } 234 255 #endif 235 256 236 257 static void __init ms_hyperv_init_platform(void) ··· 274 243 * Extract the features and hints 275 244 */ 276 245 ms_hyperv.features = cpuid_eax(HYPERV_CPUID_FEATURES); 246 + ms_hyperv.features_b = cpuid_ebx(HYPERV_CPUID_FEATURES); 277 247 ms_hyperv.misc_features = cpuid_edx(HYPERV_CPUID_FEATURES); 278 248 ms_hyperv.hints = cpuid_eax(HYPERV_CPUID_ENLIGHTMENT_INFO); 279 249 ··· 286 254 287 255 pr_debug("Hyper-V: max %u virtual processors, %u logical processors\n", 288 256 ms_hyperv.max_vp_index, ms_hyperv.max_lp_index); 257 + 258 + /* 259 + * Check CPU management privilege. 260 + * 261 + * To mirror what Windows does we should extract CPU management 262 + * features and use the ReservedIdentityBit to detect if Linux is the 263 + * root partition. But that requires negotiating CPU management 264 + * interface (a process to be finalized). 265 + * 266 + * For now, use the privilege flag as the indicator for running as 267 + * root. 268 + */ 269 + if (cpuid_ebx(HYPERV_CPUID_FEATURES) & HV_CPU_MANAGEMENT) { 270 + hv_root_partition = true; 271 + pr_info("Hyper-V: running as root partition\n"); 272 + } 289 273 290 274 /* 291 275 * Extract host information. ··· 323 275 ms_hyperv.misc_features & HV_FEATURE_FREQUENCY_MSRS_AVAILABLE) { 324 276 x86_platform.calibrate_tsc = hv_get_tsc_khz; 325 277 x86_platform.calibrate_cpu = hv_get_tsc_khz; 278 + } 279 + 280 + if (ms_hyperv.features_b & HV_ISOLATION) { 281 + ms_hyperv.isolation_config_a = cpuid_eax(HYPERV_CPUID_ISOLATION_CONFIG); 282 + ms_hyperv.isolation_config_b = cpuid_ebx(HYPERV_CPUID_ISOLATION_CONFIG); 283 + 284 + pr_info("Hyper-V: Isolation Config: Group A 0x%x, Group B 0x%x\n", 285 + ms_hyperv.isolation_config_a, ms_hyperv.isolation_config_b); 326 286 } 327 287 328 288 if (ms_hyperv.hints & HV_X64_ENLIGHTENED_VMCS_RECOMMENDED) { ··· 422 366 423 367 # ifdef CONFIG_SMP 424 368 smp_ops.smp_prepare_boot_cpu = hv_smp_prepare_boot_cpu; 369 + if (hv_root_partition) 370 + smp_ops.smp_prepare_cpus = hv_smp_prepare_cpus; 425 371 # endif 426 372 427 373 /*
+3
drivers/clocksource/hyperv_timer.c
··· 426 426 if (!(ms_hyperv.features & HV_MSR_REFERENCE_TSC_AVAILABLE)) 427 427 return false; 428 428 429 + if (hv_root_partition) 430 + return false; 431 + 429 432 hv_read_reference_counter = read_hv_clock_tsc; 430 433 phys_addr = virt_to_phys(hv_get_tsc_page()); 431 434
+2 -2
drivers/hv/channel.c
··· 618 618 goto error_clean_ring; 619 619 620 620 /* Create and init the channel open message */ 621 - open_info = kmalloc(sizeof(*open_info) + 621 + open_info = kzalloc(sizeof(*open_info) + 622 622 sizeof(struct vmbus_channel_open_channel), 623 623 GFP_KERNEL); 624 624 if (!open_info) { ··· 745 745 unsigned long flags; 746 746 int ret; 747 747 748 - info = kmalloc(sizeof(*info) + 748 + info = kzalloc(sizeof(*info) + 749 749 sizeof(struct vmbus_channel_gpadl_teardown), GFP_KERNEL); 750 750 if (!info) 751 751 return -ENOMEM;
+70 -7
drivers/hv/channel_mgmt.c
··· 31 31 { .dev_type = HV_IDE, 32 32 HV_IDE_GUID, 33 33 .perf_device = true, 34 + .allowed_in_isolated = false, 34 35 }, 35 36 36 37 /* SCSI */ 37 38 { .dev_type = HV_SCSI, 38 39 HV_SCSI_GUID, 39 40 .perf_device = true, 41 + .allowed_in_isolated = true, 40 42 }, 41 43 42 44 /* Fibre Channel */ 43 45 { .dev_type = HV_FC, 44 46 HV_SYNTHFC_GUID, 45 47 .perf_device = true, 48 + .allowed_in_isolated = false, 46 49 }, 47 50 48 51 /* Synthetic NIC */ 49 52 { .dev_type = HV_NIC, 50 53 HV_NIC_GUID, 51 54 .perf_device = true, 55 + .allowed_in_isolated = true, 52 56 }, 53 57 54 58 /* Network Direct */ 55 59 { .dev_type = HV_ND, 56 60 HV_ND_GUID, 57 61 .perf_device = true, 62 + .allowed_in_isolated = false, 58 63 }, 59 64 60 65 /* PCIE */ 61 66 { .dev_type = HV_PCIE, 62 67 HV_PCIE_GUID, 63 68 .perf_device = false, 69 + .allowed_in_isolated = false, 64 70 }, 65 71 66 72 /* Synthetic Frame Buffer */ 67 73 { .dev_type = HV_FB, 68 74 HV_SYNTHVID_GUID, 69 75 .perf_device = false, 76 + .allowed_in_isolated = false, 70 77 }, 71 78 72 79 /* Synthetic Keyboard */ 73 80 { .dev_type = HV_KBD, 74 81 HV_KBD_GUID, 75 82 .perf_device = false, 83 + .allowed_in_isolated = false, 76 84 }, 77 85 78 86 /* Synthetic MOUSE */ 79 87 { .dev_type = HV_MOUSE, 80 88 HV_MOUSE_GUID, 81 89 .perf_device = false, 90 + .allowed_in_isolated = false, 82 91 }, 83 92 84 93 /* KVP */ 85 94 { .dev_type = HV_KVP, 86 95 HV_KVP_GUID, 87 96 .perf_device = false, 97 + .allowed_in_isolated = false, 88 98 }, 89 99 90 100 /* Time Synch */ 91 101 { .dev_type = HV_TS, 92 102 HV_TS_GUID, 93 103 .perf_device = false, 104 + .allowed_in_isolated = true, 94 105 }, 95 106 96 107 /* Heartbeat */ 97 108 { .dev_type = HV_HB, 98 109 HV_HEART_BEAT_GUID, 99 110 .perf_device = false, 111 + .allowed_in_isolated = true, 100 112 }, 101 113 102 114 /* Shutdown */ 103 115 { .dev_type = HV_SHUTDOWN, 104 116 HV_SHUTDOWN_GUID, 105 117 .perf_device = false, 118 + .allowed_in_isolated = true, 106 119 }, 107 120 108 121 /* File copy */ 109 122 { .dev_type = HV_FCOPY, 110 123 HV_FCOPY_GUID, 111 124 .perf_device = false, 125 + .allowed_in_isolated = false, 112 126 }, 113 127 114 128 /* Backup */ 115 129 { .dev_type = HV_BACKUP, 116 130 HV_VSS_GUID, 117 131 .perf_device = false, 132 + .allowed_in_isolated = false, 118 133 }, 119 134 120 135 /* Dynamic Memory */ 121 136 { .dev_type = HV_DM, 122 137 HV_DM_GUID, 123 138 .perf_device = false, 139 + .allowed_in_isolated = false, 124 140 }, 125 141 126 142 /* Unknown GUID */ 127 143 { .dev_type = HV_UNKNOWN, 128 144 .perf_device = false, 145 + .allowed_in_isolated = false, 129 146 }, 130 147 }; 131 148 ··· 207 190 * vmbus_prep_negotiate_resp() - Create default response for Negotiate message 208 191 * @icmsghdrp: Pointer to msg header structure 209 192 * @buf: Raw buffer channel data 193 + * @buflen: Length of the raw buffer channel data. 210 194 * @fw_version: The framework versions we can support. 211 195 * @fw_vercnt: The size of @fw_version. 212 196 * @srv_version: The service versions we can support. ··· 220 202 * Set up and fill in default negotiate response message. 221 203 * Mainly used by Hyper-V drivers. 222 204 */ 223 - bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp, 224 - u8 *buf, const int *fw_version, int fw_vercnt, 205 + bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp, u8 *buf, 206 + u32 buflen, const int *fw_version, int fw_vercnt, 225 207 const int *srv_version, int srv_vercnt, 226 208 int *nego_fw_version, int *nego_srv_version) 227 209 { ··· 233 215 bool found_match = false; 234 216 struct icmsg_negotiate *negop; 235 217 218 + /* Check that there's enough space for icframe_vercnt, icmsg_vercnt */ 219 + if (buflen < ICMSG_HDR + offsetof(struct icmsg_negotiate, reserved)) { 220 + pr_err_ratelimited("Invalid icmsg negotiate\n"); 221 + return false; 222 + } 223 + 236 224 icmsghdrp->icmsgsize = 0x10; 237 - negop = (struct icmsg_negotiate *)&buf[ 238 - sizeof(struct vmbuspipe_hdr) + 239 - sizeof(struct icmsg_hdr)]; 225 + negop = (struct icmsg_negotiate *)&buf[ICMSG_HDR]; 240 226 241 227 icframe_major = negop->icframe_vercnt; 242 228 icframe_minor = 0; 243 229 244 230 icmsg_major = negop->icmsg_vercnt; 245 231 icmsg_minor = 0; 232 + 233 + /* Validate negop packet */ 234 + if (icframe_major > IC_VERSION_NEGOTIATION_MAX_VER_COUNT || 235 + icmsg_major > IC_VERSION_NEGOTIATION_MAX_VER_COUNT || 236 + ICMSG_NEGOTIATE_PKT_SIZE(icframe_major, icmsg_major) > buflen) { 237 + pr_err_ratelimited("Invalid icmsg negotiate - icframe_major: %u, icmsg_major: %u\n", 238 + icframe_major, icmsg_major); 239 + goto fw_error; 240 + } 246 241 247 242 /* 248 243 * Select the framework version number we will ··· 920 889 return channel; 921 890 } 922 891 892 + static bool vmbus_is_valid_device(const guid_t *guid) 893 + { 894 + u16 i; 895 + 896 + if (!hv_is_isolation_supported()) 897 + return true; 898 + 899 + for (i = 0; i < ARRAY_SIZE(vmbus_devs); i++) { 900 + if (guid_equal(guid, &vmbus_devs[i].guid)) 901 + return vmbus_devs[i].allowed_in_isolated; 902 + } 903 + return false; 904 + } 905 + 923 906 /* 924 907 * vmbus_onoffer - Handler for channel offers from vmbus in parent partition. 925 908 * ··· 947 902 offer = (struct vmbus_channel_offer_channel *)hdr; 948 903 949 904 trace_vmbus_onoffer(offer); 905 + 906 + if (!vmbus_is_valid_device(&offer->offer.if_type)) { 907 + pr_err_ratelimited("Invalid offer %d from the host supporting isolation\n", 908 + offer->child_relid); 909 + atomic_dec(&vmbus_connection.offer_in_progress); 910 + return; 911 + } 950 912 951 913 oldchannel = find_primary_channel_by_offer(offer); 952 914 ··· 1101 1049 1102 1050 mutex_lock(&vmbus_connection.channel_mutex); 1103 1051 channel = relid2channel(rescind->child_relid); 1052 + if (channel != NULL) { 1053 + /* 1054 + * Guarantee that no other instance of vmbus_onoffer_rescind() 1055 + * has got a reference to the channel object. Synchronize on 1056 + * &vmbus_connection.channel_mutex. 1057 + */ 1058 + if (channel->rescind_ref) { 1059 + mutex_unlock(&vmbus_connection.channel_mutex); 1060 + return; 1061 + } 1062 + channel->rescind_ref = true; 1063 + } 1104 1064 mutex_unlock(&vmbus_connection.channel_mutex); 1105 1065 1106 1066 if (channel == NULL) { ··· 1166 1102 vmbus_device_unregister(channel->device_obj); 1167 1103 put_device(dev); 1168 1104 } 1169 - } 1170 - if (channel->primary_channel != NULL) { 1105 + } else if (channel->primary_channel != NULL) { 1171 1106 /* 1172 1107 * Sub-channel is being rescinded. Following is the channel 1173 1108 * close sequence when initiated from the driveri (refer to
+7
drivers/hv/connection.c
··· 244 244 break; 245 245 } 246 246 247 + if (hv_is_isolation_supported() && version < VERSION_WIN10_V5_2) { 248 + pr_err("Invalid VMBus version %d.%d (expected >= %d.%d) from the host supporting isolation\n", 249 + version >> 16, version & 0xFFFF, VERSION_WIN10_V5_2 >> 16, VERSION_WIN10_V5_2 & 0xFFFF); 250 + ret = -EINVAL; 251 + goto cleanup; 252 + } 253 + 247 254 vmbus_proto_version = version; 248 255 pr_info("Vmbus version:%d.%d\n", 249 256 version >> 16, version & 0xFFFF);
+28 -8
drivers/hv/hv_fcopy.c
··· 235 235 if (fcopy_transaction.state > HVUTIL_READY) 236 236 return; 237 237 238 - vmbus_recvpacket(channel, recv_buffer, HV_HYP_PAGE_SIZE * 2, &recvlen, 239 - &requestid); 240 - if (recvlen <= 0) 238 + if (vmbus_recvpacket(channel, recv_buffer, HV_HYP_PAGE_SIZE * 2, &recvlen, &requestid)) { 239 + pr_err_ratelimited("Fcopy request received. Could not read into recv buf\n"); 241 240 return; 241 + } 242 + 243 + if (!recvlen) 244 + return; 245 + 246 + /* Ensure recvlen is big enough to read header data */ 247 + if (recvlen < ICMSG_HDR) { 248 + pr_err_ratelimited("Fcopy request received. Packet length too small: %d\n", 249 + recvlen); 250 + return; 251 + } 242 252 243 253 icmsghdr = (struct icmsg_hdr *)&recv_buffer[ 244 254 sizeof(struct vmbuspipe_hdr)]; 255 + 245 256 if (icmsghdr->icmsgtype == ICMSGTYPE_NEGOTIATE) { 246 - if (vmbus_prep_negotiate_resp(icmsghdr, recv_buffer, 257 + if (vmbus_prep_negotiate_resp(icmsghdr, 258 + recv_buffer, recvlen, 247 259 fw_versions, FW_VER_COUNT, 248 260 fcopy_versions, FCOPY_VER_COUNT, 249 261 NULL, &fcopy_srv_version)) { ··· 264 252 fcopy_srv_version >> 16, 265 253 fcopy_srv_version & 0xFFFF); 266 254 } 267 - } else { 268 - fcopy_msg = (struct hv_fcopy_hdr *)&recv_buffer[ 269 - sizeof(struct vmbuspipe_hdr) + 270 - sizeof(struct icmsg_hdr)]; 255 + } else if (icmsghdr->icmsgtype == ICMSGTYPE_FCOPY) { 256 + /* Ensure recvlen is big enough to contain hv_fcopy_hdr */ 257 + if (recvlen < ICMSG_HDR + sizeof(struct hv_fcopy_hdr)) { 258 + pr_err_ratelimited("Invalid Fcopy hdr. Packet length too small: %u\n", 259 + recvlen); 260 + return; 261 + } 262 + fcopy_msg = (struct hv_fcopy_hdr *)&recv_buffer[ICMSG_HDR]; 271 263 272 264 /* 273 265 * Stash away this global state for completing the ··· 295 279 schedule_work(&fcopy_send_work); 296 280 schedule_delayed_work(&fcopy_timeout_work, 297 281 HV_UTIL_TIMEOUT * HZ); 282 + return; 283 + } else { 284 + pr_err_ratelimited("Fcopy request received. Invalid msg type: %d\n", 285 + icmsghdr->icmsgtype); 298 286 return; 299 287 } 300 288 icmsghdr->icflags = ICMSGHDRFLAG_TRANSACTION | ICMSGHDRFLAG_RESPONSE;
+79 -63
drivers/hv/hv_kvp.c
··· 662 662 if (kvp_transaction.state > HVUTIL_READY) 663 663 return; 664 664 665 - vmbus_recvpacket(channel, recv_buffer, HV_HYP_PAGE_SIZE * 4, &recvlen, 666 - &requestid); 667 - 668 - if (recvlen > 0) { 669 - icmsghdrp = (struct icmsg_hdr *)&recv_buffer[ 670 - sizeof(struct vmbuspipe_hdr)]; 671 - 672 - if (icmsghdrp->icmsgtype == ICMSGTYPE_NEGOTIATE) { 673 - if (vmbus_prep_negotiate_resp(icmsghdrp, 674 - recv_buffer, fw_versions, FW_VER_COUNT, 675 - kvp_versions, KVP_VER_COUNT, 676 - NULL, &kvp_srv_version)) { 677 - pr_info("KVP IC version %d.%d\n", 678 - kvp_srv_version >> 16, 679 - kvp_srv_version & 0xFFFF); 680 - } 681 - } else { 682 - kvp_msg = (struct hv_kvp_msg *)&recv_buffer[ 683 - sizeof(struct vmbuspipe_hdr) + 684 - sizeof(struct icmsg_hdr)]; 685 - 686 - /* 687 - * Stash away this global state for completing the 688 - * transaction; note transactions are serialized. 689 - */ 690 - 691 - kvp_transaction.recv_len = recvlen; 692 - kvp_transaction.recv_req_id = requestid; 693 - kvp_transaction.kvp_msg = kvp_msg; 694 - 695 - if (kvp_transaction.state < HVUTIL_READY) { 696 - /* Userspace is not registered yet */ 697 - kvp_respond_to_host(NULL, HV_E_FAIL); 698 - return; 699 - } 700 - kvp_transaction.state = HVUTIL_HOSTMSG_RECEIVED; 701 - 702 - /* 703 - * Get the information from the 704 - * user-mode component. 705 - * component. This transaction will be 706 - * completed when we get the value from 707 - * the user-mode component. 708 - * Set a timeout to deal with 709 - * user-mode not responding. 710 - */ 711 - schedule_work(&kvp_sendkey_work); 712 - schedule_delayed_work(&kvp_timeout_work, 713 - HV_UTIL_TIMEOUT * HZ); 714 - 715 - return; 716 - 717 - } 718 - 719 - icmsghdrp->icflags = ICMSGHDRFLAG_TRANSACTION 720 - | ICMSGHDRFLAG_RESPONSE; 721 - 722 - vmbus_sendpacket(channel, recv_buffer, 723 - recvlen, requestid, 724 - VM_PKT_DATA_INBAND, 0); 725 - 726 - host_negotiatied = NEGO_FINISHED; 727 - hv_poll_channel(kvp_transaction.recv_channel, kvp_poll_wrapper); 665 + if (vmbus_recvpacket(channel, recv_buffer, HV_HYP_PAGE_SIZE * 4, &recvlen, &requestid)) { 666 + pr_err_ratelimited("KVP request received. Could not read into recv buf\n"); 667 + return; 728 668 } 729 669 670 + if (!recvlen) 671 + return; 672 + 673 + /* Ensure recvlen is big enough to read header data */ 674 + if (recvlen < ICMSG_HDR) { 675 + pr_err_ratelimited("KVP request received. Packet length too small: %d\n", 676 + recvlen); 677 + return; 678 + } 679 + 680 + icmsghdrp = (struct icmsg_hdr *)&recv_buffer[sizeof(struct vmbuspipe_hdr)]; 681 + 682 + if (icmsghdrp->icmsgtype == ICMSGTYPE_NEGOTIATE) { 683 + if (vmbus_prep_negotiate_resp(icmsghdrp, 684 + recv_buffer, recvlen, 685 + fw_versions, FW_VER_COUNT, 686 + kvp_versions, KVP_VER_COUNT, 687 + NULL, &kvp_srv_version)) { 688 + pr_info("KVP IC version %d.%d\n", 689 + kvp_srv_version >> 16, 690 + kvp_srv_version & 0xFFFF); 691 + } 692 + } else if (icmsghdrp->icmsgtype == ICMSGTYPE_KVPEXCHANGE) { 693 + /* 694 + * recvlen is not checked against sizeof(struct kvp_msg) because kvp_msg contains 695 + * a union of structs and the msg type received is not known. Code using this 696 + * struct should provide validation when accessing its fields. 697 + */ 698 + kvp_msg = (struct hv_kvp_msg *)&recv_buffer[ICMSG_HDR]; 699 + 700 + /* 701 + * Stash away this global state for completing the 702 + * transaction; note transactions are serialized. 703 + */ 704 + 705 + kvp_transaction.recv_len = recvlen; 706 + kvp_transaction.recv_req_id = requestid; 707 + kvp_transaction.kvp_msg = kvp_msg; 708 + 709 + if (kvp_transaction.state < HVUTIL_READY) { 710 + /* Userspace is not registered yet */ 711 + kvp_respond_to_host(NULL, HV_E_FAIL); 712 + return; 713 + } 714 + kvp_transaction.state = HVUTIL_HOSTMSG_RECEIVED; 715 + 716 + /* 717 + * Get the information from the 718 + * user-mode component. 719 + * component. This transaction will be 720 + * completed when we get the value from 721 + * the user-mode component. 722 + * Set a timeout to deal with 723 + * user-mode not responding. 724 + */ 725 + schedule_work(&kvp_sendkey_work); 726 + schedule_delayed_work(&kvp_timeout_work, 727 + HV_UTIL_TIMEOUT * HZ); 728 + 729 + return; 730 + 731 + } else { 732 + pr_err_ratelimited("KVP request received. Invalid msg type: %d\n", 733 + icmsghdrp->icmsgtype); 734 + return; 735 + } 736 + 737 + icmsghdrp->icflags = ICMSGHDRFLAG_TRANSACTION 738 + | ICMSGHDRFLAG_RESPONSE; 739 + 740 + vmbus_sendpacket(channel, recv_buffer, 741 + recvlen, requestid, 742 + VM_PKT_DATA_INBAND, 0); 743 + 744 + host_negotiatied = NEGO_FINISHED; 745 + hv_poll_channel(kvp_transaction.recv_channel, kvp_poll_wrapper); 730 746 } 731 747 732 748 static void kvp_on_reset(void)
+56 -41
drivers/hv/hv_snapshot.c
··· 298 298 if (vss_transaction.state > HVUTIL_READY) 299 299 return; 300 300 301 - vmbus_recvpacket(channel, recv_buffer, HV_HYP_PAGE_SIZE * 2, &recvlen, 302 - &requestid); 303 - 304 - if (recvlen > 0) { 305 - icmsghdrp = (struct icmsg_hdr *)&recv_buffer[ 306 - sizeof(struct vmbuspipe_hdr)]; 307 - 308 - if (icmsghdrp->icmsgtype == ICMSGTYPE_NEGOTIATE) { 309 - if (vmbus_prep_negotiate_resp(icmsghdrp, 310 - recv_buffer, fw_versions, FW_VER_COUNT, 311 - vss_versions, VSS_VER_COUNT, 312 - NULL, &vss_srv_version)) { 313 - 314 - pr_info("VSS IC version %d.%d\n", 315 - vss_srv_version >> 16, 316 - vss_srv_version & 0xFFFF); 317 - } 318 - } else { 319 - vss_msg = (struct hv_vss_msg *)&recv_buffer[ 320 - sizeof(struct vmbuspipe_hdr) + 321 - sizeof(struct icmsg_hdr)]; 322 - 323 - /* 324 - * Stash away this global state for completing the 325 - * transaction; note transactions are serialized. 326 - */ 327 - 328 - vss_transaction.recv_len = recvlen; 329 - vss_transaction.recv_req_id = requestid; 330 - vss_transaction.msg = (struct hv_vss_msg *)vss_msg; 331 - 332 - schedule_work(&vss_handle_request_work); 333 - return; 334 - } 335 - 336 - icmsghdrp->icflags = ICMSGHDRFLAG_TRANSACTION 337 - | ICMSGHDRFLAG_RESPONSE; 338 - 339 - vmbus_sendpacket(channel, recv_buffer, 340 - recvlen, requestid, 341 - VM_PKT_DATA_INBAND, 0); 301 + if (vmbus_recvpacket(channel, recv_buffer, HV_HYP_PAGE_SIZE * 2, &recvlen, &requestid)) { 302 + pr_err_ratelimited("VSS request received. Could not read into recv buf\n"); 303 + return; 342 304 } 343 305 306 + if (!recvlen) 307 + return; 308 + 309 + /* Ensure recvlen is big enough to read header data */ 310 + if (recvlen < ICMSG_HDR) { 311 + pr_err_ratelimited("VSS request received. Packet length too small: %d\n", 312 + recvlen); 313 + return; 314 + } 315 + 316 + icmsghdrp = (struct icmsg_hdr *)&recv_buffer[sizeof(struct vmbuspipe_hdr)]; 317 + 318 + if (icmsghdrp->icmsgtype == ICMSGTYPE_NEGOTIATE) { 319 + if (vmbus_prep_negotiate_resp(icmsghdrp, 320 + recv_buffer, recvlen, 321 + fw_versions, FW_VER_COUNT, 322 + vss_versions, VSS_VER_COUNT, 323 + NULL, &vss_srv_version)) { 324 + 325 + pr_info("VSS IC version %d.%d\n", 326 + vss_srv_version >> 16, 327 + vss_srv_version & 0xFFFF); 328 + } 329 + } else if (icmsghdrp->icmsgtype == ICMSGTYPE_VSS) { 330 + /* Ensure recvlen is big enough to contain hv_vss_msg */ 331 + if (recvlen < ICMSG_HDR + sizeof(struct hv_vss_msg)) { 332 + pr_err_ratelimited("Invalid VSS msg. Packet length too small: %u\n", 333 + recvlen); 334 + return; 335 + } 336 + vss_msg = (struct hv_vss_msg *)&recv_buffer[ICMSG_HDR]; 337 + 338 + /* 339 + * Stash away this global state for completing the 340 + * transaction; note transactions are serialized. 341 + */ 342 + 343 + vss_transaction.recv_len = recvlen; 344 + vss_transaction.recv_req_id = requestid; 345 + vss_transaction.msg = (struct hv_vss_msg *)vss_msg; 346 + 347 + schedule_work(&vss_handle_request_work); 348 + return; 349 + } else { 350 + pr_err_ratelimited("VSS request received. Invalid msg type: %d\n", 351 + icmsghdrp->icmsgtype); 352 + return; 353 + } 354 + 355 + icmsghdrp->icflags = ICMSGHDRFLAG_TRANSACTION | 356 + ICMSGHDRFLAG_RESPONSE; 357 + vmbus_sendpacket(channel, recv_buffer, recvlen, requestid, 358 + VM_PKT_DATA_INBAND, 0); 344 359 } 345 360 346 361 static void vss_on_reset(void)
+137 -83
drivers/hv/hv_util.c
··· 195 195 196 196 struct icmsg_hdr *icmsghdrp; 197 197 198 - vmbus_recvpacket(channel, shut_txf_buf, 199 - HV_HYP_PAGE_SIZE, &recvlen, &requestid); 198 + if (vmbus_recvpacket(channel, shut_txf_buf, HV_HYP_PAGE_SIZE, &recvlen, &requestid)) { 199 + pr_err_ratelimited("Shutdown request received. Could not read into shut txf buf\n"); 200 + return; 201 + } 200 202 201 - if (recvlen > 0) { 202 - icmsghdrp = (struct icmsg_hdr *)&shut_txf_buf[ 203 - sizeof(struct vmbuspipe_hdr)]; 203 + if (!recvlen) 204 + return; 204 205 205 - if (icmsghdrp->icmsgtype == ICMSGTYPE_NEGOTIATE) { 206 - if (vmbus_prep_negotiate_resp(icmsghdrp, shut_txf_buf, 207 - fw_versions, FW_VER_COUNT, 208 - sd_versions, SD_VER_COUNT, 209 - NULL, &sd_srv_version)) { 210 - pr_info("Shutdown IC version %d.%d\n", 211 - sd_srv_version >> 16, 212 - sd_srv_version & 0xFFFF); 213 - } 214 - } else { 215 - shutdown_msg = 216 - (struct shutdown_msg_data *)&shut_txf_buf[ 217 - sizeof(struct vmbuspipe_hdr) + 218 - sizeof(struct icmsg_hdr)]; 206 + /* Ensure recvlen is big enough to read header data */ 207 + if (recvlen < ICMSG_HDR) { 208 + pr_err_ratelimited("Shutdown request received. Packet length too small: %d\n", 209 + recvlen); 210 + return; 211 + } 219 212 220 - /* 221 - * shutdown_msg->flags can be 0(shut down), 2(reboot), 222 - * or 4(hibernate). It may bitwise-OR 1, which means 223 - * performing the request by force. Linux always tries 224 - * to perform the request by force. 225 - */ 226 - switch (shutdown_msg->flags) { 227 - case 0: 228 - case 1: 229 - icmsghdrp->status = HV_S_OK; 230 - work = &shutdown_work; 231 - pr_info("Shutdown request received -" 232 - " graceful shutdown initiated\n"); 233 - break; 234 - case 2: 235 - case 3: 236 - icmsghdrp->status = HV_S_OK; 237 - work = &restart_work; 238 - pr_info("Restart request received -" 239 - " graceful restart initiated\n"); 240 - break; 241 - case 4: 242 - case 5: 243 - pr_info("Hibernation request received\n"); 244 - icmsghdrp->status = hibernation_supported ? 245 - HV_S_OK : HV_E_FAIL; 246 - if (hibernation_supported) 247 - work = &hibernate_context.work; 248 - break; 249 - default: 250 - icmsghdrp->status = HV_E_FAIL; 251 - pr_info("Shutdown request received -" 252 - " Invalid request\n"); 253 - break; 254 - } 213 + icmsghdrp = (struct icmsg_hdr *)&shut_txf_buf[sizeof(struct vmbuspipe_hdr)]; 214 + 215 + if (icmsghdrp->icmsgtype == ICMSGTYPE_NEGOTIATE) { 216 + if (vmbus_prep_negotiate_resp(icmsghdrp, 217 + shut_txf_buf, recvlen, 218 + fw_versions, FW_VER_COUNT, 219 + sd_versions, SD_VER_COUNT, 220 + NULL, &sd_srv_version)) { 221 + pr_info("Shutdown IC version %d.%d\n", 222 + sd_srv_version >> 16, 223 + sd_srv_version & 0xFFFF); 224 + } 225 + } else if (icmsghdrp->icmsgtype == ICMSGTYPE_SHUTDOWN) { 226 + /* Ensure recvlen is big enough to contain shutdown_msg_data struct */ 227 + if (recvlen < ICMSG_HDR + sizeof(struct shutdown_msg_data)) { 228 + pr_err_ratelimited("Invalid shutdown msg data. Packet length too small: %u\n", 229 + recvlen); 230 + return; 255 231 } 256 232 257 - icmsghdrp->icflags = ICMSGHDRFLAG_TRANSACTION 258 - | ICMSGHDRFLAG_RESPONSE; 233 + shutdown_msg = (struct shutdown_msg_data *)&shut_txf_buf[ICMSG_HDR]; 259 234 260 - vmbus_sendpacket(channel, shut_txf_buf, 261 - recvlen, requestid, 262 - VM_PKT_DATA_INBAND, 0); 235 + /* 236 + * shutdown_msg->flags can be 0(shut down), 2(reboot), 237 + * or 4(hibernate). It may bitwise-OR 1, which means 238 + * performing the request by force. Linux always tries 239 + * to perform the request by force. 240 + */ 241 + switch (shutdown_msg->flags) { 242 + case 0: 243 + case 1: 244 + icmsghdrp->status = HV_S_OK; 245 + work = &shutdown_work; 246 + pr_info("Shutdown request received - graceful shutdown initiated\n"); 247 + break; 248 + case 2: 249 + case 3: 250 + icmsghdrp->status = HV_S_OK; 251 + work = &restart_work; 252 + pr_info("Restart request received - graceful restart initiated\n"); 253 + break; 254 + case 4: 255 + case 5: 256 + pr_info("Hibernation request received\n"); 257 + icmsghdrp->status = hibernation_supported ? 258 + HV_S_OK : HV_E_FAIL; 259 + if (hibernation_supported) 260 + work = &hibernate_context.work; 261 + break; 262 + default: 263 + icmsghdrp->status = HV_E_FAIL; 264 + pr_info("Shutdown request received - Invalid request\n"); 265 + break; 266 + } 267 + } else { 268 + icmsghdrp->status = HV_E_FAIL; 269 + pr_err_ratelimited("Shutdown request received. Invalid msg type: %d\n", 270 + icmsghdrp->icmsgtype); 263 271 } 272 + 273 + icmsghdrp->icflags = ICMSGHDRFLAG_TRANSACTION 274 + | ICMSGHDRFLAG_RESPONSE; 275 + 276 + vmbus_sendpacket(channel, shut_txf_buf, 277 + recvlen, requestid, 278 + VM_PKT_DATA_INBAND, 0); 264 279 265 280 if (work) 266 281 schedule_work(work); ··· 411 396 HV_HYP_PAGE_SIZE, &recvlen, 412 397 &requestid); 413 398 if (ret) { 414 - pr_warn_once("TimeSync IC pkt recv failed (Err: %d)\n", 415 - ret); 399 + pr_err_ratelimited("TimeSync IC pkt recv failed (Err: %d)\n", 400 + ret); 416 401 break; 417 402 } 418 403 419 404 if (!recvlen) 420 405 break; 421 406 407 + /* Ensure recvlen is big enough to read header data */ 408 + if (recvlen < ICMSG_HDR) { 409 + pr_err_ratelimited("Timesync request received. Packet length too small: %d\n", 410 + recvlen); 411 + break; 412 + } 413 + 422 414 icmsghdrp = (struct icmsg_hdr *)&time_txf_buf[ 423 415 sizeof(struct vmbuspipe_hdr)]; 424 416 425 417 if (icmsghdrp->icmsgtype == ICMSGTYPE_NEGOTIATE) { 426 - if (vmbus_prep_negotiate_resp(icmsghdrp, time_txf_buf, 418 + if (vmbus_prep_negotiate_resp(icmsghdrp, 419 + time_txf_buf, recvlen, 427 420 fw_versions, FW_VER_COUNT, 428 421 ts_versions, TS_VER_COUNT, 429 422 NULL, &ts_srv_version)) { ··· 439 416 ts_srv_version >> 16, 440 417 ts_srv_version & 0xFFFF); 441 418 } 442 - } else { 419 + } else if (icmsghdrp->icmsgtype == ICMSGTYPE_TIMESYNC) { 443 420 if (ts_srv_version > TS_VERSION_3) { 444 - refdata = (struct ictimesync_ref_data *) 445 - &time_txf_buf[ 446 - sizeof(struct vmbuspipe_hdr) + 447 - sizeof(struct icmsg_hdr)]; 421 + /* Ensure recvlen is big enough to read ictimesync_ref_data */ 422 + if (recvlen < ICMSG_HDR + sizeof(struct ictimesync_ref_data)) { 423 + pr_err_ratelimited("Invalid ictimesync ref data. Length too small: %u\n", 424 + recvlen); 425 + break; 426 + } 427 + refdata = (struct ictimesync_ref_data *)&time_txf_buf[ICMSG_HDR]; 448 428 449 429 adj_guesttime(refdata->parenttime, 450 430 refdata->vmreferencetime, 451 431 refdata->flags); 452 432 } else { 453 - timedatap = (struct ictimesync_data *) 454 - &time_txf_buf[ 455 - sizeof(struct vmbuspipe_hdr) + 456 - sizeof(struct icmsg_hdr)]; 433 + /* Ensure recvlen is big enough to read ictimesync_data */ 434 + if (recvlen < ICMSG_HDR + sizeof(struct ictimesync_data)) { 435 + pr_err_ratelimited("Invalid ictimesync data. Length too small: %u\n", 436 + recvlen); 437 + break; 438 + } 439 + timedatap = (struct ictimesync_data *)&time_txf_buf[ICMSG_HDR]; 440 + 457 441 adj_guesttime(timedatap->parenttime, 458 442 hv_read_reference_counter(), 459 443 timedatap->flags); 460 444 } 445 + } else { 446 + icmsghdrp->status = HV_E_FAIL; 447 + pr_err_ratelimited("Timesync request received. Invalid msg type: %d\n", 448 + icmsghdrp->icmsgtype); 461 449 } 462 450 463 451 icmsghdrp->icflags = ICMSGHDRFLAG_TRANSACTION 464 452 | ICMSGHDRFLAG_RESPONSE; 465 453 466 454 vmbus_sendpacket(channel, time_txf_buf, 467 - recvlen, requestid, 468 - VM_PKT_DATA_INBAND, 0); 455 + recvlen, requestid, 456 + VM_PKT_DATA_INBAND, 0); 469 457 } 470 458 } 471 459 ··· 496 462 497 463 while (1) { 498 464 499 - vmbus_recvpacket(channel, hbeat_txf_buf, 500 - HV_HYP_PAGE_SIZE, &recvlen, &requestid); 465 + if (vmbus_recvpacket(channel, hbeat_txf_buf, HV_HYP_PAGE_SIZE, 466 + &recvlen, &requestid)) { 467 + pr_err_ratelimited("Heartbeat request received. Could not read into hbeat txf buf\n"); 468 + return; 469 + } 501 470 502 471 if (!recvlen) 503 472 break; 473 + 474 + /* Ensure recvlen is big enough to read header data */ 475 + if (recvlen < ICMSG_HDR) { 476 + pr_err_ratelimited("Heartbeat request received. Packet length too small: %d\n", 477 + recvlen); 478 + break; 479 + } 504 480 505 481 icmsghdrp = (struct icmsg_hdr *)&hbeat_txf_buf[ 506 482 sizeof(struct vmbuspipe_hdr)]; 507 483 508 484 if (icmsghdrp->icmsgtype == ICMSGTYPE_NEGOTIATE) { 509 485 if (vmbus_prep_negotiate_resp(icmsghdrp, 510 - hbeat_txf_buf, 486 + hbeat_txf_buf, recvlen, 511 487 fw_versions, FW_VER_COUNT, 512 488 hb_versions, HB_VER_COUNT, 513 489 NULL, &hb_srv_version)) { ··· 526 482 hb_srv_version >> 16, 527 483 hb_srv_version & 0xFFFF); 528 484 } 529 - } else { 530 - heartbeat_msg = 531 - (struct heartbeat_msg_data *)&hbeat_txf_buf[ 532 - sizeof(struct vmbuspipe_hdr) + 533 - sizeof(struct icmsg_hdr)]; 485 + } else if (icmsghdrp->icmsgtype == ICMSGTYPE_HEARTBEAT) { 486 + /* 487 + * Ensure recvlen is big enough to read seq_num. Reserved area is not 488 + * included in the check as the host may not fill it up entirely 489 + */ 490 + if (recvlen < ICMSG_HDR + sizeof(u64)) { 491 + pr_err_ratelimited("Invalid heartbeat msg data. Length too small: %u\n", 492 + recvlen); 493 + break; 494 + } 495 + heartbeat_msg = (struct heartbeat_msg_data *)&hbeat_txf_buf[ICMSG_HDR]; 534 496 535 497 heartbeat_msg->seq_num += 1; 498 + } else { 499 + icmsghdrp->status = HV_E_FAIL; 500 + pr_err_ratelimited("Heartbeat request received. Invalid msg type: %d\n", 501 + icmsghdrp->icmsgtype); 536 502 } 537 503 538 504 icmsghdrp->icflags = ICMSGHDRFLAG_TRANSACTION 539 505 | ICMSGHDRFLAG_RESPONSE; 540 506 541 507 vmbus_sendpacket(channel, hbeat_txf_buf, 542 - recvlen, requestid, 543 - VM_PKT_DATA_INBAND, 0); 508 + recvlen, requestid, 509 + VM_PKT_DATA_INBAND, 0); 544 510 } 545 511 } 546 512
+47 -17
drivers/hv/vmbus_drv.c
··· 678 678 }; 679 679 __ATTRIBUTE_GROUPS(vmbus_dev); 680 680 681 + /* Set up the attribute for /sys/bus/vmbus/hibernation */ 682 + static ssize_t hibernation_show(struct bus_type *bus, char *buf) 683 + { 684 + return sprintf(buf, "%d\n", !!hv_is_hibernation_supported()); 685 + } 686 + 687 + static BUS_ATTR_RO(hibernation); 688 + 689 + static struct attribute *vmbus_bus_attrs[] = { 690 + &bus_attr_hibernation.attr, 691 + NULL, 692 + }; 693 + static const struct attribute_group vmbus_bus_group = { 694 + .attrs = vmbus_bus_attrs, 695 + }; 696 + __ATTRIBUTE_GROUPS(vmbus_bus); 697 + 681 698 /* 682 699 * vmbus_uevent - add uevent for our device 683 700 * ··· 1041 1024 .uevent = vmbus_uevent, 1042 1025 .dev_groups = vmbus_dev_groups, 1043 1026 .drv_groups = vmbus_drv_groups, 1027 + .bus_groups = vmbus_bus_groups, 1044 1028 .pm = &vmbus_pm, 1045 1029 }; 1046 1030 ··· 1072 1054 { 1073 1055 struct hv_per_cpu_context *hv_cpu = (void *)data; 1074 1056 void *page_addr = hv_cpu->synic_message_page; 1075 - struct hv_message *msg = (struct hv_message *)page_addr + 1057 + struct hv_message msg_copy, *msg = (struct hv_message *)page_addr + 1076 1058 VMBUS_MESSAGE_SINT; 1077 1059 struct vmbus_channel_message_header *hdr; 1060 + enum vmbus_channel_message_type msgtype; 1078 1061 const struct vmbus_channel_message_table_entry *entry; 1079 1062 struct onmessage_work_context *ctx; 1080 - u32 message_type = msg->header.message_type; 1063 + __u8 payload_size; 1064 + u32 message_type; 1081 1065 1082 1066 /* 1083 1067 * 'enum vmbus_channel_message_type' is supposed to always be 'u32' as ··· 1088 1068 */ 1089 1069 BUILD_BUG_ON(sizeof(enum vmbus_channel_message_type) != sizeof(u32)); 1090 1070 1071 + /* 1072 + * Since the message is in memory shared with the host, an erroneous or 1073 + * malicious Hyper-V could modify the message while vmbus_on_msg_dpc() 1074 + * or individual message handlers are executing; to prevent this, copy 1075 + * the message into private memory. 1076 + */ 1077 + memcpy(&msg_copy, msg, sizeof(struct hv_message)); 1078 + 1079 + message_type = msg_copy.header.message_type; 1091 1080 if (message_type == HVMSG_NONE) 1092 1081 /* no msg */ 1093 1082 return; 1094 1083 1095 - hdr = (struct vmbus_channel_message_header *)msg->u.payload; 1084 + hdr = (struct vmbus_channel_message_header *)msg_copy.u.payload; 1085 + msgtype = hdr->msgtype; 1096 1086 1097 1087 trace_vmbus_on_msg_dpc(hdr); 1098 1088 1099 - if (hdr->msgtype >= CHANNELMSG_COUNT) { 1100 - WARN_ONCE(1, "unknown msgtype=%d\n", hdr->msgtype); 1089 + if (msgtype >= CHANNELMSG_COUNT) { 1090 + WARN_ONCE(1, "unknown msgtype=%d\n", msgtype); 1101 1091 goto msg_handled; 1102 1092 } 1103 1093 1104 - if (msg->header.payload_size > HV_MESSAGE_PAYLOAD_BYTE_COUNT) { 1105 - WARN_ONCE(1, "payload size is too large (%d)\n", 1106 - msg->header.payload_size); 1094 + payload_size = msg_copy.header.payload_size; 1095 + if (payload_size > HV_MESSAGE_PAYLOAD_BYTE_COUNT) { 1096 + WARN_ONCE(1, "payload size is too large (%d)\n", payload_size); 1107 1097 goto msg_handled; 1108 1098 } 1109 1099 1110 - entry = &channel_message_table[hdr->msgtype]; 1100 + entry = &channel_message_table[msgtype]; 1111 1101 1112 1102 if (!entry->message_handler) 1113 1103 goto msg_handled; 1114 1104 1115 - if (msg->header.payload_size < entry->min_payload_len) { 1116 - WARN_ONCE(1, "message too short: msgtype=%d len=%d\n", 1117 - hdr->msgtype, msg->header.payload_size); 1105 + if (payload_size < entry->min_payload_len) { 1106 + WARN_ONCE(1, "message too short: msgtype=%d len=%d\n", msgtype, payload_size); 1118 1107 goto msg_handled; 1119 1108 } 1120 1109 1121 1110 if (entry->handler_type == VMHT_BLOCKING) { 1122 - ctx = kmalloc(sizeof(*ctx) + msg->header.payload_size, 1123 - GFP_ATOMIC); 1111 + ctx = kmalloc(sizeof(*ctx) + payload_size, GFP_ATOMIC); 1124 1112 if (ctx == NULL) 1125 1113 return; 1126 1114 1127 1115 INIT_WORK(&ctx->work, vmbus_onmessage_work); 1128 - memcpy(&ctx->msg, msg, sizeof(msg->header) + 1129 - msg->header.payload_size); 1116 + memcpy(&ctx->msg, &msg_copy, sizeof(msg->header) + payload_size); 1130 1117 1131 1118 /* 1132 1119 * The host can generate a rescind message while we ··· 1142 1115 * by offer_in_progress and by channel_mutex. See also the 1143 1116 * inline comments in vmbus_onoffer_rescind(). 1144 1117 */ 1145 - switch (hdr->msgtype) { 1118 + switch (msgtype) { 1146 1119 case CHANNELMSG_RESCIND_CHANNELOFFER: 1147 1120 /* 1148 1121 * If we are handling the rescind message; ··· 2644 2617 2645 2618 if (!hv_is_hyperv_initialized()) 2646 2619 return -ENODEV; 2620 + 2621 + if (hv_root_partition) 2622 + return 0; 2647 2623 2648 2624 init_completion(&probe_event); 2649 2625
+174 -3
drivers/iommu/hyperv-iommu.c
··· 20 20 #include <asm/io_apic.h> 21 21 #include <asm/irq_remapping.h> 22 22 #include <asm/hypervisor.h> 23 + #include <asm/mshyperv.h> 23 24 24 25 #include "irq_remapping.h" 25 26 ··· 116 115 .free = hyperv_irq_remapping_free, 117 116 }; 118 117 118 + static const struct irq_domain_ops hyperv_root_ir_domain_ops; 119 119 static int __init hyperv_prepare_irq_remapping(void) 120 120 { 121 121 struct fwnode_handle *fn; 122 122 int i; 123 + const char *name; 124 + const struct irq_domain_ops *ops; 123 125 124 126 if (!hypervisor_is_type(X86_HYPER_MS_HYPERV) || 125 127 x86_init.hyper.msi_ext_dest_id() || 126 128 !x2apic_supported()) 127 129 return -ENODEV; 128 130 129 - fn = irq_domain_alloc_named_id_fwnode("HYPERV-IR", 0); 131 + if (hv_root_partition) { 132 + name = "HYPERV-ROOT-IR"; 133 + ops = &hyperv_root_ir_domain_ops; 134 + } else { 135 + name = "HYPERV-IR"; 136 + ops = &hyperv_ir_domain_ops; 137 + } 138 + 139 + fn = irq_domain_alloc_named_id_fwnode(name, 0); 130 140 if (!fn) 131 141 return -ENOMEM; 132 142 133 143 ioapic_ir_domain = 134 144 irq_domain_create_hierarchy(arch_get_ir_parent_domain(), 135 - 0, IOAPIC_REMAPPING_ENTRY, fn, 136 - &hyperv_ir_domain_ops, NULL); 145 + 0, IOAPIC_REMAPPING_ENTRY, fn, ops, NULL); 137 146 138 147 if (!ioapic_ir_domain) { 139 148 irq_domain_free_fwnode(fn); 140 149 return -ENOMEM; 141 150 } 151 + 152 + if (hv_root_partition) 153 + return 0; /* The rest is only relevant to guests */ 142 154 143 155 /* 144 156 * Hyper-V doesn't provide irq remapping function for ··· 178 164 struct irq_remap_ops hyperv_irq_remap_ops = { 179 165 .prepare = hyperv_prepare_irq_remapping, 180 166 .enable = hyperv_enable_irq_remapping, 167 + }; 168 + 169 + /* IRQ remapping domain when Linux runs as the root partition */ 170 + struct hyperv_root_ir_data { 171 + u8 ioapic_id; 172 + bool is_level; 173 + struct hv_interrupt_entry entry; 174 + }; 175 + 176 + static void 177 + hyperv_root_ir_compose_msi_msg(struct irq_data *irq_data, struct msi_msg *msg) 178 + { 179 + u64 status; 180 + u32 vector; 181 + struct irq_cfg *cfg; 182 + int ioapic_id; 183 + struct cpumask *affinity; 184 + int cpu; 185 + struct hv_interrupt_entry entry; 186 + struct hyperv_root_ir_data *data = irq_data->chip_data; 187 + struct IO_APIC_route_entry e; 188 + 189 + cfg = irqd_cfg(irq_data); 190 + affinity = irq_data_get_effective_affinity_mask(irq_data); 191 + cpu = cpumask_first_and(affinity, cpu_online_mask); 192 + 193 + vector = cfg->vector; 194 + ioapic_id = data->ioapic_id; 195 + 196 + if (data->entry.source == HV_DEVICE_TYPE_IOAPIC 197 + && data->entry.ioapic_rte.as_uint64) { 198 + entry = data->entry; 199 + 200 + status = hv_unmap_ioapic_interrupt(ioapic_id, &entry); 201 + 202 + if (status != HV_STATUS_SUCCESS) 203 + pr_debug("%s: unexpected unmap status %lld\n", __func__, status); 204 + 205 + data->entry.ioapic_rte.as_uint64 = 0; 206 + data->entry.source = 0; /* Invalid source */ 207 + } 208 + 209 + 210 + status = hv_map_ioapic_interrupt(ioapic_id, data->is_level, cpu, 211 + vector, &entry); 212 + 213 + if (status != HV_STATUS_SUCCESS) { 214 + pr_err("%s: map hypercall failed, status %lld\n", __func__, status); 215 + return; 216 + } 217 + 218 + data->entry = entry; 219 + 220 + /* Turn it into an IO_APIC_route_entry, and generate MSI MSG. */ 221 + e.w1 = entry.ioapic_rte.low_uint32; 222 + e.w2 = entry.ioapic_rte.high_uint32; 223 + 224 + memset(msg, 0, sizeof(*msg)); 225 + msg->arch_data.vector = e.vector; 226 + msg->arch_data.delivery_mode = e.delivery_mode; 227 + msg->arch_addr_lo.dest_mode_logical = e.dest_mode_logical; 228 + msg->arch_addr_lo.dmar_format = e.ir_format; 229 + msg->arch_addr_lo.dmar_index_0_14 = e.ir_index_0_14; 230 + } 231 + 232 + static int hyperv_root_ir_set_affinity(struct irq_data *data, 233 + const struct cpumask *mask, bool force) 234 + { 235 + struct irq_data *parent = data->parent_data; 236 + struct irq_cfg *cfg = irqd_cfg(data); 237 + int ret; 238 + 239 + ret = parent->chip->irq_set_affinity(parent, mask, force); 240 + if (ret < 0 || ret == IRQ_SET_MASK_OK_DONE) 241 + return ret; 242 + 243 + send_cleanup_vector(cfg); 244 + 245 + return 0; 246 + } 247 + 248 + static struct irq_chip hyperv_root_ir_chip = { 249 + .name = "HYPERV-ROOT-IR", 250 + .irq_ack = apic_ack_irq, 251 + .irq_set_affinity = hyperv_root_ir_set_affinity, 252 + .irq_compose_msi_msg = hyperv_root_ir_compose_msi_msg, 253 + }; 254 + 255 + static int hyperv_root_irq_remapping_alloc(struct irq_domain *domain, 256 + unsigned int virq, unsigned int nr_irqs, 257 + void *arg) 258 + { 259 + struct irq_alloc_info *info = arg; 260 + struct irq_data *irq_data; 261 + struct hyperv_root_ir_data *data; 262 + int ret = 0; 263 + 264 + if (!info || info->type != X86_IRQ_ALLOC_TYPE_IOAPIC || nr_irqs > 1) 265 + return -EINVAL; 266 + 267 + ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg); 268 + if (ret < 0) 269 + return ret; 270 + 271 + data = kzalloc(sizeof(*data), GFP_KERNEL); 272 + if (!data) { 273 + irq_domain_free_irqs_common(domain, virq, nr_irqs); 274 + return -ENOMEM; 275 + } 276 + 277 + irq_data = irq_domain_get_irq_data(domain, virq); 278 + if (!irq_data) { 279 + kfree(data); 280 + irq_domain_free_irqs_common(domain, virq, nr_irqs); 281 + return -EINVAL; 282 + } 283 + 284 + data->ioapic_id = info->devid; 285 + data->is_level = info->ioapic.is_level; 286 + 287 + irq_data->chip = &hyperv_root_ir_chip; 288 + irq_data->chip_data = data; 289 + 290 + return 0; 291 + } 292 + 293 + static void hyperv_root_irq_remapping_free(struct irq_domain *domain, 294 + unsigned int virq, unsigned int nr_irqs) 295 + { 296 + struct irq_data *irq_data; 297 + struct hyperv_root_ir_data *data; 298 + struct hv_interrupt_entry *e; 299 + int i; 300 + 301 + for (i = 0; i < nr_irqs; i++) { 302 + irq_data = irq_domain_get_irq_data(domain, virq + i); 303 + 304 + if (irq_data && irq_data->chip_data) { 305 + data = irq_data->chip_data; 306 + e = &data->entry; 307 + 308 + if (e->source == HV_DEVICE_TYPE_IOAPIC 309 + && e->ioapic_rte.as_uint64) 310 + hv_unmap_ioapic_interrupt(data->ioapic_id, 311 + &data->entry); 312 + 313 + kfree(data); 314 + } 315 + } 316 + 317 + irq_domain_free_irqs_common(domain, virq, nr_irqs); 318 + } 319 + 320 + static const struct irq_domain_ops hyperv_root_ir_domain_ops = { 321 + .select = hyperv_irq_remapping_select, 322 + .alloc = hyperv_root_irq_remapping_alloc, 323 + .free = hyperv_root_irq_remapping_free, 181 324 }; 182 325 183 326 #endif
+16 -2
drivers/net/hyperv/netvsc.c
··· 22 22 #include <linux/prefetch.h> 23 23 24 24 #include <asm/sync_bitops.h> 25 + #include <asm/mshyperv.h> 25 26 26 27 #include "hyperv_net.h" 27 28 #include "netvsc_trace.h" ··· 563 562 init_packet->msg.v2_msg.send_ndis_config.capability.ieee8021q = 1; 564 563 565 564 if (nvsp_ver >= NVSP_PROTOCOL_VERSION_5) { 566 - init_packet->msg.v2_msg.send_ndis_config.capability.sriov = 1; 565 + if (hv_is_isolation_supported()) 566 + netdev_info(ndev, "SR-IOV not advertised by guests on the host supporting isolation\n"); 567 + else 568 + init_packet->msg.v2_msg.send_ndis_config.capability.sriov = 1; 567 569 568 570 /* Teaming bit is needed to receive link speed updates */ 569 571 init_packet->msg.v2_msg.send_ndis_config.capability.teaming = 1; ··· 609 605 } 610 606 611 607 if (i < 0) { 608 + ret = -EPROTO; 609 + goto cleanup; 610 + } 611 + 612 + if (hv_is_isolation_supported() && net_device->nvsp_version < NVSP_PROTOCOL_VERSION_61) { 613 + netdev_err(ndev, "Invalid NVSP version 0x%x (expected >= 0x%x) from the host supporting isolation\n", 614 + net_device->nvsp_version, NVSP_PROTOCOL_VERSION_61); 612 615 ret = -EPROTO; 613 616 goto cleanup; 614 617 } ··· 1427 1416 break; 1428 1417 1429 1418 case NVSP_MSG4_TYPE_SEND_VF_ASSOCIATION: 1430 - netvsc_send_vf(ndev, nvmsg, msglen); 1419 + if (hv_is_isolation_supported()) 1420 + netdev_err(ndev, "Ignore VF_ASSOCIATION msg from the host supporting isolation\n"); 1421 + else 1422 + netvsc_send_vf(ndev, nvmsg, msglen); 1431 1423 break; 1432 1424 } 1433 1425 }
+1 -1
drivers/pci/controller/pci-hyperv.c
··· 1216 1216 params = &hbus->retarget_msi_interrupt_params; 1217 1217 memset(params, 0, sizeof(*params)); 1218 1218 params->partition_id = HV_PARTITION_ID_SELF; 1219 - params->int_entry.source = 1; /* MSI(-X) */ 1219 + params->int_entry.source = HV_INTERRUPT_SOURCE_MSI; 1220 1220 hv_set_msi_entry_from_desc(&params->int_entry.msi_entry, msi_desc); 1221 1221 params->device_id = (hbus->hdev->dev_instance.b[5] << 24) | 1222 1222 (hbus->hdev->dev_instance.b[4] << 16) |
+4
include/acpi/acpi_numa.h
··· 30 30 { 31 31 return 0; 32 32 } 33 + static inline int node_to_pxm(int node) 34 + { 35 + return 0; 36 + } 33 37 #endif /* CONFIG_ACPI_NUMA */ 34 38 35 39 #ifdef CONFIG_ACPI_HMAT
+250 -5
include/asm-generic/hyperv-tlfs.h
··· 88 88 #define HV_CONNECT_PORT BIT(7) 89 89 #define HV_ACCESS_STATS BIT(8) 90 90 #define HV_DEBUGGING BIT(11) 91 - #define HV_CPU_POWER_MANAGEMENT BIT(12) 91 + #define HV_CPU_MANAGEMENT BIT(12) 92 + #define HV_ISOLATION BIT(22) 92 93 93 94 94 95 /* ··· 142 141 #define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX 0x0013 143 142 #define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX 0x0014 144 143 #define HVCALL_SEND_IPI_EX 0x0015 144 + #define HVCALL_GET_PARTITION_ID 0x0046 145 + #define HVCALL_DEPOSIT_MEMORY 0x0048 146 + #define HVCALL_CREATE_VP 0x004e 145 147 #define HVCALL_GET_VP_REGISTERS 0x0050 146 148 #define HVCALL_SET_VP_REGISTERS 0x0051 147 149 #define HVCALL_POST_MESSAGE 0x005c ··· 152 148 #define HVCALL_POST_DEBUG_DATA 0x0069 153 149 #define HVCALL_RETRIEVE_DEBUG_DATA 0x006a 154 150 #define HVCALL_RESET_DEBUG_SESSION 0x006b 151 + #define HVCALL_ADD_LOGICAL_PROCESSOR 0x0076 152 + #define HVCALL_MAP_DEVICE_INTERRUPT 0x007c 153 + #define HVCALL_UNMAP_DEVICE_INTERRUPT 0x007d 155 154 #define HVCALL_RETARGET_INTERRUPT 0x007e 156 155 #define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE 0x00af 157 156 #define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_LIST 0x00b0 ··· 414 407 u64 gva_list[]; 415 408 } __packed; 416 409 410 + /* HvGetPartitionId hypercall (output only) */ 411 + struct hv_get_partition_id { 412 + u64 partition_id; 413 + } __packed; 414 + 415 + /* HvDepositMemory hypercall */ 416 + struct hv_deposit_memory { 417 + u64 partition_id; 418 + u64 gpa_page_list[]; 419 + } __packed; 420 + 421 + struct hv_proximity_domain_flags { 422 + u32 proximity_preferred : 1; 423 + u32 reserved : 30; 424 + u32 proximity_info_valid : 1; 425 + } __packed; 426 + 427 + /* Not a union in windows but useful for zeroing */ 428 + union hv_proximity_domain_info { 429 + struct { 430 + u32 domain_id; 431 + struct hv_proximity_domain_flags flags; 432 + }; 433 + u64 as_uint64; 434 + } __packed; 435 + 436 + struct hv_lp_startup_status { 437 + u64 hv_status; 438 + u64 substatus1; 439 + u64 substatus2; 440 + u64 substatus3; 441 + u64 substatus4; 442 + u64 substatus5; 443 + u64 substatus6; 444 + } __packed; 445 + 446 + /* HvAddLogicalProcessor hypercall */ 447 + struct hv_add_logical_processor_in { 448 + u32 lp_index; 449 + u32 apic_id; 450 + union hv_proximity_domain_info proximity_domain_info; 451 + u64 flags; 452 + } __packed; 453 + 454 + struct hv_add_logical_processor_out { 455 + struct hv_lp_startup_status startup_status; 456 + } __packed; 457 + 458 + enum HV_SUBNODE_TYPE 459 + { 460 + HvSubnodeAny = 0, 461 + HvSubnodeSocket = 1, 462 + HvSubnodeAmdNode = 2, 463 + HvSubnodeL3 = 3, 464 + HvSubnodeCount = 4, 465 + HvSubnodeInvalid = -1 466 + }; 467 + 468 + /* HvCreateVp hypercall */ 469 + struct hv_create_vp { 470 + u64 partition_id; 471 + u32 vp_index; 472 + u8 padding[3]; 473 + u8 subnode_type; 474 + u64 subnode_id; 475 + union hv_proximity_domain_info proximity_domain_info; 476 + u64 flags; 477 + } __packed; 478 + 479 + enum hv_interrupt_source { 480 + HV_INTERRUPT_SOURCE_MSI = 1, /* MSI and MSI-X */ 481 + HV_INTERRUPT_SOURCE_IOAPIC, 482 + }; 483 + 484 + union hv_msi_address_register { 485 + u32 as_uint32; 486 + struct { 487 + u32 reserved1:2; 488 + u32 destination_mode:1; 489 + u32 redirection_hint:1; 490 + u32 reserved2:8; 491 + u32 destination_id:8; 492 + u32 msi_base:12; 493 + }; 494 + } __packed; 495 + 496 + union hv_msi_data_register { 497 + u32 as_uint32; 498 + struct { 499 + u32 vector:8; 500 + u32 delivery_mode:3; 501 + u32 reserved1:3; 502 + u32 level_assert:1; 503 + u32 trigger_mode:1; 504 + u32 reserved2:16; 505 + }; 506 + } __packed; 507 + 417 508 /* HvRetargetDeviceInterrupt hypercall */ 418 509 union hv_msi_entry { 419 510 u64 as_uint64; 420 511 struct { 421 - u32 address; 422 - u32 data; 512 + union hv_msi_address_register address; 513 + union hv_msi_data_register data; 423 514 } __packed; 424 515 }; 425 516 517 + union hv_ioapic_rte { 518 + u64 as_uint64; 519 + 520 + struct { 521 + u32 vector:8; 522 + u32 delivery_mode:3; 523 + u32 destination_mode:1; 524 + u32 delivery_status:1; 525 + u32 interrupt_polarity:1; 526 + u32 remote_irr:1; 527 + u32 trigger_mode:1; 528 + u32 interrupt_mask:1; 529 + u32 reserved1:15; 530 + 531 + u32 reserved2:24; 532 + u32 destination_id:8; 533 + }; 534 + 535 + struct { 536 + u32 low_uint32; 537 + u32 high_uint32; 538 + }; 539 + } __packed; 540 + 426 541 struct hv_interrupt_entry { 427 - u32 source; /* 1 for MSI(-X) */ 542 + u32 source; 428 543 u32 reserved1; 429 - union hv_msi_entry msi_entry; 544 + union { 545 + union hv_msi_entry msi_entry; 546 + union hv_ioapic_rte ioapic_rte; 547 + }; 430 548 } __packed; 431 549 432 550 /* ··· 625 493 u64 valuehigh; 626 494 } element[]; 627 495 } __packed; 496 + 497 + enum hv_device_type { 498 + HV_DEVICE_TYPE_LOGICAL = 0, 499 + HV_DEVICE_TYPE_PCI = 1, 500 + HV_DEVICE_TYPE_IOAPIC = 2, 501 + HV_DEVICE_TYPE_ACPI = 3, 502 + }; 503 + 504 + typedef u16 hv_pci_rid; 505 + typedef u16 hv_pci_segment; 506 + typedef u64 hv_logical_device_id; 507 + union hv_pci_bdf { 508 + u16 as_uint16; 509 + 510 + struct { 511 + u8 function:3; 512 + u8 device:5; 513 + u8 bus; 514 + }; 515 + } __packed; 516 + 517 + union hv_pci_bus_range { 518 + u16 as_uint16; 519 + 520 + struct { 521 + u8 subordinate_bus; 522 + u8 secondary_bus; 523 + }; 524 + } __packed; 525 + 526 + union hv_device_id { 527 + u64 as_uint64; 528 + 529 + struct { 530 + u64 reserved0:62; 531 + u64 device_type:2; 532 + }; 533 + 534 + /* HV_DEVICE_TYPE_LOGICAL */ 535 + struct { 536 + u64 id:62; 537 + u64 device_type:2; 538 + } logical; 539 + 540 + /* HV_DEVICE_TYPE_PCI */ 541 + struct { 542 + union { 543 + hv_pci_rid rid; 544 + union hv_pci_bdf bdf; 545 + }; 546 + 547 + hv_pci_segment segment; 548 + union hv_pci_bus_range shadow_bus_range; 549 + 550 + u16 phantom_function_bits:2; 551 + u16 source_shadow:1; 552 + 553 + u16 rsvdz0:11; 554 + u16 device_type:2; 555 + } pci; 556 + 557 + /* HV_DEVICE_TYPE_IOAPIC */ 558 + struct { 559 + u8 ioapic_id; 560 + u8 rsvdz0; 561 + u16 rsvdz1; 562 + u16 rsvdz2; 563 + 564 + u16 rsvdz3:14; 565 + u16 device_type:2; 566 + } ioapic; 567 + 568 + /* HV_DEVICE_TYPE_ACPI */ 569 + struct { 570 + u32 input_mapping_base; 571 + u32 input_mapping_count:30; 572 + u32 device_type:2; 573 + } acpi; 574 + } __packed; 575 + 576 + enum hv_interrupt_trigger_mode { 577 + HV_INTERRUPT_TRIGGER_MODE_EDGE = 0, 578 + HV_INTERRUPT_TRIGGER_MODE_LEVEL = 1, 579 + }; 580 + 581 + struct hv_device_interrupt_descriptor { 582 + u32 interrupt_type; 583 + u32 trigger_mode; 584 + u32 vector_count; 585 + u32 reserved; 586 + struct hv_device_interrupt_target target; 587 + } __packed; 588 + 589 + struct hv_input_map_device_interrupt { 590 + u64 partition_id; 591 + u64 device_id; 592 + u64 flags; 593 + struct hv_interrupt_entry logical_interrupt_entry; 594 + struct hv_device_interrupt_descriptor interrupt_descriptor; 595 + } __packed; 596 + 597 + struct hv_output_map_device_interrupt { 598 + struct hv_interrupt_entry interrupt_entry; 599 + } __packed; 600 + 601 + struct hv_input_unmap_device_interrupt { 602 + u64 partition_id; 603 + u64 device_id; 604 + struct hv_interrupt_entry interrupt_entry; 605 + } __packed; 606 + 607 + #define HV_SOURCE_SHADOW_NONE 0x0 608 + #define HV_SOURCE_SHADOW_BRIDGE_BUS_RANGE 0x1 628 609 629 610 #endif
+5
include/asm-generic/mshyperv.h
··· 27 27 28 28 struct ms_hyperv_info { 29 29 u32 features; 30 + u32 features_b; 30 31 u32 misc_features; 31 32 u32 hints; 32 33 u32 nested_features; 33 34 u32 max_vp_index; 34 35 u32 max_lp_index; 36 + u32 isolation_config_a; 37 + u32 isolation_config_b; 35 38 }; 36 39 extern struct ms_hyperv_info ms_hyperv; 37 40 ··· 172 169 void hyperv_report_panic_msg(phys_addr_t pa, size_t size); 173 170 bool hv_is_hyperv_initialized(void); 174 171 bool hv_is_hibernation_supported(void); 172 + enum hv_isolation_type hv_get_isolation_type(void); 173 + bool hv_is_isolation_supported(void); 175 174 void hyperv_cleanup(void); 176 175 #else /* CONFIG_HYPERV */ 177 176 static inline bool hv_is_hyperv_initialized(void) { return false; }
+11 -2
include/linux/hyperv.h
··· 785 785 u16 dev_type; 786 786 guid_t guid; 787 787 bool perf_device; 788 + bool allowed_in_isolated; 788 789 }; 789 790 790 791 struct vmbus_channel { ··· 804 803 u8 monitor_bit; 805 804 806 805 bool rescind; /* got rescind msg */ 806 + bool rescind_ref; /* got rescind msg, got channel reference */ 807 807 struct completion rescind_event; 808 808 809 809 u32 ringbuffer_gpadlhandle; ··· 1473 1471 #define ICMSGTYPE_SHUTDOWN 3 1474 1472 #define ICMSGTYPE_TIMESYNC 4 1475 1473 #define ICMSGTYPE_VSS 5 1474 + #define ICMSGTYPE_FCOPY 7 1476 1475 1477 1476 #define ICMSGHDRFLAG_TRANSACTION 1 1478 1477 #define ICMSGHDRFLAG_REQUEST 2 ··· 1517 1514 u8 reserved[2]; 1518 1515 } __packed; 1519 1516 1517 + #define IC_VERSION_NEGOTIATION_MAX_VER_COUNT 100 1518 + #define ICMSG_HDR (sizeof(struct vmbuspipe_hdr) + sizeof(struct icmsg_hdr)) 1519 + #define ICMSG_NEGOTIATE_PKT_SIZE(icframe_vercnt, icmsg_vercnt) \ 1520 + (ICMSG_HDR + sizeof(struct icmsg_negotiate) + \ 1521 + (((icframe_vercnt) + (icmsg_vercnt)) * sizeof(struct ic_version))) 1522 + 1520 1523 struct icmsg_negotiate { 1521 1524 u16 icframe_vercnt; 1522 1525 u16 icmsg_vercnt; 1523 1526 u32 reserved; 1524 - struct ic_version icversion_data[1]; /* any size array */ 1527 + struct ic_version icversion_data[]; /* any size array */ 1525 1528 } __packed; 1526 1529 1527 1530 struct shutdown_msg_data { ··· 1578 1569 }; 1579 1570 1580 1571 #define MAX_SRV_VER 0x7ffffff 1581 - extern bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp, u8 *buf, 1572 + extern bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp, u8 *buf, u32 buflen, 1582 1573 const int *fw_version, int fw_vercnt, 1583 1574 const int *srv_version, int srv_vercnt, 1584 1575 int *nego_fw_version, int *nego_srv_version);