Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'hyperv-fixes-signed-20220407' of git://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux

Pull hyperv fixes from Wei Liu:

- Correctly propagate coherence information for VMbus devices (Michael
Kelley)

- Disable balloon and memory hot-add on ARM64 temporarily (Boqun Feng)

- Use barrier to prevent reording when reading ring buffer (Michael
Kelley)

- Use virt_store_mb in favour of smp_store_mb (Andrea Parri)

- Fix VMbus device object initialization (Andrea Parri)

- Deactivate sysctl_record_panic_msg on isolated guest (Andrea Parri)

- Fix a crash when unloading VMbus module (Guilherme G. Piccoli)

* tag 'hyperv-fixes-signed-20220407' of git://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux:
Drivers: hv: vmbus: Replace smp_store_mb() with virt_store_mb()
Drivers: hv: balloon: Disable balloon and hot-add accordingly
Drivers: hv: balloon: Support status report for larger page sizes
Drivers: hv: vmbus: Prevent load re-ordering when reading ring buffer
PCI: hv: Propagate coherence from VMbus device to PCI device
Drivers: hv: vmbus: Propagate VMbus coherence to each VMbus device
Drivers: hv: vmbus: Fix potential crash on module unload
Drivers: hv: vmbus: Fix initialization of device object in vmbus_device_register()
Drivers: hv: vmbus: Deactivate sysctl_record_panic_msg by default in isolated guests

+132 -20
+3 -3
drivers/hv/channel_mgmt.c
··· 380 380 * execute: 381 381 * 382 382 * (a) In the "normal (i.e., not resuming from hibernation)" path, 383 - * the full barrier in smp_store_mb() guarantees that the store 383 + * the full barrier in virt_store_mb() guarantees that the store 384 384 * is propagated to all CPUs before the add_channel_work work 385 385 * is queued. In turn, add_channel_work is queued before the 386 386 * channel's ring buffer is allocated/initialized and the ··· 392 392 * recv_int_page before retrieving the channel pointer from the 393 393 * array of channels. 394 394 * 395 - * (b) In the "resuming from hibernation" path, the smp_store_mb() 395 + * (b) In the "resuming from hibernation" path, the virt_store_mb() 396 396 * guarantees that the store is propagated to all CPUs before 397 397 * the VMBus connection is marked as ready for the resume event 398 398 * (cf. check_ready_for_resume_event()). The interrupt handler 399 399 * of the VMBus driver and vmbus_chan_sched() can not run before 400 400 * vmbus_bus_resume() has completed execution (cf. resume_noirq). 401 401 */ 402 - smp_store_mb( 402 + virt_store_mb( 403 403 vmbus_connection.channels[channel->offermsg.child_relid], 404 404 channel); 405 405 }
+44 -5
drivers/hv/hv_balloon.c
··· 17 17 #include <linux/slab.h> 18 18 #include <linux/kthread.h> 19 19 #include <linux/completion.h> 20 + #include <linux/count_zeros.h> 20 21 #include <linux/memory_hotplug.h> 21 22 #include <linux/memory.h> 22 23 #include <linux/notifier.h> ··· 1131 1130 struct dm_status status; 1132 1131 unsigned long now = jiffies; 1133 1132 unsigned long last_post = last_post_time; 1133 + unsigned long num_pages_avail, num_pages_committed; 1134 1134 1135 1135 if (pressure_report_delay > 0) { 1136 1136 --pressure_report_delay; ··· 1156 1154 * num_pages_onlined) as committed to the host, otherwise it can try 1157 1155 * asking us to balloon them out. 1158 1156 */ 1159 - status.num_avail = si_mem_available(); 1160 - status.num_committed = vm_memory_committed() + 1157 + num_pages_avail = si_mem_available(); 1158 + num_pages_committed = vm_memory_committed() + 1161 1159 dm->num_pages_ballooned + 1162 1160 (dm->num_pages_added > dm->num_pages_onlined ? 1163 1161 dm->num_pages_added - dm->num_pages_onlined : 0) + 1164 1162 compute_balloon_floor(); 1165 1163 1166 - trace_balloon_status(status.num_avail, status.num_committed, 1164 + trace_balloon_status(num_pages_avail, num_pages_committed, 1167 1165 vm_memory_committed(), dm->num_pages_ballooned, 1168 1166 dm->num_pages_added, dm->num_pages_onlined); 1167 + 1168 + /* Convert numbers of pages into numbers of HV_HYP_PAGEs. */ 1169 + status.num_avail = num_pages_avail * NR_HV_HYP_PAGES_IN_PAGE; 1170 + status.num_committed = num_pages_committed * NR_HV_HYP_PAGES_IN_PAGE; 1171 + 1169 1172 /* 1170 1173 * If our transaction ID is no longer current, just don't 1171 1174 * send the status. This can happen if we were interrupted ··· 1660 1653 } 1661 1654 } 1662 1655 1656 + static int ballooning_enabled(void) 1657 + { 1658 + /* 1659 + * Disable ballooning if the page size is not 4k (HV_HYP_PAGE_SIZE), 1660 + * since currently it's unclear to us whether an unballoon request can 1661 + * make sure all page ranges are guest page size aligned. 1662 + */ 1663 + if (PAGE_SIZE != HV_HYP_PAGE_SIZE) { 1664 + pr_info("Ballooning disabled because page size is not 4096 bytes\n"); 1665 + return 0; 1666 + } 1667 + 1668 + return 1; 1669 + } 1670 + 1671 + static int hot_add_enabled(void) 1672 + { 1673 + /* 1674 + * Disable hot add on ARM64, because we currently rely on 1675 + * memory_add_physaddr_to_nid() to get a node id of a hot add range, 1676 + * however ARM64's memory_add_physaddr_to_nid() always return 0 and 1677 + * DM_MEM_HOT_ADD_REQUEST doesn't have the NUMA node information for 1678 + * add_memory(). 1679 + */ 1680 + if (IS_ENABLED(CONFIG_ARM64)) { 1681 + pr_info("Memory hot add disabled on ARM64\n"); 1682 + return 0; 1683 + } 1684 + 1685 + return 1; 1686 + } 1687 + 1663 1688 static int balloon_connect_vsp(struct hv_device *dev) 1664 1689 { 1665 1690 struct dm_version_request version_req; ··· 1763 1724 * currently still requires the bits to be set, so we have to add code 1764 1725 * to fail the host's hot-add and balloon up/down requests, if any. 1765 1726 */ 1766 - cap_msg.caps.cap_bits.balloon = 1; 1767 - cap_msg.caps.cap_bits.hot_add = 1; 1727 + cap_msg.caps.cap_bits.balloon = ballooning_enabled(); 1728 + cap_msg.caps.cap_bits.hot_add = hot_add_enabled(); 1768 1729 1769 1730 /* 1770 1731 * Specify our alignment requirements as it relates
+11
drivers/hv/hv_common.c
··· 20 20 #include <linux/panic_notifier.h> 21 21 #include <linux/ptrace.h> 22 22 #include <linux/slab.h> 23 + #include <linux/dma-map-ops.h> 23 24 #include <asm/hyperv-tlfs.h> 24 25 #include <asm/mshyperv.h> 25 26 ··· 218 217 return hv_extended_cap & cap_query; 219 218 } 220 219 EXPORT_SYMBOL_GPL(hv_query_ext_cap); 220 + 221 + void hv_setup_dma_ops(struct device *dev, bool coherent) 222 + { 223 + /* 224 + * Hyper-V does not offer a vIOMMU in the guest 225 + * VM, so pass 0/NULL for the IOMMU settings 226 + */ 227 + arch_setup_dma_ops(dev, 0, 0, NULL, coherent); 228 + } 229 + EXPORT_SYMBOL_GPL(hv_setup_dma_ops); 221 230 222 231 bool hv_is_hibernation_supported(void) 223 232 {
+10 -1
drivers/hv/ring_buffer.c
··· 439 439 static u32 hv_pkt_iter_avail(const struct hv_ring_buffer_info *rbi) 440 440 { 441 441 u32 priv_read_loc = rbi->priv_read_index; 442 - u32 write_loc = READ_ONCE(rbi->ring_buffer->write_index); 442 + u32 write_loc; 443 + 444 + /* 445 + * The Hyper-V host writes the packet data, then uses 446 + * store_release() to update the write_index. Use load_acquire() 447 + * here to prevent loads of the packet data from being re-ordered 448 + * before the read of the write_index and potentially getting 449 + * stale data. 450 + */ 451 + write_loc = virt_load_acquire(&rbi->ring_buffer->write_index); 443 452 444 453 if (write_loc >= priv_read_loc) 445 454 return write_loc - priv_read_loc;
+54 -11
drivers/hv/vmbus_drv.c
··· 77 77 78 78 /* 79 79 * Hyper-V should be notified only once about a panic. If we will be 80 - * doing hyperv_report_panic_msg() later with kmsg data, don't do 81 - * the notification here. 80 + * doing hv_kmsg_dump() with kmsg data later, don't do the notification 81 + * here. 82 82 */ 83 83 if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE 84 84 && hyperv_report_reg()) { ··· 100 100 101 101 /* 102 102 * Hyper-V should be notified only once about a panic. If we will be 103 - * doing hyperv_report_panic_msg() later with kmsg data, don't do 104 - * the notification here. 103 + * doing hv_kmsg_dump() with kmsg data later, don't do the notification 104 + * here. 105 105 */ 106 106 if (hyperv_report_reg()) 107 107 hyperv_report_panic(regs, val, true); ··· 921 921 } 922 922 923 923 /* 924 + * vmbus_dma_configure -- Configure DMA coherence for VMbus device 925 + */ 926 + static int vmbus_dma_configure(struct device *child_device) 927 + { 928 + /* 929 + * On ARM64, propagate the DMA coherence setting from the top level 930 + * VMbus ACPI device to the child VMbus device being added here. 931 + * On x86/x64 coherence is assumed and these calls have no effect. 932 + */ 933 + hv_setup_dma_ops(child_device, 934 + device_get_dma_attr(&hv_acpi_dev->dev) == DEV_DMA_COHERENT); 935 + return 0; 936 + } 937 + 938 + /* 924 939 * vmbus_remove - Remove a vmbus device 925 940 */ 926 941 static void vmbus_remove(struct device *child_device) ··· 1055 1040 .remove = vmbus_remove, 1056 1041 .probe = vmbus_probe, 1057 1042 .uevent = vmbus_uevent, 1043 + .dma_configure = vmbus_dma_configure, 1058 1044 .dev_groups = vmbus_dev_groups, 1059 1045 .drv_groups = vmbus_drv_groups, 1060 1046 .bus_groups = vmbus_bus_groups, ··· 1562 1546 if (ret) 1563 1547 goto err_connect; 1564 1548 1549 + if (hv_is_isolation_supported()) 1550 + sysctl_record_panic_msg = 0; 1551 + 1565 1552 /* 1566 1553 * Only register if the crash MSRs are available 1567 1554 */ 1568 1555 if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) { 1569 1556 u64 hyperv_crash_ctl; 1570 1557 /* 1571 - * Sysctl registration is not fatal, since by default 1572 - * reporting is enabled. 1558 + * Panic message recording (sysctl_record_panic_msg) 1559 + * is enabled by default in non-isolated guests and 1560 + * disabled by default in isolated guests; the panic 1561 + * message recording won't be available in isolated 1562 + * guests should the following registration fail. 1573 1563 */ 1574 1564 hv_ctl_table_hdr = register_sysctl_table(hv_root_table); 1575 1565 if (!hv_ctl_table_hdr) ··· 2119 2097 child_device_obj->device.parent = &hv_acpi_dev->dev; 2120 2098 child_device_obj->device.release = vmbus_device_release; 2121 2099 2100 + child_device_obj->device.dma_parms = &child_device_obj->dma_parms; 2101 + child_device_obj->device.dma_mask = &child_device_obj->dma_mask; 2102 + dma_set_mask(&child_device_obj->device, DMA_BIT_MASK(64)); 2103 + 2122 2104 /* 2123 2105 * Register with the LDM. This will kick off the driver/device 2124 2106 * binding...which will eventually call vmbus_match() and vmbus_probe() ··· 2148 2122 } 2149 2123 hv_debug_add_dev_dir(child_device_obj); 2150 2124 2151 - child_device_obj->device.dma_parms = &child_device_obj->dma_parms; 2152 - child_device_obj->device.dma_mask = &child_device_obj->dma_mask; 2153 - dma_set_mask(&child_device_obj->device, DMA_BIT_MASK(64)); 2154 2125 return 0; 2155 2126 2156 2127 err_kset_unregister: ··· 2450 2427 struct acpi_device *ancestor; 2451 2428 2452 2429 hv_acpi_dev = device; 2430 + 2431 + /* 2432 + * Older versions of Hyper-V for ARM64 fail to include the _CCA 2433 + * method on the top level VMbus device in the DSDT. But devices 2434 + * are hardware coherent in all current Hyper-V use cases, so fix 2435 + * up the ACPI device to behave as if _CCA is present and indicates 2436 + * hardware coherence. 2437 + */ 2438 + ACPI_COMPANION_SET(&device->dev, device); 2439 + if (IS_ENABLED(CONFIG_ACPI_CCA_REQUIRED) && 2440 + device_get_dma_attr(&device->dev) == DEV_DMA_NOT_SUPPORTED) { 2441 + pr_info("No ACPI _CCA found; assuming coherent device I/O\n"); 2442 + device->flags.cca_seen = true; 2443 + device->flags.coherent_dma = true; 2444 + } 2453 2445 2454 2446 result = acpi_walk_resources(device->handle, METHOD_NAME__CRS, 2455 2447 vmbus_walk_resources, NULL); ··· 2818 2780 if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) { 2819 2781 kmsg_dump_unregister(&hv_kmsg_dumper); 2820 2782 unregister_die_notifier(&hyperv_die_block); 2821 - atomic_notifier_chain_unregister(&panic_notifier_list, 2822 - &hyperv_panic_block); 2823 2783 } 2784 + 2785 + /* 2786 + * The panic notifier is always registered, hence we should 2787 + * also unconditionally unregister it here as well. 2788 + */ 2789 + atomic_notifier_chain_unregister(&panic_notifier_list, 2790 + &hyperv_panic_block); 2824 2791 2825 2792 free_page((unsigned long)hv_panic_page); 2826 2793 unregister_sysctl_table(hv_ctl_table_hdr);
+9
drivers/pci/controller/pci-hyperv.c
··· 3407 3407 hbus->bridge->domain_nr = dom; 3408 3408 #ifdef CONFIG_X86 3409 3409 hbus->sysdata.domain = dom; 3410 + #elif defined(CONFIG_ARM64) 3411 + /* 3412 + * Set the PCI bus parent to be the corresponding VMbus 3413 + * device. Then the VMbus device will be assigned as the 3414 + * ACPI companion in pcibios_root_bridge_prepare() and 3415 + * pci_dma_configure() will propagate device coherence 3416 + * information to devices created on the bus. 3417 + */ 3418 + hbus->sysdata.parent = hdev->device.parent; 3410 3419 #endif 3411 3420 3412 3421 hbus->hdev = hdev;
+1
include/asm-generic/mshyperv.h
··· 269 269 u64 hv_ghcb_hypercall(u64 control, void *input, void *output, u32 input_size); 270 270 void hyperv_cleanup(void); 271 271 bool hv_query_ext_cap(u64 cap_query); 272 + void hv_setup_dma_ops(struct device *dev, bool coherent); 272 273 void *hv_map_memory(void *addr, unsigned long size); 273 274 void hv_unmap_memory(void *addr); 274 275 #else /* CONFIG_HYPERV */