Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'hyperv-next-signed' of git://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux

Pull Hyper-V updates from Sasha Levin:

- first round of vmbus hibernation support (Dexuan Cui)

- remove dependencies on PAGE_SIZE (Maya Nakamura)

- move the hyper-v tools/ code into the tools build system (Andy
Shevchenko)

- hyper-v balloon cleanups (Dexuan Cui)

* tag 'hyperv-next-signed' of git://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux:
Drivers: hv: vmbus: Resume after fixing up old primary channels
Drivers: hv: vmbus: Suspend after cleaning up hv_sock and sub channels
Drivers: hv: vmbus: Clean up hv_sock channels by force upon suspend
Drivers: hv: vmbus: Suspend/resume the vmbus itself for hibernation
Drivers: hv: vmbus: Ignore the offers when resuming from hibernation
Drivers: hv: vmbus: Implement suspend/resume for VSC drivers for hibernation
Drivers: hv: vmbus: Add a helper function is_sub_channel()
Drivers: hv: vmbus: Suspend/resume the synic for hibernation
Drivers: hv: vmbus: Break out synic enable and disable operations
HID: hv: Remove dependencies on PAGE_SIZE for ring buffer
Tools: hv: move to tools buildsystem
hv_balloon: Reorganize the probe function
hv_balloon: Use a static page for the balloon_up send buffer

+613 -134
+2 -2
drivers/hid/hid-hyperv.c
··· 104 104 105 105 #pragma pack(pop) 106 106 107 - #define INPUTVSC_SEND_RING_BUFFER_SIZE (10*PAGE_SIZE) 108 - #define INPUTVSC_RECV_RING_BUFFER_SIZE (10*PAGE_SIZE) 107 + #define INPUTVSC_SEND_RING_BUFFER_SIZE (40 * 1024) 108 + #define INPUTVSC_RECV_RING_BUFFER_SIZE (40 * 1024) 109 109 110 110 111 111 enum pipe_prot_msg_type {
+144 -17
drivers/hv/channel_mgmt.c
··· 407 407 cpumask_clear_cpu(channel->target_cpu, 408 408 &primary_channel->alloced_cpus_in_node); 409 409 410 - vmbus_release_relid(channel->offermsg.child_relid); 410 + /* 411 + * Upon suspend, an in-use hv_sock channel is marked as "rescinded" and 412 + * the relid is invalidated; after hibernation, when the user-space app 413 + * destroys the channel, the relid is INVALID_RELID, and in this case 414 + * it's unnecessary and unsafe to release the old relid, since the same 415 + * relid can refer to a completely different channel now. 416 + */ 417 + if (channel->offermsg.child_relid != INVALID_RELID) 418 + vmbus_release_relid(channel->offermsg.child_relid); 411 419 412 420 free_channel(channel); 413 421 } ··· 552 544 bool fnew = true; 553 545 554 546 mutex_lock(&vmbus_connection.channel_mutex); 547 + 548 + /* Remember the channels that should be cleaned up upon suspend. */ 549 + if (is_hvsock_channel(newchannel) || is_sub_channel(newchannel)) 550 + atomic_inc(&vmbus_connection.nr_chan_close_on_suspend); 555 551 556 552 /* 557 553 * Now that we have acquired the channel_mutex, ··· 859 847 vmbus_wait_for_unload(); 860 848 } 861 849 850 + static void check_ready_for_resume_event(void) 851 + { 852 + /* 853 + * If all the old primary channels have been fixed up, then it's safe 854 + * to resume. 855 + */ 856 + if (atomic_dec_and_test(&vmbus_connection.nr_chan_fixup_on_resume)) 857 + complete(&vmbus_connection.ready_for_resume_event); 858 + } 859 + 860 + static void vmbus_setup_channel_state(struct vmbus_channel *channel, 861 + struct vmbus_channel_offer_channel *offer) 862 + { 863 + /* 864 + * Setup state for signalling the host. 865 + */ 866 + channel->sig_event = VMBUS_EVENT_CONNECTION_ID; 867 + 868 + if (vmbus_proto_version != VERSION_WS2008) { 869 + channel->is_dedicated_interrupt = 870 + (offer->is_dedicated_interrupt != 0); 871 + channel->sig_event = offer->connection_id; 872 + } 873 + 874 + memcpy(&channel->offermsg, offer, 875 + sizeof(struct vmbus_channel_offer_channel)); 876 + channel->monitor_grp = (u8)offer->monitorid / 32; 877 + channel->monitor_bit = (u8)offer->monitorid % 32; 878 + } 879 + 880 + /* 881 + * find_primary_channel_by_offer - Get the channel object given the new offer. 882 + * This is only used in the resume path of hibernation. 883 + */ 884 + static struct vmbus_channel * 885 + find_primary_channel_by_offer(const struct vmbus_channel_offer_channel *offer) 886 + { 887 + struct vmbus_channel *channel = NULL, *iter; 888 + const guid_t *inst1, *inst2; 889 + 890 + /* Ignore sub-channel offers. */ 891 + if (offer->offer.sub_channel_index != 0) 892 + return NULL; 893 + 894 + mutex_lock(&vmbus_connection.channel_mutex); 895 + 896 + list_for_each_entry(iter, &vmbus_connection.chn_list, listentry) { 897 + inst1 = &iter->offermsg.offer.if_instance; 898 + inst2 = &offer->offer.if_instance; 899 + 900 + if (guid_equal(inst1, inst2)) { 901 + channel = iter; 902 + break; 903 + } 904 + } 905 + 906 + mutex_unlock(&vmbus_connection.channel_mutex); 907 + 908 + return channel; 909 + } 910 + 862 911 /* 863 912 * vmbus_onoffer - Handler for channel offers from vmbus in parent partition. 864 913 * ··· 927 854 static void vmbus_onoffer(struct vmbus_channel_message_header *hdr) 928 855 { 929 856 struct vmbus_channel_offer_channel *offer; 930 - struct vmbus_channel *newchannel; 857 + struct vmbus_channel *oldchannel, *newchannel; 858 + size_t offer_sz; 931 859 932 860 offer = (struct vmbus_channel_offer_channel *)hdr; 933 861 934 862 trace_vmbus_onoffer(offer); 863 + 864 + oldchannel = find_primary_channel_by_offer(offer); 865 + 866 + if (oldchannel != NULL) { 867 + atomic_dec(&vmbus_connection.offer_in_progress); 868 + 869 + /* 870 + * We're resuming from hibernation: all the sub-channel and 871 + * hv_sock channels we had before the hibernation should have 872 + * been cleaned up, and now we must be seeing a re-offered 873 + * primary channel that we had before the hibernation. 874 + */ 875 + 876 + WARN_ON(oldchannel->offermsg.child_relid != INVALID_RELID); 877 + /* Fix up the relid. */ 878 + oldchannel->offermsg.child_relid = offer->child_relid; 879 + 880 + offer_sz = sizeof(*offer); 881 + if (memcmp(offer, &oldchannel->offermsg, offer_sz) == 0) { 882 + check_ready_for_resume_event(); 883 + return; 884 + } 885 + 886 + /* 887 + * This is not an error, since the host can also change the 888 + * other field(s) of the offer, e.g. on WS RS5 (Build 17763), 889 + * the offer->connection_id of the Mellanox VF vmbus device 890 + * can change when the host reoffers the device upon resume. 891 + */ 892 + pr_debug("vmbus offer changed: relid=%d\n", 893 + offer->child_relid); 894 + 895 + print_hex_dump_debug("Old vmbus offer: ", DUMP_PREFIX_OFFSET, 896 + 16, 4, &oldchannel->offermsg, offer_sz, 897 + false); 898 + print_hex_dump_debug("New vmbus offer: ", DUMP_PREFIX_OFFSET, 899 + 16, 4, offer, offer_sz, false); 900 + 901 + /* Fix up the old channel. */ 902 + vmbus_setup_channel_state(oldchannel, offer); 903 + 904 + check_ready_for_resume_event(); 905 + 906 + return; 907 + } 935 908 936 909 /* Allocate the channel object and save this offer. */ 937 910 newchannel = alloc_channel(); ··· 988 869 return; 989 870 } 990 871 991 - /* 992 - * Setup state for signalling the host. 993 - */ 994 - newchannel->sig_event = VMBUS_EVENT_CONNECTION_ID; 995 - 996 - if (vmbus_proto_version != VERSION_WS2008) { 997 - newchannel->is_dedicated_interrupt = 998 - (offer->is_dedicated_interrupt != 0); 999 - newchannel->sig_event = offer->connection_id; 1000 - } 1001 - 1002 - memcpy(&newchannel->offermsg, offer, 1003 - sizeof(struct vmbus_channel_offer_channel)); 1004 - newchannel->monitor_grp = (u8)offer->monitorid / 32; 1005 - newchannel->monitor_bit = (u8)offer->monitorid % 32; 872 + vmbus_setup_channel_state(newchannel, offer); 1006 873 1007 874 vmbus_process_offer(newchannel); 875 + } 876 + 877 + static void check_ready_for_suspend_event(void) 878 + { 879 + /* 880 + * If all the sub-channels or hv_sock channels have been cleaned up, 881 + * then it's safe to suspend. 882 + */ 883 + if (atomic_dec_and_test(&vmbus_connection.nr_chan_close_on_suspend)) 884 + complete(&vmbus_connection.ready_for_suspend_event); 1008 885 } 1009 886 1010 887 /* ··· 1013 898 struct vmbus_channel_rescind_offer *rescind; 1014 899 struct vmbus_channel *channel; 1015 900 struct device *dev; 901 + bool clean_up_chan_for_suspend; 1016 902 1017 903 rescind = (struct vmbus_channel_rescind_offer *)hdr; 1018 904 ··· 1053 937 return; 1054 938 } 1055 939 940 + clean_up_chan_for_suspend = is_hvsock_channel(channel) || 941 + is_sub_channel(channel); 1056 942 /* 1057 943 * Before setting channel->rescind in vmbus_rescind_cleanup(), we 1058 944 * should make sure the channel callback is not running any more. ··· 1080 962 if (channel->device_obj) { 1081 963 if (channel->chn_rescind_callback) { 1082 964 channel->chn_rescind_callback(channel); 965 + 966 + if (clean_up_chan_for_suspend) 967 + check_ready_for_suspend_event(); 968 + 1083 969 return; 1084 970 } 1085 971 /* ··· 1116 994 } 1117 995 mutex_unlock(&vmbus_connection.channel_mutex); 1118 996 } 997 + 998 + /* The "channel" may have been freed. Do not access it any longer. */ 999 + 1000 + if (clean_up_chan_for_suspend) 1001 + check_ready_for_suspend_event(); 1119 1002 } 1120 1003 1121 1004 void vmbus_hvsock_device_unregister(struct vmbus_channel *channel)
+6 -2
drivers/hv/connection.c
··· 26 26 struct vmbus_connection vmbus_connection = { 27 27 .conn_state = DISCONNECTED, 28 28 .next_gpadl_handle = ATOMIC_INIT(0xE1E10), 29 + 30 + .ready_for_suspend_event= COMPLETION_INITIALIZER( 31 + vmbus_connection.ready_for_suspend_event), 32 + .ready_for_resume_event = COMPLETION_INITIALIZER( 33 + vmbus_connection.ready_for_resume_event), 29 34 }; 30 35 EXPORT_SYMBOL_GPL(vmbus_connection); 31 36 ··· 64 59 } 65 60 } 66 61 67 - static int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo, 68 - __u32 version) 62 + int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo, u32 version) 69 63 { 70 64 int ret = 0; 71 65 unsigned int cur_cpu;
+37 -29
drivers/hv/hv.c
··· 154 154 * retrieve the initialized message and event pages. Otherwise, we create and 155 155 * initialize the message and event pages. 156 156 */ 157 - int hv_synic_init(unsigned int cpu) 157 + void hv_synic_enable_regs(unsigned int cpu) 158 158 { 159 159 struct hv_per_cpu_context *hv_cpu 160 160 = per_cpu_ptr(hv_context.cpu_context, cpu); ··· 196 196 sctrl.enable = 1; 197 197 198 198 hv_set_synic_state(sctrl.as_uint64); 199 + } 200 + 201 + int hv_synic_init(unsigned int cpu) 202 + { 203 + hv_synic_enable_regs(cpu); 199 204 200 205 hv_stimer_init(cpu); 201 206 ··· 210 205 /* 211 206 * hv_synic_cleanup - Cleanup routine for hv_synic_init(). 212 207 */ 213 - int hv_synic_cleanup(unsigned int cpu) 208 + void hv_synic_disable_regs(unsigned int cpu) 214 209 { 215 210 union hv_synic_sint shared_sint; 216 211 union hv_synic_simp simp; 217 212 union hv_synic_siefp siefp; 218 213 union hv_synic_scontrol sctrl; 214 + 215 + hv_get_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64); 216 + 217 + shared_sint.masked = 1; 218 + 219 + /* Need to correctly cleanup in the case of SMP!!! */ 220 + /* Disable the interrupt */ 221 + hv_set_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64); 222 + 223 + hv_get_simp(simp.as_uint64); 224 + simp.simp_enabled = 0; 225 + simp.base_simp_gpa = 0; 226 + 227 + hv_set_simp(simp.as_uint64); 228 + 229 + hv_get_siefp(siefp.as_uint64); 230 + siefp.siefp_enabled = 0; 231 + siefp.base_siefp_gpa = 0; 232 + 233 + hv_set_siefp(siefp.as_uint64); 234 + 235 + /* Disable the global synic bit */ 236 + hv_get_synic_state(sctrl.as_uint64); 237 + sctrl.enable = 0; 238 + hv_set_synic_state(sctrl.as_uint64); 239 + } 240 + 241 + int hv_synic_cleanup(unsigned int cpu) 242 + { 219 243 struct vmbus_channel *channel, *sc; 220 244 bool channel_found = false; 221 245 unsigned long flags; 222 - 223 - hv_get_synic_state(sctrl.as_uint64); 224 - if (sctrl.enable != 1) 225 - return -EFAULT; 226 246 227 247 /* 228 248 * Search for channels which are bound to the CPU we're about to ··· 279 249 280 250 hv_stimer_cleanup(cpu); 281 251 282 - hv_get_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64); 283 - 284 - shared_sint.masked = 1; 285 - 286 - /* Need to correctly cleanup in the case of SMP!!! */ 287 - /* Disable the interrupt */ 288 - hv_set_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64); 289 - 290 - hv_get_simp(simp.as_uint64); 291 - simp.simp_enabled = 0; 292 - simp.base_simp_gpa = 0; 293 - 294 - hv_set_simp(simp.as_uint64); 295 - 296 - hv_get_siefp(siefp.as_uint64); 297 - siefp.siefp_enabled = 0; 298 - siefp.base_siefp_gpa = 0; 299 - 300 - hv_set_siefp(siefp.as_uint64); 301 - 302 - /* Disable the global synic bit */ 303 - sctrl.enable = 0; 304 - hv_set_synic_state(sctrl.as_uint64); 252 + hv_synic_disable_regs(cpu); 305 253 306 254 return 0; 307 255 }
+70 -73
drivers/hv/hv_balloon.c
··· 494 494 495 495 496 496 static __u8 recv_buffer[PAGE_SIZE]; 497 - static __u8 *send_buffer; 497 + static __u8 balloon_up_send_buffer[PAGE_SIZE]; 498 498 #define PAGES_IN_2M 512 499 499 #define HA_CHUNK (32 * 1024) 500 500 ··· 1292 1292 } 1293 1293 1294 1294 while (!done) { 1295 - bl_resp = (struct dm_balloon_response *)send_buffer; 1296 - memset(send_buffer, 0, PAGE_SIZE); 1295 + memset(balloon_up_send_buffer, 0, PAGE_SIZE); 1296 + bl_resp = (struct dm_balloon_response *)balloon_up_send_buffer; 1297 1297 bl_resp->hdr.type = DM_BALLOON_RESPONSE; 1298 1298 bl_resp->hdr.size = sizeof(struct dm_balloon_response); 1299 1299 bl_resp->more_pages = 1; ··· 1564 1564 1565 1565 } 1566 1566 1567 - static int balloon_probe(struct hv_device *dev, 1568 - const struct hv_vmbus_device_id *dev_id) 1567 + static int balloon_connect_vsp(struct hv_device *dev) 1569 1568 { 1570 - int ret; 1571 - unsigned long t; 1572 1569 struct dm_version_request version_req; 1573 1570 struct dm_capabilities cap_msg; 1574 - 1575 - #ifdef CONFIG_MEMORY_HOTPLUG 1576 - do_hot_add = hot_add; 1577 - #else 1578 - do_hot_add = false; 1579 - #endif 1580 - 1581 - /* 1582 - * First allocate a send buffer. 1583 - */ 1584 - 1585 - send_buffer = kmalloc(PAGE_SIZE, GFP_KERNEL); 1586 - if (!send_buffer) 1587 - return -ENOMEM; 1571 + unsigned long t; 1572 + int ret; 1588 1573 1589 1574 ret = vmbus_open(dev->channel, dm_ring_size, dm_ring_size, NULL, 0, 1590 - balloon_onchannelcallback, dev); 1591 - 1575 + balloon_onchannelcallback, dev); 1592 1576 if (ret) 1593 - goto probe_error0; 1577 + return ret; 1594 1578 1595 - dm_device.dev = dev; 1596 - dm_device.state = DM_INITIALIZING; 1597 - dm_device.next_version = DYNMEM_PROTOCOL_VERSION_WIN8; 1598 - init_completion(&dm_device.host_event); 1599 - init_completion(&dm_device.config_event); 1600 - INIT_LIST_HEAD(&dm_device.ha_region_list); 1601 - spin_lock_init(&dm_device.ha_lock); 1602 - INIT_WORK(&dm_device.balloon_wrk.wrk, balloon_up); 1603 - INIT_WORK(&dm_device.ha_wrk.wrk, hot_add_req); 1604 - dm_device.host_specified_ha_region = false; 1605 - 1606 - dm_device.thread = 1607 - kthread_run(dm_thread_func, &dm_device, "hv_balloon"); 1608 - if (IS_ERR(dm_device.thread)) { 1609 - ret = PTR_ERR(dm_device.thread); 1610 - goto probe_error1; 1611 - } 1612 - 1613 - #ifdef CONFIG_MEMORY_HOTPLUG 1614 - set_online_page_callback(&hv_online_page); 1615 - register_memory_notifier(&hv_memory_nb); 1616 - #endif 1617 - 1618 - hv_set_drvdata(dev, &dm_device); 1619 1579 /* 1620 1580 * Initiate the hand shake with the host and negotiate 1621 1581 * a version that the host can support. We start with the ··· 1591 1631 dm_device.version = version_req.version.version; 1592 1632 1593 1633 ret = vmbus_sendpacket(dev->channel, &version_req, 1594 - sizeof(struct dm_version_request), 1595 - (unsigned long)NULL, 1596 - VM_PKT_DATA_INBAND, 0); 1634 + sizeof(struct dm_version_request), 1635 + (unsigned long)NULL, VM_PKT_DATA_INBAND, 0); 1597 1636 if (ret) 1598 - goto probe_error2; 1637 + goto out; 1599 1638 1600 1639 t = wait_for_completion_timeout(&dm_device.host_event, 5*HZ); 1601 1640 if (t == 0) { 1602 1641 ret = -ETIMEDOUT; 1603 - goto probe_error2; 1642 + goto out; 1604 1643 } 1605 1644 1606 1645 /* ··· 1607 1648 * fail the probe function. 1608 1649 */ 1609 1650 if (dm_device.state == DM_INIT_ERROR) { 1610 - ret = -ETIMEDOUT; 1611 - goto probe_error2; 1651 + ret = -EPROTO; 1652 + goto out; 1612 1653 } 1613 1654 1614 1655 pr_info("Using Dynamic Memory protocol version %u.%u\n", ··· 1641 1682 cap_msg.max_page_number = -1; 1642 1683 1643 1684 ret = vmbus_sendpacket(dev->channel, &cap_msg, 1644 - sizeof(struct dm_capabilities), 1645 - (unsigned long)NULL, 1646 - VM_PKT_DATA_INBAND, 0); 1685 + sizeof(struct dm_capabilities), 1686 + (unsigned long)NULL, VM_PKT_DATA_INBAND, 0); 1647 1687 if (ret) 1648 - goto probe_error2; 1688 + goto out; 1649 1689 1650 1690 t = wait_for_completion_timeout(&dm_device.host_event, 5*HZ); 1651 1691 if (t == 0) { 1652 1692 ret = -ETIMEDOUT; 1653 - goto probe_error2; 1693 + goto out; 1654 1694 } 1655 1695 1656 1696 /* ··· 1657 1699 * fail the probe function. 1658 1700 */ 1659 1701 if (dm_device.state == DM_INIT_ERROR) { 1660 - ret = -ETIMEDOUT; 1661 - goto probe_error2; 1702 + ret = -EPROTO; 1703 + goto out; 1662 1704 } 1663 1705 1706 + return 0; 1707 + out: 1708 + vmbus_close(dev->channel); 1709 + return ret; 1710 + } 1711 + 1712 + static int balloon_probe(struct hv_device *dev, 1713 + const struct hv_vmbus_device_id *dev_id) 1714 + { 1715 + int ret; 1716 + 1717 + #ifdef CONFIG_MEMORY_HOTPLUG 1718 + do_hot_add = hot_add; 1719 + #else 1720 + do_hot_add = false; 1721 + #endif 1722 + dm_device.dev = dev; 1723 + dm_device.state = DM_INITIALIZING; 1724 + dm_device.next_version = DYNMEM_PROTOCOL_VERSION_WIN8; 1725 + init_completion(&dm_device.host_event); 1726 + init_completion(&dm_device.config_event); 1727 + INIT_LIST_HEAD(&dm_device.ha_region_list); 1728 + spin_lock_init(&dm_device.ha_lock); 1729 + INIT_WORK(&dm_device.balloon_wrk.wrk, balloon_up); 1730 + INIT_WORK(&dm_device.ha_wrk.wrk, hot_add_req); 1731 + dm_device.host_specified_ha_region = false; 1732 + 1733 + #ifdef CONFIG_MEMORY_HOTPLUG 1734 + set_online_page_callback(&hv_online_page); 1735 + register_memory_notifier(&hv_memory_nb); 1736 + #endif 1737 + 1738 + hv_set_drvdata(dev, &dm_device); 1739 + 1740 + ret = balloon_connect_vsp(dev); 1741 + if (ret != 0) 1742 + return ret; 1743 + 1664 1744 dm_device.state = DM_INITIALIZED; 1665 - last_post_time = jiffies; 1745 + 1746 + dm_device.thread = 1747 + kthread_run(dm_thread_func, &dm_device, "hv_balloon"); 1748 + if (IS_ERR(dm_device.thread)) { 1749 + ret = PTR_ERR(dm_device.thread); 1750 + goto probe_error; 1751 + } 1666 1752 1667 1753 return 0; 1668 1754 1669 - probe_error2: 1755 + probe_error: 1756 + vmbus_close(dev->channel); 1670 1757 #ifdef CONFIG_MEMORY_HOTPLUG 1758 + unregister_memory_notifier(&hv_memory_nb); 1671 1759 restore_online_page_callback(&hv_online_page); 1672 1760 #endif 1673 - kthread_stop(dm_device.thread); 1674 - 1675 - probe_error1: 1676 - vmbus_close(dev->channel); 1677 - probe_error0: 1678 - kfree(send_buffer); 1679 1761 return ret; 1680 1762 } 1681 1763 ··· 1732 1734 cancel_work_sync(&dm->balloon_wrk.wrk); 1733 1735 cancel_work_sync(&dm->ha_wrk.wrk); 1734 1736 1735 - vmbus_close(dev->channel); 1736 1737 kthread_stop(dm->thread); 1737 - kfree(send_buffer); 1738 + vmbus_close(dev->channel); 1738 1739 #ifdef CONFIG_MEMORY_HOTPLUG 1739 - restore_online_page_callback(&hv_online_page); 1740 1740 unregister_memory_notifier(&hv_memory_nb); 1741 + restore_online_page_callback(&hv_online_page); 1741 1742 #endif 1742 1743 spin_lock_irqsave(&dm_device.ha_lock, flags); 1743 1744 list_for_each_entry_safe(has, tmp, &dm->ha_region_list, list) {
+30
drivers/hv/hyperv_vmbus.h
··· 169 169 170 170 extern void hv_synic_free(void); 171 171 172 + extern void hv_synic_enable_regs(unsigned int cpu); 172 173 extern int hv_synic_init(unsigned int cpu); 173 174 175 + extern void hv_synic_disable_regs(unsigned int cpu); 174 176 extern int hv_synic_cleanup(unsigned int cpu); 175 177 176 178 /* Interface */ ··· 258 256 struct workqueue_struct *work_queue; 259 257 struct workqueue_struct *handle_primary_chan_wq; 260 258 struct workqueue_struct *handle_sub_chan_wq; 259 + 260 + /* 261 + * The number of sub-channels and hv_sock channels that should be 262 + * cleaned up upon suspend: sub-channels will be re-created upon 263 + * resume, and hv_sock channels should not survive suspend. 264 + */ 265 + atomic_t nr_chan_close_on_suspend; 266 + /* 267 + * vmbus_bus_suspend() waits for "nr_chan_close_on_suspend" to 268 + * drop to zero. 269 + */ 270 + struct completion ready_for_suspend_event; 271 + 272 + /* 273 + * The number of primary channels that should be "fixed up" 274 + * upon resume: these channels are re-offered upon resume, and some 275 + * fields of the channel offers (i.e. child_relid and connection_id) 276 + * can change, so the old offermsg must be fixed up, before the resume 277 + * callbacks of the VSC drivers start to further touch the channels. 278 + */ 279 + atomic_t nr_chan_fixup_on_resume; 280 + /* 281 + * vmbus_bus_resume() waits for "nr_chan_fixup_on_resume" to 282 + * drop to zero. 283 + */ 284 + struct completion ready_for_resume_event; 261 285 }; 262 286 263 287 ··· 297 269 298 270 299 271 extern struct vmbus_connection vmbus_connection; 272 + 273 + int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo, u32 version); 300 274 301 275 static inline void vmbus_send_interrupt(u32 relid) 302 276 {
+265
drivers/hv/vmbus_drv.c
··· 24 24 #include <linux/sched/task_stack.h> 25 25 26 26 #include <asm/mshyperv.h> 27 + #include <linux/delay.h> 27 28 #include <linux/notifier.h> 28 29 #include <linux/ptrace.h> 29 30 #include <linux/screen_info.h> 30 31 #include <linux/kdebug.h> 31 32 #include <linux/efi.h> 32 33 #include <linux/random.h> 34 + #include <linux/syscore_ops.h> 33 35 #include <clocksource/hyperv_timer.h> 34 36 #include "hyperv_vmbus.h" 35 37 ··· 912 910 drv->shutdown(dev); 913 911 } 914 912 913 + /* 914 + * vmbus_suspend - Suspend a vmbus device 915 + */ 916 + static int vmbus_suspend(struct device *child_device) 917 + { 918 + struct hv_driver *drv; 919 + struct hv_device *dev = device_to_hv_device(child_device); 920 + 921 + /* The device may not be attached yet */ 922 + if (!child_device->driver) 923 + return 0; 924 + 925 + drv = drv_to_hv_drv(child_device->driver); 926 + if (!drv->suspend) 927 + return -EOPNOTSUPP; 928 + 929 + return drv->suspend(dev); 930 + } 931 + 932 + /* 933 + * vmbus_resume - Resume a vmbus device 934 + */ 935 + static int vmbus_resume(struct device *child_device) 936 + { 937 + struct hv_driver *drv; 938 + struct hv_device *dev = device_to_hv_device(child_device); 939 + 940 + /* The device may not be attached yet */ 941 + if (!child_device->driver) 942 + return 0; 943 + 944 + drv = drv_to_hv_drv(child_device->driver); 945 + if (!drv->resume) 946 + return -EOPNOTSUPP; 947 + 948 + return drv->resume(dev); 949 + } 915 950 916 951 /* 917 952 * vmbus_device_release - Final callback release of the vmbus child device ··· 964 925 kfree(hv_dev); 965 926 } 966 927 928 + /* 929 + * Note: we must use SET_NOIRQ_SYSTEM_SLEEP_PM_OPS rather than 930 + * SET_SYSTEM_SLEEP_PM_OPS: see the comment before vmbus_bus_pm. 931 + */ 932 + static const struct dev_pm_ops vmbus_pm = { 933 + SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(vmbus_suspend, vmbus_resume) 934 + }; 935 + 967 936 /* The one and only one */ 968 937 static struct bus_type hv_bus = { 969 938 .name = "vmbus", ··· 982 935 .uevent = vmbus_uevent, 983 936 .dev_groups = vmbus_dev_groups, 984 937 .drv_groups = vmbus_drv_groups, 938 + .pm = &vmbus_pm, 985 939 }; 986 940 987 941 struct onmessage_work_context { ··· 1070 1022 vmbus_signal_eom(msg, message_type); 1071 1023 } 1072 1024 1025 + /* 1026 + * Fake RESCIND_CHANNEL messages to clean up hv_sock channels by force for 1027 + * hibernation, because hv_sock connections can not persist across hibernation. 1028 + */ 1029 + static void vmbus_force_channel_rescinded(struct vmbus_channel *channel) 1030 + { 1031 + struct onmessage_work_context *ctx; 1032 + struct vmbus_channel_rescind_offer *rescind; 1033 + 1034 + WARN_ON(!is_hvsock_channel(channel)); 1035 + 1036 + /* 1037 + * sizeof(*ctx) is small and the allocation should really not fail, 1038 + * otherwise the state of the hv_sock connections ends up in limbo. 1039 + */ 1040 + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL | __GFP_NOFAIL); 1041 + 1042 + /* 1043 + * So far, these are not really used by Linux. Just set them to the 1044 + * reasonable values conforming to the definitions of the fields. 1045 + */ 1046 + ctx->msg.header.message_type = 1; 1047 + ctx->msg.header.payload_size = sizeof(*rescind); 1048 + 1049 + /* These values are actually used by Linux. */ 1050 + rescind = (struct vmbus_channel_rescind_offer *)ctx->msg.u.payload; 1051 + rescind->header.msgtype = CHANNELMSG_RESCIND_CHANNELOFFER; 1052 + rescind->child_relid = channel->offermsg.child_relid; 1053 + 1054 + INIT_WORK(&ctx->work, vmbus_onmessage_work); 1055 + 1056 + queue_work_on(vmbus_connection.connect_cpu, 1057 + vmbus_connection.work_queue, 1058 + &ctx->work); 1059 + } 1073 1060 1074 1061 /* 1075 1062 * Direct callback for channels using other deferred processing ··· 2125 2042 return ret_val; 2126 2043 } 2127 2044 2045 + static int vmbus_bus_suspend(struct device *dev) 2046 + { 2047 + struct vmbus_channel *channel, *sc; 2048 + unsigned long flags; 2049 + 2050 + while (atomic_read(&vmbus_connection.offer_in_progress) != 0) { 2051 + /* 2052 + * We wait here until the completion of any channel 2053 + * offers that are currently in progress. 2054 + */ 2055 + msleep(1); 2056 + } 2057 + 2058 + mutex_lock(&vmbus_connection.channel_mutex); 2059 + list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) { 2060 + if (!is_hvsock_channel(channel)) 2061 + continue; 2062 + 2063 + vmbus_force_channel_rescinded(channel); 2064 + } 2065 + mutex_unlock(&vmbus_connection.channel_mutex); 2066 + 2067 + /* 2068 + * Wait until all the sub-channels and hv_sock channels have been 2069 + * cleaned up. Sub-channels should be destroyed upon suspend, otherwise 2070 + * they would conflict with the new sub-channels that will be created 2071 + * in the resume path. hv_sock channels should also be destroyed, but 2072 + * a hv_sock channel of an established hv_sock connection can not be 2073 + * really destroyed since it may still be referenced by the userspace 2074 + * application, so we just force the hv_sock channel to be rescinded 2075 + * by vmbus_force_channel_rescinded(), and the userspace application 2076 + * will thoroughly destroy the channel after hibernation. 2077 + * 2078 + * Note: the counter nr_chan_close_on_suspend may never go above 0 if 2079 + * the VM has no sub-channel and hv_sock channel, e.g. a 1-vCPU VM. 2080 + */ 2081 + if (atomic_read(&vmbus_connection.nr_chan_close_on_suspend) > 0) 2082 + wait_for_completion(&vmbus_connection.ready_for_suspend_event); 2083 + 2084 + WARN_ON(atomic_read(&vmbus_connection.nr_chan_fixup_on_resume) != 0); 2085 + 2086 + mutex_lock(&vmbus_connection.channel_mutex); 2087 + 2088 + list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) { 2089 + /* 2090 + * Invalidate the field. Upon resume, vmbus_onoffer() will fix 2091 + * up the field, and the other fields (if necessary). 2092 + */ 2093 + channel->offermsg.child_relid = INVALID_RELID; 2094 + 2095 + if (is_hvsock_channel(channel)) { 2096 + if (!channel->rescind) { 2097 + pr_err("hv_sock channel not rescinded!\n"); 2098 + WARN_ON_ONCE(1); 2099 + } 2100 + continue; 2101 + } 2102 + 2103 + spin_lock_irqsave(&channel->lock, flags); 2104 + list_for_each_entry(sc, &channel->sc_list, sc_list) { 2105 + pr_err("Sub-channel not deleted!\n"); 2106 + WARN_ON_ONCE(1); 2107 + } 2108 + spin_unlock_irqrestore(&channel->lock, flags); 2109 + 2110 + atomic_inc(&vmbus_connection.nr_chan_fixup_on_resume); 2111 + } 2112 + 2113 + mutex_unlock(&vmbus_connection.channel_mutex); 2114 + 2115 + vmbus_initiate_unload(false); 2116 + 2117 + vmbus_connection.conn_state = DISCONNECTED; 2118 + 2119 + /* Reset the event for the next resume. */ 2120 + reinit_completion(&vmbus_connection.ready_for_resume_event); 2121 + 2122 + return 0; 2123 + } 2124 + 2125 + static int vmbus_bus_resume(struct device *dev) 2126 + { 2127 + struct vmbus_channel_msginfo *msginfo; 2128 + size_t msgsize; 2129 + int ret; 2130 + 2131 + /* 2132 + * We only use the 'vmbus_proto_version', which was in use before 2133 + * hibernation, to re-negotiate with the host. 2134 + */ 2135 + if (vmbus_proto_version == VERSION_INVAL || 2136 + vmbus_proto_version == 0) { 2137 + pr_err("Invalid proto version = 0x%x\n", vmbus_proto_version); 2138 + return -EINVAL; 2139 + } 2140 + 2141 + msgsize = sizeof(*msginfo) + 2142 + sizeof(struct vmbus_channel_initiate_contact); 2143 + 2144 + msginfo = kzalloc(msgsize, GFP_KERNEL); 2145 + 2146 + if (msginfo == NULL) 2147 + return -ENOMEM; 2148 + 2149 + ret = vmbus_negotiate_version(msginfo, vmbus_proto_version); 2150 + 2151 + kfree(msginfo); 2152 + 2153 + if (ret != 0) 2154 + return ret; 2155 + 2156 + WARN_ON(atomic_read(&vmbus_connection.nr_chan_fixup_on_resume) == 0); 2157 + 2158 + vmbus_request_offers(); 2159 + 2160 + wait_for_completion(&vmbus_connection.ready_for_resume_event); 2161 + 2162 + /* Reset the event for the next suspend. */ 2163 + reinit_completion(&vmbus_connection.ready_for_suspend_event); 2164 + 2165 + return 0; 2166 + } 2167 + 2128 2168 static const struct acpi_device_id vmbus_acpi_device_ids[] = { 2129 2169 {"VMBUS", 0}, 2130 2170 {"VMBus", 0}, 2131 2171 {"", 0}, 2132 2172 }; 2133 2173 MODULE_DEVICE_TABLE(acpi, vmbus_acpi_device_ids); 2174 + 2175 + /* 2176 + * Note: we must use SET_NOIRQ_SYSTEM_SLEEP_PM_OPS rather than 2177 + * SET_SYSTEM_SLEEP_PM_OPS, otherwise NIC SR-IOV can not work, because the 2178 + * "pci_dev_pm_ops" uses the "noirq" callbacks: in the resume path, the 2179 + * pci "noirq" restore callback runs before "non-noirq" callbacks (see 2180 + * resume_target_kernel() -> dpm_resume_start(), and hibernation_restore() -> 2181 + * dpm_resume_end()). This means vmbus_bus_resume() and the pci-hyperv's 2182 + * resume callback must also run via the "noirq" callbacks. 2183 + */ 2184 + static const struct dev_pm_ops vmbus_bus_pm = { 2185 + SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(vmbus_bus_suspend, vmbus_bus_resume) 2186 + }; 2134 2187 2135 2188 static struct acpi_driver vmbus_acpi_driver = { 2136 2189 .name = "vmbus", ··· 2275 2056 .add = vmbus_acpi_add, 2276 2057 .remove = vmbus_acpi_remove, 2277 2058 }, 2059 + .drv.pm = &vmbus_bus_pm, 2278 2060 }; 2279 2061 2280 2062 static void hv_kexec_handler(void) ··· 2304 2084 hv_stimer_cleanup(cpu); 2305 2085 hv_synic_cleanup(cpu); 2306 2086 hyperv_cleanup(); 2087 + }; 2088 + 2089 + static int hv_synic_suspend(void) 2090 + { 2091 + /* 2092 + * When we reach here, all the non-boot CPUs have been offlined, and 2093 + * the stimers on them have been unbound in hv_synic_cleanup() -> 2094 + * hv_stimer_cleanup() -> clockevents_unbind_device(). 2095 + * 2096 + * hv_synic_suspend() only runs on CPU0 with interrupts disabled. Here 2097 + * we do not unbind the stimer on CPU0 because: 1) it's unnecessary 2098 + * because the interrupts remain disabled between syscore_suspend() 2099 + * and syscore_resume(): see create_image() and resume_target_kernel(); 2100 + * 2) the stimer on CPU0 is automatically disabled later by 2101 + * syscore_suspend() -> timekeeping_suspend() -> tick_suspend() -> ... 2102 + * -> clockevents_shutdown() -> ... -> hv_ce_shutdown(); 3) a warning 2103 + * would be triggered if we call clockevents_unbind_device(), which 2104 + * may sleep, in an interrupts-disabled context. So, we intentionally 2105 + * don't call hv_stimer_cleanup(0) here. 2106 + */ 2107 + 2108 + hv_synic_disable_regs(0); 2109 + 2110 + return 0; 2111 + } 2112 + 2113 + static void hv_synic_resume(void) 2114 + { 2115 + hv_synic_enable_regs(0); 2116 + 2117 + /* 2118 + * Note: we don't need to call hv_stimer_init(0), because the timer 2119 + * on CPU0 is not unbound in hv_synic_suspend(), and the timer is 2120 + * automatically re-enabled in timekeeping_resume(). 2121 + */ 2122 + } 2123 + 2124 + /* The callbacks run only on CPU0, with irqs_disabled. */ 2125 + static struct syscore_ops hv_synic_syscore_ops = { 2126 + .suspend = hv_synic_suspend, 2127 + .resume = hv_synic_resume, 2307 2128 }; 2308 2129 2309 2130 static int __init hv_acpi_init(void) ··· 2377 2116 hv_setup_kexec_handler(hv_kexec_handler); 2378 2117 hv_setup_crash_handler(hv_crash_handler); 2379 2118 2119 + register_syscore_ops(&hv_synic_syscore_ops); 2120 + 2380 2121 return 0; 2381 2122 2382 2123 cleanup: ··· 2390 2127 static void __exit vmbus_exit(void) 2391 2128 { 2392 2129 int cpu; 2130 + 2131 + unregister_syscore_ops(&hv_synic_syscore_ops); 2393 2132 2394 2133 hv_remove_kexec_handler(); 2395 2134 hv_remove_crash_handler();
+15 -1
include/linux/hyperv.h
··· 245 245 } pipe; 246 246 } u; 247 247 /* 248 - * The sub_channel_index is defined in win8. 248 + * The sub_channel_index is defined in Win8: a value of zero means a 249 + * primary channel and a value of non-zero means a sub-channel. 250 + * 251 + * Before Win8, the field is reserved, meaning it's always zero. 249 252 */ 250 253 u16 sub_channel_index; 251 254 u16 reserved3; ··· 425 422 CHANNELMSG_TL_CONNECT_REQUEST = 21, 426 423 CHANNELMSG_COUNT 427 424 }; 425 + 426 + /* Hyper-V supports about 2048 channels, and the RELIDs start with 1. */ 427 + #define INVALID_RELID U32_MAX 428 428 429 429 struct vmbus_channel_message_header { 430 430 enum vmbus_channel_message_type msgtype; ··· 940 934 VMBUS_CHANNEL_TLNPI_PROVIDER_OFFER); 941 935 } 942 936 937 + static inline bool is_sub_channel(const struct vmbus_channel *c) 938 + { 939 + return c->offermsg.offer.sub_channel_index != 0; 940 + } 941 + 943 942 static inline void set_channel_affinity_state(struct vmbus_channel *c, 944 943 enum hv_numa_policy policy) 945 944 { ··· 1159 1148 int (*probe)(struct hv_device *, const struct hv_vmbus_device_id *); 1160 1149 int (*remove)(struct hv_device *); 1161 1150 void (*shutdown)(struct hv_device *); 1151 + 1152 + int (*suspend)(struct hv_device *); 1153 + int (*resume)(struct hv_device *); 1162 1154 1163 1155 }; 1164 1156
+3
tools/hv/Build
··· 1 + hv_kvp_daemon-y += hv_kvp_daemon.o 2 + hv_vss_daemon-y += hv_vss_daemon.o 3 + hv_fcopy_daemon-y += hv_fcopy_daemon.o
+41 -10
tools/hv/Makefile
··· 1 1 # SPDX-License-Identifier: GPL-2.0 2 2 # Makefile for Hyper-V tools 3 - 4 - WARNINGS = -Wall -Wextra 5 - CFLAGS = $(WARNINGS) -g $(shell getconf LFS_CFLAGS) 6 - 7 - CFLAGS += -D__EXPORTED_HEADERS__ -I../../include/uapi -I../../include 3 + include ../scripts/Makefile.include 8 4 9 5 sbindir ?= /usr/sbin 10 6 libexecdir ?= /usr/libexec 11 7 sharedstatedir ?= /var/lib 12 8 13 - ALL_PROGRAMS := hv_kvp_daemon hv_vss_daemon hv_fcopy_daemon 9 + ifeq ($(srctree),) 10 + srctree := $(patsubst %/,%,$(dir $(CURDIR))) 11 + srctree := $(patsubst %/,%,$(dir $(srctree))) 12 + endif 13 + 14 + # Do not use make's built-in rules 15 + # (this improves performance and avoids hard-to-debug behaviour); 16 + MAKEFLAGS += -r 17 + 18 + override CFLAGS += -O2 -Wall -g -D_GNU_SOURCE -I$(OUTPUT)include 19 + 20 + ALL_TARGETS := hv_kvp_daemon hv_vss_daemon hv_fcopy_daemon 21 + ALL_PROGRAMS := $(patsubst %,$(OUTPUT)%,$(ALL_TARGETS)) 14 22 15 23 ALL_SCRIPTS := hv_get_dhcp_info.sh hv_get_dns_info.sh hv_set_ifconfig.sh 16 24 17 25 all: $(ALL_PROGRAMS) 18 26 19 - %: %.c 20 - $(CC) $(CFLAGS) -o $@ $^ 27 + export srctree OUTPUT CC LD CFLAGS 28 + include $(srctree)/tools/build/Makefile.include 29 + 30 + HV_KVP_DAEMON_IN := $(OUTPUT)hv_kvp_daemon-in.o 31 + $(HV_KVP_DAEMON_IN): FORCE 32 + $(Q)$(MAKE) $(build)=hv_kvp_daemon 33 + $(OUTPUT)hv_kvp_daemon: $(HV_KVP_DAEMON_IN) 34 + $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $< -o $@ 35 + 36 + HV_VSS_DAEMON_IN := $(OUTPUT)hv_vss_daemon-in.o 37 + $(HV_VSS_DAEMON_IN): FORCE 38 + $(Q)$(MAKE) $(build)=hv_vss_daemon 39 + $(OUTPUT)hv_vss_daemon: $(HV_VSS_DAEMON_IN) 40 + $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $< -o $@ 41 + 42 + HV_FCOPY_DAEMON_IN := $(OUTPUT)hv_fcopy_daemon-in.o 43 + $(HV_FCOPY_DAEMON_IN): FORCE 44 + $(Q)$(MAKE) $(build)=hv_fcopy_daemon 45 + $(OUTPUT)hv_fcopy_daemon: $(HV_FCOPY_DAEMON_IN) 46 + $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $< -o $@ 21 47 22 48 clean: 23 - $(RM) hv_kvp_daemon hv_vss_daemon hv_fcopy_daemon 49 + rm -f $(ALL_PROGRAMS) 50 + find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.d' -delete 24 51 25 - install: all 52 + install: $(ALL_PROGRAMS) 26 53 install -d -m 755 $(DESTDIR)$(sbindir); \ 27 54 install -d -m 755 $(DESTDIR)$(libexecdir)/hypervkvpd; \ 28 55 install -d -m 755 $(DESTDIR)$(sharedstatedir); \ ··· 60 33 for script in $(ALL_SCRIPTS); do \ 61 34 install $$script -m 755 $(DESTDIR)$(libexecdir)/hypervkvpd/$${script%.sh}; \ 62 35 done 36 + 37 + FORCE: 38 + 39 + .PHONY: all install clean FORCE prepare