Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

hv_balloon: Add support for configurable order free page reporting

Newer versions of Hyper-V allow reporting unused guest pages in chunks
smaller than 2 Mbytes. Using smaller chunks allows reporting more
unused guest pages, but with increased overhead in the finding the
small chunks. To make this tradeoff configurable, use the existing
page_reporting_order module parameter to control the reporting order.
Drop and refine checks that restricted the minimun page reporting order
to 2Mbytes size pages. Add appropriate checks to make sure the
underlying Hyper-V versions support cold discard hints of any order
(and not just starting from 9)

Signed-off-by: Shradha Gupta <shradhagupta@linux.microsoft.com>
Reviewed-by: Michael Kelley <mikelley@microsoft.com>
Link: https://lore.kernel.org/r/1664517699-1085-3-git-send-email-shradhagupta@linux.microsoft.com
Signed-off-by: Wei Liu <wei.liu@kernel.org>

authored by

Shradha Gupta and committed by
Wei Liu
dc60f2db aebb02ce

+73 -21
+73 -21
drivers/hv/hv_balloon.c
··· 469 469 * the specified number of seconds. 470 470 */ 471 471 static uint pressure_report_delay = 45; 472 + extern unsigned int page_reporting_order; 473 + #define HV_MAX_FAILURES 2 472 474 473 475 /* 474 476 * The last time we posted a pressure report to host. 475 477 */ 476 478 static unsigned long last_post_time; 479 + 480 + static int hv_hypercall_multi_failure; 477 481 478 482 module_param(hot_add, bool, (S_IRUGO | S_IWUSR)); 479 483 MODULE_PARM_DESC(hot_add, "If set attempt memory hot_add"); ··· 582 578 static struct hv_dynmem_device dm_device; 583 579 584 580 static void post_status(struct hv_dynmem_device *dm); 581 + 582 + static void enable_page_reporting(void); 583 + 584 + static void disable_page_reporting(void); 585 585 586 586 #ifdef CONFIG_MEMORY_HOTPLUG 587 587 static inline bool has_pfn_is_backed(struct hv_hotadd_state *has, ··· 1426 1418 */ 1427 1419 reinit_completion(&dm_device.config_event); 1428 1420 post_status(dm); 1421 + /* 1422 + * disable free page reporting if multiple hypercall 1423 + * failure flag set. It is not done in the page_reporting 1424 + * callback context as that causes a deadlock between 1425 + * page_reporting_process() and page_reporting_unregister() 1426 + */ 1427 + if (hv_hypercall_multi_failure >= HV_MAX_FAILURES) { 1428 + pr_err("Multiple failures in cold memory discard hypercall, disabling page reporting\n"); 1429 + disable_page_reporting(); 1430 + /* Reset the flag after disabling reporting */ 1431 + hv_hypercall_multi_failure = 0; 1432 + } 1429 1433 } 1430 1434 1431 1435 return 0; ··· 1613 1593 1614 1594 } 1615 1595 1616 - /* Hyper-V only supports reporting 2MB pages or higher */ 1617 - #define HV_MIN_PAGE_REPORTING_ORDER 9 1618 - #define HV_MIN_PAGE_REPORTING_LEN (HV_HYP_PAGE_SIZE << HV_MIN_PAGE_REPORTING_ORDER) 1596 + #define HV_LARGE_REPORTING_ORDER 9 1597 + #define HV_LARGE_REPORTING_LEN (HV_HYP_PAGE_SIZE << \ 1598 + HV_LARGE_REPORTING_ORDER) 1619 1599 static int hv_free_page_report(struct page_reporting_dev_info *pr_dev_info, 1620 1600 struct scatterlist *sgl, unsigned int nents) 1621 1601 { 1622 1602 unsigned long flags; 1623 1603 struct hv_memory_hint *hint; 1624 - int i; 1604 + int i, order; 1625 1605 u64 status; 1626 1606 struct scatterlist *sg; 1627 1607 1628 1608 WARN_ON_ONCE(nents > HV_MEMORY_HINT_MAX_GPA_PAGE_RANGES); 1629 - WARN_ON_ONCE(sgl->length < HV_MIN_PAGE_REPORTING_LEN); 1609 + WARN_ON_ONCE(sgl->length < (HV_HYP_PAGE_SIZE << page_reporting_order)); 1630 1610 local_irq_save(flags); 1631 1611 hint = *(struct hv_memory_hint **)this_cpu_ptr(hyperv_pcpu_input_arg); 1632 1612 if (!hint) { ··· 1641 1621 1642 1622 range = &hint->ranges[i]; 1643 1623 range->address_space = 0; 1644 - /* page reporting only reports 2MB pages or higher */ 1645 - range->page.largepage = 1; 1646 - range->page.additional_pages = 1647 - (sg->length / HV_MIN_PAGE_REPORTING_LEN) - 1; 1648 - range->page_size = HV_GPA_PAGE_RANGE_PAGE_SIZE_2MB; 1649 - range->base_large_pfn = 1650 - page_to_hvpfn(sg_page(sg)) >> HV_MIN_PAGE_REPORTING_ORDER; 1624 + order = get_order(sg->length); 1625 + /* 1626 + * Hyper-V expects the additional_pages field in the units 1627 + * of one of these 3 sizes, 4Kbytes, 2Mbytes or 1Gbytes. 1628 + * This is dictated by the values of the fields page.largesize 1629 + * and page_size. 1630 + * This code however, only uses 4Kbytes and 2Mbytes units 1631 + * and not 1Gbytes unit. 1632 + */ 1633 + 1634 + /* page reporting for pages 2MB or higher */ 1635 + if (order >= HV_LARGE_REPORTING_ORDER ) { 1636 + range->page.largepage = 1; 1637 + range->page_size = HV_GPA_PAGE_RANGE_PAGE_SIZE_2MB; 1638 + range->base_large_pfn = page_to_hvpfn( 1639 + sg_page(sg)) >> HV_LARGE_REPORTING_ORDER; 1640 + range->page.additional_pages = 1641 + (sg->length / HV_LARGE_REPORTING_LEN) - 1; 1642 + } else { 1643 + /* Page reporting for pages below 2MB */ 1644 + range->page.basepfn = page_to_hvpfn(sg_page(sg)); 1645 + range->page.largepage = false; 1646 + range->page.additional_pages = 1647 + (sg->length / HV_HYP_PAGE_SIZE) - 1; 1648 + } 1649 + 1651 1650 } 1652 1651 1653 1652 status = hv_do_rep_hypercall(HV_EXT_CALL_MEMORY_HEAT_HINT, nents, 0, 1654 1653 hint, NULL); 1655 1654 local_irq_restore(flags); 1656 - if ((status & HV_HYPERCALL_RESULT_MASK) != HV_STATUS_SUCCESS) { 1655 + if (!hv_result_success(status)) { 1656 + 1657 1657 pr_err("Cold memory discard hypercall failed with status %llx\n", 1658 - status); 1658 + status); 1659 + if (hv_hypercall_multi_failure > 0) 1660 + hv_hypercall_multi_failure++; 1661 + 1662 + if (hv_result(status) == HV_STATUS_INVALID_PARAMETER) { 1663 + pr_err("Underlying Hyper-V does not support order less than 9. Hypercall failed\n"); 1664 + pr_err("Defaulting to page_reporting_order %d\n", 1665 + pageblock_order); 1666 + page_reporting_order = pageblock_order; 1667 + hv_hypercall_multi_failure++; 1668 + return -EINVAL; 1669 + } 1670 + 1659 1671 return -EINVAL; 1660 1672 } 1661 1673 ··· 1698 1646 { 1699 1647 int ret; 1700 1648 1701 - /* Essentially, validating 'PAGE_REPORTING_MIN_ORDER' is big enough. */ 1702 - if (pageblock_order < HV_MIN_PAGE_REPORTING_ORDER) { 1703 - pr_debug("Cold memory discard is only supported on 2MB pages and above\n"); 1704 - return; 1705 - } 1706 - 1707 1649 if (!hv_query_ext_cap(HV_EXT_CAPABILITY_MEMORY_COLD_DISCARD_HINT)) { 1708 1650 pr_debug("Cold memory discard hint not supported by Hyper-V\n"); 1709 1651 return; ··· 1705 1659 1706 1660 BUILD_BUG_ON(PAGE_REPORTING_CAPACITY > HV_MEMORY_HINT_MAX_GPA_PAGE_RANGES); 1707 1661 dm_device.pr_dev_info.report = hv_free_page_report; 1662 + /* 1663 + * We let the page_reporting_order parameter decide the order 1664 + * in the page_reporting code 1665 + */ 1666 + dm_device.pr_dev_info.order = 0; 1708 1667 ret = page_reporting_register(&dm_device.pr_dev_info); 1709 1668 if (ret < 0) { 1710 1669 dm_device.pr_dev_info.report = NULL; 1711 1670 pr_err("Failed to enable cold memory discard: %d\n", ret); 1712 1671 } else { 1713 - pr_info("Cold memory discard hint enabled\n"); 1672 + pr_info("Cold memory discard hint enabled with order %d\n", 1673 + page_reporting_order); 1714 1674 } 1715 1675 } 1716 1676