Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

mshv: Move region management to mshv_regions.c

Refactor memory region management functions from mshv_root_main.c into
mshv_regions.c for better modularity and code organization.

Adjust function calls and headers to use the new implementation. Improve
maintainability and separation of concerns in the mshv_root module.

Signed-off-by: Stanislav Kinsburskii <skinsburskii@linux.microsoft.com>
Reviewed-by: Anirudh Rayabharam (Microsoft) <anirudh@anirudhrb.com>
Reviewed-by: Nuno Das Neves <nunodasneves@linux.microsoft.com>
Signed-off-by: Wei Liu <wei.liu@kernel.org>

authored by

Stanislav Kinsburskii and committed by
Wei Liu
e950c30a 6f6aed2c

+198 -165
+1 -1
drivers/hv/Makefile
··· 14 14 hv_vmbus-$(CONFIG_HYPERV_TESTING) += hv_debugfs.o 15 15 hv_utils-y := hv_util.o hv_kvp.o hv_snapshot.o hv_utils_transport.o 16 16 mshv_root-y := mshv_root_main.o mshv_synic.o mshv_eventfd.o mshv_irq.o \ 17 - mshv_root_hv_call.o mshv_portid_table.o 17 + mshv_root_hv_call.o mshv_portid_table.o mshv_regions.o 18 18 mshv_vtl-y := mshv_vtl_main.o 19 19 20 20 # Code that must be built-in
+175
drivers/hv/mshv_regions.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* 3 + * Copyright (c) 2025, Microsoft Corporation. 4 + * 5 + * Memory region management for mshv_root module. 6 + * 7 + * Authors: Microsoft Linux virtualization team 8 + */ 9 + 10 + #include <linux/mm.h> 11 + #include <linux/vmalloc.h> 12 + 13 + #include <asm/mshyperv.h> 14 + 15 + #include "mshv_root.h" 16 + 17 + struct mshv_mem_region *mshv_region_create(u64 guest_pfn, u64 nr_pages, 18 + u64 uaddr, u32 flags, 19 + bool is_mmio) 20 + { 21 + struct mshv_mem_region *region; 22 + 23 + region = vzalloc(sizeof(*region) + sizeof(struct page *) * nr_pages); 24 + if (!region) 25 + return ERR_PTR(-ENOMEM); 26 + 27 + region->nr_pages = nr_pages; 28 + region->start_gfn = guest_pfn; 29 + region->start_uaddr = uaddr; 30 + region->hv_map_flags = HV_MAP_GPA_READABLE | HV_MAP_GPA_ADJUSTABLE; 31 + if (flags & BIT(MSHV_SET_MEM_BIT_WRITABLE)) 32 + region->hv_map_flags |= HV_MAP_GPA_WRITABLE; 33 + if (flags & BIT(MSHV_SET_MEM_BIT_EXECUTABLE)) 34 + region->hv_map_flags |= HV_MAP_GPA_EXECUTABLE; 35 + 36 + /* Note: large_pages flag populated when we pin the pages */ 37 + if (!is_mmio) 38 + region->flags.range_pinned = true; 39 + 40 + return region; 41 + } 42 + 43 + int mshv_region_share(struct mshv_mem_region *region) 44 + { 45 + u32 flags = HV_MODIFY_SPA_PAGE_HOST_ACCESS_MAKE_SHARED; 46 + 47 + if (region->flags.large_pages) 48 + flags |= HV_MODIFY_SPA_PAGE_HOST_ACCESS_LARGE_PAGE; 49 + 50 + return hv_call_modify_spa_host_access(region->partition->pt_id, 51 + region->pages, region->nr_pages, 52 + HV_MAP_GPA_READABLE | HV_MAP_GPA_WRITABLE, 53 + flags, true); 54 + } 55 + 56 + int mshv_region_unshare(struct mshv_mem_region *region) 57 + { 58 + u32 flags = HV_MODIFY_SPA_PAGE_HOST_ACCESS_MAKE_EXCLUSIVE; 59 + 60 + if (region->flags.large_pages) 61 + flags |= HV_MODIFY_SPA_PAGE_HOST_ACCESS_LARGE_PAGE; 62 + 63 + return hv_call_modify_spa_host_access(region->partition->pt_id, 64 + region->pages, region->nr_pages, 65 + 0, 66 + flags, false); 67 + } 68 + 69 + static int mshv_region_remap_pages(struct mshv_mem_region *region, 70 + u32 map_flags, 71 + u64 page_offset, u64 page_count) 72 + { 73 + if (page_offset + page_count > region->nr_pages) 74 + return -EINVAL; 75 + 76 + if (region->flags.large_pages) 77 + map_flags |= HV_MAP_GPA_LARGE_PAGE; 78 + 79 + return hv_call_map_gpa_pages(region->partition->pt_id, 80 + region->start_gfn + page_offset, 81 + page_count, map_flags, 82 + region->pages + page_offset); 83 + } 84 + 85 + int mshv_region_map(struct mshv_mem_region *region) 86 + { 87 + u32 map_flags = region->hv_map_flags; 88 + 89 + return mshv_region_remap_pages(region, map_flags, 90 + 0, region->nr_pages); 91 + } 92 + 93 + static void mshv_region_invalidate_pages(struct mshv_mem_region *region, 94 + u64 page_offset, u64 page_count) 95 + { 96 + if (region->flags.range_pinned) 97 + unpin_user_pages(region->pages + page_offset, page_count); 98 + 99 + memset(region->pages + page_offset, 0, 100 + page_count * sizeof(struct page *)); 101 + } 102 + 103 + void mshv_region_invalidate(struct mshv_mem_region *region) 104 + { 105 + mshv_region_invalidate_pages(region, 0, region->nr_pages); 106 + } 107 + 108 + int mshv_region_pin(struct mshv_mem_region *region) 109 + { 110 + u64 done_count, nr_pages; 111 + struct page **pages; 112 + __u64 userspace_addr; 113 + int ret; 114 + 115 + for (done_count = 0; done_count < region->nr_pages; done_count += ret) { 116 + pages = region->pages + done_count; 117 + userspace_addr = region->start_uaddr + 118 + done_count * HV_HYP_PAGE_SIZE; 119 + nr_pages = min(region->nr_pages - done_count, 120 + MSHV_PIN_PAGES_BATCH_SIZE); 121 + 122 + /* 123 + * Pinning assuming 4k pages works for large pages too. 124 + * All page structs within the large page are returned. 125 + * 126 + * Pin requests are batched because pin_user_pages_fast 127 + * with the FOLL_LONGTERM flag does a large temporary 128 + * allocation of contiguous memory. 129 + */ 130 + ret = pin_user_pages_fast(userspace_addr, nr_pages, 131 + FOLL_WRITE | FOLL_LONGTERM, 132 + pages); 133 + if (ret < 0) 134 + goto release_pages; 135 + } 136 + 137 + if (PageHuge(region->pages[0])) 138 + region->flags.large_pages = true; 139 + 140 + return 0; 141 + 142 + release_pages: 143 + mshv_region_invalidate_pages(region, 0, done_count); 144 + return ret; 145 + } 146 + 147 + void mshv_region_destroy(struct mshv_mem_region *region) 148 + { 149 + struct mshv_partition *partition = region->partition; 150 + u32 unmap_flags = 0; 151 + int ret; 152 + 153 + hlist_del(&region->hnode); 154 + 155 + if (mshv_partition_encrypted(partition)) { 156 + ret = mshv_region_share(region); 157 + if (ret) { 158 + pt_err(partition, 159 + "Failed to regain access to memory, unpinning user pages will fail and crash the host error: %d\n", 160 + ret); 161 + return; 162 + } 163 + } 164 + 165 + if (region->flags.large_pages) 166 + unmap_flags |= HV_UNMAP_GPA_LARGE_PAGE; 167 + 168 + /* ignore unmap failures and continue as process may be exiting */ 169 + hv_call_unmap_gpa_pages(partition->pt_id, region->start_gfn, 170 + region->nr_pages, unmap_flags); 171 + 172 + mshv_region_invalidate(region); 173 + 174 + vfree(region); 175 + }
+10
drivers/hv/mshv_root.h
··· 312 312 extern enum hv_scheduler_type hv_scheduler_type; 313 313 extern u8 * __percpu *hv_synic_eventring_tail; 314 314 315 + struct mshv_mem_region *mshv_region_create(u64 guest_pfn, u64 nr_pages, 316 + u64 uaddr, u32 flags, 317 + bool is_mmio); 318 + int mshv_region_share(struct mshv_mem_region *region); 319 + int mshv_region_unshare(struct mshv_mem_region *region); 320 + int mshv_region_map(struct mshv_mem_region *region); 321 + void mshv_region_invalidate(struct mshv_mem_region *region); 322 + int mshv_region_pin(struct mshv_mem_region *region); 323 + void mshv_region_destroy(struct mshv_mem_region *region); 324 + 315 325 #endif /* _MSHV_ROOT_H_ */
+12 -164
drivers/hv/mshv_root_main.c
··· 1059 1059 *status = partition->async_hypercall_status; 1060 1060 } 1061 1061 1062 - static int 1063 - mshv_partition_region_share(struct mshv_mem_region *region) 1064 - { 1065 - u32 flags = HV_MODIFY_SPA_PAGE_HOST_ACCESS_MAKE_SHARED; 1066 - 1067 - if (region->flags.large_pages) 1068 - flags |= HV_MODIFY_SPA_PAGE_HOST_ACCESS_LARGE_PAGE; 1069 - 1070 - return hv_call_modify_spa_host_access(region->partition->pt_id, 1071 - region->pages, region->nr_pages, 1072 - HV_MAP_GPA_READABLE | HV_MAP_GPA_WRITABLE, 1073 - flags, true); 1074 - } 1075 - 1076 - static int 1077 - mshv_partition_region_unshare(struct mshv_mem_region *region) 1078 - { 1079 - u32 flags = HV_MODIFY_SPA_PAGE_HOST_ACCESS_MAKE_EXCLUSIVE; 1080 - 1081 - if (region->flags.large_pages) 1082 - flags |= HV_MODIFY_SPA_PAGE_HOST_ACCESS_LARGE_PAGE; 1083 - 1084 - return hv_call_modify_spa_host_access(region->partition->pt_id, 1085 - region->pages, region->nr_pages, 1086 - 0, 1087 - flags, false); 1088 - } 1089 - 1090 - static int 1091 - mshv_region_remap_pages(struct mshv_mem_region *region, u32 map_flags, 1092 - u64 page_offset, u64 page_count) 1093 - { 1094 - if (page_offset + page_count > region->nr_pages) 1095 - return -EINVAL; 1096 - 1097 - if (region->flags.large_pages) 1098 - map_flags |= HV_MAP_GPA_LARGE_PAGE; 1099 - 1100 - /* ask the hypervisor to map guest ram */ 1101 - return hv_call_map_gpa_pages(region->partition->pt_id, 1102 - region->start_gfn + page_offset, 1103 - page_count, map_flags, 1104 - region->pages + page_offset); 1105 - } 1106 - 1107 - static int 1108 - mshv_region_map(struct mshv_mem_region *region) 1109 - { 1110 - u32 map_flags = region->hv_map_flags; 1111 - 1112 - return mshv_region_remap_pages(region, map_flags, 1113 - 0, region->nr_pages); 1114 - } 1115 - 1116 - static void 1117 - mshv_region_invalidate_pages(struct mshv_mem_region *region, 1118 - u64 page_offset, u64 page_count) 1119 - { 1120 - if (region->flags.range_pinned) 1121 - unpin_user_pages(region->pages + page_offset, page_count); 1122 - 1123 - memset(region->pages + page_offset, 0, 1124 - page_count * sizeof(struct page *)); 1125 - } 1126 - 1127 - static void 1128 - mshv_region_invalidate(struct mshv_mem_region *region) 1129 - { 1130 - mshv_region_invalidate_pages(region, 0, region->nr_pages); 1131 - } 1132 - 1133 - static int 1134 - mshv_region_pin(struct mshv_mem_region *region) 1135 - { 1136 - u64 done_count, nr_pages; 1137 - struct page **pages; 1138 - __u64 userspace_addr; 1139 - int ret; 1140 - 1141 - for (done_count = 0; done_count < region->nr_pages; done_count += ret) { 1142 - pages = region->pages + done_count; 1143 - userspace_addr = region->start_uaddr + 1144 - done_count * HV_HYP_PAGE_SIZE; 1145 - nr_pages = min(region->nr_pages - done_count, 1146 - MSHV_PIN_PAGES_BATCH_SIZE); 1147 - 1148 - /* 1149 - * Pinning assuming 4k pages works for large pages too. 1150 - * All page structs within the large page are returned. 1151 - * 1152 - * Pin requests are batched because pin_user_pages_fast 1153 - * with the FOLL_LONGTERM flag does a large temporary 1154 - * allocation of contiguous memory. 1155 - */ 1156 - ret = pin_user_pages_fast(userspace_addr, nr_pages, 1157 - FOLL_WRITE | FOLL_LONGTERM, 1158 - pages); 1159 - if (ret < 0) 1160 - goto release_pages; 1161 - } 1162 - 1163 - if (PageHuge(region->pages[0])) 1164 - region->flags.large_pages = true; 1165 - 1166 - return 0; 1167 - 1168 - release_pages: 1169 - mshv_region_invalidate_pages(region, 0, done_count); 1170 - return ret; 1171 - } 1172 - 1173 1062 static struct mshv_mem_region * 1174 1063 mshv_partition_region_by_gfn(struct mshv_partition *partition, u64 gfn) 1175 1064 { ··· 1082 1193 struct mshv_mem_region **regionpp, 1083 1194 bool is_mmio) 1084 1195 { 1085 - struct mshv_mem_region *region, *rg; 1196 + struct mshv_mem_region *rg; 1086 1197 u64 nr_pages = HVPFN_DOWN(mem->size); 1087 1198 1088 1199 /* Reject overlapping regions */ ··· 1094 1205 return -EEXIST; 1095 1206 } 1096 1207 1097 - region = vzalloc(sizeof(*region) + sizeof(struct page *) * nr_pages); 1098 - if (!region) 1099 - return -ENOMEM; 1208 + rg = mshv_region_create(mem->guest_pfn, nr_pages, 1209 + mem->userspace_addr, mem->flags, 1210 + is_mmio); 1211 + if (IS_ERR(rg)) 1212 + return PTR_ERR(rg); 1100 1213 1101 - region->nr_pages = nr_pages; 1102 - region->start_gfn = mem->guest_pfn; 1103 - region->start_uaddr = mem->userspace_addr; 1104 - region->hv_map_flags = HV_MAP_GPA_READABLE | HV_MAP_GPA_ADJUSTABLE; 1105 - if (mem->flags & BIT(MSHV_SET_MEM_BIT_WRITABLE)) 1106 - region->hv_map_flags |= HV_MAP_GPA_WRITABLE; 1107 - if (mem->flags & BIT(MSHV_SET_MEM_BIT_EXECUTABLE)) 1108 - region->hv_map_flags |= HV_MAP_GPA_EXECUTABLE; 1214 + rg->partition = partition; 1109 1215 1110 - /* Note: large_pages flag populated when we pin the pages */ 1111 - if (!is_mmio) 1112 - region->flags.range_pinned = true; 1113 - 1114 - region->partition = partition; 1115 - 1116 - *regionpp = region; 1216 + *regionpp = rg; 1117 1217 1118 1218 return 0; 1119 1219 } ··· 1140 1262 * access to guest memory regions. 1141 1263 */ 1142 1264 if (mshv_partition_encrypted(partition)) { 1143 - ret = mshv_partition_region_unshare(region); 1265 + ret = mshv_region_unshare(region); 1144 1266 if (ret) { 1145 1267 pt_err(partition, 1146 1268 "Failed to unshare memory region (guest_pfn: %llu): %d\n", ··· 1153 1275 if (ret && mshv_partition_encrypted(partition)) { 1154 1276 int shrc; 1155 1277 1156 - shrc = mshv_partition_region_share(region); 1278 + shrc = mshv_region_share(region); 1157 1279 if (!shrc) 1158 1280 goto invalidate_region; 1159 1281 ··· 1234 1356 return ret; 1235 1357 } 1236 1358 1237 - static void mshv_partition_destroy_region(struct mshv_mem_region *region) 1238 - { 1239 - struct mshv_partition *partition = region->partition; 1240 - u32 unmap_flags = 0; 1241 - int ret; 1242 - 1243 - hlist_del(&region->hnode); 1244 - 1245 - if (mshv_partition_encrypted(partition)) { 1246 - ret = mshv_partition_region_share(region); 1247 - if (ret) { 1248 - pt_err(partition, 1249 - "Failed to regain access to memory, unpinning user pages will fail and crash the host error: %d\n", 1250 - ret); 1251 - return; 1252 - } 1253 - } 1254 - 1255 - if (region->flags.large_pages) 1256 - unmap_flags |= HV_UNMAP_GPA_LARGE_PAGE; 1257 - 1258 - /* ignore unmap failures and continue as process may be exiting */ 1259 - hv_call_unmap_gpa_pages(partition->pt_id, region->start_gfn, 1260 - region->nr_pages, unmap_flags); 1261 - 1262 - mshv_region_invalidate(region); 1263 - 1264 - vfree(region); 1265 - } 1266 - 1267 1359 /* Called for unmapping both the guest ram and the mmio space */ 1268 1360 static long 1269 1361 mshv_unmap_user_memory(struct mshv_partition *partition, ··· 1254 1406 region->nr_pages != HVPFN_DOWN(mem.size)) 1255 1407 return -EINVAL; 1256 1408 1257 - mshv_partition_destroy_region(region); 1409 + mshv_region_destroy(region); 1258 1410 1259 1411 return 0; 1260 1412 } ··· 1658 1810 1659 1811 hlist_for_each_entry_safe(region, n, &partition->pt_mem_regions, 1660 1812 hnode) 1661 - mshv_partition_destroy_region(region); 1813 + mshv_region_destroy(region); 1662 1814 1663 1815 /* Withdraw and free all pages we deposited */ 1664 1816 hv_call_withdraw_memory(U64_MAX, NUMA_NO_NODE, partition->pt_id);