Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

xen: re-introduce support for grant v2 interface

The grant v2 support was removed from the kernel with
commit 438b33c7145ca8a5131a30c36d8f59bce119a19a ("xen/grant-table:
remove support for V2 tables") as the higher memory footprint of v2
grants resulted in less grants being possible for a kernel compared
to the v1 grant interface.

As machines with more than 16TB of memory are expected to be more
common in the near future support of grant v2 is mandatory in order
to be able to run a Xen pv domain at any memory location.

So re-add grant v2 support basically by reverting above commit.

Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>

authored by

Juergen Gross and committed by
Boris Ostrovsky
b988b8ff ec4001c3

+399 -14
+8 -1
arch/arm/xen/grant-table.c
··· 45 45 return; 46 46 } 47 47 48 - int arch_gnttab_init(unsigned long nr_shared) 48 + int arch_gnttab_map_status(uint64_t *frames, unsigned long nr_gframes, 49 + unsigned long max_nr_gframes, 50 + grant_status_t **__shared) 51 + { 52 + return -ENOSYS; 53 + } 54 + 55 + int arch_gnttab_init(unsigned long nr_shared, unsigned long nr_status) 49 56 { 50 57 return 0; 51 58 }
+56 -6
arch/x86/xen/grant-table.c
··· 49 49 static struct gnttab_vm_area { 50 50 struct vm_struct *area; 51 51 pte_t **ptes; 52 - } gnttab_shared_vm_area; 52 + } gnttab_shared_vm_area, gnttab_status_vm_area; 53 53 54 54 int arch_gnttab_map_shared(unsigned long *frames, unsigned long nr_gframes, 55 55 unsigned long max_nr_gframes, ··· 73 73 return 0; 74 74 } 75 75 76 - void arch_gnttab_unmap(void *shared, unsigned long nr_gframes) 76 + int arch_gnttab_map_status(uint64_t *frames, unsigned long nr_gframes, 77 + unsigned long max_nr_gframes, 78 + grant_status_t **__shared) 77 79 { 80 + grant_status_t *shared = *__shared; 78 81 unsigned long addr; 79 82 unsigned long i; 83 + 84 + if (shared == NULL) 85 + *__shared = shared = gnttab_status_vm_area.area->addr; 80 86 81 87 addr = (unsigned long)shared; 82 88 83 89 for (i = 0; i < nr_gframes; i++) { 84 - set_pte_at(&init_mm, addr, gnttab_shared_vm_area.ptes[i], 85 - __pte(0)); 90 + set_pte_at(&init_mm, addr, gnttab_status_vm_area.ptes[i], 91 + mfn_pte(frames[i], PAGE_KERNEL)); 92 + addr += PAGE_SIZE; 93 + } 94 + 95 + return 0; 96 + } 97 + 98 + void arch_gnttab_unmap(void *shared, unsigned long nr_gframes) 99 + { 100 + pte_t **ptes; 101 + unsigned long addr; 102 + unsigned long i; 103 + 104 + if (shared == gnttab_status_vm_area.area->addr) 105 + ptes = gnttab_status_vm_area.ptes; 106 + else 107 + ptes = gnttab_shared_vm_area.ptes; 108 + 109 + addr = (unsigned long)shared; 110 + 111 + for (i = 0; i < nr_gframes; i++) { 112 + set_pte_at(&init_mm, addr, ptes[i], __pte(0)); 86 113 addr += PAGE_SIZE; 87 114 } 88 115 } ··· 129 102 return 0; 130 103 } 131 104 132 - int arch_gnttab_init(unsigned long nr_shared) 105 + static void arch_gnttab_vfree(struct gnttab_vm_area *area) 133 106 { 107 + free_vm_area(area->area); 108 + kfree(area->ptes); 109 + } 110 + 111 + int arch_gnttab_init(unsigned long nr_shared, unsigned long nr_status) 112 + { 113 + int ret; 114 + 134 115 if (!xen_pv_domain()) 135 116 return 0; 136 117 137 - return arch_gnttab_valloc(&gnttab_shared_vm_area, nr_shared); 118 + ret = arch_gnttab_valloc(&gnttab_shared_vm_area, nr_shared); 119 + if (ret < 0) 120 + return ret; 121 + 122 + /* 123 + * Always allocate the space for the status frames in case 124 + * we're migrated to a host with V2 support. 125 + */ 126 + ret = arch_gnttab_valloc(&gnttab_status_vm_area, nr_status); 127 + if (ret < 0) 128 + goto err; 129 + 130 + return 0; 131 + err: 132 + arch_gnttab_vfree(&gnttab_shared_vm_area); 133 + return -ENOMEM; 138 134 } 139 135 140 136 #ifdef CONFIG_XEN_PVH
+306 -6
drivers/xen/grant-table.c
··· 71 71 72 72 static union { 73 73 struct grant_entry_v1 *v1; 74 + union grant_entry_v2 *v2; 74 75 void *addr; 75 76 } gnttab_shared; 76 77 ··· 122 121 * by bit operations. 123 122 */ 124 123 int (*query_foreign_access)(grant_ref_t ref); 124 + /* 125 + * Grant a domain to access a range of bytes within the page referred by 126 + * an available grant entry. Ref parameter is reference of a grant entry 127 + * which will be sub-page accessed, domid is id of grantee domain, frame 128 + * is frame address of subpage grant, flags is grant type and flag 129 + * information, page_off is offset of the range of bytes, and length is 130 + * length of bytes to be accessed. 131 + */ 132 + void (*update_subpage_entry)(grant_ref_t ref, domid_t domid, 133 + unsigned long frame, int flags, 134 + unsigned page_off, unsigned length); 135 + /* 136 + * Redirect an available grant entry on domain A to another grant 137 + * reference of domain B, then allow domain C to use grant reference 138 + * of domain B transitively. Ref parameter is an available grant entry 139 + * reference on domain A, domid is id of domain C which accesses grant 140 + * entry transitively, flags is grant type and flag information, 141 + * trans_domid is id of domain B whose grant entry is finally accessed 142 + * transitively, trans_gref is grant entry transitive reference of 143 + * domain B. 144 + */ 145 + void (*update_trans_entry)(grant_ref_t ref, domid_t domid, int flags, 146 + domid_t trans_domid, grant_ref_t trans_gref); 125 147 }; 126 148 127 149 struct unmap_refs_callback_data { ··· 154 130 155 131 static const struct gnttab_ops *gnttab_interface; 156 132 133 + /* This reflects status of grant entries, so act as a global value. */ 134 + static grant_status_t *grstatus; 135 + 157 136 static int grant_table_version; 158 137 static int grefs_per_grant_frame; 159 138 ··· 165 138 static int gnttab_expand(unsigned int req_entries); 166 139 167 140 #define RPP (PAGE_SIZE / sizeof(grant_ref_t)) 141 + #define SPP (PAGE_SIZE / sizeof(grant_status_t)) 168 142 169 143 static inline grant_ref_t *__gnttab_entry(grant_ref_t entry) 170 144 { ··· 238 210 } 239 211 240 212 /* 241 - * Following applies to gnttab_update_entry_v1. 213 + * Following applies to gnttab_update_entry_v1 and gnttab_update_entry_v2. 242 214 * Introducing a valid entry into the grant table: 243 215 * 1. Write ent->domid. 244 216 * 2. Write ent->frame: ··· 255 227 gnttab_shared.v1[ref].frame = frame; 256 228 wmb(); 257 229 gnttab_shared.v1[ref].flags = flags; 230 + } 231 + 232 + static void gnttab_update_entry_v2(grant_ref_t ref, domid_t domid, 233 + unsigned long frame, unsigned int flags) 234 + { 235 + gnttab_shared.v2[ref].hdr.domid = domid; 236 + gnttab_shared.v2[ref].full_page.frame = frame; 237 + wmb(); /* Hypervisor concurrent accesses. */ 238 + gnttab_shared.v2[ref].hdr.flags = GTF_permit_access | flags; 258 239 } 259 240 260 241 /* ··· 292 255 } 293 256 EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access); 294 257 258 + static void gnttab_update_subpage_entry_v2(grant_ref_t ref, domid_t domid, 259 + unsigned long frame, int flags, 260 + unsigned page_off, unsigned length) 261 + { 262 + gnttab_shared.v2[ref].sub_page.frame = frame; 263 + gnttab_shared.v2[ref].sub_page.page_off = page_off; 264 + gnttab_shared.v2[ref].sub_page.length = length; 265 + gnttab_shared.v2[ref].hdr.domid = domid; 266 + wmb(); 267 + gnttab_shared.v2[ref].hdr.flags = 268 + GTF_permit_access | GTF_sub_page | flags; 269 + } 270 + 271 + int gnttab_grant_foreign_access_subpage_ref(grant_ref_t ref, domid_t domid, 272 + unsigned long frame, int flags, 273 + unsigned page_off, 274 + unsigned length) 275 + { 276 + if (flags & (GTF_accept_transfer | GTF_reading | 277 + GTF_writing | GTF_transitive)) 278 + return -EPERM; 279 + 280 + if (gnttab_interface->update_subpage_entry == NULL) 281 + return -ENOSYS; 282 + 283 + gnttab_interface->update_subpage_entry(ref, domid, frame, flags, 284 + page_off, length); 285 + 286 + return 0; 287 + } 288 + EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_subpage_ref); 289 + 290 + int gnttab_grant_foreign_access_subpage(domid_t domid, unsigned long frame, 291 + int flags, unsigned page_off, 292 + unsigned length) 293 + { 294 + int ref, rc; 295 + 296 + ref = get_free_entries(1); 297 + if (unlikely(ref < 0)) 298 + return -ENOSPC; 299 + 300 + rc = gnttab_grant_foreign_access_subpage_ref(ref, domid, frame, flags, 301 + page_off, length); 302 + if (rc < 0) { 303 + put_free_entry(ref); 304 + return rc; 305 + } 306 + 307 + return ref; 308 + } 309 + EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_subpage); 310 + 311 + bool gnttab_subpage_grants_available(void) 312 + { 313 + return gnttab_interface->update_subpage_entry != NULL; 314 + } 315 + EXPORT_SYMBOL_GPL(gnttab_subpage_grants_available); 316 + 317 + static void gnttab_update_trans_entry_v2(grant_ref_t ref, domid_t domid, 318 + int flags, domid_t trans_domid, 319 + grant_ref_t trans_gref) 320 + { 321 + gnttab_shared.v2[ref].transitive.trans_domid = trans_domid; 322 + gnttab_shared.v2[ref].transitive.gref = trans_gref; 323 + gnttab_shared.v2[ref].hdr.domid = domid; 324 + wmb(); 325 + gnttab_shared.v2[ref].hdr.flags = 326 + GTF_permit_access | GTF_transitive | flags; 327 + } 328 + 329 + int gnttab_grant_foreign_access_trans_ref(grant_ref_t ref, domid_t domid, 330 + int flags, domid_t trans_domid, 331 + grant_ref_t trans_gref) 332 + { 333 + if (flags & (GTF_accept_transfer | GTF_reading | 334 + GTF_writing | GTF_sub_page)) 335 + return -EPERM; 336 + 337 + if (gnttab_interface->update_trans_entry == NULL) 338 + return -ENOSYS; 339 + 340 + gnttab_interface->update_trans_entry(ref, domid, flags, trans_domid, 341 + trans_gref); 342 + 343 + return 0; 344 + } 345 + EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_trans_ref); 346 + 347 + int gnttab_grant_foreign_access_trans(domid_t domid, int flags, 348 + domid_t trans_domid, 349 + grant_ref_t trans_gref) 350 + { 351 + int ref, rc; 352 + 353 + ref = get_free_entries(1); 354 + if (unlikely(ref < 0)) 355 + return -ENOSPC; 356 + 357 + rc = gnttab_grant_foreign_access_trans_ref(ref, domid, flags, 358 + trans_domid, trans_gref); 359 + if (rc < 0) { 360 + put_free_entry(ref); 361 + return rc; 362 + } 363 + 364 + return ref; 365 + } 366 + EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_trans); 367 + 368 + bool gnttab_trans_grants_available(void) 369 + { 370 + return gnttab_interface->update_trans_entry != NULL; 371 + } 372 + EXPORT_SYMBOL_GPL(gnttab_trans_grants_available); 373 + 295 374 static int gnttab_query_foreign_access_v1(grant_ref_t ref) 296 375 { 297 376 return gnttab_shared.v1[ref].flags & (GTF_reading|GTF_writing); 377 + } 378 + 379 + static int gnttab_query_foreign_access_v2(grant_ref_t ref) 380 + { 381 + return grstatus[ref] & (GTF_reading|GTF_writing); 298 382 } 299 383 300 384 int gnttab_query_foreign_access(grant_ref_t ref) ··· 436 278 if (flags & (GTF_reading|GTF_writing)) 437 279 return 0; 438 280 } while ((nflags = sync_cmpxchg(pflags, flags, 0)) != flags); 281 + 282 + return 1; 283 + } 284 + 285 + static int gnttab_end_foreign_access_ref_v2(grant_ref_t ref, int readonly) 286 + { 287 + gnttab_shared.v2[ref].hdr.flags = 0; 288 + mb(); /* Concurrent access by hypervisor. */ 289 + if (grstatus[ref] & (GTF_reading|GTF_writing)) { 290 + return 0; 291 + } else { 292 + /* 293 + * The read of grstatus needs to have acquire semantics. 294 + * On x86, reads already have that, and we just need to 295 + * protect against compiler reorderings. 296 + * On other architectures we may need a full barrier. 297 + */ 298 + #ifdef CONFIG_X86 299 + barrier(); 300 + #else 301 + mb(); 302 + #endif 303 + } 439 304 440 305 return 1; 441 306 } ··· 618 437 619 438 rmb(); /* Read the frame number /after/ reading completion status. */ 620 439 frame = gnttab_shared.v1[ref].frame; 440 + BUG_ON(frame == 0); 441 + 442 + return frame; 443 + } 444 + 445 + static unsigned long gnttab_end_foreign_transfer_ref_v2(grant_ref_t ref) 446 + { 447 + unsigned long frame; 448 + u16 flags; 449 + u16 *pflags; 450 + 451 + pflags = &gnttab_shared.v2[ref].hdr.flags; 452 + 453 + /* 454 + * If a transfer is not even yet started, try to reclaim the grant 455 + * reference and return failure (== 0). 456 + */ 457 + while (!((flags = *pflags) & GTF_transfer_committed)) { 458 + if (sync_cmpxchg(pflags, flags, 0) == flags) 459 + return 0; 460 + cpu_relax(); 461 + } 462 + 463 + /* If a transfer is in progress then wait until it is completed. */ 464 + while (!(flags & GTF_transfer_completed)) { 465 + flags = *pflags; 466 + cpu_relax(); 467 + } 468 + 469 + rmb(); /* Read the frame number /after/ reading completion status. */ 470 + frame = gnttab_shared.v2[ref].full_page.frame; 621 471 BUG_ON(frame == 0); 622 472 623 473 return frame; ··· 1150 938 } 1151 939 EXPORT_SYMBOL_GPL(gnttab_unmap_refs_sync); 1152 940 941 + static unsigned int nr_status_frames(unsigned int nr_grant_frames) 942 + { 943 + BUG_ON(grefs_per_grant_frame == 0); 944 + return (nr_grant_frames * grefs_per_grant_frame + SPP - 1) / SPP; 945 + } 946 + 1153 947 static int gnttab_map_frames_v1(xen_pfn_t *frames, unsigned int nr_gframes) 1154 948 { 1155 949 int rc; ··· 1171 953 static void gnttab_unmap_frames_v1(void) 1172 954 { 1173 955 arch_gnttab_unmap(gnttab_shared.addr, nr_grant_frames); 956 + } 957 + 958 + static int gnttab_map_frames_v2(xen_pfn_t *frames, unsigned int nr_gframes) 959 + { 960 + uint64_t *sframes; 961 + unsigned int nr_sframes; 962 + struct gnttab_get_status_frames getframes; 963 + int rc; 964 + 965 + nr_sframes = nr_status_frames(nr_gframes); 966 + 967 + /* No need for kzalloc as it is initialized in following hypercall 968 + * GNTTABOP_get_status_frames. 969 + */ 970 + sframes = kmalloc_array(nr_sframes, sizeof(uint64_t), GFP_ATOMIC); 971 + if (!sframes) 972 + return -ENOMEM; 973 + 974 + getframes.dom = DOMID_SELF; 975 + getframes.nr_frames = nr_sframes; 976 + set_xen_guest_handle(getframes.frame_list, sframes); 977 + 978 + rc = HYPERVISOR_grant_table_op(GNTTABOP_get_status_frames, 979 + &getframes, 1); 980 + if (rc == -ENOSYS) { 981 + kfree(sframes); 982 + return -ENOSYS; 983 + } 984 + 985 + BUG_ON(rc || getframes.status); 986 + 987 + rc = arch_gnttab_map_status(sframes, nr_sframes, 988 + nr_status_frames(gnttab_max_grant_frames()), 989 + &grstatus); 990 + BUG_ON(rc); 991 + kfree(sframes); 992 + 993 + rc = arch_gnttab_map_shared(frames, nr_gframes, 994 + gnttab_max_grant_frames(), 995 + &gnttab_shared.addr); 996 + BUG_ON(rc); 997 + 998 + return 0; 999 + } 1000 + 1001 + static void gnttab_unmap_frames_v2(void) 1002 + { 1003 + arch_gnttab_unmap(gnttab_shared.addr, nr_grant_frames); 1004 + arch_gnttab_unmap(grstatus, nr_status_frames(nr_grant_frames)); 1174 1005 } 1175 1006 1176 1007 static int gnttab_map(unsigned int start_idx, unsigned int end_idx) ··· 1289 1022 .query_foreign_access = gnttab_query_foreign_access_v1, 1290 1023 }; 1291 1024 1025 + static const struct gnttab_ops gnttab_v2_ops = { 1026 + .map_frames = gnttab_map_frames_v2, 1027 + .unmap_frames = gnttab_unmap_frames_v2, 1028 + .update_entry = gnttab_update_entry_v2, 1029 + .end_foreign_access_ref = gnttab_end_foreign_access_ref_v2, 1030 + .end_foreign_transfer_ref = gnttab_end_foreign_transfer_ref_v2, 1031 + .query_foreign_access = gnttab_query_foreign_access_v2, 1032 + .update_subpage_entry = gnttab_update_subpage_entry_v2, 1033 + .update_trans_entry = gnttab_update_trans_entry_v2, 1034 + }; 1035 + 1292 1036 static void gnttab_request_version(void) 1293 1037 { 1294 - /* Only version 1 is used, which will always be available. */ 1295 - grant_table_version = 1; 1296 - grefs_per_grant_frame = XEN_PAGE_SIZE / sizeof(struct grant_entry_v1); 1297 - gnttab_interface = &gnttab_v1_ops; 1038 + int rc; 1039 + struct gnttab_set_version gsv; 1298 1040 1041 + gsv.version = 1; 1042 + 1043 + rc = HYPERVISOR_grant_table_op(GNTTABOP_set_version, &gsv, 1); 1044 + if (rc == 0 && gsv.version == 2) { 1045 + grant_table_version = 2; 1046 + grefs_per_grant_frame = XEN_PAGE_SIZE / 1047 + sizeof(union grant_entry_v2); 1048 + gnttab_interface = &gnttab_v2_ops; 1049 + } else if (grant_table_version == 2) { 1050 + /* 1051 + * If we've already used version 2 features, 1052 + * but then suddenly discover that they're not 1053 + * available (e.g. migrating to an older 1054 + * version of Xen), almost unbounded badness 1055 + * can happen. 1056 + */ 1057 + panic("we need grant tables version 2, but only version 1 is available"); 1058 + } else { 1059 + grant_table_version = 1; 1060 + grefs_per_grant_frame = XEN_PAGE_SIZE / 1061 + sizeof(struct grant_entry_v1); 1062 + gnttab_interface = &gnttab_v1_ops; 1063 + } 1299 1064 pr_info("Grant tables using version %d layout\n", grant_table_version); 1300 1065 } 1301 1066 ··· 1421 1122 } 1422 1123 } 1423 1124 1424 - ret = arch_gnttab_init(max_nr_grant_frames); 1125 + ret = arch_gnttab_init(max_nr_grant_frames, 1126 + nr_status_frames(max_nr_grant_frames)); 1425 1127 if (ret < 0) 1426 1128 goto ini_nomem; 1427 1129
+29 -1
include/xen/grant_table.h
··· 84 84 85 85 int gnttab_grant_foreign_access(domid_t domid, unsigned long frame, 86 86 int readonly); 87 + int gnttab_grant_foreign_access_subpage(domid_t domid, unsigned long frame, 88 + int flags, unsigned page_off, 89 + unsigned length); 90 + int gnttab_grant_foreign_access_trans(domid_t domid, int flags, 91 + domid_t trans_domid, 92 + grant_ref_t trans_gref); 93 + 94 + /* 95 + * Are sub-page grants available on this version of Xen? Returns true if they 96 + * are, and false if they're not. 97 + */ 98 + bool gnttab_subpage_grants_available(void); 99 + 100 + /* 101 + * Are transitive grants available on this version of Xen? Returns true if they 102 + * are, and false if they're not. 103 + */ 104 + bool gnttab_trans_grants_available(void); 87 105 88 106 /* 89 107 * End access through the given grant reference, iff the grant entry is no ··· 148 130 149 131 void gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid, 150 132 unsigned long frame, int readonly); 133 + int gnttab_grant_foreign_access_subpage_ref(grant_ref_t ref, domid_t domid, 134 + unsigned long frame, int flags, 135 + unsigned page_off, 136 + unsigned length); 137 + int gnttab_grant_foreign_access_trans_ref(grant_ref_t ref, domid_t domid, 138 + int flags, domid_t trans_domid, 139 + grant_ref_t trans_gref); 151 140 152 141 /* Give access to the first 4K of the page */ 153 142 static inline void gnttab_page_grant_foreign_access_ref_one( ··· 199 174 unmap->dev_bus_addr = 0; 200 175 } 201 176 202 - int arch_gnttab_init(unsigned long nr_shared); 177 + int arch_gnttab_init(unsigned long nr_shared, unsigned long nr_status); 203 178 int arch_gnttab_map_shared(xen_pfn_t *frames, unsigned long nr_gframes, 204 179 unsigned long max_nr_gframes, 205 180 void **__shared); 181 + int arch_gnttab_map_status(uint64_t *frames, unsigned long nr_gframes, 182 + unsigned long max_nr_gframes, 183 + grant_status_t **__shared); 206 184 void arch_gnttab_unmap(void *shared, unsigned long nr_gframes); 207 185 208 186 struct grant_frames {