Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

xen/grants: support allocating consecutive grants

For support of virtio via grant mappings in rare cases larger mappings
using consecutive grants are needed. Support those by adding a bitmap
of free grants.

As consecutive grants will be needed only in very rare cases (e.g. when
configuring a virtio device with a multi-page ring), optimize for the
normal case of non-consecutive allocations.

Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Link: https://lore.kernel.org/r/1654197833-25362-3-git-send-email-olekstysh@gmail.com
Signed-off-by: Juergen Gross <jgross@suse.com>

+221 -38
+217 -38
drivers/xen/grant-table.c
··· 33 33 34 34 #define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt 35 35 36 + #include <linux/bitmap.h> 36 37 #include <linux/memblock.h> 37 38 #include <linux/sched.h> 38 39 #include <linux/mm.h> ··· 71 70 72 71 static grant_ref_t **gnttab_list; 73 72 static unsigned int nr_grant_frames; 73 + 74 + /* 75 + * Handling of free grants: 76 + * 77 + * Free grants are in a simple list anchored in gnttab_free_head. They are 78 + * linked by grant ref, the last element contains GNTTAB_LIST_END. The number 79 + * of free entries is stored in gnttab_free_count. 80 + * Additionally there is a bitmap of free entries anchored in 81 + * gnttab_free_bitmap. This is being used for simplifying allocation of 82 + * multiple consecutive grants, which is needed e.g. for support of virtio. 83 + * gnttab_last_free is used to add free entries of new frames at the end of 84 + * the free list. 85 + * gnttab_free_tail_ptr specifies the variable which references the start 86 + * of consecutive free grants ending with gnttab_last_free. This pointer is 87 + * updated in a rather defensive way, in order to avoid performance hits in 88 + * hot paths. 89 + * All those variables are protected by gnttab_list_lock. 90 + */ 74 91 static int gnttab_free_count; 75 - static grant_ref_t gnttab_free_head; 92 + static unsigned int gnttab_size; 93 + static grant_ref_t gnttab_free_head = GNTTAB_LIST_END; 94 + static grant_ref_t gnttab_last_free = GNTTAB_LIST_END; 95 + static grant_ref_t *gnttab_free_tail_ptr; 96 + static unsigned long *gnttab_free_bitmap; 76 97 static DEFINE_SPINLOCK(gnttab_list_lock); 98 + 77 99 struct grant_frames xen_auto_xlat_grant_frames; 78 100 static unsigned int xen_gnttab_version; 79 101 module_param_named(version, xen_gnttab_version, uint, 0); ··· 192 168 193 169 ref = head = gnttab_free_head; 194 170 gnttab_free_count -= count; 195 - while (count-- > 1) 196 - head = gnttab_entry(head); 171 + while (count--) { 172 + bitmap_clear(gnttab_free_bitmap, head, 1); 173 + if (gnttab_free_tail_ptr == __gnttab_entry(head)) 174 + gnttab_free_tail_ptr = &gnttab_free_head; 175 + if (count) 176 + head = gnttab_entry(head); 177 + } 197 178 gnttab_free_head = gnttab_entry(head); 198 179 gnttab_entry(head) = GNTTAB_LIST_END; 180 + 181 + if (!gnttab_free_count) { 182 + gnttab_last_free = GNTTAB_LIST_END; 183 + gnttab_free_tail_ptr = NULL; 184 + } 199 185 200 186 spin_unlock_irqrestore(&gnttab_list_lock, flags); 201 187 202 188 return ref; 189 + } 190 + 191 + static int get_seq_entry_count(void) 192 + { 193 + if (gnttab_last_free == GNTTAB_LIST_END || !gnttab_free_tail_ptr || 194 + *gnttab_free_tail_ptr == GNTTAB_LIST_END) 195 + return 0; 196 + 197 + return gnttab_last_free - *gnttab_free_tail_ptr + 1; 198 + } 199 + 200 + /* Rebuilds the free grant list and tries to find count consecutive entries. */ 201 + static int get_free_seq(unsigned int count) 202 + { 203 + int ret = -ENOSPC; 204 + unsigned int from, to; 205 + grant_ref_t *last; 206 + 207 + gnttab_free_tail_ptr = &gnttab_free_head; 208 + last = &gnttab_free_head; 209 + 210 + for (from = find_first_bit(gnttab_free_bitmap, gnttab_size); 211 + from < gnttab_size; 212 + from = find_next_bit(gnttab_free_bitmap, gnttab_size, to + 1)) { 213 + to = find_next_zero_bit(gnttab_free_bitmap, gnttab_size, 214 + from + 1); 215 + if (ret < 0 && to - from >= count) { 216 + ret = from; 217 + bitmap_clear(gnttab_free_bitmap, ret, count); 218 + from += count; 219 + gnttab_free_count -= count; 220 + if (from == to) 221 + continue; 222 + } 223 + 224 + /* 225 + * Recreate the free list in order to have it properly sorted. 226 + * This is needed to make sure that the free tail has the maximum 227 + * possible size. 228 + */ 229 + while (from < to) { 230 + *last = from; 231 + last = __gnttab_entry(from); 232 + gnttab_last_free = from; 233 + from++; 234 + } 235 + if (to < gnttab_size) 236 + gnttab_free_tail_ptr = __gnttab_entry(to - 1); 237 + } 238 + 239 + *last = GNTTAB_LIST_END; 240 + if (gnttab_last_free != gnttab_size - 1) 241 + gnttab_free_tail_ptr = NULL; 242 + 243 + return ret; 244 + } 245 + 246 + static int get_free_entries_seq(unsigned int count) 247 + { 248 + unsigned long flags; 249 + int ret = 0; 250 + 251 + spin_lock_irqsave(&gnttab_list_lock, flags); 252 + 253 + if (gnttab_free_count < count) { 254 + ret = gnttab_expand(count - gnttab_free_count); 255 + if (ret < 0) 256 + goto out; 257 + } 258 + 259 + if (get_seq_entry_count() < count) { 260 + ret = get_free_seq(count); 261 + if (ret >= 0) 262 + goto out; 263 + ret = gnttab_expand(count - get_seq_entry_count()); 264 + if (ret < 0) 265 + goto out; 266 + } 267 + 268 + ret = *gnttab_free_tail_ptr; 269 + *gnttab_free_tail_ptr = gnttab_entry(ret + count - 1); 270 + gnttab_free_count -= count; 271 + if (!gnttab_free_count) 272 + gnttab_free_tail_ptr = NULL; 273 + bitmap_clear(gnttab_free_bitmap, ret, count); 274 + 275 + out: 276 + spin_unlock_irqrestore(&gnttab_list_lock, flags); 277 + 278 + return ret; 203 279 } 204 280 205 281 static void do_free_callbacks(void) ··· 328 204 do_free_callbacks(); 329 205 } 330 206 207 + static void put_free_entry_locked(grant_ref_t ref) 208 + { 209 + if (unlikely(ref < GNTTAB_NR_RESERVED_ENTRIES)) 210 + return; 211 + 212 + gnttab_entry(ref) = gnttab_free_head; 213 + gnttab_free_head = ref; 214 + if (!gnttab_free_count) 215 + gnttab_last_free = ref; 216 + if (gnttab_free_tail_ptr == &gnttab_free_head) 217 + gnttab_free_tail_ptr = __gnttab_entry(ref); 218 + gnttab_free_count++; 219 + bitmap_set(gnttab_free_bitmap, ref, 1); 220 + } 221 + 331 222 static void put_free_entry(grant_ref_t ref) 332 223 { 333 224 unsigned long flags; 334 225 335 - if (unlikely(ref < GNTTAB_NR_RESERVED_ENTRIES)) 336 - return; 337 - 338 226 spin_lock_irqsave(&gnttab_list_lock, flags); 339 - gnttab_entry(ref) = gnttab_free_head; 340 - gnttab_free_head = ref; 341 - gnttab_free_count++; 227 + put_free_entry_locked(ref); 342 228 check_free_callbacks(); 343 229 spin_unlock_irqrestore(&gnttab_list_lock, flags); 230 + } 231 + 232 + static void gnttab_set_free(unsigned int start, unsigned int n) 233 + { 234 + unsigned int i; 235 + 236 + for (i = start; i < start + n - 1; i++) 237 + gnttab_entry(i) = i + 1; 238 + 239 + gnttab_entry(i) = GNTTAB_LIST_END; 240 + if (!gnttab_free_count) { 241 + gnttab_free_head = start; 242 + gnttab_free_tail_ptr = &gnttab_free_head; 243 + } else { 244 + gnttab_entry(gnttab_last_free) = start; 245 + } 246 + gnttab_free_count += n; 247 + gnttab_last_free = i; 248 + 249 + bitmap_set(gnttab_free_bitmap, start, n); 344 250 } 345 251 346 252 /* ··· 604 450 { 605 451 grant_ref_t ref; 606 452 unsigned long flags; 607 - int count = 1; 608 - if (head == GNTTAB_LIST_END) 609 - return; 453 + 610 454 spin_lock_irqsave(&gnttab_list_lock, flags); 611 - ref = head; 612 - while (gnttab_entry(ref) != GNTTAB_LIST_END) { 613 - ref = gnttab_entry(ref); 614 - count++; 455 + while (head != GNTTAB_LIST_END) { 456 + ref = gnttab_entry(head); 457 + put_free_entry_locked(head); 458 + head = ref; 615 459 } 616 - gnttab_entry(ref) = gnttab_free_head; 617 - gnttab_free_head = head; 618 - gnttab_free_count += count; 619 460 check_free_callbacks(); 620 461 spin_unlock_irqrestore(&gnttab_list_lock, flags); 621 462 } 622 463 EXPORT_SYMBOL_GPL(gnttab_free_grant_references); 464 + 465 + void gnttab_free_grant_reference_seq(grant_ref_t head, unsigned int count) 466 + { 467 + unsigned long flags; 468 + unsigned int i; 469 + 470 + spin_lock_irqsave(&gnttab_list_lock, flags); 471 + for (i = count; i > 0; i--) 472 + put_free_entry_locked(head + i - 1); 473 + check_free_callbacks(); 474 + spin_unlock_irqrestore(&gnttab_list_lock, flags); 475 + } 476 + EXPORT_SYMBOL_GPL(gnttab_free_grant_reference_seq); 623 477 624 478 int gnttab_alloc_grant_references(u16 count, grant_ref_t *head) 625 479 { ··· 641 479 return 0; 642 480 } 643 481 EXPORT_SYMBOL_GPL(gnttab_alloc_grant_references); 482 + 483 + int gnttab_alloc_grant_reference_seq(unsigned int count, grant_ref_t *first) 484 + { 485 + int h; 486 + 487 + if (count == 1) 488 + h = get_free_entries(1); 489 + else 490 + h = get_free_entries_seq(count); 491 + 492 + if (h < 0) 493 + return -ENOSPC; 494 + 495 + *first = h; 496 + 497 + return 0; 498 + } 499 + EXPORT_SYMBOL_GPL(gnttab_alloc_grant_reference_seq); 644 500 645 501 int gnttab_empty_grant_references(const grant_ref_t *private_head) 646 502 { ··· 752 572 goto grow_nomem; 753 573 } 754 574 575 + gnttab_set_free(gnttab_size, extra_entries); 755 576 756 - for (i = grefs_per_frame * nr_grant_frames; 757 - i < grefs_per_frame * new_nr_grant_frames - 1; i++) 758 - gnttab_entry(i) = i + 1; 759 - 760 - gnttab_entry(i) = gnttab_free_head; 761 - gnttab_free_head = grefs_per_frame * nr_grant_frames; 762 - gnttab_free_count += extra_entries; 577 + if (!gnttab_free_tail_ptr) 578 + gnttab_free_tail_ptr = __gnttab_entry(gnttab_size); 763 579 764 580 nr_grant_frames = new_nr_grant_frames; 581 + gnttab_size += extra_entries; 765 582 766 583 check_free_callbacks(); 767 584 ··· 1601 1424 int gnttab_init(void) 1602 1425 { 1603 1426 int i; 1604 - unsigned long max_nr_grant_frames; 1427 + unsigned long max_nr_grant_frames, max_nr_grefs; 1605 1428 unsigned int max_nr_glist_frames, nr_glist_frames; 1606 - unsigned int nr_init_grefs; 1607 1429 int ret; 1608 1430 1609 1431 gnttab_request_version(); 1610 1432 max_nr_grant_frames = gnttab_max_grant_frames(); 1433 + max_nr_grefs = max_nr_grant_frames * 1434 + gnttab_interface->grefs_per_grant_frame; 1611 1435 nr_grant_frames = 1; 1612 1436 1613 1437 /* Determine the maximum number of frames required for the 1614 1438 * grant reference free list on the current hypervisor. 1615 1439 */ 1616 - max_nr_glist_frames = (max_nr_grant_frames * 1617 - gnttab_interface->grefs_per_grant_frame / RPP); 1440 + max_nr_glist_frames = max_nr_grefs / RPP; 1618 1441 1619 1442 gnttab_list = kmalloc_array(max_nr_glist_frames, 1620 1443 sizeof(grant_ref_t *), ··· 1631 1454 } 1632 1455 } 1633 1456 1457 + gnttab_free_bitmap = bitmap_zalloc(max_nr_grefs, GFP_KERNEL); 1458 + if (!gnttab_free_bitmap) { 1459 + ret = -ENOMEM; 1460 + goto ini_nomem; 1461 + } 1462 + 1634 1463 ret = arch_gnttab_init(max_nr_grant_frames, 1635 1464 nr_status_frames(max_nr_grant_frames)); 1636 1465 if (ret < 0) ··· 1647 1464 goto ini_nomem; 1648 1465 } 1649 1466 1650 - nr_init_grefs = nr_grant_frames * 1651 - gnttab_interface->grefs_per_grant_frame; 1467 + gnttab_size = nr_grant_frames * gnttab_interface->grefs_per_grant_frame; 1652 1468 1653 - for (i = GNTTAB_NR_RESERVED_ENTRIES; i < nr_init_grefs - 1; i++) 1654 - gnttab_entry(i) = i + 1; 1655 - 1656 - gnttab_entry(nr_init_grefs - 1) = GNTTAB_LIST_END; 1657 - gnttab_free_count = nr_init_grefs - GNTTAB_NR_RESERVED_ENTRIES; 1658 - gnttab_free_head = GNTTAB_NR_RESERVED_ENTRIES; 1469 + gnttab_set_free(GNTTAB_NR_RESERVED_ENTRIES, 1470 + gnttab_size - GNTTAB_NR_RESERVED_ENTRIES); 1659 1471 1660 1472 printk("Grant table initialized\n"); 1661 1473 return 0; ··· 1659 1481 for (i--; i >= 0; i--) 1660 1482 free_page((unsigned long)gnttab_list[i]); 1661 1483 kfree(gnttab_list); 1484 + bitmap_free(gnttab_free_bitmap); 1662 1485 return ret; 1663 1486 } 1664 1487 EXPORT_SYMBOL_GPL(gnttab_init);
+4
include/xen/grant_table.h
··· 127 127 */ 128 128 int gnttab_alloc_grant_references(u16 count, grant_ref_t *pprivate_head); 129 129 130 + int gnttab_alloc_grant_reference_seq(unsigned int count, grant_ref_t *first); 131 + 130 132 void gnttab_free_grant_reference(grant_ref_t ref); 131 133 132 134 void gnttab_free_grant_references(grant_ref_t head); 135 + 136 + void gnttab_free_grant_reference_seq(grant_ref_t head, unsigned int count); 133 137 134 138 int gnttab_empty_grant_references(const grant_ref_t *pprivate_head); 135 139