Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

KVM: Use interval tree to do fast hva lookup in memslots

The current memslots implementation only allows quick binary search by gfn,
quick lookup by hva is not possible - the implementation has to do a linear
scan of the whole memslots array, even though the operation being performed
might apply just to a single memslot.

This significantly hurts performance of per-hva operations with higher
memslot counts.

Since hva ranges can overlap between memslots an interval tree is needed
for tracking them.

[sean: handle interval tree updates in kvm_replace_memslot()]
Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
Message-Id: <d66b9974becaa9839be9c4e1a5de97b177b4ac20.1638817640.git.maciej.szmigiero@oracle.com>

authored by

Maciej S. Szmigiero and committed by
Paolo Bonzini
ed922739 26b8345a

+47 -14
+1
arch/arm64/kvm/Kconfig
··· 39 39 select HAVE_KVM_IRQ_BYPASS 40 40 select HAVE_KVM_VCPU_RUN_PID_CHANGE 41 41 select SCHED_INFO 42 + select INTERVAL_TREE 42 43 help 43 44 Support hosting virtualized guest machines. 44 45
+1
arch/mips/kvm/Kconfig
··· 27 27 select KVM_MMIO 28 28 select MMU_NOTIFIER 29 29 select SRCU 30 + select INTERVAL_TREE 30 31 help 31 32 Support for hosting Guest kernels. 32 33
+1
arch/powerpc/kvm/Kconfig
··· 26 26 select KVM_VFIO 27 27 select IRQ_BYPASS_MANAGER 28 28 select HAVE_KVM_IRQ_BYPASS 29 + select INTERVAL_TREE 29 30 30 31 config KVM_BOOK3S_HANDLER 31 32 bool
+1
arch/s390/kvm/Kconfig
··· 33 33 select HAVE_KVM_NO_POLL 34 34 select SRCU 35 35 select KVM_VFIO 36 + select INTERVAL_TREE 36 37 help 37 38 Support hosting paravirtualized guest machines using the SIE 38 39 virtualization capability on the mainframe. This should work
+1
arch/x86/kvm/Kconfig
··· 43 43 select KVM_GENERIC_DIRTYLOG_READ_PROTECT 44 44 select KVM_VFIO 45 45 select SRCU 46 + select INTERVAL_TREE 46 47 select HAVE_KVM_PM_NOTIFIER if PM 47 48 help 48 49 Support hosting fully virtualized guest machines using hardware
+3
include/linux/kvm_host.h
··· 30 30 #include <linux/nospec.h> 31 31 #include <linux/notifier.h> 32 32 #include <linux/hashtable.h> 33 + #include <linux/interval_tree.h> 33 34 #include <linux/xarray.h> 34 35 #include <asm/signal.h> 35 36 ··· 429 428 430 429 struct kvm_memory_slot { 431 430 struct hlist_node id_node; 431 + struct interval_tree_node hva_node; 432 432 gfn_t base_gfn; 433 433 unsigned long npages; 434 434 unsigned long *dirty_bitmap; ··· 531 529 */ 532 530 struct kvm_memslots { 533 531 u64 generation; 532 + struct rb_root_cached hva_tree; 534 533 /* 535 534 * The mapping table from slot id to the index in memslots[]. 536 535 *
+39 -14
virt/kvm/kvm_main.c
··· 512 512 } 513 513 #define IS_KVM_NULL_FN(fn) ((fn) == (void *)kvm_null_fn) 514 514 515 + /* Iterate over each memslot intersecting [start, last] (inclusive) range */ 516 + #define kvm_for_each_memslot_in_hva_range(node, slots, start, last) \ 517 + for (node = interval_tree_iter_first(&slots->hva_tree, start, last); \ 518 + node; \ 519 + node = interval_tree_iter_next(node, start, last)) \ 520 + 515 521 static __always_inline int __kvm_handle_hva_range(struct kvm *kvm, 516 522 const struct kvm_hva_range *range) 517 523 { ··· 527 521 struct kvm_memslots *slots; 528 522 int i, idx; 529 523 524 + if (WARN_ON_ONCE(range->end <= range->start)) 525 + return 0; 526 + 530 527 /* A null handler is allowed if and only if on_lock() is provided. */ 531 528 if (WARN_ON_ONCE(IS_KVM_NULL_FN(range->on_lock) && 532 529 IS_KVM_NULL_FN(range->handler))) ··· 538 529 idx = srcu_read_lock(&kvm->srcu); 539 530 540 531 for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) { 532 + struct interval_tree_node *node; 533 + 541 534 slots = __kvm_memslots(kvm, i); 542 - kvm_for_each_memslot(slot, slots) { 535 + kvm_for_each_memslot_in_hva_range(node, slots, 536 + range->start, range->end - 1) { 543 537 unsigned long hva_start, hva_end; 544 538 539 + slot = container_of(node, struct kvm_memory_slot, hva_node); 545 540 hva_start = max(range->start, slot->userspace_addr); 546 541 hva_end = min(range->end, slot->userspace_addr + 547 542 (slot->npages << PAGE_SHIFT)); 548 - if (hva_start >= hva_end) 549 - continue; 550 543 551 544 /* 552 545 * To optimize for the likely case where the address ··· 884 873 if (!slots) 885 874 return NULL; 886 875 876 + slots->hva_tree = RB_ROOT_CACHED; 887 877 hash_init(slots->id_hash); 888 878 889 879 return slots; ··· 1289 1277 struct kvm_memory_slot *new) 1290 1278 { 1291 1279 /* 1292 - * Remove the old memslot from the hash list, copying the node data 1293 - * would corrupt the list. 1280 + * Remove the old memslot from the hash list and interval tree, copying 1281 + * the node data would corrupt the structures. 1294 1282 */ 1295 1283 if (old) { 1296 1284 hash_del(&old->id_node); 1285 + interval_tree_remove(&old->hva_node, &slots->hva_tree); 1297 1286 1298 1287 if (!new) 1299 1288 return; 1300 1289 1301 1290 /* Copy the source *data*, not the pointer, to the destination. */ 1302 1291 *new = *old; 1292 + } else { 1293 + /* If @old is NULL, initialize @new's hva range. */ 1294 + new->hva_node.start = new->userspace_addr; 1295 + new->hva_node.last = new->userspace_addr + 1296 + (new->npages << PAGE_SHIFT) - 1; 1303 1297 } 1304 1298 1305 1299 /* (Re)Add the new memslot. */ 1306 1300 hash_add(slots->id_hash, &new->id_node, new->id); 1301 + interval_tree_insert(&new->hva_node, &slots->hva_tree); 1307 1302 } 1308 1303 1309 1304 static void kvm_shift_memslot(struct kvm_memslots *slots, int dst, int src) ··· 1341 1322 atomic_set(&slots->last_used_slot, 0); 1342 1323 1343 1324 /* 1344 - * Remove the to-be-deleted memslot from the list _before_ shifting 1325 + * Remove the to-be-deleted memslot from the list/tree _before_ shifting 1345 1326 * the trailing memslots forward, its data will be overwritten. 1346 1327 * Defer the (somewhat pointless) copying of the memslot until after 1347 1328 * the last slot has been shifted to avoid overwriting said last slot. ··· 1368 1349 * itself is not preserved in the array, i.e. not swapped at this time, only 1369 1350 * its new index into the array is tracked. Returns the changed memslot's 1370 1351 * current index into the memslots array. 1371 - * The memslot at the returned index will not be in @slots->id_hash by then. 1352 + * The memslot at the returned index will not be in @slots->hva_tree or 1353 + * @slots->id_hash by then. 1372 1354 * @memslot is a detached struct with desired final data of the changed slot. 1373 1355 */ 1374 1356 static inline int kvm_memslot_move_backward(struct kvm_memslots *slots, ··· 1383 1363 return -1; 1384 1364 1385 1365 /* 1386 - * Delete the slot from the hash table before sorting the remaining 1387 - * slots, the slot's data may be overwritten when copying slots as part 1388 - * of the sorting proccess. update_memslots() will unconditionally 1389 - * rewrite the entire slot and re-add it to the hash table. 1366 + * Delete the slot from the hash table and interval tree before sorting 1367 + * the remaining slots, the slot's data may be overwritten when copying 1368 + * slots as part of the sorting proccess. update_memslots() will 1369 + * unconditionally rewrite and re-add the entire slot. 1390 1370 */ 1391 1371 kvm_replace_memslot(slots, oldslot, NULL); 1392 1372 ··· 1412 1392 * is not preserved in the array, i.e. not swapped at this time, only its new 1413 1393 * index into the array is tracked. Returns the changed memslot's final index 1414 1394 * into the memslots array. 1415 - * The memslot at the returned index will not be in @slots->id_hash by then. 1395 + * The memslot at the returned index will not be in @slots->hva_tree or 1396 + * @slots->id_hash by then. 1416 1397 * @memslot is a detached struct with desired final data of the new or 1417 1398 * changed slot. 1418 - * Assumes that the memslot at @start index is not in @slots->id_hash. 1399 + * Assumes that the memslot at @start index is not in @slots->hva_tree or 1400 + * @slots->id_hash. 1419 1401 */ 1420 1402 static inline int kvm_memslot_move_forward(struct kvm_memslots *slots, 1421 1403 struct kvm_memory_slot *memslot, ··· 1610 1588 1611 1589 memcpy(slots, old, kvm_memslots_size(old->used_slots)); 1612 1590 1591 + slots->hva_tree = RB_ROOT_CACHED; 1613 1592 hash_init(slots->id_hash); 1614 - kvm_for_each_memslot(memslot, slots) 1593 + kvm_for_each_memslot(memslot, slots) { 1594 + interval_tree_insert(&memslot->hva_node, &slots->hva_tree); 1615 1595 hash_add(slots->id_hash, &memslot->id_node, memslot->id); 1596 + } 1616 1597 1617 1598 return slots; 1618 1599 }