Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/radeon: use an intervall tree to manage the VMA v2

Scales much better than scanning the address range linearly.

v2: store pfn instead of address

Signed-off-by: Christian König <christian.koenig@amd.com>
Tested-by: Michel Dänzer <michel.daenzer@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

+52 -61
+1
drivers/gpu/drm/Kconfig
··· 114 114 select POWER_SUPPLY 115 115 select HWMON 116 116 select BACKLIGHT_CLASS_DEVICE 117 + select INTERVAL_TREE 117 118 help 118 119 Choose this option if you have an ATI Radeon graphics card. There 119 120 are both PCI and AGP versions. You don't need to choose this to
+3 -4
drivers/gpu/drm/radeon/radeon.h
··· 64 64 #include <linux/wait.h> 65 65 #include <linux/list.h> 66 66 #include <linux/kref.h> 67 + #include <linux/interval_tree.h> 67 68 68 69 #include <ttm/ttm_bo_api.h> 69 70 #include <ttm/ttm_bo_driver.h> ··· 448 447 struct radeon_bo_va { 449 448 /* protected by bo being reserved */ 450 449 struct list_head bo_list; 451 - uint64_t soffset; 452 - uint64_t eoffset; 453 450 uint32_t flags; 454 451 uint64_t addr; 455 452 unsigned ref_count; 456 453 457 454 /* protected by vm mutex */ 458 - struct list_head vm_list; 455 + struct interval_tree_node it; 459 456 struct list_head vm_status; 460 457 461 458 /* constant after initialization */ ··· 876 877 }; 877 878 878 879 struct radeon_vm { 879 - struct list_head va; 880 + struct rb_root va; 880 881 unsigned id; 881 882 882 883 /* BOs moved, but not yet updated in the PT */
+2 -2
drivers/gpu/drm/radeon/radeon_gem.c
··· 496 496 497 497 switch (args->operation) { 498 498 case RADEON_VA_MAP: 499 - if (bo_va->soffset) { 499 + if (bo_va->it.start) { 500 500 args->operation = RADEON_VA_RESULT_VA_EXIST; 501 - args->offset = bo_va->soffset; 501 + args->offset = bo_va->it.start * RADEON_GPU_PAGE_SIZE; 502 502 goto out; 503 503 } 504 504 r = radeon_vm_bo_set_addr(rdev, bo_va, args->offset, args->flags);
+2 -2
drivers/gpu/drm/radeon/radeon_trace.h
··· 72 72 ), 73 73 74 74 TP_fast_assign( 75 - __entry->soffset = bo_va->soffset; 76 - __entry->eoffset = bo_va->eoffset; 75 + __entry->soffset = bo_va->it.start; 76 + __entry->eoffset = bo_va->it.last + 1; 77 77 __entry->flags = bo_va->flags; 78 78 ), 79 79 TP_printk("soffs=%010llx, eoffs=%010llx, flags=%08x",
+44 -53
drivers/gpu/drm/radeon/radeon_vm.c
··· 326 326 } 327 327 bo_va->vm = vm; 328 328 bo_va->bo = bo; 329 - bo_va->soffset = 0; 330 - bo_va->eoffset = 0; 329 + bo_va->it.start = 0; 330 + bo_va->it.last = 0; 331 331 bo_va->flags = 0; 332 332 bo_va->addr = 0; 333 333 bo_va->ref_count = 1; 334 334 INIT_LIST_HEAD(&bo_va->bo_list); 335 - INIT_LIST_HEAD(&bo_va->vm_list); 336 335 INIT_LIST_HEAD(&bo_va->vm_status); 337 336 338 337 mutex_lock(&vm->mutex); 339 - list_add(&bo_va->vm_list, &vm->va); 340 338 list_add_tail(&bo_va->bo_list, &bo->va); 341 339 mutex_unlock(&vm->mutex); 342 340 ··· 418 420 uint32_t flags) 419 421 { 420 422 uint64_t size = radeon_bo_size(bo_va->bo); 421 - uint64_t eoffset, last_offset = 0; 422 423 struct radeon_vm *vm = bo_va->vm; 423 - struct radeon_bo_va *tmp; 424 - struct list_head *head; 425 424 unsigned last_pfn, pt_idx; 425 + uint64_t eoffset; 426 426 int r; 427 427 428 428 if (soffset) { ··· 442 446 } 443 447 444 448 mutex_lock(&vm->mutex); 445 - head = &vm->va; 446 - last_offset = 0; 447 - list_for_each_entry(tmp, &vm->va, vm_list) { 448 - if (bo_va == tmp) { 449 - /* skip over currently modified bo */ 450 - continue; 449 + if (bo_va->it.start || bo_va->it.last) { 450 + if (bo_va->addr) { 451 + /* add a clone of the bo_va to clear the old address */ 452 + struct radeon_bo_va *tmp; 453 + tmp = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL); 454 + tmp->it.start = bo_va->it.start; 455 + tmp->it.last = bo_va->it.last; 456 + tmp->vm = vm; 457 + tmp->addr = bo_va->addr; 458 + list_add(&tmp->vm_status, &vm->freed); 451 459 } 452 460 453 - if (soffset >= last_offset && eoffset <= tmp->soffset) { 454 - /* bo can be added before this one */ 455 - break; 456 - } 457 - if (eoffset > tmp->soffset && soffset < tmp->eoffset) { 461 + interval_tree_remove(&bo_va->it, &vm->va); 462 + bo_va->it.start = 0; 463 + bo_va->it.last = 0; 464 + } 465 + 466 + soffset /= RADEON_GPU_PAGE_SIZE; 467 + eoffset /= RADEON_GPU_PAGE_SIZE; 468 + if (soffset || eoffset) { 469 + struct interval_tree_node *it; 470 + it = interval_tree_iter_first(&vm->va, soffset, eoffset - 1); 471 + if (it) { 472 + struct radeon_bo_va *tmp; 473 + tmp = container_of(it, struct radeon_bo_va, it); 458 474 /* bo and tmp overlap, invalid offset */ 459 - dev_err(rdev->dev, "bo %p va 0x%08X conflict with (bo %p 0x%08X 0x%08X)\n", 460 - bo_va->bo, (unsigned)bo_va->soffset, tmp->bo, 461 - (unsigned)tmp->soffset, (unsigned)tmp->eoffset); 475 + dev_err(rdev->dev, "bo %p va 0x%010Lx conflict with " 476 + "(bo %p 0x%010lx 0x%010lx)\n", bo_va->bo, 477 + soffset, tmp->bo, tmp->it.start, tmp->it.last); 462 478 mutex_unlock(&vm->mutex); 463 479 return -EINVAL; 464 480 } 465 - last_offset = tmp->eoffset; 466 - head = &tmp->vm_list; 481 + bo_va->it.start = soffset; 482 + bo_va->it.last = eoffset - 1; 483 + interval_tree_insert(&bo_va->it, &vm->va); 467 484 } 468 485 469 - if (bo_va->soffset) { 470 - /* add a clone of the bo_va to clear the old address */ 471 - tmp = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL); 472 - if (!tmp) { 473 - mutex_unlock(&vm->mutex); 474 - return -ENOMEM; 475 - } 476 - tmp->soffset = bo_va->soffset; 477 - tmp->eoffset = bo_va->eoffset; 478 - tmp->vm = vm; 479 - list_add(&tmp->vm_status, &vm->freed); 480 - } 481 - 482 - bo_va->soffset = soffset; 483 - bo_va->eoffset = eoffset; 484 486 bo_va->flags = flags; 485 487 bo_va->addr = 0; 486 - list_move(&bo_va->vm_list, head); 487 488 488 - soffset = (soffset / RADEON_GPU_PAGE_SIZE) >> radeon_vm_block_size; 489 - eoffset = (eoffset / RADEON_GPU_PAGE_SIZE) >> radeon_vm_block_size; 489 + soffset >>= radeon_vm_block_size; 490 + eoffset >>= radeon_vm_block_size; 490 491 491 492 BUG_ON(eoffset >= radeon_vm_num_pdes(rdev)); 492 493 ··· 771 778 unsigned count = 0; 772 779 uint64_t addr; 773 780 774 - start = start / RADEON_GPU_PAGE_SIZE; 775 - end = end / RADEON_GPU_PAGE_SIZE; 776 - 777 781 /* walk over the address space and update the page tables */ 778 782 for (addr = start; addr < end; ) { 779 783 uint64_t pt_idx = addr >> radeon_vm_block_size; ··· 837 847 uint64_t addr; 838 848 int r; 839 849 840 - if (!bo_va->soffset) { 850 + if (!bo_va->it.start) { 841 851 dev_err(rdev->dev, "bo %p don't has a mapping in vm %p\n", 842 852 bo_va->bo, vm); 843 853 return -EINVAL; ··· 871 881 872 882 trace_radeon_vm_bo_update(bo_va); 873 883 874 - nptes = (bo_va->eoffset - bo_va->soffset) / RADEON_GPU_PAGE_SIZE; 884 + nptes = bo_va->it.last - bo_va->it.start + 1; 875 885 876 886 /* padding, etc. */ 877 887 ndw = 64; ··· 896 906 return r; 897 907 ib.length_dw = 0; 898 908 899 - radeon_vm_update_ptes(rdev, vm, &ib, bo_va->soffset, bo_va->eoffset, 900 - addr, radeon_vm_page_flags(bo_va->flags)); 909 + radeon_vm_update_ptes(rdev, vm, &ib, bo_va->it.start, 910 + bo_va->it.last + 1, addr, 911 + radeon_vm_page_flags(bo_va->flags)); 901 912 902 913 radeon_semaphore_sync_to(ib.semaphore, vm->fence); 903 914 r = radeon_ib_schedule(rdev, &ib, NULL); ··· 984 993 list_del(&bo_va->bo_list); 985 994 986 995 mutex_lock(&vm->mutex); 987 - list_del(&bo_va->vm_list); 996 + interval_tree_remove(&bo_va->it, &vm->va); 988 997 list_del(&bo_va->vm_status); 989 998 990 999 if (bo_va->addr) { ··· 1042 1051 vm->last_flush = NULL; 1043 1052 vm->last_id_use = NULL; 1044 1053 mutex_init(&vm->mutex); 1045 - INIT_LIST_HEAD(&vm->va); 1054 + vm->va = RB_ROOT; 1046 1055 INIT_LIST_HEAD(&vm->invalidated); 1047 1056 INIT_LIST_HEAD(&vm->freed); 1048 1057 ··· 1087 1096 struct radeon_bo_va *bo_va, *tmp; 1088 1097 int i, r; 1089 1098 1090 - if (!list_empty(&vm->va)) { 1099 + if (!RB_EMPTY_ROOT(&vm->va)) { 1091 1100 dev_err(rdev->dev, "still active bo inside vm\n"); 1092 1101 } 1093 - list_for_each_entry_safe(bo_va, tmp, &vm->va, vm_list) { 1094 - list_del_init(&bo_va->vm_list); 1102 + rbtree_postorder_for_each_entry_safe(bo_va, tmp, &vm->va, it.rb) { 1103 + interval_tree_remove(&bo_va->it, &vm->va); 1095 1104 r = radeon_bo_reserve(bo_va->bo, false); 1096 1105 if (!r) { 1097 1106 list_del_init(&bo_va->bo_list);