Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/radeon: switch over to drm_exec v2

Just a straightforward conversion without any optimization.

Smoke tested on actual hardware.

v2: rebase

Signed-off-by: Christian König <christian.koenig@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20241114153020.6209-2-christian.koenig@amd.com

+66 -63
+1
drivers/gpu/drm/radeon/Kconfig
··· 13 13 select DRM_TTM 14 14 select DRM_TTM_HELPER 15 15 select FB_IOMEM_HELPERS if DRM_FBDEV_EMULATION 16 + select DRM_EXEC 16 17 select SND_HDA_COMPONENT if SND_HDA_CORE 17 18 select POWER_SUPPLY 18 19 select HWMON
+4 -3
drivers/gpu/drm/radeon/radeon.h
··· 75 75 76 76 #include <drm/ttm/ttm_bo.h> 77 77 #include <drm/ttm/ttm_placement.h> 78 - #include <drm/ttm/ttm_execbuf_util.h> 79 78 79 + #include <drm/drm_exec.h> 80 80 #include <drm/drm_gem.h> 81 81 #include <drm/drm_audio_component.h> 82 82 #include <drm/drm_suballoc.h> ··· 457 457 458 458 struct radeon_bo_list { 459 459 struct radeon_bo *robj; 460 - struct ttm_validate_buffer tv; 460 + struct list_head list; 461 + bool shared; 461 462 uint64_t gpu_offset; 462 463 unsigned preferred_domains; 463 464 unsigned allowed_domains; ··· 1031 1030 struct radeon_bo_list *vm_bos; 1032 1031 struct list_head validated; 1033 1032 unsigned dma_reloc_idx; 1033 + struct drm_exec exec; 1034 1034 /* indices of various chunks */ 1035 1035 struct radeon_cs_chunk *chunk_ib; 1036 1036 struct radeon_cs_chunk *chunk_relocs; ··· 1045 1043 u32 cs_flags; 1046 1044 u32 ring; 1047 1045 s32 priority; 1048 - struct ww_acquire_ctx ticket; 1049 1046 }; 1050 1047 1051 1048 static inline u32 radeon_get_ib_value(struct radeon_cs_parser *p, int idx)
+23 -22
drivers/gpu/drm/radeon/radeon_cs.c
··· 182 182 } 183 183 } 184 184 185 - p->relocs[i].tv.bo = &p->relocs[i].robj->tbo; 186 - p->relocs[i].tv.num_shared = !r->write_domain; 187 - 188 - radeon_cs_buckets_add(&buckets, &p->relocs[i].tv.head, 189 - priority); 185 + p->relocs[i].shared = !r->write_domain; 186 + radeon_cs_buckets_add(&buckets, &p->relocs[i].list, priority); 190 187 } 191 188 192 189 radeon_cs_buckets_get_list(&buckets, &p->validated); ··· 194 197 if (need_mmap_lock) 195 198 mmap_read_lock(current->mm); 196 199 197 - r = radeon_bo_list_validate(p->rdev, &p->ticket, &p->validated, p->ring); 200 + r = radeon_bo_list_validate(p->rdev, &p->exec, &p->validated, p->ring); 198 201 199 202 if (need_mmap_lock) 200 203 mmap_read_unlock(current->mm); ··· 250 253 struct radeon_bo_list *reloc; 251 254 int r; 252 255 253 - list_for_each_entry(reloc, &p->validated, tv.head) { 256 + list_for_each_entry(reloc, &p->validated, list) { 254 257 struct dma_resv *resv; 255 258 256 259 resv = reloc->robj->tbo.base.resv; 257 - r = radeon_sync_resv(p->rdev, &p->ib.sync, resv, 258 - reloc->tv.num_shared); 260 + r = radeon_sync_resv(p->rdev, &p->ib.sync, resv, reloc->shared); 259 261 if (r) 260 262 return r; 261 263 } ··· 272 276 s32 priority = 0; 273 277 274 278 INIT_LIST_HEAD(&p->validated); 279 + drm_exec_init(&p->exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); 275 280 276 281 if (!cs->num_chunks) { 277 282 return 0; ··· 394 397 static int cmp_size_smaller_first(void *priv, const struct list_head *a, 395 398 const struct list_head *b) 396 399 { 397 - struct radeon_bo_list *la = list_entry(a, struct radeon_bo_list, tv.head); 398 - struct radeon_bo_list *lb = list_entry(b, struct radeon_bo_list, tv.head); 400 + struct radeon_bo_list *la = list_entry(a, struct radeon_bo_list, list); 401 + struct radeon_bo_list *lb = list_entry(b, struct radeon_bo_list, list); 399 402 400 403 /* Sort A before B if A is smaller. */ 401 404 if (la->robj->tbo.base.size > lb->robj->tbo.base.size) ··· 414 417 * If error is set than unvalidate buffer, otherwise just free memory 415 418 * used by parsing context. 416 419 **/ 417 - static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error, bool backoff) 420 + static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error) 418 421 { 419 422 unsigned i; 420 423 421 424 if (!error) { 425 + struct radeon_bo_list *reloc; 426 + 422 427 /* Sort the buffer list from the smallest to largest buffer, 423 428 * which affects the order of buffers in the LRU list. 424 429 * This assures that the smallest buffers are added first ··· 432 433 * per frame under memory pressure. 433 434 */ 434 435 list_sort(NULL, &parser->validated, cmp_size_smaller_first); 435 - 436 - ttm_eu_fence_buffer_objects(&parser->ticket, 437 - &parser->validated, 438 - &parser->ib.fence->base); 439 - } else if (backoff) { 440 - ttm_eu_backoff_reservation(&parser->ticket, 441 - &parser->validated); 436 + list_for_each_entry(reloc, &parser->validated, list) { 437 + dma_resv_add_fence(reloc->robj->tbo.base.resv, 438 + &parser->ib.fence->base, 439 + reloc->shared ? 440 + DMA_RESV_USAGE_READ : 441 + DMA_RESV_USAGE_WRITE); 442 + } 442 443 } 444 + 445 + drm_exec_fini(&parser->exec); 443 446 444 447 if (parser->relocs != NULL) { 445 448 for (i = 0; i < parser->nrelocs; i++) { ··· 694 693 r = radeon_cs_parser_init(&parser, data); 695 694 if (r) { 696 695 DRM_ERROR("Failed to initialize parser !\n"); 697 - radeon_cs_parser_fini(&parser, r, false); 696 + radeon_cs_parser_fini(&parser, r); 698 697 up_read(&rdev->exclusive_lock); 699 698 r = radeon_cs_handle_lockup(rdev, r); 700 699 return r; ··· 708 707 } 709 708 710 709 if (r) { 711 - radeon_cs_parser_fini(&parser, r, false); 710 + radeon_cs_parser_fini(&parser, r); 712 711 up_read(&rdev->exclusive_lock); 713 712 r = radeon_cs_handle_lockup(rdev, r); 714 713 return r; ··· 725 724 goto out; 726 725 } 727 726 out: 728 - radeon_cs_parser_fini(&parser, r, true); 727 + radeon_cs_parser_fini(&parser, r); 729 728 up_read(&rdev->exclusive_lock); 730 729 r = radeon_cs_handle_lockup(rdev, r); 731 730 return r;
+22 -17
drivers/gpu/drm/radeon/radeon_gem.c
··· 605 605 static void radeon_gem_va_update_vm(struct radeon_device *rdev, 606 606 struct radeon_bo_va *bo_va) 607 607 { 608 - struct ttm_validate_buffer tv, *entry; 609 - struct radeon_bo_list *vm_bos; 610 - struct ww_acquire_ctx ticket; 608 + struct radeon_bo_list *vm_bos, *entry; 611 609 struct list_head list; 610 + struct drm_exec exec; 612 611 unsigned domain; 613 612 int r; 614 613 615 614 INIT_LIST_HEAD(&list); 616 615 617 - tv.bo = &bo_va->bo->tbo; 618 - tv.num_shared = 1; 619 - list_add(&tv.head, &list); 620 - 621 616 vm_bos = radeon_vm_get_bos(rdev, bo_va->vm, &list); 622 617 if (!vm_bos) 623 618 return; 624 619 625 - r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL); 626 - if (r) 627 - goto error_free; 620 + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); 621 + drm_exec_until_all_locked(&exec) { 622 + list_for_each_entry(entry, &list, list) { 623 + r = drm_exec_prepare_obj(&exec, &entry->robj->tbo.base, 624 + 1); 625 + drm_exec_retry_on_contention(&exec); 626 + if (unlikely(r)) 627 + goto error_cleanup; 628 + } 628 629 629 - list_for_each_entry(entry, &list, head) { 630 - domain = radeon_mem_type_to_domain(entry->bo->resource->mem_type); 630 + r = drm_exec_prepare_obj(&exec, &bo_va->bo->tbo.base, 1); 631 + drm_exec_retry_on_contention(&exec); 632 + if (unlikely(r)) 633 + goto error_cleanup; 634 + } 635 + 636 + list_for_each_entry(entry, &list, list) { 637 + domain = radeon_mem_type_to_domain(entry->robj->tbo.resource->mem_type); 631 638 /* if anything is swapped out don't swap it in here, 632 639 just abort and wait for the next CS */ 633 640 if (domain == RADEON_GEM_DOMAIN_CPU) 634 - goto error_unreserve; 641 + goto error_cleanup; 635 642 } 636 643 637 644 mutex_lock(&bo_va->vm->mutex); ··· 652 645 error_unlock: 653 646 mutex_unlock(&bo_va->vm->mutex); 654 647 655 - error_unreserve: 656 - ttm_eu_backoff_reservation(&ticket, &list); 657 - 658 - error_free: 648 + error_cleanup: 649 + drm_exec_fini(&exec); 659 650 kvfree(vm_bos); 660 651 661 652 if (r && r != -ERESTARTSYS)
+11 -14
drivers/gpu/drm/radeon/radeon_object.c
··· 464 464 } 465 465 466 466 int radeon_bo_list_validate(struct radeon_device *rdev, 467 - struct ww_acquire_ctx *ticket, 467 + struct drm_exec *exec, 468 468 struct list_head *head, int ring) 469 469 { 470 470 struct ttm_operation_ctx ctx = { true, false }; 471 471 struct radeon_bo_list *lobj; 472 - struct list_head duplicates; 473 - int r; 474 472 u64 bytes_moved = 0, initial_bytes_moved; 475 473 u64 bytes_moved_threshold = radeon_bo_get_threshold_for_moves(rdev); 474 + int r; 476 475 477 - INIT_LIST_HEAD(&duplicates); 478 - r = ttm_eu_reserve_buffers(ticket, head, true, &duplicates); 479 - if (unlikely(r != 0)) { 480 - return r; 476 + drm_exec_until_all_locked(exec) { 477 + list_for_each_entry(lobj, head, list) { 478 + r = drm_exec_prepare_obj(exec, &lobj->robj->tbo.base, 479 + 1); 480 + drm_exec_retry_on_contention(exec); 481 + if (unlikely(r && r != -EALREADY)) 482 + return r; 483 + } 481 484 } 482 485 483 - list_for_each_entry(lobj, head, tv.head) { 486 + list_for_each_entry(lobj, head, list) { 484 487 struct radeon_bo *bo = lobj->robj; 485 488 if (!bo->tbo.pin_count) { 486 489 u32 domain = lobj->preferred_domains; ··· 522 519 domain = lobj->allowed_domains; 523 520 goto retry; 524 521 } 525 - ttm_eu_backoff_reservation(ticket, head); 526 522 return r; 527 523 } 528 524 } 529 525 lobj->gpu_offset = radeon_bo_gpu_offset(bo); 530 526 lobj->tiling_flags = bo->tiling_flags; 531 - } 532 - 533 - list_for_each_entry(lobj, &duplicates, tv.head) { 534 - lobj->gpu_offset = radeon_bo_gpu_offset(lobj->robj); 535 - lobj->tiling_flags = lobj->robj->tiling_flags; 536 527 } 537 528 538 529 return 0;
+1 -1
drivers/gpu/drm/radeon/radeon_object.h
··· 152 152 extern int radeon_bo_init(struct radeon_device *rdev); 153 153 extern void radeon_bo_fini(struct radeon_device *rdev); 154 154 extern int radeon_bo_list_validate(struct radeon_device *rdev, 155 - struct ww_acquire_ctx *ticket, 155 + struct drm_exec *exec, 156 156 struct list_head *head, int ring); 157 157 extern int radeon_bo_set_tiling_flags(struct radeon_bo *bo, 158 158 u32 tiling_flags, u32 pitch);
+4 -6
drivers/gpu/drm/radeon/radeon_vm.c
··· 142 142 list[0].robj = vm->page_directory; 143 143 list[0].preferred_domains = RADEON_GEM_DOMAIN_VRAM; 144 144 list[0].allowed_domains = RADEON_GEM_DOMAIN_VRAM; 145 - list[0].tv.bo = &vm->page_directory->tbo; 146 - list[0].tv.num_shared = 1; 145 + list[0].shared = true; 147 146 list[0].tiling_flags = 0; 148 - list_add(&list[0].tv.head, head); 147 + list_add(&list[0].list, head); 149 148 150 149 for (i = 0, idx = 1; i <= vm->max_pde_used; i++) { 151 150 if (!vm->page_tables[i].bo) ··· 153 154 list[idx].robj = vm->page_tables[i].bo; 154 155 list[idx].preferred_domains = RADEON_GEM_DOMAIN_VRAM; 155 156 list[idx].allowed_domains = RADEON_GEM_DOMAIN_VRAM; 156 - list[idx].tv.bo = &list[idx].robj->tbo; 157 - list[idx].tv.num_shared = 1; 157 + list[idx].shared = true; 158 158 list[idx].tiling_flags = 0; 159 - list_add(&list[idx++].tv.head, head); 159 + list_add(&list[idx++].list, head); 160 160 } 161 161 162 162 return list;