Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/ttm: Use the LRU walker for eviction

Use the LRU walker for eviction. This helps
removing a lot of code with weird locking
semantics.

The functionality is slightly changed so that
when trylocked buffer objects are exhausted, we
continue to interleave walks with ticket-locks while
there is still progress made. The list walks are
not restarted in-between evictions.

Also provide a separate ttm_bo_evict_first()
function for its single user. The context of that
user allows sleeping dma_resv locks.

v6:
- Various cleanups suggested by Matthew Brost.
- Fix error return code of ttm_bo_evict_first(). (Matthew Brost)
- Fix an error check that was inverted. (Matthew Brost)
v7:
- Use s64 rather than long (Christian König)
- Early ttm_resource_cursor_fini() in ttm_bo_evict_first().
- Simplify check for bo_moved in ttm_bo_evict_first().
(Christian König)
- Don't evict pinned bos.

Cc: Christian König <christian.koenig@amd.com>
Cc: Somalapuram Amaranath <Amaranath.Somalapuram@amd.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: <dri-devel@lists.freedesktop.org>
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com> #v6
Link: https://patchwork.freedesktop.org/patch/msgid/20240705153206.68526-8-thomas.hellstrom@linux.intel.com
Signed-off-by: Christian König <christian.koenig@amd.com>

authored by

Thomas Hellström and committed by
Christian König
3756310e 10efe34d

+144 -232
+137 -210
drivers/gpu/drm/ttm/ttm_bo.c
··· 224 224 dma_resv_iter_end(&cursor); 225 225 } 226 226 227 - /** 228 - * ttm_bo_cleanup_refs 229 - * If bo idle, remove from lru lists, and unref. 230 - * If not idle, block if possible. 231 - * 232 - * Must be called with lru_lock and reservation held, this function 233 - * will drop the lru lock and optionally the reservation lock before returning. 234 - * 235 - * @bo: The buffer object to clean-up 236 - * @interruptible: Any sleeps should occur interruptibly. 237 - * @no_wait_gpu: Never wait for gpu. Return -EBUSY instead. 238 - * @unlock_resv: Unlock the reservation lock as well. 239 - */ 240 - 241 - static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, 242 - bool interruptible, bool no_wait_gpu, 243 - bool unlock_resv) 244 - { 245 - struct dma_resv *resv = &bo->base._resv; 246 - int ret; 247 - 248 - if (dma_resv_test_signaled(resv, DMA_RESV_USAGE_BOOKKEEP)) 249 - ret = 0; 250 - else 251 - ret = -EBUSY; 252 - 253 - if (ret && !no_wait_gpu) { 254 - long lret; 255 - 256 - if (unlock_resv) 257 - dma_resv_unlock(bo->base.resv); 258 - spin_unlock(&bo->bdev->lru_lock); 259 - 260 - lret = dma_resv_wait_timeout(resv, DMA_RESV_USAGE_BOOKKEEP, 261 - interruptible, 262 - 30 * HZ); 263 - 264 - if (lret < 0) 265 - return lret; 266 - else if (lret == 0) 267 - return -EBUSY; 268 - 269 - spin_lock(&bo->bdev->lru_lock); 270 - if (unlock_resv && !dma_resv_trylock(bo->base.resv)) { 271 - /* 272 - * We raced, and lost, someone else holds the reservation now, 273 - * and is probably busy in ttm_bo_cleanup_memtype_use. 274 - * 275 - * Even if it's not the case, because we finished waiting any 276 - * delayed destruction would succeed, so just return success 277 - * here. 278 - */ 279 - spin_unlock(&bo->bdev->lru_lock); 280 - return 0; 281 - } 282 - ret = 0; 283 - } 284 - 285 - if (ret) { 286 - if (unlock_resv) 287 - dma_resv_unlock(bo->base.resv); 288 - spin_unlock(&bo->bdev->lru_lock); 289 - return ret; 290 - } 291 - 292 - spin_unlock(&bo->bdev->lru_lock); 293 - ttm_bo_cleanup_memtype_use(bo); 294 - 295 - if (unlock_resv) 296 - dma_resv_unlock(bo->base.resv); 297 - 298 - return 0; 299 - } 300 - 301 227 /* 302 228 * Block for the dma_resv object to become idle, lock the buffer and clean up 303 229 * the resource and tt object. ··· 431 505 } 432 506 EXPORT_SYMBOL(ttm_bo_eviction_valuable); 433 507 434 - /* 435 - * Check the target bo is allowable to be evicted or swapout, including cases: 508 + /** 509 + * ttm_bo_evict_first() - Evict the first bo on the manager's LRU list. 510 + * @bdev: The ttm device. 511 + * @man: The manager whose bo to evict. 512 + * @ctx: The TTM operation ctx governing the eviction. 436 513 * 437 - * a. if share same reservation object with ctx->resv, have assumption 438 - * reservation objects should already be locked, so not lock again and 439 - * return true directly when either the opreation allow_reserved_eviction 440 - * or the target bo already is in delayed free list; 441 - * 442 - * b. Otherwise, trylock it. 514 + * Return: 0 if successful or the resource disappeared. Negative error code on error. 443 515 */ 444 - static bool ttm_bo_evict_swapout_allowable(struct ttm_buffer_object *bo, 445 - struct ttm_operation_ctx *ctx, 446 - const struct ttm_place *place, 447 - bool *locked, bool *busy) 516 + int ttm_bo_evict_first(struct ttm_device *bdev, struct ttm_resource_manager *man, 517 + struct ttm_operation_ctx *ctx) 448 518 { 449 - bool ret = false; 519 + struct ttm_resource_cursor cursor; 520 + struct ttm_buffer_object *bo; 521 + struct ttm_resource *res; 522 + unsigned int mem_type; 523 + int ret = 0; 450 524 451 - if (bo->pin_count) { 452 - *locked = false; 453 - if (busy) 454 - *busy = false; 455 - return false; 525 + spin_lock(&bdev->lru_lock); 526 + res = ttm_resource_manager_first(man, &cursor); 527 + ttm_resource_cursor_fini(&cursor); 528 + if (!res) { 529 + ret = -ENOENT; 530 + goto out_no_ref; 456 531 } 532 + bo = res->bo; 533 + if (!ttm_bo_get_unless_zero(bo)) 534 + goto out_no_ref; 535 + mem_type = res->mem_type; 536 + spin_unlock(&bdev->lru_lock); 537 + ret = ttm_bo_reserve(bo, ctx->interruptible, ctx->no_wait_gpu, NULL); 538 + if (ret) 539 + goto out_no_lock; 540 + if (!bo->resource || bo->resource->mem_type != mem_type) 541 + goto out_bo_moved; 457 542 458 - if (bo->base.resv == ctx->resv) { 459 - dma_resv_assert_held(bo->base.resv); 460 - if (ctx->allow_res_evict) 461 - ret = true; 462 - *locked = false; 463 - if (busy) 464 - *busy = false; 543 + if (bo->deleted) { 544 + ret = ttm_bo_wait_ctx(bo, ctx); 545 + if (!ret) 546 + ttm_bo_cleanup_memtype_use(bo); 465 547 } else { 466 - ret = dma_resv_trylock(bo->base.resv); 467 - *locked = ret; 468 - if (busy) 469 - *busy = !ret; 548 + ret = ttm_bo_evict(bo, ctx); 470 549 } 550 + out_bo_moved: 551 + dma_resv_unlock(bo->base.resv); 552 + out_no_lock: 553 + ttm_bo_put(bo); 554 + return ret; 471 555 472 - if (ret && place && (bo->resource->mem_type != place->mem_type || 473 - !bo->bdev->funcs->eviction_valuable(bo, place))) { 474 - ret = false; 475 - if (*locked) { 476 - dma_resv_unlock(bo->base.resv); 477 - *locked = false; 478 - } 479 - } 480 - 556 + out_no_ref: 557 + spin_unlock(&bdev->lru_lock); 481 558 return ret; 482 559 } 483 560 484 561 /** 485 - * ttm_mem_evict_wait_busy - wait for a busy BO to become available 486 - * 487 - * @busy_bo: BO which couldn't be locked with trylock 488 - * @ctx: operation context 489 - * @ticket: acquire ticket 490 - * 491 - * Try to lock a busy buffer object to avoid failing eviction. 562 + * struct ttm_bo_evict_walk - Parameters for the evict walk. 492 563 */ 493 - static int ttm_mem_evict_wait_busy(struct ttm_buffer_object *busy_bo, 494 - struct ttm_operation_ctx *ctx, 495 - struct ww_acquire_ctx *ticket) 564 + struct ttm_bo_evict_walk { 565 + /** @walk: The walk base parameters. */ 566 + struct ttm_lru_walk walk; 567 + /** @place: The place passed to the resource allocation. */ 568 + const struct ttm_place *place; 569 + /** @evictor: The buffer object we're trying to make room for. */ 570 + struct ttm_buffer_object *evictor; 571 + /** @res: The allocated resource if any. */ 572 + struct ttm_resource **res; 573 + /** @evicted: Number of successful evictions. */ 574 + unsigned long evicted; 575 + }; 576 + 577 + static s64 ttm_bo_evict_cb(struct ttm_lru_walk *walk, struct ttm_buffer_object *bo) 496 578 { 497 - int r; 579 + struct ttm_bo_evict_walk *evict_walk = 580 + container_of(walk, typeof(*evict_walk), walk); 581 + s64 lret; 498 582 499 - if (!busy_bo || !ticket) 500 - return -EBUSY; 501 - 502 - if (ctx->interruptible) 503 - r = dma_resv_lock_interruptible(busy_bo->base.resv, 504 - ticket); 505 - else 506 - r = dma_resv_lock(busy_bo->base.resv, ticket); 507 - 508 - /* 509 - * TODO: It would be better to keep the BO locked until allocation is at 510 - * least tried one more time, but that would mean a much larger rework 511 - * of TTM. 512 - */ 513 - if (!r) 514 - dma_resv_unlock(busy_bo->base.resv); 515 - 516 - return r == -EDEADLK ? -EBUSY : r; 517 - } 518 - 519 - int ttm_mem_evict_first(struct ttm_device *bdev, 520 - struct ttm_resource_manager *man, 521 - const struct ttm_place *place, 522 - struct ttm_operation_ctx *ctx, 523 - struct ww_acquire_ctx *ticket) 524 - { 525 - struct ttm_buffer_object *bo = NULL, *busy_bo = NULL; 526 - struct ttm_resource_cursor cursor; 527 - struct ttm_resource *res; 528 - bool locked = false; 529 - int ret; 530 - 531 - spin_lock(&bdev->lru_lock); 532 - ttm_resource_manager_for_each_res(man, &cursor, res) { 533 - bool busy; 534 - 535 - if (!ttm_bo_evict_swapout_allowable(res->bo, ctx, place, 536 - &locked, &busy)) { 537 - if (busy && !busy_bo && ticket != 538 - dma_resv_locking_ctx(res->bo->base.resv)) 539 - busy_bo = res->bo; 540 - continue; 541 - } 542 - 543 - if (ttm_bo_get_unless_zero(res->bo)) { 544 - bo = res->bo; 545 - break; 546 - } 547 - if (locked) 548 - dma_resv_unlock(res->bo->base.resv); 549 - } 550 - ttm_resource_cursor_fini(&cursor); 551 - 552 - if (!bo) { 553 - if (busy_bo && !ttm_bo_get_unless_zero(busy_bo)) 554 - busy_bo = NULL; 555 - spin_unlock(&bdev->lru_lock); 556 - ret = ttm_mem_evict_wait_busy(busy_bo, ctx, ticket); 557 - if (busy_bo) 558 - ttm_bo_put(busy_bo); 559 - return ret; 560 - } 583 + if (bo->pin_count || !bo->bdev->funcs->eviction_valuable(bo, evict_walk->place)) 584 + return 0; 561 585 562 586 if (bo->deleted) { 563 - ret = ttm_bo_cleanup_refs(bo, ctx->interruptible, 564 - ctx->no_wait_gpu, locked); 565 - ttm_bo_put(bo); 566 - return ret; 587 + lret = ttm_bo_wait_ctx(bo, walk->ctx); 588 + if (!lret) 589 + ttm_bo_cleanup_memtype_use(bo); 590 + } else { 591 + lret = ttm_bo_evict(bo, walk->ctx); 567 592 } 568 593 569 - spin_unlock(&bdev->lru_lock); 594 + if (lret) 595 + goto out; 570 596 571 - ret = ttm_bo_evict(bo, ctx); 572 - if (locked) 573 - ttm_bo_unreserve(bo); 574 - else 575 - ttm_bo_move_to_lru_tail_unlocked(bo); 597 + evict_walk->evicted++; 598 + if (evict_walk->res) 599 + lret = ttm_resource_alloc(evict_walk->evictor, evict_walk->place, 600 + evict_walk->res); 601 + if (lret == 0) 602 + return 1; 603 + out: 604 + /* Errors that should terminate the walk. */ 605 + if (lret == -ENOSPC) 606 + return -EBUSY; 576 607 577 - ttm_bo_put(bo); 578 - return ret; 608 + return lret; 609 + } 610 + 611 + static const struct ttm_lru_walk_ops ttm_evict_walk_ops = { 612 + .process_bo = ttm_bo_evict_cb, 613 + }; 614 + 615 + static int ttm_bo_evict_alloc(struct ttm_device *bdev, 616 + struct ttm_resource_manager *man, 617 + const struct ttm_place *place, 618 + struct ttm_buffer_object *evictor, 619 + struct ttm_operation_ctx *ctx, 620 + struct ww_acquire_ctx *ticket, 621 + struct ttm_resource **res) 622 + { 623 + struct ttm_bo_evict_walk evict_walk = { 624 + .walk = { 625 + .ops = &ttm_evict_walk_ops, 626 + .ctx = ctx, 627 + .ticket = ticket, 628 + }, 629 + .place = place, 630 + .evictor = evictor, 631 + .res = res, 632 + }; 633 + s64 lret; 634 + 635 + evict_walk.walk.trylock_only = true; 636 + lret = ttm_lru_walk_for_evict(&evict_walk.walk, bdev, man, 1); 637 + if (lret || !ticket) 638 + goto out; 639 + 640 + /* If ticket-locking, repeat while making progress. */ 641 + evict_walk.walk.trylock_only = false; 642 + do { 643 + /* The walk may clear the evict_walk.walk.ticket field */ 644 + evict_walk.walk.ticket = ticket; 645 + evict_walk.evicted = 0; 646 + lret = ttm_lru_walk_for_evict(&evict_walk.walk, bdev, man, 1); 647 + } while (!lret && evict_walk.evicted); 648 + out: 649 + if (lret < 0) 650 + return lret; 651 + if (lret == 0) 652 + return -EBUSY; 653 + return 0; 579 654 } 580 655 581 656 /** ··· 687 760 for (i = 0; i < placement->num_placement; ++i) { 688 761 const struct ttm_place *place = &placement->placement[i]; 689 762 struct ttm_resource_manager *man; 763 + bool may_evict; 690 764 691 765 man = ttm_manager_type(bdev, place->mem_type); 692 766 if (!man || !ttm_resource_manager_used(man)) ··· 697 769 TTM_PL_FLAG_FALLBACK)) 698 770 continue; 699 771 700 - do { 701 - ret = ttm_resource_alloc(bo, place, res); 702 - if (unlikely(ret && ret != -ENOSPC)) 772 + may_evict = (force_space && place->mem_type != TTM_PL_SYSTEM); 773 + ret = ttm_resource_alloc(bo, place, res); 774 + if (ret) { 775 + if (ret != -ENOSPC) 703 776 return ret; 704 - if (likely(!ret) || !force_space) 705 - break; 777 + if (!may_evict) 778 + continue; 706 779 707 - ret = ttm_mem_evict_first(bdev, man, place, ctx, 708 - ticket); 709 - if (unlikely(ret == -EBUSY)) 710 - break; 711 - if (unlikely(ret)) 780 + ret = ttm_bo_evict_alloc(bdev, man, place, bo, ctx, 781 + ticket, res); 782 + if (ret == -EBUSY) 783 + continue; 784 + if (ret) 712 785 return ret; 713 - } while (1); 714 - if (ret) 715 - continue; 786 + } 716 787 717 788 ret = ttm_bo_add_move_fence(bo, man, ctx->no_wait_gpu); 718 789 if (unlikely(ret)) {
+4 -17
drivers/gpu/drm/ttm/ttm_resource.c
··· 495 495 }; 496 496 struct dma_fence *fence; 497 497 int ret; 498 - unsigned i; 499 498 500 - /* 501 - * Can't use standard list traversal since we're unlocking. 502 - */ 503 - 504 - spin_lock(&bdev->lru_lock); 505 - for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) { 506 - while (!list_empty(&man->lru[i])) { 507 - spin_unlock(&bdev->lru_lock); 508 - ret = ttm_mem_evict_first(bdev, man, NULL, &ctx, 509 - NULL); 510 - if (ret) 511 - return ret; 512 - spin_lock(&bdev->lru_lock); 513 - } 514 - } 515 - spin_unlock(&bdev->lru_lock); 499 + do { 500 + ret = ttm_bo_evict_first(bdev, man, &ctx); 501 + cond_resched(); 502 + } while (!ret); 516 503 517 504 spin_lock(&man->move_lock); 518 505 fence = dma_fence_get(man->move);
+3 -5
include/drm/ttm/ttm_bo.h
··· 422 422 s64 target); 423 423 void ttm_bo_pin(struct ttm_buffer_object *bo); 424 424 void ttm_bo_unpin(struct ttm_buffer_object *bo); 425 - int ttm_mem_evict_first(struct ttm_device *bdev, 426 - struct ttm_resource_manager *man, 427 - const struct ttm_place *place, 428 - struct ttm_operation_ctx *ctx, 429 - struct ww_acquire_ctx *ticket); 425 + int ttm_bo_evict_first(struct ttm_device *bdev, 426 + struct ttm_resource_manager *man, 427 + struct ttm_operation_ctx *ctx); 430 428 vm_fault_t ttm_bo_vm_reserve(struct ttm_buffer_object *bo, 431 429 struct vm_fault *vmf); 432 430 vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf,