Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v5.4 759 lines 19 kB view raw
1/* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright © 2019 Intel Corporation 5 */ 6 7#include <linux/debugobjects.h> 8 9#include "gt/intel_engine_pm.h" 10 11#include "i915_drv.h" 12#include "i915_active.h" 13#include "i915_globals.h" 14 15#define BKL(ref) (&(ref)->i915->drm.struct_mutex) 16 17/* 18 * Active refs memory management 19 * 20 * To be more economical with memory, we reap all the i915_active trees as 21 * they idle (when we know the active requests are inactive) and allocate the 22 * nodes from a local slab cache to hopefully reduce the fragmentation. 23 */ 24static struct i915_global_active { 25 struct i915_global base; 26 struct kmem_cache *slab_cache; 27} global; 28 29struct active_node { 30 struct i915_active_request base; 31 struct i915_active *ref; 32 struct rb_node node; 33 u64 timeline; 34}; 35 36static inline struct active_node * 37node_from_active(struct i915_active_request *active) 38{ 39 return container_of(active, struct active_node, base); 40} 41 42#define take_preallocated_barriers(x) llist_del_all(&(x)->preallocated_barriers) 43 44static inline bool is_barrier(const struct i915_active_request *active) 45{ 46 return IS_ERR(rcu_access_pointer(active->request)); 47} 48 49static inline struct llist_node *barrier_to_ll(struct active_node *node) 50{ 51 GEM_BUG_ON(!is_barrier(&node->base)); 52 return (struct llist_node *)&node->base.link; 53} 54 55static inline struct intel_engine_cs * 56__barrier_to_engine(struct active_node *node) 57{ 58 return (struct intel_engine_cs *)READ_ONCE(node->base.link.prev); 59} 60 61static inline struct intel_engine_cs * 62barrier_to_engine(struct active_node *node) 63{ 64 GEM_BUG_ON(!is_barrier(&node->base)); 65 return __barrier_to_engine(node); 66} 67 68static inline struct active_node *barrier_from_ll(struct llist_node *x) 69{ 70 return container_of((struct list_head *)x, 71 struct active_node, base.link); 72} 73 74#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) && IS_ENABLED(CONFIG_DEBUG_OBJECTS) 75 76static void *active_debug_hint(void *addr) 77{ 78 struct i915_active *ref = addr; 79 80 return (void *)ref->active ?: (void *)ref->retire ?: (void *)ref; 81} 82 83static struct debug_obj_descr active_debug_desc = { 84 .name = "i915_active", 85 .debug_hint = active_debug_hint, 86}; 87 88static void debug_active_init(struct i915_active *ref) 89{ 90 debug_object_init(ref, &active_debug_desc); 91} 92 93static void debug_active_activate(struct i915_active *ref) 94{ 95 debug_object_activate(ref, &active_debug_desc); 96} 97 98static void debug_active_deactivate(struct i915_active *ref) 99{ 100 debug_object_deactivate(ref, &active_debug_desc); 101} 102 103static void debug_active_fini(struct i915_active *ref) 104{ 105 debug_object_free(ref, &active_debug_desc); 106} 107 108static void debug_active_assert(struct i915_active *ref) 109{ 110 debug_object_assert_init(ref, &active_debug_desc); 111} 112 113#else 114 115static inline void debug_active_init(struct i915_active *ref) { } 116static inline void debug_active_activate(struct i915_active *ref) { } 117static inline void debug_active_deactivate(struct i915_active *ref) { } 118static inline void debug_active_fini(struct i915_active *ref) { } 119static inline void debug_active_assert(struct i915_active *ref) { } 120 121#endif 122 123static void 124__active_retire(struct i915_active *ref) 125{ 126 struct active_node *it, *n; 127 struct rb_root root; 128 bool retire = false; 129 130 lockdep_assert_held(&ref->mutex); 131 132 /* return the unused nodes to our slabcache -- flushing the allocator */ 133 if (atomic_dec_and_test(&ref->count)) { 134 debug_active_deactivate(ref); 135 root = ref->tree; 136 ref->tree = RB_ROOT; 137 ref->cache = NULL; 138 retire = true; 139 } 140 141 mutex_unlock(&ref->mutex); 142 if (!retire) 143 return; 144 145 rbtree_postorder_for_each_entry_safe(it, n, &root, node) { 146 GEM_BUG_ON(i915_active_request_isset(&it->base)); 147 kmem_cache_free(global.slab_cache, it); 148 } 149 150 /* After the final retire, the entire struct may be freed */ 151 if (ref->retire) 152 ref->retire(ref); 153} 154 155static void 156active_retire(struct i915_active *ref) 157{ 158 GEM_BUG_ON(!atomic_read(&ref->count)); 159 if (atomic_add_unless(&ref->count, -1, 1)) 160 return; 161 162 /* One active may be flushed from inside the acquire of another */ 163 mutex_lock_nested(&ref->mutex, SINGLE_DEPTH_NESTING); 164 __active_retire(ref); 165} 166 167static void 168node_retire(struct i915_active_request *base, struct i915_request *rq) 169{ 170 active_retire(node_from_active(base)->ref); 171} 172 173static struct i915_active_request * 174active_instance(struct i915_active *ref, struct intel_timeline *tl) 175{ 176 struct active_node *node, *prealloc; 177 struct rb_node **p, *parent; 178 u64 idx = tl->fence_context; 179 180 /* 181 * We track the most recently used timeline to skip a rbtree search 182 * for the common case, under typical loads we never need the rbtree 183 * at all. We can reuse the last slot if it is empty, that is 184 * after the previous activity has been retired, or if it matches the 185 * current timeline. 186 */ 187 node = READ_ONCE(ref->cache); 188 if (node && node->timeline == idx) 189 return &node->base; 190 191 /* Preallocate a replacement, just in case */ 192 prealloc = kmem_cache_alloc(global.slab_cache, GFP_KERNEL); 193 if (!prealloc) 194 return NULL; 195 196 mutex_lock(&ref->mutex); 197 GEM_BUG_ON(i915_active_is_idle(ref)); 198 199 parent = NULL; 200 p = &ref->tree.rb_node; 201 while (*p) { 202 parent = *p; 203 204 node = rb_entry(parent, struct active_node, node); 205 if (node->timeline == idx) { 206 kmem_cache_free(global.slab_cache, prealloc); 207 goto out; 208 } 209 210 if (node->timeline < idx) 211 p = &parent->rb_right; 212 else 213 p = &parent->rb_left; 214 } 215 216 node = prealloc; 217 i915_active_request_init(&node->base, &tl->mutex, NULL, node_retire); 218 node->ref = ref; 219 node->timeline = idx; 220 221 rb_link_node(&node->node, parent, p); 222 rb_insert_color(&node->node, &ref->tree); 223 224out: 225 ref->cache = node; 226 mutex_unlock(&ref->mutex); 227 228 BUILD_BUG_ON(offsetof(typeof(*node), base)); 229 return &node->base; 230} 231 232void __i915_active_init(struct drm_i915_private *i915, 233 struct i915_active *ref, 234 int (*active)(struct i915_active *ref), 235 void (*retire)(struct i915_active *ref), 236 struct lock_class_key *key) 237{ 238 debug_active_init(ref); 239 240 ref->i915 = i915; 241 ref->flags = 0; 242 ref->active = active; 243 ref->retire = retire; 244 ref->tree = RB_ROOT; 245 ref->cache = NULL; 246 init_llist_head(&ref->preallocated_barriers); 247 atomic_set(&ref->count, 0); 248 __mutex_init(&ref->mutex, "i915_active", key); 249} 250 251static bool ____active_del_barrier(struct i915_active *ref, 252 struct active_node *node, 253 struct intel_engine_cs *engine) 254 255{ 256 struct llist_node *head = NULL, *tail = NULL; 257 struct llist_node *pos, *next; 258 259 GEM_BUG_ON(node->timeline != engine->kernel_context->timeline->fence_context); 260 261 /* 262 * Rebuild the llist excluding our node. We may perform this 263 * outside of the kernel_context timeline mutex and so someone 264 * else may be manipulating the engine->barrier_tasks, in 265 * which case either we or they will be upset :) 266 * 267 * A second __active_del_barrier() will report failure to claim 268 * the active_node and the caller will just shrug and know not to 269 * claim ownership of its node. 270 * 271 * A concurrent i915_request_add_active_barriers() will miss adding 272 * any of the tasks, but we will try again on the next -- and since 273 * we are actively using the barrier, we know that there will be 274 * at least another opportunity when we idle. 275 */ 276 llist_for_each_safe(pos, next, llist_del_all(&engine->barrier_tasks)) { 277 if (node == barrier_from_ll(pos)) { 278 node = NULL; 279 continue; 280 } 281 282 pos->next = head; 283 head = pos; 284 if (!tail) 285 tail = pos; 286 } 287 if (head) 288 llist_add_batch(head, tail, &engine->barrier_tasks); 289 290 return !node; 291} 292 293static bool 294__active_del_barrier(struct i915_active *ref, struct active_node *node) 295{ 296 return ____active_del_barrier(ref, node, barrier_to_engine(node)); 297} 298 299int i915_active_ref(struct i915_active *ref, 300 struct intel_timeline *tl, 301 struct i915_request *rq) 302{ 303 struct i915_active_request *active; 304 int err; 305 306 lockdep_assert_held(&tl->mutex); 307 308 /* Prevent reaping in case we malloc/wait while building the tree */ 309 err = i915_active_acquire(ref); 310 if (err) 311 return err; 312 313 active = active_instance(ref, tl); 314 if (!active) { 315 err = -ENOMEM; 316 goto out; 317 } 318 319 if (is_barrier(active)) { /* proto-node used by our idle barrier */ 320 /* 321 * This request is on the kernel_context timeline, and so 322 * we can use it to substitute for the pending idle-barrer 323 * request that we want to emit on the kernel_context. 324 */ 325 __active_del_barrier(ref, node_from_active(active)); 326 RCU_INIT_POINTER(active->request, NULL); 327 INIT_LIST_HEAD(&active->link); 328 } else { 329 if (!i915_active_request_isset(active)) 330 atomic_inc(&ref->count); 331 } 332 GEM_BUG_ON(!atomic_read(&ref->count)); 333 __i915_active_request_set(active, rq); 334 335out: 336 i915_active_release(ref); 337 return err; 338} 339 340int i915_active_acquire(struct i915_active *ref) 341{ 342 int err; 343 344 debug_active_assert(ref); 345 if (atomic_add_unless(&ref->count, 1, 0)) 346 return 0; 347 348 err = mutex_lock_interruptible(&ref->mutex); 349 if (err) 350 return err; 351 352 if (!atomic_read(&ref->count) && ref->active) 353 err = ref->active(ref); 354 if (!err) { 355 debug_active_activate(ref); 356 atomic_inc(&ref->count); 357 } 358 359 mutex_unlock(&ref->mutex); 360 361 return err; 362} 363 364void i915_active_release(struct i915_active *ref) 365{ 366 debug_active_assert(ref); 367 active_retire(ref); 368} 369 370static void __active_ungrab(struct i915_active *ref) 371{ 372 clear_and_wake_up_bit(I915_ACTIVE_GRAB_BIT, &ref->flags); 373} 374 375bool i915_active_trygrab(struct i915_active *ref) 376{ 377 debug_active_assert(ref); 378 379 if (test_and_set_bit(I915_ACTIVE_GRAB_BIT, &ref->flags)) 380 return false; 381 382 if (!atomic_add_unless(&ref->count, 1, 0)) { 383 __active_ungrab(ref); 384 return false; 385 } 386 387 return true; 388} 389 390void i915_active_ungrab(struct i915_active *ref) 391{ 392 GEM_BUG_ON(!test_bit(I915_ACTIVE_GRAB_BIT, &ref->flags)); 393 394 active_retire(ref); 395 __active_ungrab(ref); 396} 397 398int i915_active_wait(struct i915_active *ref) 399{ 400 struct active_node *it, *n; 401 int err; 402 403 might_sleep(); 404 might_lock(&ref->mutex); 405 406 if (i915_active_is_idle(ref)) 407 return 0; 408 409 err = mutex_lock_interruptible(&ref->mutex); 410 if (err) 411 return err; 412 413 if (!atomic_add_unless(&ref->count, 1, 0)) { 414 mutex_unlock(&ref->mutex); 415 return 0; 416 } 417 418 rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) { 419 if (is_barrier(&it->base)) { /* unconnected idle-barrier */ 420 err = -EBUSY; 421 break; 422 } 423 424 err = i915_active_request_retire(&it->base, BKL(ref)); 425 if (err) 426 break; 427 } 428 429 __active_retire(ref); 430 if (err) 431 return err; 432 433 if (wait_on_bit(&ref->flags, I915_ACTIVE_GRAB_BIT, TASK_KILLABLE)) 434 return -EINTR; 435 436 if (!i915_active_is_idle(ref)) 437 return -EBUSY; 438 439 return 0; 440} 441 442int i915_request_await_active_request(struct i915_request *rq, 443 struct i915_active_request *active) 444{ 445 struct i915_request *barrier = 446 i915_active_request_raw(active, &rq->i915->drm.struct_mutex); 447 448 return barrier ? i915_request_await_dma_fence(rq, &barrier->fence) : 0; 449} 450 451int i915_request_await_active(struct i915_request *rq, struct i915_active *ref) 452{ 453 struct active_node *it, *n; 454 int err; 455 456 if (RB_EMPTY_ROOT(&ref->tree)) 457 return 0; 458 459 /* await allocates and so we need to avoid hitting the shrinker */ 460 err = i915_active_acquire(ref); 461 if (err) 462 return err; 463 464 mutex_lock(&ref->mutex); 465 rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) { 466 err = i915_request_await_active_request(rq, &it->base); 467 if (err) 468 break; 469 } 470 mutex_unlock(&ref->mutex); 471 472 i915_active_release(ref); 473 return err; 474} 475 476#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) 477void i915_active_fini(struct i915_active *ref) 478{ 479 debug_active_fini(ref); 480 GEM_BUG_ON(!RB_EMPTY_ROOT(&ref->tree)); 481 GEM_BUG_ON(atomic_read(&ref->count)); 482 mutex_destroy(&ref->mutex); 483} 484#endif 485 486static inline bool is_idle_barrier(struct active_node *node, u64 idx) 487{ 488 return node->timeline == idx && !i915_active_request_isset(&node->base); 489} 490 491static struct active_node *reuse_idle_barrier(struct i915_active *ref, u64 idx) 492{ 493 struct rb_node *prev, *p; 494 495 if (RB_EMPTY_ROOT(&ref->tree)) 496 return NULL; 497 498 mutex_lock(&ref->mutex); 499 GEM_BUG_ON(i915_active_is_idle(ref)); 500 501 /* 502 * Try to reuse any existing barrier nodes already allocated for this 503 * i915_active, due to overlapping active phases there is likely a 504 * node kept alive (as we reuse before parking). We prefer to reuse 505 * completely idle barriers (less hassle in manipulating the llists), 506 * but otherwise any will do. 507 */ 508 if (ref->cache && is_idle_barrier(ref->cache, idx)) { 509 p = &ref->cache->node; 510 goto match; 511 } 512 513 prev = NULL; 514 p = ref->tree.rb_node; 515 while (p) { 516 struct active_node *node = 517 rb_entry(p, struct active_node, node); 518 519 if (is_idle_barrier(node, idx)) 520 goto match; 521 522 prev = p; 523 if (node->timeline < idx) 524 p = p->rb_right; 525 else 526 p = p->rb_left; 527 } 528 529 /* 530 * No quick match, but we did find the leftmost rb_node for the 531 * kernel_context. Walk the rb_tree in-order to see if there were 532 * any idle-barriers on this timeline that we missed, or just use 533 * the first pending barrier. 534 */ 535 for (p = prev; p; p = rb_next(p)) { 536 struct active_node *node = 537 rb_entry(p, struct active_node, node); 538 struct intel_engine_cs *engine; 539 540 if (node->timeline > idx) 541 break; 542 543 if (node->timeline < idx) 544 continue; 545 546 if (is_idle_barrier(node, idx)) 547 goto match; 548 549 /* 550 * The list of pending barriers is protected by the 551 * kernel_context timeline, which notably we do not hold 552 * here. i915_request_add_active_barriers() may consume 553 * the barrier before we claim it, so we have to check 554 * for success. 555 */ 556 engine = __barrier_to_engine(node); 557 smp_rmb(); /* serialise with add_active_barriers */ 558 if (is_barrier(&node->base) && 559 ____active_del_barrier(ref, node, engine)) 560 goto match; 561 } 562 563 mutex_unlock(&ref->mutex); 564 565 return NULL; 566 567match: 568 rb_erase(p, &ref->tree); /* Hide from waits and sibling allocations */ 569 if (p == &ref->cache->node) 570 ref->cache = NULL; 571 mutex_unlock(&ref->mutex); 572 573 return rb_entry(p, struct active_node, node); 574} 575 576int i915_active_acquire_preallocate_barrier(struct i915_active *ref, 577 struct intel_engine_cs *engine) 578{ 579 struct drm_i915_private *i915 = engine->i915; 580 intel_engine_mask_t tmp, mask = engine->mask; 581 struct llist_node *pos, *next; 582 int err; 583 584 GEM_BUG_ON(!llist_empty(&ref->preallocated_barriers)); 585 586 /* 587 * Preallocate a node for each physical engine supporting the target 588 * engine (remember virtual engines have more than one sibling). 589 * We can then use the preallocated nodes in 590 * i915_active_acquire_barrier() 591 */ 592 for_each_engine_masked(engine, i915, mask, tmp) { 593 u64 idx = engine->kernel_context->timeline->fence_context; 594 struct active_node *node; 595 596 node = reuse_idle_barrier(ref, idx); 597 if (!node) { 598 node = kmem_cache_alloc(global.slab_cache, GFP_KERNEL); 599 if (!node) { 600 err = ENOMEM; 601 goto unwind; 602 } 603 604#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) 605 node->base.lock = 606 &engine->kernel_context->timeline->mutex; 607#endif 608 RCU_INIT_POINTER(node->base.request, NULL); 609 node->base.retire = node_retire; 610 node->timeline = idx; 611 node->ref = ref; 612 } 613 614 if (!i915_active_request_isset(&node->base)) { 615 /* 616 * Mark this as being *our* unconnected proto-node. 617 * 618 * Since this node is not in any list, and we have 619 * decoupled it from the rbtree, we can reuse the 620 * request to indicate this is an idle-barrier node 621 * and then we can use the rb_node and list pointers 622 * for our tracking of the pending barrier. 623 */ 624 RCU_INIT_POINTER(node->base.request, ERR_PTR(-EAGAIN)); 625 node->base.link.prev = (void *)engine; 626 atomic_inc(&ref->count); 627 } 628 629 GEM_BUG_ON(barrier_to_engine(node) != engine); 630 llist_add(barrier_to_ll(node), &ref->preallocated_barriers); 631 intel_engine_pm_get(engine); 632 } 633 634 return 0; 635 636unwind: 637 llist_for_each_safe(pos, next, take_preallocated_barriers(ref)) { 638 struct active_node *node = barrier_from_ll(pos); 639 640 atomic_dec(&ref->count); 641 intel_engine_pm_put(barrier_to_engine(node)); 642 643 kmem_cache_free(global.slab_cache, node); 644 } 645 return err; 646} 647 648void i915_active_acquire_barrier(struct i915_active *ref) 649{ 650 struct llist_node *pos, *next; 651 652 GEM_BUG_ON(i915_active_is_idle(ref)); 653 654 /* 655 * Transfer the list of preallocated barriers into the 656 * i915_active rbtree, but only as proto-nodes. They will be 657 * populated by i915_request_add_active_barriers() to point to the 658 * request that will eventually release them. 659 */ 660 mutex_lock_nested(&ref->mutex, SINGLE_DEPTH_NESTING); 661 llist_for_each_safe(pos, next, take_preallocated_barriers(ref)) { 662 struct active_node *node = barrier_from_ll(pos); 663 struct intel_engine_cs *engine = barrier_to_engine(node); 664 struct rb_node **p, *parent; 665 666 parent = NULL; 667 p = &ref->tree.rb_node; 668 while (*p) { 669 struct active_node *it; 670 671 parent = *p; 672 673 it = rb_entry(parent, struct active_node, node); 674 if (it->timeline < node->timeline) 675 p = &parent->rb_right; 676 else 677 p = &parent->rb_left; 678 } 679 rb_link_node(&node->node, parent, p); 680 rb_insert_color(&node->node, &ref->tree); 681 682 llist_add(barrier_to_ll(node), &engine->barrier_tasks); 683 intel_engine_pm_put(engine); 684 } 685 mutex_unlock(&ref->mutex); 686} 687 688void i915_request_add_active_barriers(struct i915_request *rq) 689{ 690 struct intel_engine_cs *engine = rq->engine; 691 struct llist_node *node, *next; 692 693 GEM_BUG_ON(intel_engine_is_virtual(engine)); 694 GEM_BUG_ON(rq->timeline != engine->kernel_context->timeline); 695 696 /* 697 * Attach the list of proto-fences to the in-flight request such 698 * that the parent i915_active will be released when this request 699 * is retired. 700 */ 701 llist_for_each_safe(node, next, llist_del_all(&engine->barrier_tasks)) { 702 RCU_INIT_POINTER(barrier_from_ll(node)->base.request, rq); 703 smp_wmb(); /* serialise with reuse_idle_barrier */ 704 list_add_tail((struct list_head *)node, &rq->active_list); 705 } 706} 707 708int i915_active_request_set(struct i915_active_request *active, 709 struct i915_request *rq) 710{ 711 int err; 712 713#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) 714 lockdep_assert_held(active->lock); 715#endif 716 717 /* Must maintain ordering wrt previous active requests */ 718 err = i915_request_await_active_request(rq, active); 719 if (err) 720 return err; 721 722 __i915_active_request_set(active, rq); 723 return 0; 724} 725 726void i915_active_retire_noop(struct i915_active_request *active, 727 struct i915_request *request) 728{ 729 /* Space left intentionally blank */ 730} 731 732#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 733#include "selftests/i915_active.c" 734#endif 735 736static void i915_global_active_shrink(void) 737{ 738 kmem_cache_shrink(global.slab_cache); 739} 740 741static void i915_global_active_exit(void) 742{ 743 kmem_cache_destroy(global.slab_cache); 744} 745 746static struct i915_global_active global = { { 747 .shrink = i915_global_active_shrink, 748 .exit = i915_global_active_exit, 749} }; 750 751int __init i915_global_active_init(void) 752{ 753 global.slab_cache = KMEM_CACHE(active_node, SLAB_HWCACHE_ALIGN); 754 if (!global.slab_cache) 755 return -ENOMEM; 756 757 i915_global_register(&global.base); 758 return 0; 759}