Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/i915: Drop no-semaphore boosting

Now that we have fast timeslicing on semaphores, we no longer need to
prioritise none-semaphore work as we will yield any work blocked on a
semaphore to the next in the queue. Previously with no timeslicing,
blocking on the semaphore caused extremely bad scheduling with multiple
clients utilising multiple rings. Now, there is no impact and we can
remove the complication.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20200513173504.28322-1-chris@chris-wilson.co.uk

+11 -73
-15
drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
··· 2582 2582 /* Check that the context wasn't destroyed before submission */ 2583 2583 if (likely(!intel_context_is_closed(eb->context))) { 2584 2584 attr = eb->gem_context->sched; 2585 - 2586 - /* 2587 - * Boost actual workloads past semaphores! 2588 - * 2589 - * With semaphores we spin on one engine waiting for another, 2590 - * simply to reduce the latency of starting our work when 2591 - * the signaler completes. However, if there is any other 2592 - * work that we could be doing on this engine instead, that 2593 - * is better utilisation and will reduce the overall duration 2594 - * of the current work. To avoid PI boosting a semaphore 2595 - * far in the distance past over useful work, we keep a history 2596 - * of any semaphore use along our dependency chain. 2597 - */ 2598 - if (!(rq->sched.flags & I915_SCHED_HAS_SEMAPHORE_CHAIN)) 2599 - attr.priority |= I915_PRIORITY_NOSEMAPHORE; 2600 2585 } else { 2601 2586 /* Serialise with context_close via the add_to_timeline */ 2602 2587 i915_request_set_error_once(rq, -ENOENT);
-9
drivers/gpu/drm/i915/gt/intel_lrc.c
··· 429 429 if (i915_request_has_nopreempt(rq)) 430 430 prio = I915_PRIORITY_UNPREEMPTABLE; 431 431 432 - /* 433 - * On unwinding the active request, we give it a priority bump 434 - * if it has completed waiting on any semaphore. If we know that 435 - * the request has already started, we can prevent an unwanted 436 - * preempt-to-idle cycle by taking that into account now. 437 - */ 438 - if (__i915_request_has_started(rq)) 439 - prio |= I915_PRIORITY_NOSEMAPHORE; 440 - 441 432 return prio; 442 433 } 443 434
+1
drivers/gpu/drm/i915/gt/selftest_context.c
··· 24 24 25 25 /* Opencode i915_request_add() so we can keep the timeline locked. */ 26 26 __i915_request_commit(rq); 27 + rq->sched.attr.priority = I915_PRIORITY_BARRIER; 27 28 __i915_request_queue(rq, NULL); 28 29 29 30 timeout = i915_request_wait(rq, 0, HZ / 10);
+1 -3
drivers/gpu/drm/i915/i915_priolist_types.h
··· 24 24 I915_PRIORITY_DISPLAY, 25 25 }; 26 26 27 - #define I915_USER_PRIORITY_SHIFT 1 27 + #define I915_USER_PRIORITY_SHIFT 0 28 28 #define I915_USER_PRIORITY(x) ((x) << I915_USER_PRIORITY_SHIFT) 29 29 30 30 #define I915_PRIORITY_COUNT BIT(I915_USER_PRIORITY_SHIFT) 31 31 #define I915_PRIORITY_MASK (I915_PRIORITY_COUNT - 1) 32 - 33 - #define I915_PRIORITY_NOSEMAPHORE ((u8)BIT(0)) 34 32 35 33 /* Smallest priority value that cannot be bumped. */ 36 34 #define I915_PRIORITY_INVALID (INT_MIN | (u8)I915_PRIORITY_MASK)
+4 -36
drivers/gpu/drm/i915/i915_request.c
··· 368 368 } 369 369 spin_unlock_irq(&signal->lock); 370 370 371 - /* Copy across semaphore status as we need the same behaviour */ 372 - rq->sched.flags |= signal->sched.flags; 373 371 return 0; 374 372 } 375 373 ··· 535 537 spin_unlock(&request->lock); 536 538 537 539 /* We've already spun, don't charge on resubmitting. */ 538 - if (request->sched.semaphores && i915_request_started(request)) { 539 - request->sched.attr.priority |= I915_PRIORITY_NOSEMAPHORE; 540 + if (request->sched.semaphores && i915_request_started(request)) 540 541 request->sched.semaphores = 0; 541 - } 542 542 543 543 /* 544 544 * We don't need to wake_up any waiters on request->execute, they ··· 594 598 return NOTIFY_DONE; 595 599 } 596 600 597 - static void irq_semaphore_cb(struct irq_work *wrk) 598 - { 599 - struct i915_request *rq = 600 - container_of(wrk, typeof(*rq), semaphore_work); 601 - 602 - i915_schedule_bump_priority(rq, I915_PRIORITY_NOSEMAPHORE); 603 - i915_request_put(rq); 604 - } 605 - 606 601 static int __i915_sw_fence_call 607 602 semaphore_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) 608 603 { ··· 601 614 602 615 switch (state) { 603 616 case FENCE_COMPLETE: 604 - if (!(READ_ONCE(rq->sched.attr.priority) & I915_PRIORITY_NOSEMAPHORE)) { 605 - i915_request_get(rq); 606 - init_irq_work(&rq->semaphore_work, irq_semaphore_cb); 607 - irq_work_queue(&rq->semaphore_work); 608 - } 609 617 break; 610 618 611 619 case FENCE_FREE: ··· 979 997 gfp_t gfp) 980 998 { 981 999 const intel_engine_mask_t mask = READ_ONCE(from->engine)->mask; 1000 + struct i915_sw_fence *wait = &to->submit; 982 1001 983 1002 if (!intel_context_use_semaphores(to->context)) 984 1003 goto await_fence; ··· 1014 1031 goto await_fence; 1015 1032 1016 1033 to->sched.semaphores |= mask; 1017 - to->sched.flags |= I915_SCHED_HAS_SEMAPHORE_CHAIN; 1018 - return 0; 1034 + wait = &to->semaphore; 1019 1035 1020 1036 await_fence: 1021 - return i915_sw_fence_await_dma_fence(&to->submit, 1037 + return i915_sw_fence_await_dma_fence(wait, 1022 1038 &from->fence, 0, 1023 1039 I915_FENCE_GFP); 1024 1040 } ··· 1051 1069 ret = emit_semaphore_wait(to, from, I915_FENCE_GFP); 1052 1070 if (ret < 0) 1053 1071 return ret; 1054 - 1055 - if (to->sched.flags & I915_SCHED_HAS_SEMAPHORE_CHAIN) { 1056 - ret = i915_sw_fence_await_dma_fence(&to->semaphore, 1057 - &from->fence, 0, 1058 - I915_FENCE_GFP); 1059 - if (ret < 0) 1060 - return ret; 1061 - } 1062 - 1063 - if (from->sched.flags & I915_SCHED_HAS_EXTERNAL_CHAIN) 1064 - to->sched.flags |= I915_SCHED_HAS_EXTERNAL_CHAIN; 1065 1072 1066 1073 return 0; 1067 1074 } ··· 1498 1527 if (ctx) 1499 1528 attr = ctx->sched; 1500 1529 rcu_read_unlock(); 1501 - 1502 - if (!(rq->sched.flags & I915_SCHED_HAS_SEMAPHORE_CHAIN)) 1503 - attr.priority |= I915_PRIORITY_NOSEMAPHORE; 1504 1530 1505 1531 __i915_request_queue(rq, &attr); 1506 1532
-1
drivers/gpu/drm/i915/i915_request.h
··· 216 216 }; 217 217 struct list_head execute_cb; 218 218 struct i915_sw_fence semaphore; 219 - struct irq_work semaphore_work; 220 219 221 220 /* 222 221 * A list of everyone we wait upon, and everyone who waits upon us.
+4 -7
drivers/gpu/drm/i915/i915_scheduler.c
··· 51 51 GEM_BUG_ON(rb_first_cached(&execlists->queue) != 52 52 rb_first(&execlists->queue.rb_root)); 53 53 54 - last_prio = (INT_MAX >> I915_USER_PRIORITY_SHIFT) + 1; 54 + last_prio = INT_MAX; 55 55 for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) { 56 56 const struct i915_priolist *p = to_priolist(rb); 57 57 58 - GEM_BUG_ON(p->priority >= last_prio); 58 + GEM_BUG_ON(p->priority > last_prio); 59 59 last_prio = p->priority; 60 60 61 61 GEM_BUG_ON(!p->used); ··· 434 434 dep->waiter = node; 435 435 dep->flags = flags; 436 436 437 - /* Keep track of whether anyone on this chain has a semaphore */ 438 - if (signal->flags & I915_SCHED_HAS_SEMAPHORE_CHAIN && 439 - !node_started(signal)) 440 - node->flags |= I915_SCHED_HAS_SEMAPHORE_CHAIN; 441 - 442 437 /* All set, now publish. Beware the lockless walkers. */ 443 438 list_add_rcu(&dep->signal_link, &node->signalers_list); 444 439 list_add_rcu(&dep->wait_link, &signal->waiters_list); 445 440 441 + /* Propagate the chains */ 442 + node->flags |= signal->flags; 446 443 ret = true; 447 444 } 448 445
+1 -2
drivers/gpu/drm/i915/i915_scheduler_types.h
··· 65 65 struct list_head link; 66 66 struct i915_sched_attr attr; 67 67 unsigned int flags; 68 - #define I915_SCHED_HAS_SEMAPHORE_CHAIN BIT(0) 69 - #define I915_SCHED_HAS_EXTERNAL_CHAIN BIT(1) 68 + #define I915_SCHED_HAS_EXTERNAL_CHAIN BIT(0) 70 69 intel_engine_mask_t semaphores; 71 70 }; 72 71