drm/i915: Drop no-semaphore boosting · tjh.dev/kernel@18e4af0

-15

drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c

··· 2582 2582 /* Check that the context wasn't destroyed before submission */ 2583 2583 if (likely(!intel_context_is_closed(eb->context))) { 2584 2584 attr = eb->gem_context->sched; 2585 - 2586 - /* 2587 - * Boost actual workloads past semaphores! 2588 - * 2589 - * With semaphores we spin on one engine waiting for another, 2590 - * simply to reduce the latency of starting our work when 2591 - * the signaler completes. However, if there is any other 2592 - * work that we could be doing on this engine instead, that 2593 - * is better utilisation and will reduce the overall duration 2594 - * of the current work. To avoid PI boosting a semaphore 2595 - * far in the distance past over useful work, we keep a history 2596 - * of any semaphore use along our dependency chain. 2597 - */ 2598 - if (!(rq->sched.flags & I915_SCHED_HAS_SEMAPHORE_CHAIN)) 2599 - attr.priority |= I915_PRIORITY_NOSEMAPHORE; 2600 2585 } else { 2601 2586 /* Serialise with context_close via the add_to_timeline */ 2602 2587 i915_request_set_error_once(rq, -ENOENT);

-9

drivers/gpu/drm/i915/gt/intel_lrc.c

··· 429 429 if (i915_request_has_nopreempt(rq)) 430 430 prio = I915_PRIORITY_UNPREEMPTABLE; 431 431 432 - /* 433 - * On unwinding the active request, we give it a priority bump 434 - * if it has completed waiting on any semaphore. If we know that 435 - * the request has already started, we can prevent an unwanted 436 - * preempt-to-idle cycle by taking that into account now. 437 - */ 438 - if (__i915_request_has_started(rq)) 439 - prio |= I915_PRIORITY_NOSEMAPHORE; 440 - 441 432 return prio; 442 433 } 443 434

+1

drivers/gpu/drm/i915/gt/selftest_context.c

··· 24 24 25 25 /* Opencode i915_request_add() so we can keep the timeline locked. */ 26 26 __i915_request_commit(rq); 27 + rq->sched.attr.priority = I915_PRIORITY_BARRIER; 27 28 __i915_request_queue(rq, NULL); 28 29 29 30 timeout = i915_request_wait(rq, 0, HZ / 10);

+1 -3

drivers/gpu/drm/i915/i915_priolist_types.h

··· 24 24 I915_PRIORITY_DISPLAY, 25 25 }; 26 26 27 - #define I915_USER_PRIORITY_SHIFT 1 27 + #define I915_USER_PRIORITY_SHIFT 0 28 28 #define I915_USER_PRIORITY(x) ((x) << I915_USER_PRIORITY_SHIFT) 29 29 30 30 #define I915_PRIORITY_COUNT BIT(I915_USER_PRIORITY_SHIFT) 31 31 #define I915_PRIORITY_MASK (I915_PRIORITY_COUNT - 1) 32 - 33 - #define I915_PRIORITY_NOSEMAPHORE ((u8)BIT(0)) 34 32 35 33 /* Smallest priority value that cannot be bumped. */ 36 34 #define I915_PRIORITY_INVALID (INT_MIN | (u8)I915_PRIORITY_MASK)

+4 -36

drivers/gpu/drm/i915/i915_request.c

··· 368 368 } 369 369 spin_unlock_irq(&signal->lock); 370 370 371 - /* Copy across semaphore status as we need the same behaviour */ 372 - rq->sched.flags |= signal->sched.flags; 373 371 return 0; 374 372 } 375 373 ··· 535 537 spin_unlock(&request->lock); 536 538 537 539 /* We've already spun, don't charge on resubmitting. */ 538 - if (request->sched.semaphores && i915_request_started(request)) { 539 - request->sched.attr.priority |= I915_PRIORITY_NOSEMAPHORE; 540 + if (request->sched.semaphores && i915_request_started(request)) 540 541 request->sched.semaphores = 0; 541 - } 542 542 543 543 /* 544 544 * We don't need to wake_up any waiters on request->execute, they ··· 594 598 return NOTIFY_DONE; 595 599 } 596 600 597 - static void irq_semaphore_cb(struct irq_work *wrk) 598 - { 599 - struct i915_request *rq = 600 - container_of(wrk, typeof(*rq), semaphore_work); 601 - 602 - i915_schedule_bump_priority(rq, I915_PRIORITY_NOSEMAPHORE); 603 - i915_request_put(rq); 604 - } 605 - 606 601 static int __i915_sw_fence_call 607 602 semaphore_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) 608 603 { ··· 601 614 602 615 switch (state) { 603 616 case FENCE_COMPLETE: 604 - if (!(READ_ONCE(rq->sched.attr.priority) & I915_PRIORITY_NOSEMAPHORE)) { 605 - i915_request_get(rq); 606 - init_irq_work(&rq->semaphore_work, irq_semaphore_cb); 607 - irq_work_queue(&rq->semaphore_work); 608 - } 609 617 break; 610 618 611 619 case FENCE_FREE: ··· 979 997 gfp_t gfp) 980 998 { 981 999 const intel_engine_mask_t mask = READ_ONCE(from->engine)->mask; 1000 + struct i915_sw_fence *wait = &to->submit; 982 1001 983 1002 if (!intel_context_use_semaphores(to->context)) 984 1003 goto await_fence; ··· 1014 1031 goto await_fence; 1015 1032 1016 1033 to->sched.semaphores |= mask; 1017 - to->sched.flags |= I915_SCHED_HAS_SEMAPHORE_CHAIN; 1018 - return 0; 1034 + wait = &to->semaphore; 1019 1035 1020 1036 await_fence: 1021 - return i915_sw_fence_await_dma_fence(&to->submit, 1037 + return i915_sw_fence_await_dma_fence(wait, 1022 1038 &from->fence, 0, 1023 1039 I915_FENCE_GFP); 1024 1040 } ··· 1051 1069 ret = emit_semaphore_wait(to, from, I915_FENCE_GFP); 1052 1070 if (ret < 0) 1053 1071 return ret; 1054 - 1055 - if (to->sched.flags & I915_SCHED_HAS_SEMAPHORE_CHAIN) { 1056 - ret = i915_sw_fence_await_dma_fence(&to->semaphore, 1057 - &from->fence, 0, 1058 - I915_FENCE_GFP); 1059 - if (ret < 0) 1060 - return ret; 1061 - } 1062 - 1063 - if (from->sched.flags & I915_SCHED_HAS_EXTERNAL_CHAIN) 1064 - to->sched.flags |= I915_SCHED_HAS_EXTERNAL_CHAIN; 1065 1072 1066 1073 return 0; 1067 1074 } ··· 1498 1527 if (ctx) 1499 1528 attr = ctx->sched; 1500 1529 rcu_read_unlock(); 1501 - 1502 - if (!(rq->sched.flags & I915_SCHED_HAS_SEMAPHORE_CHAIN)) 1503 - attr.priority |= I915_PRIORITY_NOSEMAPHORE; 1504 1530 1505 1531 __i915_request_queue(rq, &attr); 1506 1532

-1

drivers/gpu/drm/i915/i915_request.h

··· 216 216 }; 217 217 struct list_head execute_cb; 218 218 struct i915_sw_fence semaphore; 219 - struct irq_work semaphore_work; 220 219 221 220 /* 222 221 * A list of everyone we wait upon, and everyone who waits upon us.

+4 -7

drivers/gpu/drm/i915/i915_scheduler.c

··· 51 51 GEM_BUG_ON(rb_first_cached(&execlists->queue) != 52 52 rb_first(&execlists->queue.rb_root)); 53 53 54 - last_prio = (INT_MAX >> I915_USER_PRIORITY_SHIFT) + 1; 54 + last_prio = INT_MAX; 55 55 for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) { 56 56 const struct i915_priolist *p = to_priolist(rb); 57 57 58 - GEM_BUG_ON(p->priority >= last_prio); 58 + GEM_BUG_ON(p->priority > last_prio); 59 59 last_prio = p->priority; 60 60 61 61 GEM_BUG_ON(!p->used); ··· 434 434 dep->waiter = node; 435 435 dep->flags = flags; 436 436 437 - /* Keep track of whether anyone on this chain has a semaphore */ 438 - if (signal->flags & I915_SCHED_HAS_SEMAPHORE_CHAIN && 439 - !node_started(signal)) 440 - node->flags |= I915_SCHED_HAS_SEMAPHORE_CHAIN; 441 - 442 437 /* All set, now publish. Beware the lockless walkers. */ 443 438 list_add_rcu(&dep->signal_link, &node->signalers_list); 444 439 list_add_rcu(&dep->wait_link, &signal->waiters_list); 445 440 441 + /* Propagate the chains */ 442 + node->flags |= signal->flags; 446 443 ret = true; 447 444 } 448 445

+1 -2

drivers/gpu/drm/i915/i915_scheduler_types.h

··· 65 65 struct list_head link; 66 66 struct i915_sched_attr attr; 67 67 unsigned int flags; 68 - #define I915_SCHED_HAS_SEMAPHORE_CHAIN BIT(0) 69 - #define I915_SCHED_HAS_EXTERNAL_CHAIN BIT(1) 68 + #define I915_SCHED_HAS_EXTERNAL_CHAIN BIT(0) 70 69 intel_engine_mask_t semaphores; 71 70 }; 72 71