drm/i915: Pull scheduling under standalone lock

+1

drivers/gpu/drm/i915/Makefile

··· 75 75 i915_gemfs.o \ 76 76 i915_query.o \ 77 77 i915_request.o \ 78 + i915_scheduler.o \ 78 79 i915_timeline.o \ 79 80 i915_trace_points.o \ 80 81 i915_vma.o \

-85

drivers/gpu/drm/i915/i915_request.c

··· 111 111 spin_unlock(&file_priv->mm.lock); 112 112 } 113 113 114 - static struct i915_dependency * 115 - i915_dependency_alloc(struct drm_i915_private *i915) 116 - { 117 - return kmem_cache_alloc(i915->dependencies, GFP_KERNEL); 118 - } 119 - 120 - static void 121 - i915_dependency_free(struct drm_i915_private *i915, 122 - struct i915_dependency *dep) 123 - { 124 - kmem_cache_free(i915->dependencies, dep); 125 - } 126 - 127 - static void 128 - __i915_sched_node_add_dependency(struct i915_sched_node *node, 129 - struct i915_sched_node *signal, 130 - struct i915_dependency *dep, 131 - unsigned long flags) 132 - { 133 - INIT_LIST_HEAD(&dep->dfs_link); 134 - list_add(&dep->wait_link, &signal->waiters_list); 135 - list_add(&dep->signal_link, &node->signalers_list); 136 - dep->signaler = signal; 137 - dep->flags = flags; 138 - } 139 - 140 - static int 141 - i915_sched_node_add_dependency(struct drm_i915_private *i915, 142 - struct i915_sched_node *node, 143 - struct i915_sched_node *signal) 144 - { 145 - struct i915_dependency *dep; 146 - 147 - dep = i915_dependency_alloc(i915); 148 - if (!dep) 149 - return -ENOMEM; 150 - 151 - __i915_sched_node_add_dependency(node, signal, dep, 152 - I915_DEPENDENCY_ALLOC); 153 - return 0; 154 - } 155 - 156 - static void 157 - i915_sched_node_fini(struct drm_i915_private *i915, 158 - struct i915_sched_node *node) 159 - { 160 - struct i915_dependency *dep, *tmp; 161 - 162 - GEM_BUG_ON(!list_empty(&node->link)); 163 - 164 - /* 165 - * Everyone we depended upon (the fences we wait to be signaled) 166 - * should retire before us and remove themselves from our list. 167 - * However, retirement is run independently on each timeline and 168 - * so we may be called out-of-order. 169 - */ 170 - list_for_each_entry_safe(dep, tmp, &node->signalers_list, signal_link) { 171 - GEM_BUG_ON(!i915_sched_node_signaled(dep->signaler)); 172 - GEM_BUG_ON(!list_empty(&dep->dfs_link)); 173 - 174 - list_del(&dep->wait_link); 175 - if (dep->flags & I915_DEPENDENCY_ALLOC) 176 - i915_dependency_free(i915, dep); 177 - } 178 - 179 - /* Remove ourselves from everyone who depends upon us */ 180 - list_for_each_entry_safe(dep, tmp, &node->waiters_list, wait_link) { 181 - GEM_BUG_ON(dep->signaler != node); 182 - GEM_BUG_ON(!list_empty(&dep->dfs_link)); 183 - 184 - list_del(&dep->signal_link); 185 - if (dep->flags & I915_DEPENDENCY_ALLOC) 186 - i915_dependency_free(i915, dep); 187 - } 188 - } 189 - 190 - static void 191 - i915_sched_node_init(struct i915_sched_node *node) 192 - { 193 - INIT_LIST_HEAD(&node->signalers_list); 194 - INIT_LIST_HEAD(&node->waiters_list); 195 - INIT_LIST_HEAD(&node->link); 196 - node->attr.priority = I915_PRIORITY_INVALID; 197 - } 198 - 199 114 static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno) 200 115 { 201 116 struct intel_engine_cs *engine;

-8

drivers/gpu/drm/i915/i915_request.h

··· 332 332 return __i915_request_completed(rq, seqno); 333 333 } 334 334 335 - static inline bool i915_sched_node_signaled(const struct i915_sched_node *node) 336 - { 337 - const struct i915_request *rq = 338 - container_of(node, const struct i915_request, sched); 339 - 340 - return i915_request_completed(rq); 341 - } 342 - 343 335 void i915_retire_requests(struct drm_i915_private *i915); 344 336 345 337 /*

+377

drivers/gpu/drm/i915/i915_scheduler.c

··· 1 + /* 2 + * SPDX-License-Identifier: MIT 3 + * 4 + * Copyright © 2018 Intel Corporation 5 + */ 6 + 7 + #include <linux/mutex.h> 8 + 9 + #include "i915_drv.h" 10 + #include "i915_request.h" 11 + #include "i915_scheduler.h" 12 + 13 + static DEFINE_SPINLOCK(schedule_lock); 14 + 15 + static const struct i915_request * 16 + node_to_request(const struct i915_sched_node *node) 17 + { 18 + return container_of(node, const struct i915_request, sched); 19 + } 20 + 21 + static inline bool node_signaled(const struct i915_sched_node *node) 22 + { 23 + return i915_request_completed(node_to_request(node)); 24 + } 25 + 26 + void i915_sched_node_init(struct i915_sched_node *node) 27 + { 28 + INIT_LIST_HEAD(&node->signalers_list); 29 + INIT_LIST_HEAD(&node->waiters_list); 30 + INIT_LIST_HEAD(&node->link); 31 + node->attr.priority = I915_PRIORITY_INVALID; 32 + } 33 + 34 + static struct i915_dependency * 35 + i915_dependency_alloc(struct drm_i915_private *i915) 36 + { 37 + return kmem_cache_alloc(i915->dependencies, GFP_KERNEL); 38 + } 39 + 40 + static void 41 + i915_dependency_free(struct drm_i915_private *i915, 42 + struct i915_dependency *dep) 43 + { 44 + kmem_cache_free(i915->dependencies, dep); 45 + } 46 + 47 + bool __i915_sched_node_add_dependency(struct i915_sched_node *node, 48 + struct i915_sched_node *signal, 49 + struct i915_dependency *dep, 50 + unsigned long flags) 51 + { 52 + bool ret = false; 53 + 54 + spin_lock(&schedule_lock); 55 + 56 + if (!node_signaled(signal)) { 57 + INIT_LIST_HEAD(&dep->dfs_link); 58 + list_add(&dep->wait_link, &signal->waiters_list); 59 + list_add(&dep->signal_link, &node->signalers_list); 60 + dep->signaler = signal; 61 + dep->flags = flags; 62 + 63 + ret = true; 64 + } 65 + 66 + spin_unlock(&schedule_lock); 67 + 68 + return ret; 69 + } 70 + 71 + int i915_sched_node_add_dependency(struct drm_i915_private *i915, 72 + struct i915_sched_node *node, 73 + struct i915_sched_node *signal) 74 + { 75 + struct i915_dependency *dep; 76 + 77 + dep = i915_dependency_alloc(i915); 78 + if (!dep) 79 + return -ENOMEM; 80 + 81 + if (!__i915_sched_node_add_dependency(node, signal, dep, 82 + I915_DEPENDENCY_ALLOC)) 83 + i915_dependency_free(i915, dep); 84 + 85 + return 0; 86 + } 87 + 88 + void i915_sched_node_fini(struct drm_i915_private *i915, 89 + struct i915_sched_node *node) 90 + { 91 + struct i915_dependency *dep, *tmp; 92 + 93 + GEM_BUG_ON(!list_empty(&node->link)); 94 + 95 + spin_lock(&schedule_lock); 96 + 97 + /* 98 + * Everyone we depended upon (the fences we wait to be signaled) 99 + * should retire before us and remove themselves from our list. 100 + * However, retirement is run independently on each timeline and 101 + * so we may be called out-of-order. 102 + */ 103 + list_for_each_entry_safe(dep, tmp, &node->signalers_list, signal_link) { 104 + GEM_BUG_ON(!node_signaled(dep->signaler)); 105 + GEM_BUG_ON(!list_empty(&dep->dfs_link)); 106 + 107 + list_del(&dep->wait_link); 108 + if (dep->flags & I915_DEPENDENCY_ALLOC) 109 + i915_dependency_free(i915, dep); 110 + } 111 + 112 + /* Remove ourselves from everyone who depends upon us */ 113 + list_for_each_entry_safe(dep, tmp, &node->waiters_list, wait_link) { 114 + GEM_BUG_ON(dep->signaler != node); 115 + GEM_BUG_ON(!list_empty(&dep->dfs_link)); 116 + 117 + list_del(&dep->signal_link); 118 + if (dep->flags & I915_DEPENDENCY_ALLOC) 119 + i915_dependency_free(i915, dep); 120 + } 121 + 122 + spin_unlock(&schedule_lock); 123 + } 124 + 125 + static inline struct i915_priolist *to_priolist(struct rb_node *rb) 126 + { 127 + return rb_entry(rb, struct i915_priolist, node); 128 + } 129 + 130 + static void assert_priolists(struct intel_engine_execlists * const execlists, 131 + long queue_priority) 132 + { 133 + struct rb_node *rb; 134 + long last_prio, i; 135 + 136 + if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) 137 + return; 138 + 139 + GEM_BUG_ON(rb_first_cached(&execlists->queue) != 140 + rb_first(&execlists->queue.rb_root)); 141 + 142 + last_prio = (queue_priority >> I915_USER_PRIORITY_SHIFT) + 1; 143 + for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) { 144 + const struct i915_priolist *p = to_priolist(rb); 145 + 146 + GEM_BUG_ON(p->priority >= last_prio); 147 + last_prio = p->priority; 148 + 149 + GEM_BUG_ON(!p->used); 150 + for (i = 0; i < ARRAY_SIZE(p->requests); i++) { 151 + if (list_empty(&p->requests[i])) 152 + continue; 153 + 154 + GEM_BUG_ON(!(p->used & BIT(i))); 155 + } 156 + } 157 + } 158 + 159 + struct list_head * 160 + i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio) 161 + { 162 + struct intel_engine_execlists * const execlists = &engine->execlists; 163 + struct i915_priolist *p; 164 + struct rb_node **parent, *rb; 165 + bool first = true; 166 + int idx, i; 167 + 168 + lockdep_assert_held(&engine->timeline.lock); 169 + assert_priolists(execlists, INT_MAX); 170 + 171 + /* buckets sorted from highest [in slot 0] to lowest priority */ 172 + idx = I915_PRIORITY_COUNT - (prio & I915_PRIORITY_MASK) - 1; 173 + prio >>= I915_USER_PRIORITY_SHIFT; 174 + if (unlikely(execlists->no_priolist)) 175 + prio = I915_PRIORITY_NORMAL; 176 + 177 + find_priolist: 178 + /* most positive priority is scheduled first, equal priorities fifo */ 179 + rb = NULL; 180 + parent = &execlists->queue.rb_root.rb_node; 181 + while (*parent) { 182 + rb = *parent; 183 + p = to_priolist(rb); 184 + if (prio > p->priority) { 185 + parent = &rb->rb_left; 186 + } else if (prio < p->priority) { 187 + parent = &rb->rb_right; 188 + first = false; 189 + } else { 190 + goto out; 191 + } 192 + } 193 + 194 + if (prio == I915_PRIORITY_NORMAL) { 195 + p = &execlists->default_priolist; 196 + } else { 197 + p = kmem_cache_alloc(engine->i915->priorities, GFP_ATOMIC); 198 + /* Convert an allocation failure to a priority bump */ 199 + if (unlikely(!p)) { 200 + prio = I915_PRIORITY_NORMAL; /* recurses just once */ 201 + 202 + /* To maintain ordering with all rendering, after an 203 + * allocation failure we have to disable all scheduling. 204 + * Requests will then be executed in fifo, and schedule 205 + * will ensure that dependencies are emitted in fifo. 206 + * There will be still some reordering with existing 207 + * requests, so if userspace lied about their 208 + * dependencies that reordering may be visible. 209 + */ 210 + execlists->no_priolist = true; 211 + goto find_priolist; 212 + } 213 + } 214 + 215 + p->priority = prio; 216 + for (i = 0; i < ARRAY_SIZE(p->requests); i++) 217 + INIT_LIST_HEAD(&p->requests[i]); 218 + rb_link_node(&p->node, rb, parent); 219 + rb_insert_color_cached(&p->node, &execlists->queue, first); 220 + p->used = 0; 221 + 222 + out: 223 + p->used |= BIT(idx); 224 + return &p->requests[idx]; 225 + } 226 + 227 + static struct intel_engine_cs * 228 + sched_lock_engine(struct i915_sched_node *node, struct intel_engine_cs *locked) 229 + { 230 + struct intel_engine_cs *engine = node_to_request(node)->engine; 231 + 232 + GEM_BUG_ON(!locked); 233 + 234 + if (engine != locked) { 235 + spin_unlock(&locked->timeline.lock); 236 + spin_lock(&engine->timeline.lock); 237 + } 238 + 239 + return engine; 240 + } 241 + 242 + void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr) 243 + { 244 + struct list_head *uninitialized_var(pl); 245 + struct intel_engine_cs *engine, *last; 246 + struct i915_dependency *dep, *p; 247 + struct i915_dependency stack; 248 + const int prio = attr->priority; 249 + LIST_HEAD(dfs); 250 + 251 + GEM_BUG_ON(prio == I915_PRIORITY_INVALID); 252 + 253 + if (i915_request_completed(rq)) 254 + return; 255 + 256 + if (prio <= READ_ONCE(rq->sched.attr.priority)) 257 + return; 258 + 259 + /* Needed in order to use the temporary link inside i915_dependency */ 260 + spin_lock(&schedule_lock); 261 + 262 + stack.signaler = &rq->sched; 263 + list_add(&stack.dfs_link, &dfs); 264 + 265 + /* 266 + * Recursively bump all dependent priorities to match the new request. 267 + * 268 + * A naive approach would be to use recursion: 269 + * static void update_priorities(struct i915_sched_node *node, prio) { 270 + * list_for_each_entry(dep, &node->signalers_list, signal_link) 271 + * update_priorities(dep->signal, prio) 272 + * queue_request(node); 273 + * } 274 + * but that may have unlimited recursion depth and so runs a very 275 + * real risk of overunning the kernel stack. Instead, we build 276 + * a flat list of all dependencies starting with the current request. 277 + * As we walk the list of dependencies, we add all of its dependencies 278 + * to the end of the list (this may include an already visited 279 + * request) and continue to walk onwards onto the new dependencies. The 280 + * end result is a topological list of requests in reverse order, the 281 + * last element in the list is the request we must execute first. 282 + */ 283 + list_for_each_entry(dep, &dfs, dfs_link) { 284 + struct i915_sched_node *node = dep->signaler; 285 + 286 + /* 287 + * Within an engine, there can be no cycle, but we may 288 + * refer to the same dependency chain multiple times 289 + * (redundant dependencies are not eliminated) and across 290 + * engines. 291 + */ 292 + list_for_each_entry(p, &node->signalers_list, signal_link) { 293 + GEM_BUG_ON(p == dep); /* no cycles! */ 294 + 295 + if (node_signaled(p->signaler)) 296 + continue; 297 + 298 + GEM_BUG_ON(p->signaler->attr.priority < node->attr.priority); 299 + if (prio > READ_ONCE(p->signaler->attr.priority)) 300 + list_move_tail(&p->dfs_link, &dfs); 301 + } 302 + } 303 + 304 + /* 305 + * If we didn't need to bump any existing priorities, and we haven't 306 + * yet submitted this request (i.e. there is no potential race with 307 + * execlists_submit_request()), we can set our own priority and skip 308 + * acquiring the engine locks. 309 + */ 310 + if (rq->sched.attr.priority == I915_PRIORITY_INVALID) { 311 + GEM_BUG_ON(!list_empty(&rq->sched.link)); 312 + rq->sched.attr = *attr; 313 + 314 + if (stack.dfs_link.next == stack.dfs_link.prev) 315 + goto out_unlock; 316 + 317 + __list_del_entry(&stack.dfs_link); 318 + } 319 + 320 + last = NULL; 321 + engine = rq->engine; 322 + spin_lock_irq(&engine->timeline.lock); 323 + 324 + /* Fifo and depth-first replacement ensure our deps execute before us */ 325 + list_for_each_entry_safe_reverse(dep, p, &dfs, dfs_link) { 326 + struct i915_sched_node *node = dep->signaler; 327 + 328 + INIT_LIST_HEAD(&dep->dfs_link); 329 + 330 + engine = sched_lock_engine(node, engine); 331 + 332 + /* Recheck after acquiring the engine->timeline.lock */ 333 + if (prio <= node->attr.priority || node_signaled(node)) 334 + continue; 335 + 336 + node->attr.priority = prio; 337 + if (!list_empty(&node->link)) { 338 + if (last != engine) { 339 + pl = i915_sched_lookup_priolist(engine, prio); 340 + last = engine; 341 + } 342 + list_move_tail(&node->link, pl); 343 + } else { 344 + /* 345 + * If the request is not in the priolist queue because 346 + * it is not yet runnable, then it doesn't contribute 347 + * to our preemption decisions. On the other hand, 348 + * if the request is on the HW, it too is not in the 349 + * queue; but in that case we may still need to reorder 350 + * the inflight requests. 351 + */ 352 + if (!i915_sw_fence_done(&node_to_request(node)->submit)) 353 + continue; 354 + } 355 + 356 + if (prio <= engine->execlists.queue_priority) 357 + continue; 358 + 359 + /* 360 + * If we are already the currently executing context, don't 361 + * bother evaluating if we should preempt ourselves. 362 + */ 363 + if (node_to_request(node)->global_seqno && 364 + i915_seqno_passed(port_request(engine->execlists.port)->global_seqno, 365 + node_to_request(node)->global_seqno)) 366 + continue; 367 + 368 + /* Defer (tasklet) submission until after all of our updates. */ 369 + engine->execlists.queue_priority = prio; 370 + tasklet_hi_schedule(&engine->execlists.tasklet); 371 + } 372 + 373 + spin_unlock_irq(&engine->timeline.lock); 374 + 375 + out_unlock: 376 + spin_unlock(&schedule_lock); 377 + }

+25

drivers/gpu/drm/i915/i915_scheduler.h

··· 8 8 #define _I915_SCHEDULER_H_ 9 9 10 10 #include <linux/bitops.h> 11 + #include <linux/kernel.h> 11 12 12 13 #include <uapi/drm/i915_drm.h> 14 + 15 + struct drm_i915_private; 16 + struct i915_request; 17 + struct intel_engine_cs; 13 18 14 19 enum { 15 20 I915_PRIORITY_MIN = I915_CONTEXT_MIN_USER_PRIORITY - 1, ··· 81 76 unsigned long flags; 82 77 #define I915_DEPENDENCY_ALLOC BIT(0) 83 78 }; 79 + 80 + void i915_sched_node_init(struct i915_sched_node *node); 81 + 82 + bool __i915_sched_node_add_dependency(struct i915_sched_node *node, 83 + struct i915_sched_node *signal, 84 + struct i915_dependency *dep, 85 + unsigned long flags); 86 + 87 + int i915_sched_node_add_dependency(struct drm_i915_private *i915, 88 + struct i915_sched_node *node, 89 + struct i915_sched_node *signal); 90 + 91 + void i915_sched_node_fini(struct drm_i915_private *i915, 92 + struct i915_sched_node *node); 93 + 94 + void i915_schedule(struct i915_request *request, 95 + const struct i915_sched_attr *attr); 96 + 97 + struct list_head * 98 + i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio); 84 99 85 100 #endif /* _I915_SCHEDULER_H_ */

+1 -2

drivers/gpu/drm/i915/intel_display.c

··· 13188 13188 13189 13189 ret = intel_plane_pin_fb(to_intel_plane_state(new_state)); 13190 13190 13191 - fb_obj_bump_render_priority(obj); 13192 - 13193 13191 mutex_unlock(&dev_priv->drm.struct_mutex); 13194 13192 i915_gem_object_unpin_pages(obj); 13195 13193 if (ret) 13196 13194 return ret; 13197 13195 13196 + fb_obj_bump_render_priority(obj); 13198 13197 intel_fb_obj_flush(obj, ORIGIN_DIRTYFB); 13199 13198 13200 13199 if (!new_state->fence) { /* implicit fencing */

+5 -263

drivers/gpu/drm/i915/intel_lrc.c

··· 259 259 ce->lrc_desc = desc; 260 260 } 261 261 262 - static void assert_priolists(struct intel_engine_execlists * const execlists, 263 - long queue_priority) 264 - { 265 - struct rb_node *rb; 266 - long last_prio, i; 267 - 268 - if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) 269 - return; 270 - 271 - GEM_BUG_ON(rb_first_cached(&execlists->queue) != 272 - rb_first(&execlists->queue.rb_root)); 273 - 274 - last_prio = (queue_priority >> I915_USER_PRIORITY_SHIFT) + 1; 275 - for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) { 276 - struct i915_priolist *p = to_priolist(rb); 277 - 278 - GEM_BUG_ON(p->priority >= last_prio); 279 - last_prio = p->priority; 280 - 281 - GEM_BUG_ON(!p->used); 282 - for (i = 0; i < ARRAY_SIZE(p->requests); i++) { 283 - if (list_empty(&p->requests[i])) 284 - continue; 285 - 286 - GEM_BUG_ON(!(p->used & BIT(i))); 287 - } 288 - } 289 - } 290 - 291 - static struct list_head * 292 - lookup_priolist(struct intel_engine_cs *engine, int prio) 293 - { 294 - struct intel_engine_execlists * const execlists = &engine->execlists; 295 - struct i915_priolist *p; 296 - struct rb_node **parent, *rb; 297 - bool first = true; 298 - int idx, i; 299 - 300 - assert_priolists(execlists, INT_MAX); 301 - 302 - /* buckets sorted from highest [in slot 0] to lowest priority */ 303 - idx = I915_PRIORITY_COUNT - (prio & I915_PRIORITY_MASK) - 1; 304 - prio >>= I915_USER_PRIORITY_SHIFT; 305 - if (unlikely(execlists->no_priolist)) 306 - prio = I915_PRIORITY_NORMAL; 307 - 308 - find_priolist: 309 - /* most positive priority is scheduled first, equal priorities fifo */ 310 - rb = NULL; 311 - parent = &execlists->queue.rb_root.rb_node; 312 - while (*parent) { 313 - rb = *parent; 314 - p = to_priolist(rb); 315 - if (prio > p->priority) { 316 - parent = &rb->rb_left; 317 - } else if (prio < p->priority) { 318 - parent = &rb->rb_right; 319 - first = false; 320 - } else { 321 - goto out; 322 - } 323 - } 324 - 325 - if (prio == I915_PRIORITY_NORMAL) { 326 - p = &execlists->default_priolist; 327 - } else { 328 - p = kmem_cache_alloc(engine->i915->priorities, GFP_ATOMIC); 329 - /* Convert an allocation failure to a priority bump */ 330 - if (unlikely(!p)) { 331 - prio = I915_PRIORITY_NORMAL; /* recurses just once */ 332 - 333 - /* To maintain ordering with all rendering, after an 334 - * allocation failure we have to disable all scheduling. 335 - * Requests will then be executed in fifo, and schedule 336 - * will ensure that dependencies are emitted in fifo. 337 - * There will be still some reordering with existing 338 - * requests, so if userspace lied about their 339 - * dependencies that reordering may be visible. 340 - */ 341 - execlists->no_priolist = true; 342 - goto find_priolist; 343 - } 344 - } 345 - 346 - p->priority = prio; 347 - for (i = 0; i < ARRAY_SIZE(p->requests); i++) 348 - INIT_LIST_HEAD(&p->requests[i]); 349 - rb_link_node(&p->node, rb, parent); 350 - rb_insert_color_cached(&p->node, &execlists->queue, first); 351 - p->used = 0; 352 - 353 - out: 354 - p->used |= BIT(idx); 355 - return &p->requests[idx]; 356 - } 357 - 358 262 static void unwind_wa_tail(struct i915_request *rq) 359 263 { 360 264 rq->tail = intel_ring_wrap(rq->ring, rq->wa_tail - WA_TAIL_BYTES); ··· 285 381 GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID); 286 382 if (rq_prio(rq) != prio) { 287 383 prio = rq_prio(rq); 288 - pl = lookup_priolist(engine, prio); 384 + pl = i915_sched_lookup_priolist(engine, prio); 289 385 } 290 386 GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)); 291 387 ··· 302 398 if (!(prio & I915_PRIORITY_NEWCLIENT)) { 303 399 prio |= I915_PRIORITY_NEWCLIENT; 304 400 list_move_tail(&active->sched.link, 305 - lookup_priolist(engine, prio)); 401 + i915_sched_lookup_priolist(engine, prio)); 306 402 } 307 403 } 308 404 ··· 696 792 */ 697 793 execlists->queue_priority = 698 794 port != execlists->port ? rq_prio(last) : INT_MIN; 699 - assert_priolists(execlists, execlists->queue_priority); 700 795 701 796 if (submit) { 702 797 port_assign(port, last); ··· 1022 1119 struct i915_sched_node *node, 1023 1120 int prio) 1024 1121 { 1025 - list_add_tail(&node->link, lookup_priolist(engine, prio)); 1026 - } 1027 - 1028 - static void __update_queue(struct intel_engine_cs *engine, int prio) 1029 - { 1030 - engine->execlists.queue_priority = prio; 1122 + list_add_tail(&node->link, i915_sched_lookup_priolist(engine, prio)); 1031 1123 } 1032 1124 1033 1125 static void __submit_queue_imm(struct intel_engine_cs *engine) ··· 1041 1143 static void submit_queue(struct intel_engine_cs *engine, int prio) 1042 1144 { 1043 1145 if (prio > engine->execlists.queue_priority) { 1044 - __update_queue(engine, prio); 1146 + engine->execlists.queue_priority = prio; 1045 1147 __submit_queue_imm(engine); 1046 1148 } 1047 1149 } ··· 1062 1164 submit_queue(engine, rq_prio(request)); 1063 1165 1064 1166 spin_unlock_irqrestore(&engine->timeline.lock, flags); 1065 - } 1066 - 1067 - static struct i915_request *sched_to_request(struct i915_sched_node *node) 1068 - { 1069 - return container_of(node, struct i915_request, sched); 1070 - } 1071 - 1072 - static struct intel_engine_cs * 1073 - sched_lock_engine(struct i915_sched_node *node, struct intel_engine_cs *locked) 1074 - { 1075 - struct intel_engine_cs *engine = sched_to_request(node)->engine; 1076 - 1077 - GEM_BUG_ON(!locked); 1078 - 1079 - if (engine != locked) { 1080 - spin_unlock(&locked->timeline.lock); 1081 - spin_lock(&engine->timeline.lock); 1082 - } 1083 - 1084 - return engine; 1085 - } 1086 - 1087 - static void execlists_schedule(struct i915_request *request, 1088 - const struct i915_sched_attr *attr) 1089 - { 1090 - struct list_head *uninitialized_var(pl); 1091 - struct intel_engine_cs *engine, *last; 1092 - struct i915_dependency *dep, *p; 1093 - struct i915_dependency stack; 1094 - const int prio = attr->priority; 1095 - LIST_HEAD(dfs); 1096 - 1097 - GEM_BUG_ON(prio == I915_PRIORITY_INVALID); 1098 - 1099 - if (i915_request_completed(request)) 1100 - return; 1101 - 1102 - if (prio <= READ_ONCE(request->sched.attr.priority)) 1103 - return; 1104 - 1105 - /* Need BKL in order to use the temporary link inside i915_dependency */ 1106 - lockdep_assert_held(&request->i915->drm.struct_mutex); 1107 - 1108 - stack.signaler = &request->sched; 1109 - list_add(&stack.dfs_link, &dfs); 1110 - 1111 - /* 1112 - * Recursively bump all dependent priorities to match the new request. 1113 - * 1114 - * A naive approach would be to use recursion: 1115 - * static void update_priorities(struct i915_sched_node *node, prio) { 1116 - * list_for_each_entry(dep, &node->signalers_list, signal_link) 1117 - * update_priorities(dep->signal, prio) 1118 - * queue_request(node); 1119 - * } 1120 - * but that may have unlimited recursion depth and so runs a very 1121 - * real risk of overunning the kernel stack. Instead, we build 1122 - * a flat list of all dependencies starting with the current request. 1123 - * As we walk the list of dependencies, we add all of its dependencies 1124 - * to the end of the list (this may include an already visited 1125 - * request) and continue to walk onwards onto the new dependencies. The 1126 - * end result is a topological list of requests in reverse order, the 1127 - * last element in the list is the request we must execute first. 1128 - */ 1129 - list_for_each_entry(dep, &dfs, dfs_link) { 1130 - struct i915_sched_node *node = dep->signaler; 1131 - 1132 - /* 1133 - * Within an engine, there can be no cycle, but we may 1134 - * refer to the same dependency chain multiple times 1135 - * (redundant dependencies are not eliminated) and across 1136 - * engines. 1137 - */ 1138 - list_for_each_entry(p, &node->signalers_list, signal_link) { 1139 - GEM_BUG_ON(p == dep); /* no cycles! */ 1140 - 1141 - if (i915_sched_node_signaled(p->signaler)) 1142 - continue; 1143 - 1144 - GEM_BUG_ON(p->signaler->attr.priority < node->attr.priority); 1145 - if (prio > READ_ONCE(p->signaler->attr.priority)) 1146 - list_move_tail(&p->dfs_link, &dfs); 1147 - } 1148 - } 1149 - 1150 - /* 1151 - * If we didn't need to bump any existing priorities, and we haven't 1152 - * yet submitted this request (i.e. there is no potential race with 1153 - * execlists_submit_request()), we can set our own priority and skip 1154 - * acquiring the engine locks. 1155 - */ 1156 - if (request->sched.attr.priority == I915_PRIORITY_INVALID) { 1157 - GEM_BUG_ON(!list_empty(&request->sched.link)); 1158 - request->sched.attr = *attr; 1159 - if (stack.dfs_link.next == stack.dfs_link.prev) 1160 - return; 1161 - __list_del_entry(&stack.dfs_link); 1162 - } 1163 - 1164 - last = NULL; 1165 - engine = request->engine; 1166 - spin_lock_irq(&engine->timeline.lock); 1167 - 1168 - /* Fifo and depth-first replacement ensure our deps execute before us */ 1169 - list_for_each_entry_safe_reverse(dep, p, &dfs, dfs_link) { 1170 - struct i915_sched_node *node = dep->signaler; 1171 - 1172 - INIT_LIST_HEAD(&dep->dfs_link); 1173 - 1174 - engine = sched_lock_engine(node, engine); 1175 - 1176 - /* Recheck after acquiring the engine->timeline.lock */ 1177 - if (prio <= node->attr.priority) 1178 - continue; 1179 - 1180 - if (i915_sched_node_signaled(node)) 1181 - continue; 1182 - 1183 - node->attr.priority = prio; 1184 - if (!list_empty(&node->link)) { 1185 - if (last != engine) { 1186 - pl = lookup_priolist(engine, prio); 1187 - last = engine; 1188 - } 1189 - list_move_tail(&node->link, pl); 1190 - } else { 1191 - /* 1192 - * If the request is not in the priolist queue because 1193 - * it is not yet runnable, then it doesn't contribute 1194 - * to our preemption decisions. On the other hand, 1195 - * if the request is on the HW, it too is not in the 1196 - * queue; but in that case we may still need to reorder 1197 - * the inflight requests. 1198 - */ 1199 - if (!i915_sw_fence_done(&sched_to_request(node)->submit)) 1200 - continue; 1201 - } 1202 - 1203 - if (prio <= engine->execlists.queue_priority) 1204 - continue; 1205 - 1206 - /* 1207 - * If we are already the currently executing context, don't 1208 - * bother evaluating if we should preempt ourselves. 1209 - */ 1210 - if (sched_to_request(node)->global_seqno && 1211 - i915_seqno_passed(port_request(engine->execlists.port)->global_seqno, 1212 - sched_to_request(node)->global_seqno)) 1213 - continue; 1214 - 1215 - /* Defer (tasklet) submission until after all of our updates. */ 1216 - __update_queue(engine, prio); 1217 - tasklet_hi_schedule(&engine->execlists.tasklet); 1218 - } 1219 - 1220 - spin_unlock_irq(&engine->timeline.lock); 1221 1167 } 1222 1168 1223 1169 static void execlists_context_destroy(struct intel_context *ce) ··· 2101 2359 { 2102 2360 engine->submit_request = execlists_submit_request; 2103 2361 engine->cancel_requests = execlists_cancel_requests; 2104 - engine->schedule = execlists_schedule; 2362 + engine->schedule = i915_schedule; 2105 2363 engine->execlists.tasklet.func = execlists_submission_tasklet; 2106 2364 2107 2365 engine->reset.prepare = execlists_reset_prepare;

+2 -3

drivers/gpu/drm/i915/intel_ringbuffer.h

··· 498 498 */ 499 499 void (*submit_request)(struct i915_request *rq); 500 500 501 - /* Call when the priority on a request has changed and it and its 501 + /* 502 + * Call when the priority on a request has changed and it and its 502 503 * dependencies may need rescheduling. Note the request itself may 503 504 * not be ready to run! 504 - * 505 - * Called under the struct_mutex. 506 505 */ 507 506 void (*schedule)(struct i915_request *request, 508 507 const struct i915_sched_attr *attr);