Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/sched: Further optimise drm_sched_entity_push_job

Having removed one re-lock cycle on the entity->lock in a patch titled
"drm/sched: Optimise drm_sched_entity_push_job", with only a tiny bit
larger refactoring we can do the same optimisation on the rq->lock.
(Currently both drm_sched_rq_add_entity() and
drm_sched_rq_update_fifo_locked() take and release the same lock.)

To achieve this we make drm_sched_rq_update_fifo_locked() and
drm_sched_rq_add_entity() expect the rq->lock to be held.

We also align drm_sched_rq_update_fifo_locked(),
drm_sched_rq_add_entity() and
drm_sched_rq_remove_fifo_locked() function signatures, by adding rq as a
parameter to the latter.

v2:
* Fix after rebase of the series.
* Avoid naming inconsistency between drm_sched_rq_add/remove. (Christian)

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
Cc: Christian König <christian.koenig@amd.com>
Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: Luben Tuikov <ltuikov89@gmail.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: Philipp Stanner <pstanner@redhat.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Philipp Stanner <pstanner@redhat.com>
Signed-off-by: Philipp Stanner <pstanner@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20241016122013.7857-6-tursulin@igalia.com

authored by

Tvrtko Ursulin and committed by
Philipp Stanner
134e71bd f93126f5

+25 -18
+9 -2
drivers/gpu/drm/scheduler/sched_entity.c
··· 515 515 516 516 next = to_drm_sched_job(spsc_queue_peek(&entity->job_queue)); 517 517 if (next) { 518 + struct drm_sched_rq *rq; 519 + 518 520 spin_lock(&entity->lock); 519 - drm_sched_rq_update_fifo_locked(entity, 521 + rq = entity->rq; 522 + spin_lock(&rq->lock); 523 + drm_sched_rq_update_fifo_locked(entity, rq, 520 524 next->submit_ts); 525 + spin_unlock(&rq->lock); 521 526 spin_unlock(&entity->lock); 522 527 } 523 528 } ··· 621 616 rq = entity->rq; 622 617 sched = rq->sched; 623 618 619 + spin_lock(&rq->lock); 624 620 drm_sched_rq_add_entity(rq, entity); 625 621 626 622 if (drm_sched_policy == DRM_SCHED_POLICY_FIFO) 627 - drm_sched_rq_update_fifo_locked(entity, submit_ts); 623 + drm_sched_rq_update_fifo_locked(entity, rq, submit_ts); 628 624 625 + spin_unlock(&rq->lock); 629 626 spin_unlock(&entity->lock); 630 627 631 628 drm_sched_wakeup(sched);
+14 -15
drivers/gpu/drm/scheduler/sched_main.c
··· 153 153 return ktime_before(ent_a->oldest_job_waiting, ent_b->oldest_job_waiting); 154 154 } 155 155 156 - static inline void drm_sched_rq_remove_fifo_locked(struct drm_sched_entity *entity) 156 + static void drm_sched_rq_remove_fifo_locked(struct drm_sched_entity *entity, 157 + struct drm_sched_rq *rq) 157 158 { 158 - struct drm_sched_rq *rq = entity->rq; 159 - 160 159 if (!RB_EMPTY_NODE(&entity->rb_tree_node)) { 161 160 rb_erase_cached(&entity->rb_tree_node, &rq->rb_tree_root); 162 161 RB_CLEAR_NODE(&entity->rb_tree_node); 163 162 } 164 163 } 165 164 166 - void drm_sched_rq_update_fifo_locked(struct drm_sched_entity *entity, ktime_t ts) 165 + void drm_sched_rq_update_fifo_locked(struct drm_sched_entity *entity, 166 + struct drm_sched_rq *rq, 167 + ktime_t ts) 167 168 { 168 169 /* 169 170 * Both locks need to be grabbed, one to protect from entity->rq change ··· 172 171 * other to update the rb tree structure. 173 172 */ 174 173 lockdep_assert_held(&entity->lock); 174 + lockdep_assert_held(&rq->lock); 175 175 176 - spin_lock(&entity->rq->lock); 177 - 178 - drm_sched_rq_remove_fifo_locked(entity); 176 + drm_sched_rq_remove_fifo_locked(entity, rq); 179 177 180 178 entity->oldest_job_waiting = ts; 181 179 182 - rb_add_cached(&entity->rb_tree_node, &entity->rq->rb_tree_root, 180 + rb_add_cached(&entity->rb_tree_node, &rq->rb_tree_root, 183 181 drm_sched_entity_compare_before); 184 - 185 - spin_unlock(&entity->rq->lock); 186 182 } 187 183 188 184 /** ··· 211 213 void drm_sched_rq_add_entity(struct drm_sched_rq *rq, 212 214 struct drm_sched_entity *entity) 213 215 { 216 + lockdep_assert_held(&entity->lock); 217 + lockdep_assert_held(&rq->lock); 218 + 214 219 if (!list_empty(&entity->list)) 215 220 return; 216 221 217 - spin_lock(&rq->lock); 218 - 219 222 atomic_inc(rq->sched->score); 220 223 list_add_tail(&entity->list, &rq->entities); 221 - 222 - spin_unlock(&rq->lock); 223 224 } 224 225 225 226 /** ··· 232 235 void drm_sched_rq_remove_entity(struct drm_sched_rq *rq, 233 236 struct drm_sched_entity *entity) 234 237 { 238 + lockdep_assert_held(&entity->lock); 239 + 235 240 if (list_empty(&entity->list)) 236 241 return; 237 242 ··· 246 247 rq->current_entity = NULL; 247 248 248 249 if (drm_sched_policy == DRM_SCHED_POLICY_FIFO) 249 - drm_sched_rq_remove_fifo_locked(entity); 250 + drm_sched_rq_remove_fifo_locked(entity, rq); 250 251 251 252 spin_unlock(&rq->lock); 252 253 }
+2 -1
include/drm/gpu_scheduler.h
··· 596 596 void drm_sched_rq_remove_entity(struct drm_sched_rq *rq, 597 597 struct drm_sched_entity *entity); 598 598 599 - void drm_sched_rq_update_fifo_locked(struct drm_sched_entity *entity, ktime_t ts); 599 + void drm_sched_rq_update_fifo_locked(struct drm_sched_entity *entity, 600 + struct drm_sched_rq *rq, ktime_t ts); 600 601 601 602 int drm_sched_entity_init(struct drm_sched_entity *entity, 602 603 enum drm_sched_priority priority,