Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/i915/gt: Always send a pulse down the engine after disabling heartbeat

Currently, we check we can send a pulse prior to disabling the
heartbeat to verify that we can change the heartbeat, but since we may
re-evaluate execution upon changing the heartbeat interval we need another
pulse afterwards to refresh execution.

v2: Tvrtko asked if we could reduce the double pulse to a single, which
opened up a discussion of how we should handle the pulse-error after
attempting to change the property, and the desire to serialise
adjustment of the property with its validating pulse, and unwind upon
failure.

Fixes: 9a40bddd47ca ("drm/i915/gt: Expose heartbeat interval via sysfs")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: <stable@vger.kernel.org> # v5.7+
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Acked-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20200928221510.26044-2-chris@chris-wilson.co.uk
(cherry picked from commit 3dd66a94de59d7792e7917eb3075342e70f06f44)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>

authored by

Chris Wilson and committed by
Rodrigo Vivi
ca65fc0d 7d442ea7

+76 -48
+76 -48
drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
··· 177 177 INIT_DELAYED_WORK(&engine->heartbeat.work, heartbeat); 178 178 } 179 179 180 - int intel_engine_set_heartbeat(struct intel_engine_cs *engine, 181 - unsigned long delay) 182 - { 183 - int err; 184 - 185 - /* Send one last pulse before to cleanup persistent hogs */ 186 - if (!delay && IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT)) { 187 - err = intel_engine_pulse(engine); 188 - if (err) 189 - return err; 190 - } 191 - 192 - WRITE_ONCE(engine->props.heartbeat_interval_ms, delay); 193 - 194 - if (intel_engine_pm_get_if_awake(engine)) { 195 - if (delay) 196 - intel_engine_unpark_heartbeat(engine); 197 - else 198 - intel_engine_park_heartbeat(engine); 199 - intel_engine_pm_put(engine); 200 - } 201 - 202 - return 0; 203 - } 204 - 205 - int intel_engine_pulse(struct intel_engine_cs *engine) 180 + static int __intel_engine_pulse(struct intel_engine_cs *engine) 206 181 { 207 182 struct i915_sched_attr attr = { .priority = I915_PRIORITY_BARRIER }; 208 183 struct intel_context *ce = engine->kernel_context; 209 184 struct i915_request *rq; 185 + 186 + lockdep_assert_held(&ce->timeline->mutex); 187 + GEM_BUG_ON(!intel_engine_has_preemption(engine)); 188 + GEM_BUG_ON(!intel_engine_pm_is_awake(engine)); 189 + 190 + intel_context_enter(ce); 191 + rq = __i915_request_create(ce, GFP_NOWAIT | __GFP_NOWARN); 192 + intel_context_exit(ce); 193 + if (IS_ERR(rq)) 194 + return PTR_ERR(rq); 195 + 196 + __set_bit(I915_FENCE_FLAG_SENTINEL, &rq->fence.flags); 197 + idle_pulse(engine, rq); 198 + 199 + __i915_request_commit(rq); 200 + __i915_request_queue(rq, &attr); 201 + GEM_BUG_ON(rq->sched.attr.priority < I915_PRIORITY_BARRIER); 202 + 203 + return 0; 204 + } 205 + 206 + static unsigned long set_heartbeat(struct intel_engine_cs *engine, 207 + unsigned long delay) 208 + { 209 + unsigned long old; 210 + 211 + old = xchg(&engine->props.heartbeat_interval_ms, delay); 212 + if (delay) 213 + intel_engine_unpark_heartbeat(engine); 214 + else 215 + intel_engine_park_heartbeat(engine); 216 + 217 + return old; 218 + } 219 + 220 + int intel_engine_set_heartbeat(struct intel_engine_cs *engine, 221 + unsigned long delay) 222 + { 223 + struct intel_context *ce = engine->kernel_context; 224 + int err = 0; 225 + 226 + if (!delay && !intel_engine_has_preempt_reset(engine)) 227 + return -ENODEV; 228 + 229 + intel_engine_pm_get(engine); 230 + 231 + err = mutex_lock_interruptible(&ce->timeline->mutex); 232 + if (err) 233 + goto out_rpm; 234 + 235 + if (delay != engine->props.heartbeat_interval_ms) { 236 + unsigned long saved = set_heartbeat(engine, delay); 237 + 238 + /* recheck current execution */ 239 + if (intel_engine_has_preemption(engine)) { 240 + err = __intel_engine_pulse(engine); 241 + if (err) 242 + set_heartbeat(engine, saved); 243 + } 244 + } 245 + 246 + mutex_unlock(&ce->timeline->mutex); 247 + 248 + out_rpm: 249 + intel_engine_pm_put(engine); 250 + return err; 251 + } 252 + 253 + int intel_engine_pulse(struct intel_engine_cs *engine) 254 + { 255 + struct intel_context *ce = engine->kernel_context; 210 256 int err; 211 257 212 258 if (!intel_engine_has_preemption(engine)) ··· 261 215 if (!intel_engine_pm_get_if_awake(engine)) 262 216 return 0; 263 217 264 - if (mutex_lock_interruptible(&ce->timeline->mutex)) { 265 - err = -EINTR; 266 - goto out_rpm; 218 + err = -EINTR; 219 + if (!mutex_lock_interruptible(&ce->timeline->mutex)) { 220 + err = __intel_engine_pulse(engine); 221 + mutex_unlock(&ce->timeline->mutex); 267 222 } 268 223 269 - intel_context_enter(ce); 270 - rq = __i915_request_create(ce, GFP_NOWAIT | __GFP_NOWARN); 271 - intel_context_exit(ce); 272 - if (IS_ERR(rq)) { 273 - err = PTR_ERR(rq); 274 - goto out_unlock; 275 - } 276 - 277 - __set_bit(I915_FENCE_FLAG_SENTINEL, &rq->fence.flags); 278 - idle_pulse(engine, rq); 279 - 280 - __i915_request_commit(rq); 281 - __i915_request_queue(rq, &attr); 282 - GEM_BUG_ON(rq->sched.attr.priority < I915_PRIORITY_BARRIER); 283 - err = 0; 284 - 285 - out_unlock: 286 - mutex_unlock(&ce->timeline->mutex); 287 - out_rpm: 288 224 intel_engine_pm_put(engine); 289 225 return err; 290 226 }