drivers/gpu/drm/i915/i915_active.h at v5.3

tjh.dev / kernel
fork
Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
fork
kernel / drivers / gpu / drm / i915 / i915_active.h
at v5.3 414 lines 15 kB view raw
wrap content
  1/*
  2 * SPDX-License-Identifier: MIT
  3 *
  4 * Copyright © 2019 Intel Corporation
  5 */
  6
  7#ifndef _I915_ACTIVE_H_
  8#define _I915_ACTIVE_H_
  9
 10#include <linux/lockdep.h>
 11
 12#include "i915_active_types.h"
 13#include "i915_request.h"
 14
 15/*
 16 * We treat requests as fences. This is not be to confused with our
 17 * "fence registers" but pipeline synchronisation objects ala GL_ARB_sync.
 18 * We use the fences to synchronize access from the CPU with activity on the
 19 * GPU, for example, we should not rewrite an object's PTE whilst the GPU
 20 * is reading them. We also track fences at a higher level to provide
 21 * implicit synchronisation around GEM objects, e.g. set-domain will wait
 22 * for outstanding GPU rendering before marking the object ready for CPU
 23 * access, or a pageflip will wait until the GPU is complete before showing
 24 * the frame on the scanout.
 25 *
 26 * In order to use a fence, the object must track the fence it needs to
 27 * serialise with. For example, GEM objects want to track both read and
 28 * write access so that we can perform concurrent read operations between
 29 * the CPU and GPU engines, as well as waiting for all rendering to
 30 * complete, or waiting for the last GPU user of a "fence register". The
 31 * object then embeds a #i915_active_request to track the most recent (in
 32 * retirement order) request relevant for the desired mode of access.
 33 * The #i915_active_request is updated with i915_active_request_set() to
 34 * track the most recent fence request, typically this is done as part of
 35 * i915_vma_move_to_active().
 36 *
 37 * When the #i915_active_request completes (is retired), it will
 38 * signal its completion to the owner through a callback as well as mark
 39 * itself as idle (i915_active_request.request == NULL). The owner
 40 * can then perform any action, such as delayed freeing of an active
 41 * resource including itself.
 42 */
 43
 44void i915_active_retire_noop(struct i915_active_request *active,
 45			     struct i915_request *request);
 46
 47/**
 48 * i915_active_request_init - prepares the activity tracker for use
 49 * @active - the active tracker
 50 * @rq - initial request to track, can be NULL
 51 * @func - a callback when then the tracker is retired (becomes idle),
 52 *         can be NULL
 53 *
 54 * i915_active_request_init() prepares the embedded @active struct for use as
 55 * an activity tracker, that is for tracking the last known active request
 56 * associated with it. When the last request becomes idle, when it is retired
 57 * after completion, the optional callback @func is invoked.
 58 */
 59static inline void
 60i915_active_request_init(struct i915_active_request *active,
 61			 struct i915_request *rq,
 62			 i915_active_retire_fn retire)
 63{
 64	RCU_INIT_POINTER(active->request, rq);
 65	INIT_LIST_HEAD(&active->link);
 66	active->retire = retire ?: i915_active_retire_noop;
 67}
 68
 69#define INIT_ACTIVE_REQUEST(name) i915_active_request_init((name), NULL, NULL)
 70
 71/**
 72 * i915_active_request_set - updates the tracker to watch the current request
 73 * @active - the active tracker
 74 * @request - the request to watch
 75 *
 76 * __i915_active_request_set() watches the given @request for completion. Whilst
 77 * that @request is busy, the @active reports busy. When that @request is
 78 * retired, the @active tracker is updated to report idle.
 79 */
 80static inline void
 81__i915_active_request_set(struct i915_active_request *active,
 82			  struct i915_request *request)
 83{
 84	list_move(&active->link, &request->active_list);
 85	rcu_assign_pointer(active->request, request);
 86}
 87
 88int __must_check
 89i915_active_request_set(struct i915_active_request *active,
 90			struct i915_request *rq);
 91
 92/**
 93 * i915_active_request_set_retire_fn - updates the retirement callback
 94 * @active - the active tracker
 95 * @fn - the routine called when the request is retired
 96 * @mutex - struct_mutex used to guard retirements
 97 *
 98 * i915_active_request_set_retire_fn() updates the function pointer that
 99 * is called when the final request associated with the @active tracker
100 * is retired.
101 */
102static inline void
103i915_active_request_set_retire_fn(struct i915_active_request *active,
104				  i915_active_retire_fn fn,
105				  struct mutex *mutex)
106{
107	lockdep_assert_held(mutex);
108	active->retire = fn ?: i915_active_retire_noop;
109}
110
111/**
112 * i915_active_request_raw - return the active request
113 * @active - the active tracker
114 *
115 * i915_active_request_raw() returns the current request being tracked, or NULL.
116 * It does not obtain a reference on the request for the caller, so the caller
117 * must hold struct_mutex.
118 */
119static inline struct i915_request *
120i915_active_request_raw(const struct i915_active_request *active,
121			struct mutex *mutex)
122{
123	return rcu_dereference_protected(active->request,
124					 lockdep_is_held(mutex));
125}
126
127/**
128 * i915_active_request_peek - report the active request being monitored
129 * @active - the active tracker
130 *
131 * i915_active_request_peek() returns the current request being tracked if
132 * still active, or NULL. It does not obtain a reference on the request
133 * for the caller, so the caller must hold struct_mutex.
134 */
135static inline struct i915_request *
136i915_active_request_peek(const struct i915_active_request *active,
137			 struct mutex *mutex)
138{
139	struct i915_request *request;
140
141	request = i915_active_request_raw(active, mutex);
142	if (!request || i915_request_completed(request))
143		return NULL;
144
145	return request;
146}
147
148/**
149 * i915_active_request_get - return a reference to the active request
150 * @active - the active tracker
151 *
152 * i915_active_request_get() returns a reference to the active request, or NULL
153 * if the active tracker is idle. The caller must hold struct_mutex.
154 */
155static inline struct i915_request *
156i915_active_request_get(const struct i915_active_request *active,
157			struct mutex *mutex)
158{
159	return i915_request_get(i915_active_request_peek(active, mutex));
160}
161
162/**
163 * __i915_active_request_get_rcu - return a reference to the active request
164 * @active - the active tracker
165 *
166 * __i915_active_request_get() returns a reference to the active request,
167 * or NULL if the active tracker is idle. The caller must hold the RCU read
168 * lock, but the returned pointer is safe to use outside of RCU.
169 */
170static inline struct i915_request *
171__i915_active_request_get_rcu(const struct i915_active_request *active)
172{
173	/*
174	 * Performing a lockless retrieval of the active request is super
175	 * tricky. SLAB_TYPESAFE_BY_RCU merely guarantees that the backing
176	 * slab of request objects will not be freed whilst we hold the
177	 * RCU read lock. It does not guarantee that the request itself
178	 * will not be freed and then *reused*. Viz,
179	 *
180	 * Thread A			Thread B
181	 *
182	 * rq = active.request
183	 *				retire(rq) -> free(rq);
184	 *				(rq is now first on the slab freelist)
185	 *				active.request = NULL
186	 *
187	 *				rq = new submission on a new object
188	 * ref(rq)
189	 *
190	 * To prevent the request from being reused whilst the caller
191	 * uses it, we take a reference like normal. Whilst acquiring
192	 * the reference we check that it is not in a destroyed state
193	 * (refcnt == 0). That prevents the request being reallocated
194	 * whilst the caller holds on to it. To check that the request
195	 * was not reallocated as we acquired the reference we have to
196	 * check that our request remains the active request across
197	 * the lookup, in the same manner as a seqlock. The visibility
198	 * of the pointer versus the reference counting is controlled
199	 * by using RCU barriers (rcu_dereference and rcu_assign_pointer).
200	 *
201	 * In the middle of all that, we inspect whether the request is
202	 * complete. Retiring is lazy so the request may be completed long
203	 * before the active tracker is updated. Querying whether the
204	 * request is complete is far cheaper (as it involves no locked
205	 * instructions setting cachelines to exclusive) than acquiring
206	 * the reference, so we do it first. The RCU read lock ensures the
207	 * pointer dereference is valid, but does not ensure that the
208	 * seqno nor HWS is the right one! However, if the request was
209	 * reallocated, that means the active tracker's request was complete.
210	 * If the new request is also complete, then both are and we can
211	 * just report the active tracker is idle. If the new request is
212	 * incomplete, then we acquire a reference on it and check that
213	 * it remained the active request.
214	 *
215	 * It is then imperative that we do not zero the request on
216	 * reallocation, so that we can chase the dangling pointers!
217	 * See i915_request_alloc().
218	 */
219	do {
220		struct i915_request *request;
221
222		request = rcu_dereference(active->request);
223		if (!request || i915_request_completed(request))
224			return NULL;
225
226		/*
227		 * An especially silly compiler could decide to recompute the
228		 * result of i915_request_completed, more specifically
229		 * re-emit the load for request->fence.seqno. A race would catch
230		 * a later seqno value, which could flip the result from true to
231		 * false. Which means part of the instructions below might not
232		 * be executed, while later on instructions are executed. Due to
233		 * barriers within the refcounting the inconsistency can't reach
234		 * past the call to i915_request_get_rcu, but not executing
235		 * that while still executing i915_request_put() creates
236		 * havoc enough.  Prevent this with a compiler barrier.
237		 */
238		barrier();
239
240		request = i915_request_get_rcu(request);
241
242		/*
243		 * What stops the following rcu_access_pointer() from occurring
244		 * before the above i915_request_get_rcu()? If we were
245		 * to read the value before pausing to get the reference to
246		 * the request, we may not notice a change in the active
247		 * tracker.
248		 *
249		 * The rcu_access_pointer() is a mere compiler barrier, which
250		 * means both the CPU and compiler are free to perform the
251		 * memory read without constraint. The compiler only has to
252		 * ensure that any operations after the rcu_access_pointer()
253		 * occur afterwards in program order. This means the read may
254		 * be performed earlier by an out-of-order CPU, or adventurous
255		 * compiler.
256		 *
257		 * The atomic operation at the heart of
258		 * i915_request_get_rcu(), see dma_fence_get_rcu(), is
259		 * atomic_inc_not_zero() which is only a full memory barrier
260		 * when successful. That is, if i915_request_get_rcu()
261		 * returns the request (and so with the reference counted
262		 * incremented) then the following read for rcu_access_pointer()
263		 * must occur after the atomic operation and so confirm
264		 * that this request is the one currently being tracked.
265		 *
266		 * The corresponding write barrier is part of
267		 * rcu_assign_pointer().
268		 */
269		if (!request || request == rcu_access_pointer(active->request))
270			return rcu_pointer_handoff(request);
271
272		i915_request_put(request);
273	} while (1);
274}
275
276/**
277 * i915_active_request_get_unlocked - return a reference to the active request
278 * @active - the active tracker
279 *
280 * i915_active_request_get_unlocked() returns a reference to the active request,
281 * or NULL if the active tracker is idle. The reference is obtained under RCU,
282 * so no locking is required by the caller.
283 *
284 * The reference should be freed with i915_request_put().
285 */
286static inline struct i915_request *
287i915_active_request_get_unlocked(const struct i915_active_request *active)
288{
289	struct i915_request *request;
290
291	rcu_read_lock();
292	request = __i915_active_request_get_rcu(active);
293	rcu_read_unlock();
294
295	return request;
296}
297
298/**
299 * i915_active_request_isset - report whether the active tracker is assigned
300 * @active - the active tracker
301 *
302 * i915_active_request_isset() returns true if the active tracker is currently
303 * assigned to a request. Due to the lazy retiring, that request may be idle
304 * and this may report stale information.
305 */
306static inline bool
307i915_active_request_isset(const struct i915_active_request *active)
308{
309	return rcu_access_pointer(active->request);
310}
311
312/**
313 * i915_active_request_retire - waits until the request is retired
314 * @active - the active request on which to wait
315 *
316 * i915_active_request_retire() waits until the request is completed,
317 * and then ensures that at least the retirement handler for this
318 * @active tracker is called before returning. If the @active
319 * tracker is idle, the function returns immediately.
320 */
321static inline int __must_check
322i915_active_request_retire(struct i915_active_request *active,
323			   struct mutex *mutex)
324{
325	struct i915_request *request;
326	long ret;
327
328	request = i915_active_request_raw(active, mutex);
329	if (!request)
330		return 0;
331
332	ret = i915_request_wait(request,
333				I915_WAIT_INTERRUPTIBLE,
334				MAX_SCHEDULE_TIMEOUT);
335	if (ret < 0)
336		return ret;
337
338	list_del_init(&active->link);
339	RCU_INIT_POINTER(active->request, NULL);
340
341	active->retire(active, request);
342
343	return 0;
344}
345
346/*
347 * GPU activity tracking
348 *
349 * Each set of commands submitted to the GPU compromises a single request that
350 * signals a fence upon completion. struct i915_request combines the
351 * command submission, scheduling and fence signaling roles. If we want to see
352 * if a particular task is complete, we need to grab the fence (struct
353 * i915_request) for that task and check or wait for it to be signaled. More
354 * often though we want to track the status of a bunch of tasks, for example
355 * to wait for the GPU to finish accessing some memory across a variety of
356 * different command pipelines from different clients. We could choose to
357 * track every single request associated with the task, but knowing that
358 * each request belongs to an ordered timeline (later requests within a
359 * timeline must wait for earlier requests), we need only track the
360 * latest request in each timeline to determine the overall status of the
361 * task.
362 *
363 * struct i915_active provides this tracking across timelines. It builds a
364 * composite shared-fence, and is updated as new work is submitted to the task,
365 * forming a snapshot of the current status. It should be embedded into the
366 * different resources that need to track their associated GPU activity to
367 * provide a callback when that GPU activity has ceased, or otherwise to
368 * provide a serialisation point either for request submission or for CPU
369 * synchronisation.
370 */
371
372void i915_active_init(struct drm_i915_private *i915,
373		      struct i915_active *ref,
374		      void (*retire)(struct i915_active *ref));
375
376int i915_active_ref(struct i915_active *ref,
377		    u64 timeline,
378		    struct i915_request *rq);
379
380int i915_active_wait(struct i915_active *ref);
381
382int i915_request_await_active(struct i915_request *rq,
383			      struct i915_active *ref);
384int i915_request_await_active_request(struct i915_request *rq,
385				      struct i915_active_request *active);
386
387bool i915_active_acquire(struct i915_active *ref);
388
389static inline void i915_active_cancel(struct i915_active *ref)
390{
391	GEM_BUG_ON(ref->count != 1);
392	ref->count = 0;
393}
394
395void i915_active_release(struct i915_active *ref);
396
397static inline bool
398i915_active_is_idle(const struct i915_active *ref)
399{
400	return !ref->count;
401}
402
403#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
404void i915_active_fini(struct i915_active *ref);
405#else
406static inline void i915_active_fini(struct i915_active *ref) { }
407#endif
408
409int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
410					    struct intel_engine_cs *engine);
411void i915_active_acquire_barrier(struct i915_active *ref);
412void i915_request_add_barriers(struct i915_request *rq);
413
414#endif /* _I915_ACTIVE_H_ */
Configure Feed

Configure Feed