drivers/gpu/drm/v3d/v3d_sched.c at v5.1-rc4

tjh.dev / kernel
fork
Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
fork
kernel / drivers / gpu / drm / v3d / v3d_sched.c
at v5.1-rc4 357 lines 9.2 kB view raw
wrap content
  1// SPDX-License-Identifier: GPL-2.0+
  2/* Copyright (C) 2018 Broadcom */
  3
  4/**
  5 * DOC: Broadcom V3D scheduling
  6 *
  7 * The shared DRM GPU scheduler is used to coordinate submitting jobs
  8 * to the hardware.  Each DRM fd (roughly a client process) gets its
  9 * own scheduler entity, which will process jobs in order.  The GPU
 10 * scheduler will round-robin between clients to submit the next job.
 11 *
 12 * For simplicity, and in order to keep latency low for interactive
 13 * jobs when bulk background jobs are queued up, we submit a new job
 14 * to the HW only when it has completed the last one, instead of
 15 * filling up the CT[01]Q FIFOs with jobs.  Similarly, we use
 16 * v3d_job_dependency() to manage the dependency between bin and
 17 * render, instead of having the clients submit jobs using the HW's
 18 * semaphores to interlock between them.
 19 */
 20
 21#include <linux/kthread.h>
 22
 23#include "v3d_drv.h"
 24#include "v3d_regs.h"
 25#include "v3d_trace.h"
 26
 27static struct v3d_job *
 28to_v3d_job(struct drm_sched_job *sched_job)
 29{
 30	return container_of(sched_job, struct v3d_job, base);
 31}
 32
 33static struct v3d_tfu_job *
 34to_tfu_job(struct drm_sched_job *sched_job)
 35{
 36	return container_of(sched_job, struct v3d_tfu_job, base);
 37}
 38
 39static void
 40v3d_job_free(struct drm_sched_job *sched_job)
 41{
 42	struct v3d_job *job = to_v3d_job(sched_job);
 43
 44	drm_sched_job_cleanup(sched_job);
 45
 46	v3d_exec_put(job->exec);
 47}
 48
 49static void
 50v3d_tfu_job_free(struct drm_sched_job *sched_job)
 51{
 52	struct v3d_tfu_job *job = to_tfu_job(sched_job);
 53
 54	drm_sched_job_cleanup(sched_job);
 55
 56	v3d_tfu_job_put(job);
 57}
 58
 59/**
 60 * Returns the fences that the bin or render job depends on, one by one.
 61 * v3d_job_run() won't be called until all of them have been signaled.
 62 */
 63static struct dma_fence *
 64v3d_job_dependency(struct drm_sched_job *sched_job,
 65		   struct drm_sched_entity *s_entity)
 66{
 67	struct v3d_job *job = to_v3d_job(sched_job);
 68	struct v3d_exec_info *exec = job->exec;
 69	enum v3d_queue q = job == &exec->bin ? V3D_BIN : V3D_RENDER;
 70	struct dma_fence *fence;
 71
 72	fence = job->in_fence;
 73	if (fence) {
 74		job->in_fence = NULL;
 75		return fence;
 76	}
 77
 78	if (q == V3D_RENDER) {
 79		/* If we had a bin job, the render job definitely depends on
 80		 * it. We first have to wait for bin to be scheduled, so that
 81		 * its done_fence is created.
 82		 */
 83		fence = exec->bin_done_fence;
 84		if (fence) {
 85			exec->bin_done_fence = NULL;
 86			return fence;
 87		}
 88	}
 89
 90	/* XXX: Wait on a fence for switching the GMP if necessary,
 91	 * and then do so.
 92	 */
 93
 94	return fence;
 95}
 96
 97/**
 98 * Returns the fences that the TFU job depends on, one by one.
 99 * v3d_tfu_job_run() won't be called until all of them have been
100 * signaled.
101 */
102static struct dma_fence *
103v3d_tfu_job_dependency(struct drm_sched_job *sched_job,
104		       struct drm_sched_entity *s_entity)
105{
106	struct v3d_tfu_job *job = to_tfu_job(sched_job);
107	struct dma_fence *fence;
108
109	fence = job->in_fence;
110	if (fence) {
111		job->in_fence = NULL;
112		return fence;
113	}
114
115	return NULL;
116}
117
118static struct dma_fence *v3d_job_run(struct drm_sched_job *sched_job)
119{
120	struct v3d_job *job = to_v3d_job(sched_job);
121	struct v3d_exec_info *exec = job->exec;
122	enum v3d_queue q = job == &exec->bin ? V3D_BIN : V3D_RENDER;
123	struct v3d_dev *v3d = exec->v3d;
124	struct drm_device *dev = &v3d->drm;
125	struct dma_fence *fence;
126	unsigned long irqflags;
127
128	if (unlikely(job->base.s_fence->finished.error))
129		return NULL;
130
131	/* Lock required around bin_job update vs
132	 * v3d_overflow_mem_work().
133	 */
134	spin_lock_irqsave(&v3d->job_lock, irqflags);
135	if (q == V3D_BIN) {
136		v3d->bin_job = job->exec;
137
138		/* Clear out the overflow allocation, so we don't
139		 * reuse the overflow attached to a previous job.
140		 */
141		V3D_CORE_WRITE(0, V3D_PTB_BPOS, 0);
142	} else {
143		v3d->render_job = job->exec;
144	}
145	spin_unlock_irqrestore(&v3d->job_lock, irqflags);
146
147	/* Can we avoid this flush when q==RENDER?  We need to be
148	 * careful of scheduling, though -- imagine job0 rendering to
149	 * texture and job1 reading, and them being executed as bin0,
150	 * bin1, render0, render1, so that render1's flush at bin time
151	 * wasn't enough.
152	 */
153	v3d_invalidate_caches(v3d);
154
155	fence = v3d_fence_create(v3d, q);
156	if (IS_ERR(fence))
157		return NULL;
158
159	if (job->done_fence)
160		dma_fence_put(job->done_fence);
161	job->done_fence = dma_fence_get(fence);
162
163	trace_v3d_submit_cl(dev, q == V3D_RENDER, to_v3d_fence(fence)->seqno,
164			    job->start, job->end);
165
166	if (q == V3D_BIN) {
167		if (exec->qma) {
168			V3D_CORE_WRITE(0, V3D_CLE_CT0QMA, exec->qma);
169			V3D_CORE_WRITE(0, V3D_CLE_CT0QMS, exec->qms);
170		}
171		if (exec->qts) {
172			V3D_CORE_WRITE(0, V3D_CLE_CT0QTS,
173				       V3D_CLE_CT0QTS_ENABLE |
174				       exec->qts);
175		}
176	} else {
177		/* XXX: Set the QCFG */
178	}
179
180	/* Set the current and end address of the control list.
181	 * Writing the end register is what starts the job.
182	 */
183	V3D_CORE_WRITE(0, V3D_CLE_CTNQBA(q), job->start);
184	V3D_CORE_WRITE(0, V3D_CLE_CTNQEA(q), job->end);
185
186	return fence;
187}
188
189static struct dma_fence *
190v3d_tfu_job_run(struct drm_sched_job *sched_job)
191{
192	struct v3d_tfu_job *job = to_tfu_job(sched_job);
193	struct v3d_dev *v3d = job->v3d;
194	struct drm_device *dev = &v3d->drm;
195	struct dma_fence *fence;
196
197	fence = v3d_fence_create(v3d, V3D_TFU);
198	if (IS_ERR(fence))
199		return NULL;
200
201	v3d->tfu_job = job;
202	if (job->done_fence)
203		dma_fence_put(job->done_fence);
204	job->done_fence = dma_fence_get(fence);
205
206	trace_v3d_submit_tfu(dev, to_v3d_fence(fence)->seqno);
207
208	V3D_WRITE(V3D_TFU_IIA, job->args.iia);
209	V3D_WRITE(V3D_TFU_IIS, job->args.iis);
210	V3D_WRITE(V3D_TFU_ICA, job->args.ica);
211	V3D_WRITE(V3D_TFU_IUA, job->args.iua);
212	V3D_WRITE(V3D_TFU_IOA, job->args.ioa);
213	V3D_WRITE(V3D_TFU_IOS, job->args.ios);
214	V3D_WRITE(V3D_TFU_COEF0, job->args.coef[0]);
215	if (job->args.coef[0] & V3D_TFU_COEF0_USECOEF) {
216		V3D_WRITE(V3D_TFU_COEF1, job->args.coef[1]);
217		V3D_WRITE(V3D_TFU_COEF2, job->args.coef[2]);
218		V3D_WRITE(V3D_TFU_COEF3, job->args.coef[3]);
219	}
220	/* ICFG kicks off the job. */
221	V3D_WRITE(V3D_TFU_ICFG, job->args.icfg | V3D_TFU_ICFG_IOC);
222
223	return fence;
224}
225
226static void
227v3d_gpu_reset_for_timeout(struct v3d_dev *v3d, struct drm_sched_job *sched_job)
228{
229	enum v3d_queue q;
230
231	mutex_lock(&v3d->reset_lock);
232
233	/* block scheduler */
234	for (q = 0; q < V3D_MAX_QUEUES; q++) {
235		struct drm_gpu_scheduler *sched = &v3d->queue[q].sched;
236
237		drm_sched_stop(sched);
238
239		if(sched_job)
240			drm_sched_increase_karma(sched_job);
241	}
242
243	/* get the GPU back into the init state */
244	v3d_reset(v3d);
245
246	for (q = 0; q < V3D_MAX_QUEUES; q++)
247		drm_sched_resubmit_jobs(sched_job->sched);
248
249	/* Unblock schedulers and restart their jobs. */
250	for (q = 0; q < V3D_MAX_QUEUES; q++) {
251		drm_sched_start(&v3d->queue[q].sched, true);
252	}
253
254	mutex_unlock(&v3d->reset_lock);
255}
256
257static void
258v3d_job_timedout(struct drm_sched_job *sched_job)
259{
260	struct v3d_job *job = to_v3d_job(sched_job);
261	struct v3d_exec_info *exec = job->exec;
262	struct v3d_dev *v3d = exec->v3d;
263	enum v3d_queue job_q = job == &exec->bin ? V3D_BIN : V3D_RENDER;
264	u32 ctca = V3D_CORE_READ(0, V3D_CLE_CTNCA(job_q));
265	u32 ctra = V3D_CORE_READ(0, V3D_CLE_CTNRA(job_q));
266
267	/* If the current address or return address have changed, then
268	 * the GPU has probably made progress and we should delay the
269	 * reset.  This could fail if the GPU got in an infinite loop
270	 * in the CL, but that is pretty unlikely outside of an i-g-t
271	 * testcase.
272	 */
273	if (job->timedout_ctca != ctca || job->timedout_ctra != ctra) {
274		job->timedout_ctca = ctca;
275		job->timedout_ctra = ctra;
276		return;
277	}
278
279	v3d_gpu_reset_for_timeout(v3d, sched_job);
280}
281
282static void
283v3d_tfu_job_timedout(struct drm_sched_job *sched_job)
284{
285	struct v3d_tfu_job *job = to_tfu_job(sched_job);
286
287	v3d_gpu_reset_for_timeout(job->v3d, sched_job);
288}
289
290static const struct drm_sched_backend_ops v3d_sched_ops = {
291	.dependency = v3d_job_dependency,
292	.run_job = v3d_job_run,
293	.timedout_job = v3d_job_timedout,
294	.free_job = v3d_job_free
295};
296
297static const struct drm_sched_backend_ops v3d_tfu_sched_ops = {
298	.dependency = v3d_tfu_job_dependency,
299	.run_job = v3d_tfu_job_run,
300	.timedout_job = v3d_tfu_job_timedout,
301	.free_job = v3d_tfu_job_free
302};
303
304int
305v3d_sched_init(struct v3d_dev *v3d)
306{
307	int hw_jobs_limit = 1;
308	int job_hang_limit = 0;
309	int hang_limit_ms = 500;
310	int ret;
311
312	ret = drm_sched_init(&v3d->queue[V3D_BIN].sched,
313			     &v3d_sched_ops,
314			     hw_jobs_limit, job_hang_limit,
315			     msecs_to_jiffies(hang_limit_ms),
316			     "v3d_bin");
317	if (ret) {
318		dev_err(v3d->dev, "Failed to create bin scheduler: %d.", ret);
319		return ret;
320	}
321
322	ret = drm_sched_init(&v3d->queue[V3D_RENDER].sched,
323			     &v3d_sched_ops,
324			     hw_jobs_limit, job_hang_limit,
325			     msecs_to_jiffies(hang_limit_ms),
326			     "v3d_render");
327	if (ret) {
328		dev_err(v3d->dev, "Failed to create render scheduler: %d.",
329			ret);
330		drm_sched_fini(&v3d->queue[V3D_BIN].sched);
331		return ret;
332	}
333
334	ret = drm_sched_init(&v3d->queue[V3D_TFU].sched,
335			     &v3d_tfu_sched_ops,
336			     hw_jobs_limit, job_hang_limit,
337			     msecs_to_jiffies(hang_limit_ms),
338			     "v3d_tfu");
339	if (ret) {
340		dev_err(v3d->dev, "Failed to create TFU scheduler: %d.",
341			ret);
342		drm_sched_fini(&v3d->queue[V3D_RENDER].sched);
343		drm_sched_fini(&v3d->queue[V3D_BIN].sched);
344		return ret;
345	}
346
347	return 0;
348}
349
350void
351v3d_sched_fini(struct v3d_dev *v3d)
352{
353	enum v3d_queue q;
354
355	for (q = 0; q < V3D_MAX_QUEUES; q++)
356		drm_sched_fini(&v3d->queue[q].sched);
357}
Configure Feed

Configure Feed