Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1/*
2 * Copyright © 2016 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 */
24
25#include <drm/drm_print.h>
26
27#include "i915_drv.h"
28#include "i915_reset.h"
29#include "intel_ringbuffer.h"
30#include "intel_lrc.h"
31
32/* Haswell does have the CXT_SIZE register however it does not appear to be
33 * valid. Now, docs explain in dwords what is in the context object. The full
34 * size is 70720 bytes, however, the power context and execlist context will
35 * never be saved (power context is stored elsewhere, and execlists don't work
36 * on HSW) - so the final size, including the extra state required for the
37 * Resource Streamer, is 66944 bytes, which rounds to 17 pages.
38 */
39#define HSW_CXT_TOTAL_SIZE (17 * PAGE_SIZE)
40
41#define DEFAULT_LR_CONTEXT_RENDER_SIZE (22 * PAGE_SIZE)
42#define GEN8_LR_CONTEXT_RENDER_SIZE (20 * PAGE_SIZE)
43#define GEN9_LR_CONTEXT_RENDER_SIZE (22 * PAGE_SIZE)
44#define GEN10_LR_CONTEXT_RENDER_SIZE (18 * PAGE_SIZE)
45#define GEN11_LR_CONTEXT_RENDER_SIZE (14 * PAGE_SIZE)
46
47#define GEN8_LR_CONTEXT_OTHER_SIZE ( 2 * PAGE_SIZE)
48
49struct engine_class_info {
50 const char *name;
51 int (*init_legacy)(struct intel_engine_cs *engine);
52 int (*init_execlists)(struct intel_engine_cs *engine);
53
54 u8 uabi_class;
55};
56
57static const struct engine_class_info intel_engine_classes[] = {
58 [RENDER_CLASS] = {
59 .name = "rcs",
60 .init_execlists = logical_render_ring_init,
61 .init_legacy = intel_init_render_ring_buffer,
62 .uabi_class = I915_ENGINE_CLASS_RENDER,
63 },
64 [COPY_ENGINE_CLASS] = {
65 .name = "bcs",
66 .init_execlists = logical_xcs_ring_init,
67 .init_legacy = intel_init_blt_ring_buffer,
68 .uabi_class = I915_ENGINE_CLASS_COPY,
69 },
70 [VIDEO_DECODE_CLASS] = {
71 .name = "vcs",
72 .init_execlists = logical_xcs_ring_init,
73 .init_legacy = intel_init_bsd_ring_buffer,
74 .uabi_class = I915_ENGINE_CLASS_VIDEO,
75 },
76 [VIDEO_ENHANCEMENT_CLASS] = {
77 .name = "vecs",
78 .init_execlists = logical_xcs_ring_init,
79 .init_legacy = intel_init_vebox_ring_buffer,
80 .uabi_class = I915_ENGINE_CLASS_VIDEO_ENHANCE,
81 },
82};
83
84#define MAX_MMIO_BASES 3
85struct engine_info {
86 unsigned int hw_id;
87 unsigned int uabi_id;
88 u8 class;
89 u8 instance;
90 /* mmio bases table *must* be sorted in reverse gen order */
91 struct engine_mmio_base {
92 u32 gen : 8;
93 u32 base : 24;
94 } mmio_bases[MAX_MMIO_BASES];
95};
96
97static const struct engine_info intel_engines[] = {
98 [RCS] = {
99 .hw_id = RCS_HW,
100 .uabi_id = I915_EXEC_RENDER,
101 .class = RENDER_CLASS,
102 .instance = 0,
103 .mmio_bases = {
104 { .gen = 1, .base = RENDER_RING_BASE }
105 },
106 },
107 [BCS] = {
108 .hw_id = BCS_HW,
109 .uabi_id = I915_EXEC_BLT,
110 .class = COPY_ENGINE_CLASS,
111 .instance = 0,
112 .mmio_bases = {
113 { .gen = 6, .base = BLT_RING_BASE }
114 },
115 },
116 [VCS] = {
117 .hw_id = VCS_HW,
118 .uabi_id = I915_EXEC_BSD,
119 .class = VIDEO_DECODE_CLASS,
120 .instance = 0,
121 .mmio_bases = {
122 { .gen = 11, .base = GEN11_BSD_RING_BASE },
123 { .gen = 6, .base = GEN6_BSD_RING_BASE },
124 { .gen = 4, .base = BSD_RING_BASE }
125 },
126 },
127 [VCS2] = {
128 .hw_id = VCS2_HW,
129 .uabi_id = I915_EXEC_BSD,
130 .class = VIDEO_DECODE_CLASS,
131 .instance = 1,
132 .mmio_bases = {
133 { .gen = 11, .base = GEN11_BSD2_RING_BASE },
134 { .gen = 8, .base = GEN8_BSD2_RING_BASE }
135 },
136 },
137 [VCS3] = {
138 .hw_id = VCS3_HW,
139 .uabi_id = I915_EXEC_BSD,
140 .class = VIDEO_DECODE_CLASS,
141 .instance = 2,
142 .mmio_bases = {
143 { .gen = 11, .base = GEN11_BSD3_RING_BASE }
144 },
145 },
146 [VCS4] = {
147 .hw_id = VCS4_HW,
148 .uabi_id = I915_EXEC_BSD,
149 .class = VIDEO_DECODE_CLASS,
150 .instance = 3,
151 .mmio_bases = {
152 { .gen = 11, .base = GEN11_BSD4_RING_BASE }
153 },
154 },
155 [VECS] = {
156 .hw_id = VECS_HW,
157 .uabi_id = I915_EXEC_VEBOX,
158 .class = VIDEO_ENHANCEMENT_CLASS,
159 .instance = 0,
160 .mmio_bases = {
161 { .gen = 11, .base = GEN11_VEBOX_RING_BASE },
162 { .gen = 7, .base = VEBOX_RING_BASE }
163 },
164 },
165 [VECS2] = {
166 .hw_id = VECS2_HW,
167 .uabi_id = I915_EXEC_VEBOX,
168 .class = VIDEO_ENHANCEMENT_CLASS,
169 .instance = 1,
170 .mmio_bases = {
171 { .gen = 11, .base = GEN11_VEBOX2_RING_BASE }
172 },
173 },
174};
175
176/**
177 * ___intel_engine_context_size() - return the size of the context for an engine
178 * @dev_priv: i915 device private
179 * @class: engine class
180 *
181 * Each engine class may require a different amount of space for a context
182 * image.
183 *
184 * Return: size (in bytes) of an engine class specific context image
185 *
186 * Note: this size includes the HWSP, which is part of the context image
187 * in LRC mode, but does not include the "shared data page" used with
188 * GuC submission. The caller should account for this if using the GuC.
189 */
190static u32
191__intel_engine_context_size(struct drm_i915_private *dev_priv, u8 class)
192{
193 u32 cxt_size;
194
195 BUILD_BUG_ON(I915_GTT_PAGE_SIZE != PAGE_SIZE);
196
197 switch (class) {
198 case RENDER_CLASS:
199 switch (INTEL_GEN(dev_priv)) {
200 default:
201 MISSING_CASE(INTEL_GEN(dev_priv));
202 return DEFAULT_LR_CONTEXT_RENDER_SIZE;
203 case 11:
204 return GEN11_LR_CONTEXT_RENDER_SIZE;
205 case 10:
206 return GEN10_LR_CONTEXT_RENDER_SIZE;
207 case 9:
208 return GEN9_LR_CONTEXT_RENDER_SIZE;
209 case 8:
210 return GEN8_LR_CONTEXT_RENDER_SIZE;
211 case 7:
212 if (IS_HASWELL(dev_priv))
213 return HSW_CXT_TOTAL_SIZE;
214
215 cxt_size = I915_READ(GEN7_CXT_SIZE);
216 return round_up(GEN7_CXT_TOTAL_SIZE(cxt_size) * 64,
217 PAGE_SIZE);
218 case 6:
219 cxt_size = I915_READ(CXT_SIZE);
220 return round_up(GEN6_CXT_TOTAL_SIZE(cxt_size) * 64,
221 PAGE_SIZE);
222 case 5:
223 case 4:
224 case 3:
225 case 2:
226 /* For the special day when i810 gets merged. */
227 case 1:
228 return 0;
229 }
230 break;
231 default:
232 MISSING_CASE(class);
233 /* fall through */
234 case VIDEO_DECODE_CLASS:
235 case VIDEO_ENHANCEMENT_CLASS:
236 case COPY_ENGINE_CLASS:
237 if (INTEL_GEN(dev_priv) < 8)
238 return 0;
239 return GEN8_LR_CONTEXT_OTHER_SIZE;
240 }
241}
242
243static u32 __engine_mmio_base(struct drm_i915_private *i915,
244 const struct engine_mmio_base *bases)
245{
246 int i;
247
248 for (i = 0; i < MAX_MMIO_BASES; i++)
249 if (INTEL_GEN(i915) >= bases[i].gen)
250 break;
251
252 GEM_BUG_ON(i == MAX_MMIO_BASES);
253 GEM_BUG_ON(!bases[i].base);
254
255 return bases[i].base;
256}
257
258static void __sprint_engine_name(char *name, const struct engine_info *info)
259{
260 WARN_ON(snprintf(name, INTEL_ENGINE_CS_MAX_NAME, "%s%u",
261 intel_engine_classes[info->class].name,
262 info->instance) >= INTEL_ENGINE_CS_MAX_NAME);
263}
264
265void intel_engine_set_hwsp_writemask(struct intel_engine_cs *engine, u32 mask)
266{
267 struct drm_i915_private *dev_priv = engine->i915;
268 i915_reg_t hwstam;
269
270 /*
271 * Though they added more rings on g4x/ilk, they did not add
272 * per-engine HWSTAM until gen6.
273 */
274 if (INTEL_GEN(dev_priv) < 6 && engine->class != RENDER_CLASS)
275 return;
276
277 hwstam = RING_HWSTAM(engine->mmio_base);
278 if (INTEL_GEN(dev_priv) >= 3)
279 I915_WRITE(hwstam, mask);
280 else
281 I915_WRITE16(hwstam, mask);
282}
283
284static void intel_engine_sanitize_mmio(struct intel_engine_cs *engine)
285{
286 /* Mask off all writes into the unknown HWSP */
287 intel_engine_set_hwsp_writemask(engine, ~0u);
288}
289
290static int
291intel_engine_setup(struct drm_i915_private *dev_priv,
292 enum intel_engine_id id)
293{
294 const struct engine_info *info = &intel_engines[id];
295 struct intel_engine_cs *engine;
296
297 GEM_BUG_ON(info->class >= ARRAY_SIZE(intel_engine_classes));
298
299 BUILD_BUG_ON(MAX_ENGINE_CLASS >= BIT(GEN11_ENGINE_CLASS_WIDTH));
300 BUILD_BUG_ON(MAX_ENGINE_INSTANCE >= BIT(GEN11_ENGINE_INSTANCE_WIDTH));
301
302 if (GEM_DEBUG_WARN_ON(info->class > MAX_ENGINE_CLASS))
303 return -EINVAL;
304
305 if (GEM_DEBUG_WARN_ON(info->instance > MAX_ENGINE_INSTANCE))
306 return -EINVAL;
307
308 if (GEM_DEBUG_WARN_ON(dev_priv->engine_class[info->class][info->instance]))
309 return -EINVAL;
310
311 GEM_BUG_ON(dev_priv->engine[id]);
312 engine = kzalloc(sizeof(*engine), GFP_KERNEL);
313 if (!engine)
314 return -ENOMEM;
315
316 engine->id = id;
317 engine->i915 = dev_priv;
318 __sprint_engine_name(engine->name, info);
319 engine->hw_id = engine->guc_id = info->hw_id;
320 engine->mmio_base = __engine_mmio_base(dev_priv, info->mmio_bases);
321 engine->class = info->class;
322 engine->instance = info->instance;
323
324 engine->uabi_id = info->uabi_id;
325 engine->uabi_class = intel_engine_classes[info->class].uabi_class;
326
327 engine->context_size = __intel_engine_context_size(dev_priv,
328 engine->class);
329 if (WARN_ON(engine->context_size > BIT(20)))
330 engine->context_size = 0;
331 if (engine->context_size)
332 DRIVER_CAPS(dev_priv)->has_logical_contexts = true;
333
334 /* Nothing to do here, execute in order of dependencies */
335 engine->schedule = NULL;
336
337 seqlock_init(&engine->stats.lock);
338
339 ATOMIC_INIT_NOTIFIER_HEAD(&engine->context_status_notifier);
340
341 /* Scrub mmio state on takeover */
342 intel_engine_sanitize_mmio(engine);
343
344 dev_priv->engine_class[info->class][info->instance] = engine;
345 dev_priv->engine[id] = engine;
346 return 0;
347}
348
349/**
350 * intel_engines_init_mmio() - allocate and prepare the Engine Command Streamers
351 * @dev_priv: i915 device private
352 *
353 * Return: non-zero if the initialization failed.
354 */
355int intel_engines_init_mmio(struct drm_i915_private *dev_priv)
356{
357 struct intel_device_info *device_info = mkwrite_device_info(dev_priv);
358 const unsigned int ring_mask = INTEL_INFO(dev_priv)->ring_mask;
359 struct intel_engine_cs *engine;
360 enum intel_engine_id id;
361 unsigned int mask = 0;
362 unsigned int i;
363 int err;
364
365 WARN_ON(ring_mask == 0);
366 WARN_ON(ring_mask &
367 GENMASK(BITS_PER_TYPE(mask) - 1, I915_NUM_ENGINES));
368
369 if (i915_inject_load_failure())
370 return -ENODEV;
371
372 for (i = 0; i < ARRAY_SIZE(intel_engines); i++) {
373 if (!HAS_ENGINE(dev_priv, i))
374 continue;
375
376 err = intel_engine_setup(dev_priv, i);
377 if (err)
378 goto cleanup;
379
380 mask |= ENGINE_MASK(i);
381 }
382
383 /*
384 * Catch failures to update intel_engines table when the new engines
385 * are added to the driver by a warning and disabling the forgotten
386 * engines.
387 */
388 if (WARN_ON(mask != ring_mask))
389 device_info->ring_mask = mask;
390
391 /* We always presume we have at least RCS available for later probing */
392 if (WARN_ON(!HAS_ENGINE(dev_priv, RCS))) {
393 err = -ENODEV;
394 goto cleanup;
395 }
396
397 RUNTIME_INFO(dev_priv)->num_rings = hweight32(mask);
398
399 i915_check_and_clear_faults(dev_priv);
400
401 return 0;
402
403cleanup:
404 for_each_engine(engine, dev_priv, id)
405 kfree(engine);
406 return err;
407}
408
409/**
410 * intel_engines_init() - init the Engine Command Streamers
411 * @dev_priv: i915 device private
412 *
413 * Return: non-zero if the initialization failed.
414 */
415int intel_engines_init(struct drm_i915_private *dev_priv)
416{
417 struct intel_engine_cs *engine;
418 enum intel_engine_id id, err_id;
419 int err;
420
421 for_each_engine(engine, dev_priv, id) {
422 const struct engine_class_info *class_info =
423 &intel_engine_classes[engine->class];
424 int (*init)(struct intel_engine_cs *engine);
425
426 if (HAS_EXECLISTS(dev_priv))
427 init = class_info->init_execlists;
428 else
429 init = class_info->init_legacy;
430
431 err = -EINVAL;
432 err_id = id;
433
434 if (GEM_DEBUG_WARN_ON(!init))
435 goto cleanup;
436
437 err = init(engine);
438 if (err)
439 goto cleanup;
440
441 GEM_BUG_ON(!engine->submit_request);
442 }
443
444 return 0;
445
446cleanup:
447 for_each_engine(engine, dev_priv, id) {
448 if (id >= err_id) {
449 kfree(engine);
450 dev_priv->engine[id] = NULL;
451 } else {
452 dev_priv->gt.cleanup_engine(engine);
453 }
454 }
455 return err;
456}
457
458void intel_engine_write_global_seqno(struct intel_engine_cs *engine, u32 seqno)
459{
460 intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno);
461 GEM_BUG_ON(intel_engine_get_seqno(engine) != seqno);
462}
463
464static void intel_engine_init_batch_pool(struct intel_engine_cs *engine)
465{
466 i915_gem_batch_pool_init(&engine->batch_pool, engine);
467}
468
469static void intel_engine_init_execlist(struct intel_engine_cs *engine)
470{
471 struct intel_engine_execlists * const execlists = &engine->execlists;
472
473 execlists->port_mask = 1;
474 GEM_BUG_ON(!is_power_of_2(execlists_num_ports(execlists)));
475 GEM_BUG_ON(execlists_num_ports(execlists) > EXECLIST_MAX_PORTS);
476
477 execlists->queue_priority_hint = INT_MIN;
478 execlists->queue = RB_ROOT_CACHED;
479}
480
481static void cleanup_status_page(struct intel_engine_cs *engine)
482{
483 struct i915_vma *vma;
484
485 /* Prevent writes into HWSP after returning the page to the system */
486 intel_engine_set_hwsp_writemask(engine, ~0u);
487
488 vma = fetch_and_zero(&engine->status_page.vma);
489 if (!vma)
490 return;
491
492 if (!HWS_NEEDS_PHYSICAL(engine->i915))
493 i915_vma_unpin(vma);
494
495 i915_gem_object_unpin_map(vma->obj);
496 __i915_gem_object_release_unless_active(vma->obj);
497}
498
499static int pin_ggtt_status_page(struct intel_engine_cs *engine,
500 struct i915_vma *vma)
501{
502 unsigned int flags;
503
504 flags = PIN_GLOBAL;
505 if (!HAS_LLC(engine->i915))
506 /*
507 * On g33, we cannot place HWS above 256MiB, so
508 * restrict its pinning to the low mappable arena.
509 * Though this restriction is not documented for
510 * gen4, gen5, or byt, they also behave similarly
511 * and hang if the HWS is placed at the top of the
512 * GTT. To generalise, it appears that all !llc
513 * platforms have issues with us placing the HWS
514 * above the mappable region (even though we never
515 * actually map it).
516 */
517 flags |= PIN_MAPPABLE;
518 else
519 flags |= PIN_HIGH;
520
521 return i915_vma_pin(vma, 0, 0, flags);
522}
523
524static int init_status_page(struct intel_engine_cs *engine)
525{
526 struct drm_i915_gem_object *obj;
527 struct i915_vma *vma;
528 void *vaddr;
529 int ret;
530
531 /*
532 * Though the HWS register does support 36bit addresses, historically
533 * we have had hangs and corruption reported due to wild writes if
534 * the HWS is placed above 4G. We only allow objects to be allocated
535 * in GFP_DMA32 for i965, and no earlier physical address users had
536 * access to more than 4G.
537 */
538 obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE);
539 if (IS_ERR(obj)) {
540 DRM_ERROR("Failed to allocate status page\n");
541 return PTR_ERR(obj);
542 }
543
544 ret = i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
545 if (ret)
546 goto err;
547
548 vma = i915_vma_instance(obj, &engine->i915->ggtt.vm, NULL);
549 if (IS_ERR(vma)) {
550 ret = PTR_ERR(vma);
551 goto err;
552 }
553
554 vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB);
555 if (IS_ERR(vaddr)) {
556 ret = PTR_ERR(vaddr);
557 goto err;
558 }
559
560 engine->status_page.addr = memset(vaddr, 0, PAGE_SIZE);
561 engine->status_page.vma = vma;
562
563 if (!HWS_NEEDS_PHYSICAL(engine->i915)) {
564 ret = pin_ggtt_status_page(engine, vma);
565 if (ret)
566 goto err_unpin;
567 }
568
569 return 0;
570
571err_unpin:
572 i915_gem_object_unpin_map(obj);
573err:
574 i915_gem_object_put(obj);
575 return ret;
576}
577
578/**
579 * intel_engines_setup_common - setup engine state not requiring hw access
580 * @engine: Engine to setup.
581 *
582 * Initializes @engine@ structure members shared between legacy and execlists
583 * submission modes which do not require hardware access.
584 *
585 * Typically done early in the submission mode specific engine setup stage.
586 */
587int intel_engine_setup_common(struct intel_engine_cs *engine)
588{
589 int err;
590
591 err = init_status_page(engine);
592 if (err)
593 return err;
594
595 err = i915_timeline_init(engine->i915,
596 &engine->timeline,
597 engine->name,
598 engine->status_page.vma);
599 if (err)
600 goto err_hwsp;
601
602 i915_timeline_set_subclass(&engine->timeline, TIMELINE_ENGINE);
603
604 intel_engine_init_breadcrumbs(engine);
605 intel_engine_init_execlist(engine);
606 intel_engine_init_hangcheck(engine);
607 intel_engine_init_batch_pool(engine);
608 intel_engine_init_cmd_parser(engine);
609
610 return 0;
611
612err_hwsp:
613 cleanup_status_page(engine);
614 return err;
615}
616
617static void __intel_context_unpin(struct i915_gem_context *ctx,
618 struct intel_engine_cs *engine)
619{
620 intel_context_unpin(to_intel_context(ctx, engine));
621}
622
623struct measure_breadcrumb {
624 struct i915_request rq;
625 struct i915_timeline timeline;
626 struct intel_ring ring;
627 u32 cs[1024];
628};
629
630static int measure_breadcrumb_dw(struct intel_engine_cs *engine)
631{
632 struct measure_breadcrumb *frame;
633 int dw = -ENOMEM;
634
635 GEM_BUG_ON(!engine->i915->gt.scratch);
636
637 frame = kzalloc(sizeof(*frame), GFP_KERNEL);
638 if (!frame)
639 return -ENOMEM;
640
641 if (i915_timeline_init(engine->i915,
642 &frame->timeline, "measure",
643 engine->status_page.vma))
644 goto out_frame;
645
646 INIT_LIST_HEAD(&frame->ring.request_list);
647 frame->ring.timeline = &frame->timeline;
648 frame->ring.vaddr = frame->cs;
649 frame->ring.size = sizeof(frame->cs);
650 frame->ring.effective_size = frame->ring.size;
651 intel_ring_update_space(&frame->ring);
652
653 frame->rq.i915 = engine->i915;
654 frame->rq.engine = engine;
655 frame->rq.ring = &frame->ring;
656 frame->rq.timeline = &frame->timeline;
657
658 dw = i915_timeline_pin(&frame->timeline);
659 if (dw < 0)
660 goto out_timeline;
661
662 dw = engine->emit_fini_breadcrumb(&frame->rq, frame->cs) - frame->cs;
663
664 i915_timeline_unpin(&frame->timeline);
665
666out_timeline:
667 i915_timeline_fini(&frame->timeline);
668out_frame:
669 kfree(frame);
670 return dw;
671}
672
673/**
674 * intel_engines_init_common - initialize cengine state which might require hw access
675 * @engine: Engine to initialize.
676 *
677 * Initializes @engine@ structure members shared between legacy and execlists
678 * submission modes which do require hardware access.
679 *
680 * Typcally done at later stages of submission mode specific engine setup.
681 *
682 * Returns zero on success or an error code on failure.
683 */
684int intel_engine_init_common(struct intel_engine_cs *engine)
685{
686 struct drm_i915_private *i915 = engine->i915;
687 struct intel_context *ce;
688 int ret;
689
690 engine->set_default_submission(engine);
691
692 /* We may need to do things with the shrinker which
693 * require us to immediately switch back to the default
694 * context. This can cause a problem as pinning the
695 * default context also requires GTT space which may not
696 * be available. To avoid this we always pin the default
697 * context.
698 */
699 ce = intel_context_pin(i915->kernel_context, engine);
700 if (IS_ERR(ce))
701 return PTR_ERR(ce);
702
703 /*
704 * Similarly the preempt context must always be available so that
705 * we can interrupt the engine at any time.
706 */
707 if (i915->preempt_context) {
708 ce = intel_context_pin(i915->preempt_context, engine);
709 if (IS_ERR(ce)) {
710 ret = PTR_ERR(ce);
711 goto err_unpin_kernel;
712 }
713 }
714
715 ret = measure_breadcrumb_dw(engine);
716 if (ret < 0)
717 goto err_unpin_preempt;
718
719 engine->emit_fini_breadcrumb_dw = ret;
720
721 return 0;
722
723err_unpin_preempt:
724 if (i915->preempt_context)
725 __intel_context_unpin(i915->preempt_context, engine);
726
727err_unpin_kernel:
728 __intel_context_unpin(i915->kernel_context, engine);
729 return ret;
730}
731
732/**
733 * intel_engines_cleanup_common - cleans up the engine state created by
734 * the common initiailizers.
735 * @engine: Engine to cleanup.
736 *
737 * This cleans up everything created by the common helpers.
738 */
739void intel_engine_cleanup_common(struct intel_engine_cs *engine)
740{
741 struct drm_i915_private *i915 = engine->i915;
742
743 cleanup_status_page(engine);
744
745 intel_engine_fini_breadcrumbs(engine);
746 intel_engine_cleanup_cmd_parser(engine);
747 i915_gem_batch_pool_fini(&engine->batch_pool);
748
749 if (engine->default_state)
750 i915_gem_object_put(engine->default_state);
751
752 if (i915->preempt_context)
753 __intel_context_unpin(i915->preempt_context, engine);
754 __intel_context_unpin(i915->kernel_context, engine);
755
756 i915_timeline_fini(&engine->timeline);
757
758 intel_wa_list_free(&engine->ctx_wa_list);
759 intel_wa_list_free(&engine->wa_list);
760 intel_wa_list_free(&engine->whitelist);
761}
762
763u64 intel_engine_get_active_head(const struct intel_engine_cs *engine)
764{
765 struct drm_i915_private *dev_priv = engine->i915;
766 u64 acthd;
767
768 if (INTEL_GEN(dev_priv) >= 8)
769 acthd = I915_READ64_2x32(RING_ACTHD(engine->mmio_base),
770 RING_ACTHD_UDW(engine->mmio_base));
771 else if (INTEL_GEN(dev_priv) >= 4)
772 acthd = I915_READ(RING_ACTHD(engine->mmio_base));
773 else
774 acthd = I915_READ(ACTHD);
775
776 return acthd;
777}
778
779u64 intel_engine_get_last_batch_head(const struct intel_engine_cs *engine)
780{
781 struct drm_i915_private *dev_priv = engine->i915;
782 u64 bbaddr;
783
784 if (INTEL_GEN(dev_priv) >= 8)
785 bbaddr = I915_READ64_2x32(RING_BBADDR(engine->mmio_base),
786 RING_BBADDR_UDW(engine->mmio_base));
787 else
788 bbaddr = I915_READ(RING_BBADDR(engine->mmio_base));
789
790 return bbaddr;
791}
792
793int intel_engine_stop_cs(struct intel_engine_cs *engine)
794{
795 struct drm_i915_private *dev_priv = engine->i915;
796 const u32 base = engine->mmio_base;
797 const i915_reg_t mode = RING_MI_MODE(base);
798 int err;
799
800 if (INTEL_GEN(dev_priv) < 3)
801 return -ENODEV;
802
803 GEM_TRACE("%s\n", engine->name);
804
805 I915_WRITE_FW(mode, _MASKED_BIT_ENABLE(STOP_RING));
806
807 err = 0;
808 if (__intel_wait_for_register_fw(dev_priv,
809 mode, MODE_IDLE, MODE_IDLE,
810 1000, 0,
811 NULL)) {
812 GEM_TRACE("%s: timed out on STOP_RING -> IDLE\n", engine->name);
813 err = -ETIMEDOUT;
814 }
815
816 /* A final mmio read to let GPU writes be hopefully flushed to memory */
817 POSTING_READ_FW(mode);
818
819 return err;
820}
821
822void intel_engine_cancel_stop_cs(struct intel_engine_cs *engine)
823{
824 struct drm_i915_private *dev_priv = engine->i915;
825
826 GEM_TRACE("%s\n", engine->name);
827
828 I915_WRITE_FW(RING_MI_MODE(engine->mmio_base),
829 _MASKED_BIT_DISABLE(STOP_RING));
830}
831
832const char *i915_cache_level_str(struct drm_i915_private *i915, int type)
833{
834 switch (type) {
835 case I915_CACHE_NONE: return " uncached";
836 case I915_CACHE_LLC: return HAS_LLC(i915) ? " LLC" : " snooped";
837 case I915_CACHE_L3_LLC: return " L3+LLC";
838 case I915_CACHE_WT: return " WT";
839 default: return "";
840 }
841}
842
843u32 intel_calculate_mcr_s_ss_select(struct drm_i915_private *dev_priv)
844{
845 const struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu;
846 u32 mcr_s_ss_select;
847 u32 slice = fls(sseu->slice_mask);
848 u32 subslice = fls(sseu->subslice_mask[slice]);
849
850 if (IS_GEN(dev_priv, 10))
851 mcr_s_ss_select = GEN8_MCR_SLICE(slice) |
852 GEN8_MCR_SUBSLICE(subslice);
853 else if (INTEL_GEN(dev_priv) >= 11)
854 mcr_s_ss_select = GEN11_MCR_SLICE(slice) |
855 GEN11_MCR_SUBSLICE(subslice);
856 else
857 mcr_s_ss_select = 0;
858
859 return mcr_s_ss_select;
860}
861
862static inline u32
863read_subslice_reg(struct drm_i915_private *dev_priv, int slice,
864 int subslice, i915_reg_t reg)
865{
866 u32 mcr_slice_subslice_mask;
867 u32 mcr_slice_subslice_select;
868 u32 default_mcr_s_ss_select;
869 u32 mcr;
870 u32 ret;
871 enum forcewake_domains fw_domains;
872
873 if (INTEL_GEN(dev_priv) >= 11) {
874 mcr_slice_subslice_mask = GEN11_MCR_SLICE_MASK |
875 GEN11_MCR_SUBSLICE_MASK;
876 mcr_slice_subslice_select = GEN11_MCR_SLICE(slice) |
877 GEN11_MCR_SUBSLICE(subslice);
878 } else {
879 mcr_slice_subslice_mask = GEN8_MCR_SLICE_MASK |
880 GEN8_MCR_SUBSLICE_MASK;
881 mcr_slice_subslice_select = GEN8_MCR_SLICE(slice) |
882 GEN8_MCR_SUBSLICE(subslice);
883 }
884
885 default_mcr_s_ss_select = intel_calculate_mcr_s_ss_select(dev_priv);
886
887 fw_domains = intel_uncore_forcewake_for_reg(dev_priv, reg,
888 FW_REG_READ);
889 fw_domains |= intel_uncore_forcewake_for_reg(dev_priv,
890 GEN8_MCR_SELECTOR,
891 FW_REG_READ | FW_REG_WRITE);
892
893 spin_lock_irq(&dev_priv->uncore.lock);
894 intel_uncore_forcewake_get__locked(dev_priv, fw_domains);
895
896 mcr = I915_READ_FW(GEN8_MCR_SELECTOR);
897
898 WARN_ON_ONCE((mcr & mcr_slice_subslice_mask) !=
899 default_mcr_s_ss_select);
900
901 mcr &= ~mcr_slice_subslice_mask;
902 mcr |= mcr_slice_subslice_select;
903 I915_WRITE_FW(GEN8_MCR_SELECTOR, mcr);
904
905 ret = I915_READ_FW(reg);
906
907 mcr &= ~mcr_slice_subslice_mask;
908 mcr |= default_mcr_s_ss_select;
909
910 I915_WRITE_FW(GEN8_MCR_SELECTOR, mcr);
911
912 intel_uncore_forcewake_put__locked(dev_priv, fw_domains);
913 spin_unlock_irq(&dev_priv->uncore.lock);
914
915 return ret;
916}
917
918/* NB: please notice the memset */
919void intel_engine_get_instdone(struct intel_engine_cs *engine,
920 struct intel_instdone *instdone)
921{
922 struct drm_i915_private *dev_priv = engine->i915;
923 u32 mmio_base = engine->mmio_base;
924 int slice;
925 int subslice;
926
927 memset(instdone, 0, sizeof(*instdone));
928
929 switch (INTEL_GEN(dev_priv)) {
930 default:
931 instdone->instdone = I915_READ(RING_INSTDONE(mmio_base));
932
933 if (engine->id != RCS)
934 break;
935
936 instdone->slice_common = I915_READ(GEN7_SC_INSTDONE);
937 for_each_instdone_slice_subslice(dev_priv, slice, subslice) {
938 instdone->sampler[slice][subslice] =
939 read_subslice_reg(dev_priv, slice, subslice,
940 GEN7_SAMPLER_INSTDONE);
941 instdone->row[slice][subslice] =
942 read_subslice_reg(dev_priv, slice, subslice,
943 GEN7_ROW_INSTDONE);
944 }
945 break;
946 case 7:
947 instdone->instdone = I915_READ(RING_INSTDONE(mmio_base));
948
949 if (engine->id != RCS)
950 break;
951
952 instdone->slice_common = I915_READ(GEN7_SC_INSTDONE);
953 instdone->sampler[0][0] = I915_READ(GEN7_SAMPLER_INSTDONE);
954 instdone->row[0][0] = I915_READ(GEN7_ROW_INSTDONE);
955
956 break;
957 case 6:
958 case 5:
959 case 4:
960 instdone->instdone = I915_READ(RING_INSTDONE(mmio_base));
961
962 if (engine->id == RCS)
963 /* HACK: Using the wrong struct member */
964 instdone->slice_common = I915_READ(GEN4_INSTDONE1);
965 break;
966 case 3:
967 case 2:
968 instdone->instdone = I915_READ(GEN2_INSTDONE);
969 break;
970 }
971}
972
973static bool ring_is_idle(struct intel_engine_cs *engine)
974{
975 struct drm_i915_private *dev_priv = engine->i915;
976 intel_wakeref_t wakeref;
977 bool idle = true;
978
979 if (I915_SELFTEST_ONLY(!engine->mmio_base))
980 return true;
981
982 /* If the whole device is asleep, the engine must be idle */
983 wakeref = intel_runtime_pm_get_if_in_use(dev_priv);
984 if (!wakeref)
985 return true;
986
987 /* First check that no commands are left in the ring */
988 if ((I915_READ_HEAD(engine) & HEAD_ADDR) !=
989 (I915_READ_TAIL(engine) & TAIL_ADDR))
990 idle = false;
991
992 /* No bit for gen2, so assume the CS parser is idle */
993 if (INTEL_GEN(dev_priv) > 2 && !(I915_READ_MODE(engine) & MODE_IDLE))
994 idle = false;
995
996 intel_runtime_pm_put(dev_priv, wakeref);
997
998 return idle;
999}
1000
1001/**
1002 * intel_engine_is_idle() - Report if the engine has finished process all work
1003 * @engine: the intel_engine_cs
1004 *
1005 * Return true if there are no requests pending, nothing left to be submitted
1006 * to hardware, and that the engine is idle.
1007 */
1008bool intel_engine_is_idle(struct intel_engine_cs *engine)
1009{
1010 struct drm_i915_private *dev_priv = engine->i915;
1011
1012 /* More white lies, if wedged, hw state is inconsistent */
1013 if (i915_terminally_wedged(&dev_priv->gpu_error))
1014 return true;
1015
1016 /* Any inflight/incomplete requests? */
1017 if (!intel_engine_signaled(engine, intel_engine_last_submit(engine)))
1018 return false;
1019
1020 /* Waiting to drain ELSP? */
1021 if (READ_ONCE(engine->execlists.active)) {
1022 struct tasklet_struct *t = &engine->execlists.tasklet;
1023
1024 local_bh_disable();
1025 if (tasklet_trylock(t)) {
1026 /* Must wait for any GPU reset in progress. */
1027 if (__tasklet_is_enabled(t))
1028 t->func(t->data);
1029 tasklet_unlock(t);
1030 }
1031 local_bh_enable();
1032
1033 /* Otherwise flush the tasklet if it was on another cpu */
1034 tasklet_unlock_wait(t);
1035
1036 if (READ_ONCE(engine->execlists.active))
1037 return false;
1038 }
1039
1040 /* ELSP is empty, but there are ready requests? E.g. after reset */
1041 if (!RB_EMPTY_ROOT(&engine->execlists.queue.rb_root))
1042 return false;
1043
1044 /* Ring stopped? */
1045 return ring_is_idle(engine);
1046}
1047
1048bool intel_engines_are_idle(struct drm_i915_private *dev_priv)
1049{
1050 struct intel_engine_cs *engine;
1051 enum intel_engine_id id;
1052
1053 /*
1054 * If the driver is wedged, HW state may be very inconsistent and
1055 * report that it is still busy, even though we have stopped using it.
1056 */
1057 if (i915_terminally_wedged(&dev_priv->gpu_error))
1058 return true;
1059
1060 for_each_engine(engine, dev_priv, id) {
1061 if (!intel_engine_is_idle(engine))
1062 return false;
1063 }
1064
1065 return true;
1066}
1067
1068/**
1069 * intel_engine_has_kernel_context:
1070 * @engine: the engine
1071 *
1072 * Returns true if the last context to be executed on this engine, or has been
1073 * executed if the engine is already idle, is the kernel context
1074 * (#i915.kernel_context).
1075 */
1076bool intel_engine_has_kernel_context(const struct intel_engine_cs *engine)
1077{
1078 const struct intel_context *kernel_context =
1079 to_intel_context(engine->i915->kernel_context, engine);
1080 struct i915_request *rq;
1081
1082 lockdep_assert_held(&engine->i915->drm.struct_mutex);
1083
1084 /*
1085 * Check the last context seen by the engine. If active, it will be
1086 * the last request that remains in the timeline. When idle, it is
1087 * the last executed context as tracked by retirement.
1088 */
1089 rq = __i915_active_request_peek(&engine->timeline.last_request);
1090 if (rq)
1091 return rq->hw_context == kernel_context;
1092 else
1093 return engine->last_retired_context == kernel_context;
1094}
1095
1096void intel_engines_reset_default_submission(struct drm_i915_private *i915)
1097{
1098 struct intel_engine_cs *engine;
1099 enum intel_engine_id id;
1100
1101 for_each_engine(engine, i915, id)
1102 engine->set_default_submission(engine);
1103}
1104
1105static bool reset_engines(struct drm_i915_private *i915)
1106{
1107 if (INTEL_INFO(i915)->gpu_reset_clobbers_display)
1108 return false;
1109
1110 return intel_gpu_reset(i915, ALL_ENGINES) == 0;
1111}
1112
1113/**
1114 * intel_engines_sanitize: called after the GPU has lost power
1115 * @i915: the i915 device
1116 * @force: ignore a failed reset and sanitize engine state anyway
1117 *
1118 * Anytime we reset the GPU, either with an explicit GPU reset or through a
1119 * PCI power cycle, the GPU loses state and we must reset our state tracking
1120 * to match. Note that calling intel_engines_sanitize() if the GPU has not
1121 * been reset results in much confusion!
1122 */
1123void intel_engines_sanitize(struct drm_i915_private *i915, bool force)
1124{
1125 struct intel_engine_cs *engine;
1126 enum intel_engine_id id;
1127
1128 GEM_TRACE("\n");
1129
1130 if (!reset_engines(i915) && !force)
1131 return;
1132
1133 for_each_engine(engine, i915, id)
1134 intel_engine_reset(engine, false);
1135}
1136
1137/**
1138 * intel_engines_park: called when the GT is transitioning from busy->idle
1139 * @i915: the i915 device
1140 *
1141 * The GT is now idle and about to go to sleep (maybe never to wake again?).
1142 * Time for us to tidy and put away our toys (release resources back to the
1143 * system).
1144 */
1145void intel_engines_park(struct drm_i915_private *i915)
1146{
1147 struct intel_engine_cs *engine;
1148 enum intel_engine_id id;
1149
1150 for_each_engine(engine, i915, id) {
1151 /* Flush the residual irq tasklets first. */
1152 intel_engine_disarm_breadcrumbs(engine);
1153 tasklet_kill(&engine->execlists.tasklet);
1154
1155 /*
1156 * We are committed now to parking the engines, make sure there
1157 * will be no more interrupts arriving later and the engines
1158 * are truly idle.
1159 */
1160 if (wait_for(intel_engine_is_idle(engine), 10)) {
1161 struct drm_printer p = drm_debug_printer(__func__);
1162
1163 dev_err(i915->drm.dev,
1164 "%s is not idle before parking\n",
1165 engine->name);
1166 intel_engine_dump(engine, &p, NULL);
1167 }
1168
1169 /* Must be reset upon idling, or we may miss the busy wakeup. */
1170 GEM_BUG_ON(engine->execlists.queue_priority_hint != INT_MIN);
1171
1172 if (engine->park)
1173 engine->park(engine);
1174
1175 if (engine->pinned_default_state) {
1176 i915_gem_object_unpin_map(engine->default_state);
1177 engine->pinned_default_state = NULL;
1178 }
1179
1180 i915_gem_batch_pool_fini(&engine->batch_pool);
1181 engine->execlists.no_priolist = false;
1182 }
1183}
1184
1185/**
1186 * intel_engines_unpark: called when the GT is transitioning from idle->busy
1187 * @i915: the i915 device
1188 *
1189 * The GT was idle and now about to fire up with some new user requests.
1190 */
1191void intel_engines_unpark(struct drm_i915_private *i915)
1192{
1193 struct intel_engine_cs *engine;
1194 enum intel_engine_id id;
1195
1196 for_each_engine(engine, i915, id) {
1197 void *map;
1198
1199 /* Pin the default state for fast resets from atomic context. */
1200 map = NULL;
1201 if (engine->default_state)
1202 map = i915_gem_object_pin_map(engine->default_state,
1203 I915_MAP_WB);
1204 if (!IS_ERR_OR_NULL(map))
1205 engine->pinned_default_state = map;
1206
1207 if (engine->unpark)
1208 engine->unpark(engine);
1209
1210 intel_engine_init_hangcheck(engine);
1211 }
1212}
1213
1214/**
1215 * intel_engine_lost_context: called when the GPU is reset into unknown state
1216 * @engine: the engine
1217 *
1218 * We have either reset the GPU or otherwise about to lose state tracking of
1219 * the current GPU logical state (e.g. suspend). On next use, it is therefore
1220 * imperative that we make no presumptions about the current state and load
1221 * from scratch.
1222 */
1223void intel_engine_lost_context(struct intel_engine_cs *engine)
1224{
1225 struct intel_context *ce;
1226
1227 lockdep_assert_held(&engine->i915->drm.struct_mutex);
1228
1229 ce = fetch_and_zero(&engine->last_retired_context);
1230 if (ce)
1231 intel_context_unpin(ce);
1232}
1233
1234bool intel_engine_can_store_dword(struct intel_engine_cs *engine)
1235{
1236 switch (INTEL_GEN(engine->i915)) {
1237 case 2:
1238 return false; /* uses physical not virtual addresses */
1239 case 3:
1240 /* maybe only uses physical not virtual addresses */
1241 return !(IS_I915G(engine->i915) || IS_I915GM(engine->i915));
1242 case 6:
1243 return engine->class != VIDEO_DECODE_CLASS; /* b0rked */
1244 default:
1245 return true;
1246 }
1247}
1248
1249unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915)
1250{
1251 struct intel_engine_cs *engine;
1252 enum intel_engine_id id;
1253 unsigned int which;
1254
1255 which = 0;
1256 for_each_engine(engine, i915, id)
1257 if (engine->default_state)
1258 which |= BIT(engine->uabi_class);
1259
1260 return which;
1261}
1262
1263static int print_sched_attr(struct drm_i915_private *i915,
1264 const struct i915_sched_attr *attr,
1265 char *buf, int x, int len)
1266{
1267 if (attr->priority == I915_PRIORITY_INVALID)
1268 return x;
1269
1270 x += snprintf(buf + x, len - x,
1271 " prio=%d", attr->priority);
1272
1273 return x;
1274}
1275
1276static void print_request(struct drm_printer *m,
1277 struct i915_request *rq,
1278 const char *prefix)
1279{
1280 const char *name = rq->fence.ops->get_timeline_name(&rq->fence);
1281 char buf[80] = "";
1282 int x = 0;
1283
1284 x = print_sched_attr(rq->i915, &rq->sched.attr, buf, x, sizeof(buf));
1285
1286 drm_printf(m, "%s%x%s%s [%llx:%llx]%s @ %dms: %s\n",
1287 prefix,
1288 rq->global_seqno,
1289 i915_request_completed(rq) ? "!" :
1290 i915_request_started(rq) ? "*" :
1291 "",
1292 test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
1293 &rq->fence.flags) ? "+" : "",
1294 rq->fence.context, rq->fence.seqno,
1295 buf,
1296 jiffies_to_msecs(jiffies - rq->emitted_jiffies),
1297 name);
1298}
1299
1300static void hexdump(struct drm_printer *m, const void *buf, size_t len)
1301{
1302 const size_t rowsize = 8 * sizeof(u32);
1303 const void *prev = NULL;
1304 bool skip = false;
1305 size_t pos;
1306
1307 for (pos = 0; pos < len; pos += rowsize) {
1308 char line[128];
1309
1310 if (prev && !memcmp(prev, buf + pos, rowsize)) {
1311 if (!skip) {
1312 drm_printf(m, "*\n");
1313 skip = true;
1314 }
1315 continue;
1316 }
1317
1318 WARN_ON_ONCE(hex_dump_to_buffer(buf + pos, len - pos,
1319 rowsize, sizeof(u32),
1320 line, sizeof(line),
1321 false) >= sizeof(line));
1322 drm_printf(m, "[%04zx] %s\n", pos, line);
1323
1324 prev = buf + pos;
1325 skip = false;
1326 }
1327}
1328
1329static void intel_engine_print_registers(const struct intel_engine_cs *engine,
1330 struct drm_printer *m)
1331{
1332 struct drm_i915_private *dev_priv = engine->i915;
1333 const struct intel_engine_execlists * const execlists =
1334 &engine->execlists;
1335 u64 addr;
1336
1337 if (engine->id == RCS && IS_GEN_RANGE(dev_priv, 4, 7))
1338 drm_printf(m, "\tCCID: 0x%08x\n", I915_READ(CCID));
1339 drm_printf(m, "\tRING_START: 0x%08x\n",
1340 I915_READ(RING_START(engine->mmio_base)));
1341 drm_printf(m, "\tRING_HEAD: 0x%08x\n",
1342 I915_READ(RING_HEAD(engine->mmio_base)) & HEAD_ADDR);
1343 drm_printf(m, "\tRING_TAIL: 0x%08x\n",
1344 I915_READ(RING_TAIL(engine->mmio_base)) & TAIL_ADDR);
1345 drm_printf(m, "\tRING_CTL: 0x%08x%s\n",
1346 I915_READ(RING_CTL(engine->mmio_base)),
1347 I915_READ(RING_CTL(engine->mmio_base)) & (RING_WAIT | RING_WAIT_SEMAPHORE) ? " [waiting]" : "");
1348 if (INTEL_GEN(engine->i915) > 2) {
1349 drm_printf(m, "\tRING_MODE: 0x%08x%s\n",
1350 I915_READ(RING_MI_MODE(engine->mmio_base)),
1351 I915_READ(RING_MI_MODE(engine->mmio_base)) & (MODE_IDLE) ? " [idle]" : "");
1352 }
1353
1354 if (INTEL_GEN(dev_priv) >= 6) {
1355 drm_printf(m, "\tRING_IMR: %08x\n", I915_READ_IMR(engine));
1356 }
1357
1358 addr = intel_engine_get_active_head(engine);
1359 drm_printf(m, "\tACTHD: 0x%08x_%08x\n",
1360 upper_32_bits(addr), lower_32_bits(addr));
1361 addr = intel_engine_get_last_batch_head(engine);
1362 drm_printf(m, "\tBBADDR: 0x%08x_%08x\n",
1363 upper_32_bits(addr), lower_32_bits(addr));
1364 if (INTEL_GEN(dev_priv) >= 8)
1365 addr = I915_READ64_2x32(RING_DMA_FADD(engine->mmio_base),
1366 RING_DMA_FADD_UDW(engine->mmio_base));
1367 else if (INTEL_GEN(dev_priv) >= 4)
1368 addr = I915_READ(RING_DMA_FADD(engine->mmio_base));
1369 else
1370 addr = I915_READ(DMA_FADD_I8XX);
1371 drm_printf(m, "\tDMA_FADDR: 0x%08x_%08x\n",
1372 upper_32_bits(addr), lower_32_bits(addr));
1373 if (INTEL_GEN(dev_priv) >= 4) {
1374 drm_printf(m, "\tIPEIR: 0x%08x\n",
1375 I915_READ(RING_IPEIR(engine->mmio_base)));
1376 drm_printf(m, "\tIPEHR: 0x%08x\n",
1377 I915_READ(RING_IPEHR(engine->mmio_base)));
1378 } else {
1379 drm_printf(m, "\tIPEIR: 0x%08x\n", I915_READ(IPEIR));
1380 drm_printf(m, "\tIPEHR: 0x%08x\n", I915_READ(IPEHR));
1381 }
1382
1383 if (HAS_EXECLISTS(dev_priv)) {
1384 const u32 *hws =
1385 &engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];
1386 unsigned int idx;
1387 u8 read, write;
1388
1389 drm_printf(m, "\tExeclist status: 0x%08x %08x\n",
1390 I915_READ(RING_EXECLIST_STATUS_LO(engine)),
1391 I915_READ(RING_EXECLIST_STATUS_HI(engine)));
1392
1393 read = execlists->csb_head;
1394 write = READ_ONCE(*execlists->csb_write);
1395
1396 drm_printf(m, "\tExeclist CSB read %d, write %d [mmio:%d], tasklet queued? %s (%s)\n",
1397 read, write,
1398 GEN8_CSB_WRITE_PTR(I915_READ(RING_CONTEXT_STATUS_PTR(engine))),
1399 yesno(test_bit(TASKLET_STATE_SCHED,
1400 &engine->execlists.tasklet.state)),
1401 enableddisabled(!atomic_read(&engine->execlists.tasklet.count)));
1402 if (read >= GEN8_CSB_ENTRIES)
1403 read = 0;
1404 if (write >= GEN8_CSB_ENTRIES)
1405 write = 0;
1406 if (read > write)
1407 write += GEN8_CSB_ENTRIES;
1408 while (read < write) {
1409 idx = ++read % GEN8_CSB_ENTRIES;
1410 drm_printf(m, "\tExeclist CSB[%d]: 0x%08x [mmio:0x%08x], context: %d [mmio:%d]\n",
1411 idx,
1412 hws[idx * 2],
1413 I915_READ(RING_CONTEXT_STATUS_BUF_LO(engine, idx)),
1414 hws[idx * 2 + 1],
1415 I915_READ(RING_CONTEXT_STATUS_BUF_HI(engine, idx)));
1416 }
1417
1418 rcu_read_lock();
1419 for (idx = 0; idx < execlists_num_ports(execlists); idx++) {
1420 struct i915_request *rq;
1421 unsigned int count;
1422
1423 rq = port_unpack(&execlists->port[idx], &count);
1424 if (rq) {
1425 char hdr[80];
1426
1427 snprintf(hdr, sizeof(hdr),
1428 "\t\tELSP[%d] count=%d, ring:{start:%08x, hwsp:%08x}, rq: ",
1429 idx, count,
1430 i915_ggtt_offset(rq->ring->vma),
1431 rq->timeline->hwsp_offset);
1432 print_request(m, rq, hdr);
1433 } else {
1434 drm_printf(m, "\t\tELSP[%d] idle\n", idx);
1435 }
1436 }
1437 drm_printf(m, "\t\tHW active? 0x%x\n", execlists->active);
1438 rcu_read_unlock();
1439 } else if (INTEL_GEN(dev_priv) > 6) {
1440 drm_printf(m, "\tPP_DIR_BASE: 0x%08x\n",
1441 I915_READ(RING_PP_DIR_BASE(engine)));
1442 drm_printf(m, "\tPP_DIR_BASE_READ: 0x%08x\n",
1443 I915_READ(RING_PP_DIR_BASE_READ(engine)));
1444 drm_printf(m, "\tPP_DIR_DCLV: 0x%08x\n",
1445 I915_READ(RING_PP_DIR_DCLV(engine)));
1446 }
1447}
1448
1449static void print_request_ring(struct drm_printer *m, struct i915_request *rq)
1450{
1451 void *ring;
1452 int size;
1453
1454 drm_printf(m,
1455 "[head %04x, postfix %04x, tail %04x, batch 0x%08x_%08x]:\n",
1456 rq->head, rq->postfix, rq->tail,
1457 rq->batch ? upper_32_bits(rq->batch->node.start) : ~0u,
1458 rq->batch ? lower_32_bits(rq->batch->node.start) : ~0u);
1459
1460 size = rq->tail - rq->head;
1461 if (rq->tail < rq->head)
1462 size += rq->ring->size;
1463
1464 ring = kmalloc(size, GFP_ATOMIC);
1465 if (ring) {
1466 const void *vaddr = rq->ring->vaddr;
1467 unsigned int head = rq->head;
1468 unsigned int len = 0;
1469
1470 if (rq->tail < head) {
1471 len = rq->ring->size - head;
1472 memcpy(ring, vaddr + head, len);
1473 head = 0;
1474 }
1475 memcpy(ring + len, vaddr + head, size - len);
1476
1477 hexdump(m, ring, size);
1478 kfree(ring);
1479 }
1480}
1481
1482void intel_engine_dump(struct intel_engine_cs *engine,
1483 struct drm_printer *m,
1484 const char *header, ...)
1485{
1486 struct i915_gpu_error * const error = &engine->i915->gpu_error;
1487 struct i915_request *rq;
1488 intel_wakeref_t wakeref;
1489
1490 if (header) {
1491 va_list ap;
1492
1493 va_start(ap, header);
1494 drm_vprintf(m, header, &ap);
1495 va_end(ap);
1496 }
1497
1498 if (i915_terminally_wedged(&engine->i915->gpu_error))
1499 drm_printf(m, "*** WEDGED ***\n");
1500
1501 drm_printf(m, "\tcurrent seqno %x, last %x, hangcheck %x [%d ms]\n",
1502 intel_engine_get_seqno(engine),
1503 intel_engine_last_submit(engine),
1504 engine->hangcheck.seqno,
1505 jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp));
1506 drm_printf(m, "\tReset count: %d (global %d)\n",
1507 i915_reset_engine_count(error, engine),
1508 i915_reset_count(error));
1509
1510 rcu_read_lock();
1511
1512 drm_printf(m, "\tRequests:\n");
1513
1514 rq = list_first_entry(&engine->timeline.requests,
1515 struct i915_request, link);
1516 if (&rq->link != &engine->timeline.requests)
1517 print_request(m, rq, "\t\tfirst ");
1518
1519 rq = list_last_entry(&engine->timeline.requests,
1520 struct i915_request, link);
1521 if (&rq->link != &engine->timeline.requests)
1522 print_request(m, rq, "\t\tlast ");
1523
1524 rq = i915_gem_find_active_request(engine);
1525 if (rq) {
1526 print_request(m, rq, "\t\tactive ");
1527
1528 drm_printf(m, "\t\tring->start: 0x%08x\n",
1529 i915_ggtt_offset(rq->ring->vma));
1530 drm_printf(m, "\t\tring->head: 0x%08x\n",
1531 rq->ring->head);
1532 drm_printf(m, "\t\tring->tail: 0x%08x\n",
1533 rq->ring->tail);
1534 drm_printf(m, "\t\tring->emit: 0x%08x\n",
1535 rq->ring->emit);
1536 drm_printf(m, "\t\tring->space: 0x%08x\n",
1537 rq->ring->space);
1538 drm_printf(m, "\t\tring->hwsp: 0x%08x\n",
1539 rq->timeline->hwsp_offset);
1540
1541 print_request_ring(m, rq);
1542 }
1543
1544 rcu_read_unlock();
1545
1546 wakeref = intel_runtime_pm_get_if_in_use(engine->i915);
1547 if (wakeref) {
1548 intel_engine_print_registers(engine, m);
1549 intel_runtime_pm_put(engine->i915, wakeref);
1550 } else {
1551 drm_printf(m, "\tDevice is asleep; skipping register dump\n");
1552 }
1553
1554 intel_execlists_show_requests(engine, m, print_request, 8);
1555
1556 drm_printf(m, "HWSP:\n");
1557 hexdump(m, engine->status_page.addr, PAGE_SIZE);
1558
1559 drm_printf(m, "Idle? %s\n", yesno(intel_engine_is_idle(engine)));
1560
1561 intel_engine_print_breadcrumbs(engine, m);
1562}
1563
1564static u8 user_class_map[] = {
1565 [I915_ENGINE_CLASS_RENDER] = RENDER_CLASS,
1566 [I915_ENGINE_CLASS_COPY] = COPY_ENGINE_CLASS,
1567 [I915_ENGINE_CLASS_VIDEO] = VIDEO_DECODE_CLASS,
1568 [I915_ENGINE_CLASS_VIDEO_ENHANCE] = VIDEO_ENHANCEMENT_CLASS,
1569};
1570
1571struct intel_engine_cs *
1572intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance)
1573{
1574 if (class >= ARRAY_SIZE(user_class_map))
1575 return NULL;
1576
1577 class = user_class_map[class];
1578
1579 GEM_BUG_ON(class > MAX_ENGINE_CLASS);
1580
1581 if (instance > MAX_ENGINE_INSTANCE)
1582 return NULL;
1583
1584 return i915->engine_class[class][instance];
1585}
1586
1587/**
1588 * intel_enable_engine_stats() - Enable engine busy tracking on engine
1589 * @engine: engine to enable stats collection
1590 *
1591 * Start collecting the engine busyness data for @engine.
1592 *
1593 * Returns 0 on success or a negative error code.
1594 */
1595int intel_enable_engine_stats(struct intel_engine_cs *engine)
1596{
1597 struct intel_engine_execlists *execlists = &engine->execlists;
1598 unsigned long flags;
1599 int err = 0;
1600
1601 if (!intel_engine_supports_stats(engine))
1602 return -ENODEV;
1603
1604 spin_lock_irqsave(&engine->timeline.lock, flags);
1605 write_seqlock(&engine->stats.lock);
1606
1607 if (unlikely(engine->stats.enabled == ~0)) {
1608 err = -EBUSY;
1609 goto unlock;
1610 }
1611
1612 if (engine->stats.enabled++ == 0) {
1613 const struct execlist_port *port = execlists->port;
1614 unsigned int num_ports = execlists_num_ports(execlists);
1615
1616 engine->stats.enabled_at = ktime_get();
1617
1618 /* XXX submission method oblivious? */
1619 while (num_ports-- && port_isset(port)) {
1620 engine->stats.active++;
1621 port++;
1622 }
1623
1624 if (engine->stats.active)
1625 engine->stats.start = engine->stats.enabled_at;
1626 }
1627
1628unlock:
1629 write_sequnlock(&engine->stats.lock);
1630 spin_unlock_irqrestore(&engine->timeline.lock, flags);
1631
1632 return err;
1633}
1634
1635static ktime_t __intel_engine_get_busy_time(struct intel_engine_cs *engine)
1636{
1637 ktime_t total = engine->stats.total;
1638
1639 /*
1640 * If the engine is executing something at the moment
1641 * add it to the total.
1642 */
1643 if (engine->stats.active)
1644 total = ktime_add(total,
1645 ktime_sub(ktime_get(), engine->stats.start));
1646
1647 return total;
1648}
1649
1650/**
1651 * intel_engine_get_busy_time() - Return current accumulated engine busyness
1652 * @engine: engine to report on
1653 *
1654 * Returns accumulated time @engine was busy since engine stats were enabled.
1655 */
1656ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine)
1657{
1658 unsigned int seq;
1659 ktime_t total;
1660
1661 do {
1662 seq = read_seqbegin(&engine->stats.lock);
1663 total = __intel_engine_get_busy_time(engine);
1664 } while (read_seqretry(&engine->stats.lock, seq));
1665
1666 return total;
1667}
1668
1669/**
1670 * intel_disable_engine_stats() - Disable engine busy tracking on engine
1671 * @engine: engine to disable stats collection
1672 *
1673 * Stops collecting the engine busyness data for @engine.
1674 */
1675void intel_disable_engine_stats(struct intel_engine_cs *engine)
1676{
1677 unsigned long flags;
1678
1679 if (!intel_engine_supports_stats(engine))
1680 return;
1681
1682 write_seqlock_irqsave(&engine->stats.lock, flags);
1683 WARN_ON_ONCE(engine->stats.enabled == 0);
1684 if (--engine->stats.enabled == 0) {
1685 engine->stats.total = __intel_engine_get_busy_time(engine);
1686 engine->stats.active = 0;
1687 }
1688 write_sequnlock_irqrestore(&engine->stats.lock, flags);
1689}
1690
1691#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
1692#include "selftests/mock_engine.c"
1693#include "selftests/intel_engine_cs.c"
1694#endif