Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1/* SPDX-License-Identifier: MIT */
2#ifndef _INTEL_RINGBUFFER_H_
3#define _INTEL_RINGBUFFER_H_
4
5#include <drm/drm_util.h>
6
7#include <linux/hashtable.h>
8#include <linux/irq_work.h>
9#include <linux/seqlock.h>
10
11#include "i915_gem_batch_pool.h"
12
13#include "i915_reg.h"
14#include "i915_pmu.h"
15#include "i915_request.h"
16#include "i915_selftest.h"
17#include "i915_timeline.h"
18#include "intel_gpu_commands.h"
19#include "intel_workarounds.h"
20
21struct drm_printer;
22struct i915_sched_attr;
23
24#define I915_CMD_HASH_ORDER 9
25
26/* Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill,
27 * but keeps the logic simple. Indeed, the whole purpose of this macro is just
28 * to give some inclination as to some of the magic values used in the various
29 * workarounds!
30 */
31#define CACHELINE_BYTES 64
32#define CACHELINE_DWORDS (CACHELINE_BYTES / sizeof(u32))
33
34struct intel_hw_status_page {
35 struct i915_vma *vma;
36 u32 *addr;
37};
38
39#define I915_READ_TAIL(engine) I915_READ(RING_TAIL((engine)->mmio_base))
40#define I915_WRITE_TAIL(engine, val) I915_WRITE(RING_TAIL((engine)->mmio_base), val)
41
42#define I915_READ_START(engine) I915_READ(RING_START((engine)->mmio_base))
43#define I915_WRITE_START(engine, val) I915_WRITE(RING_START((engine)->mmio_base), val)
44
45#define I915_READ_HEAD(engine) I915_READ(RING_HEAD((engine)->mmio_base))
46#define I915_WRITE_HEAD(engine, val) I915_WRITE(RING_HEAD((engine)->mmio_base), val)
47
48#define I915_READ_CTL(engine) I915_READ(RING_CTL((engine)->mmio_base))
49#define I915_WRITE_CTL(engine, val) I915_WRITE(RING_CTL((engine)->mmio_base), val)
50
51#define I915_READ_IMR(engine) I915_READ(RING_IMR((engine)->mmio_base))
52#define I915_WRITE_IMR(engine, val) I915_WRITE(RING_IMR((engine)->mmio_base), val)
53
54#define I915_READ_MODE(engine) I915_READ(RING_MI_MODE((engine)->mmio_base))
55#define I915_WRITE_MODE(engine, val) I915_WRITE(RING_MI_MODE((engine)->mmio_base), val)
56
57/* seqno size is actually only a uint32, but since we plan to use MI_FLUSH_DW to
58 * do the writes, and that must have qw aligned offsets, simply pretend it's 8b.
59 */
60enum intel_engine_hangcheck_action {
61 ENGINE_IDLE = 0,
62 ENGINE_WAIT,
63 ENGINE_ACTIVE_SEQNO,
64 ENGINE_ACTIVE_HEAD,
65 ENGINE_ACTIVE_SUBUNITS,
66 ENGINE_WAIT_KICK,
67 ENGINE_DEAD,
68};
69
70static inline const char *
71hangcheck_action_to_str(const enum intel_engine_hangcheck_action a)
72{
73 switch (a) {
74 case ENGINE_IDLE:
75 return "idle";
76 case ENGINE_WAIT:
77 return "wait";
78 case ENGINE_ACTIVE_SEQNO:
79 return "active seqno";
80 case ENGINE_ACTIVE_HEAD:
81 return "active head";
82 case ENGINE_ACTIVE_SUBUNITS:
83 return "active subunits";
84 case ENGINE_WAIT_KICK:
85 return "wait kick";
86 case ENGINE_DEAD:
87 return "dead";
88 }
89
90 return "unknown";
91}
92
93#define I915_MAX_SLICES 3
94#define I915_MAX_SUBSLICES 8
95
96#define instdone_slice_mask(dev_priv__) \
97 (IS_GEN(dev_priv__, 7) ? \
98 1 : RUNTIME_INFO(dev_priv__)->sseu.slice_mask)
99
100#define instdone_subslice_mask(dev_priv__) \
101 (IS_GEN(dev_priv__, 7) ? \
102 1 : RUNTIME_INFO(dev_priv__)->sseu.subslice_mask[0])
103
104#define for_each_instdone_slice_subslice(dev_priv__, slice__, subslice__) \
105 for ((slice__) = 0, (subslice__) = 0; \
106 (slice__) < I915_MAX_SLICES; \
107 (subslice__) = ((subslice__) + 1) < I915_MAX_SUBSLICES ? (subslice__) + 1 : 0, \
108 (slice__) += ((subslice__) == 0)) \
109 for_each_if((BIT(slice__) & instdone_slice_mask(dev_priv__)) && \
110 (BIT(subslice__) & instdone_subslice_mask(dev_priv__)))
111
112struct intel_instdone {
113 u32 instdone;
114 /* The following exist only in the RCS engine */
115 u32 slice_common;
116 u32 sampler[I915_MAX_SLICES][I915_MAX_SUBSLICES];
117 u32 row[I915_MAX_SLICES][I915_MAX_SUBSLICES];
118};
119
120struct intel_engine_hangcheck {
121 u64 acthd;
122 u32 seqno;
123 unsigned long action_timestamp;
124 struct intel_instdone instdone;
125};
126
127struct intel_ring {
128 struct i915_vma *vma;
129 void *vaddr;
130
131 struct i915_timeline *timeline;
132 struct list_head request_list;
133 struct list_head active_link;
134
135 u32 head;
136 u32 tail;
137 u32 emit;
138
139 u32 space;
140 u32 size;
141 u32 effective_size;
142};
143
144struct i915_gem_context;
145struct drm_i915_reg_table;
146
147/*
148 * we use a single page to load ctx workarounds so all of these
149 * values are referred in terms of dwords
150 *
151 * struct i915_wa_ctx_bb:
152 * offset: specifies batch starting position, also helpful in case
153 * if we want to have multiple batches at different offsets based on
154 * some criteria. It is not a requirement at the moment but provides
155 * an option for future use.
156 * size: size of the batch in DWORDS
157 */
158struct i915_ctx_workarounds {
159 struct i915_wa_ctx_bb {
160 u32 offset;
161 u32 size;
162 } indirect_ctx, per_ctx;
163 struct i915_vma *vma;
164};
165
166struct i915_request;
167
168#define I915_MAX_VCS 4
169#define I915_MAX_VECS 2
170
171/*
172 * Engine IDs definitions.
173 * Keep instances of the same type engine together.
174 */
175enum intel_engine_id {
176 RCS = 0,
177 BCS,
178 VCS,
179 VCS2,
180 VCS3,
181 VCS4,
182#define _VCS(n) (VCS + (n))
183 VECS,
184 VECS2
185#define _VECS(n) (VECS + (n))
186};
187
188struct i915_priolist {
189 struct list_head requests[I915_PRIORITY_COUNT];
190 struct rb_node node;
191 unsigned long used;
192 int priority;
193};
194
195#define priolist_for_each_request(it, plist, idx) \
196 for (idx = 0; idx < ARRAY_SIZE((plist)->requests); idx++) \
197 list_for_each_entry(it, &(plist)->requests[idx], sched.link)
198
199#define priolist_for_each_request_consume(it, n, plist, idx) \
200 for (; (idx = ffs((plist)->used)); (plist)->used &= ~BIT(idx - 1)) \
201 list_for_each_entry_safe(it, n, \
202 &(plist)->requests[idx - 1], \
203 sched.link)
204
205struct st_preempt_hang {
206 struct completion completion;
207 unsigned int count;
208 bool inject_hang;
209};
210
211/**
212 * struct intel_engine_execlists - execlist submission queue and port state
213 *
214 * The struct intel_engine_execlists represents the combined logical state of
215 * driver and the hardware state for execlist mode of submission.
216 */
217struct intel_engine_execlists {
218 /**
219 * @tasklet: softirq tasklet for bottom handler
220 */
221 struct tasklet_struct tasklet;
222
223 /**
224 * @default_priolist: priority list for I915_PRIORITY_NORMAL
225 */
226 struct i915_priolist default_priolist;
227
228 /**
229 * @no_priolist: priority lists disabled
230 */
231 bool no_priolist;
232
233 /**
234 * @submit_reg: gen-specific execlist submission register
235 * set to the ExecList Submission Port (elsp) register pre-Gen11 and to
236 * the ExecList Submission Queue Contents register array for Gen11+
237 */
238 u32 __iomem *submit_reg;
239
240 /**
241 * @ctrl_reg: the enhanced execlists control register, used to load the
242 * submit queue on the HW and to request preemptions to idle
243 */
244 u32 __iomem *ctrl_reg;
245
246 /**
247 * @port: execlist port states
248 *
249 * For each hardware ELSP (ExecList Submission Port) we keep
250 * track of the last request and the number of times we submitted
251 * that port to hw. We then count the number of times the hw reports
252 * a context completion or preemption. As only one context can
253 * be active on hw, we limit resubmission of context to port[0]. This
254 * is called Lite Restore, of the context.
255 */
256 struct execlist_port {
257 /**
258 * @request_count: combined request and submission count
259 */
260 struct i915_request *request_count;
261#define EXECLIST_COUNT_BITS 2
262#define port_request(p) ptr_mask_bits((p)->request_count, EXECLIST_COUNT_BITS)
263#define port_count(p) ptr_unmask_bits((p)->request_count, EXECLIST_COUNT_BITS)
264#define port_pack(rq, count) ptr_pack_bits(rq, count, EXECLIST_COUNT_BITS)
265#define port_unpack(p, count) ptr_unpack_bits((p)->request_count, count, EXECLIST_COUNT_BITS)
266#define port_set(p, packed) ((p)->request_count = (packed))
267#define port_isset(p) ((p)->request_count)
268#define port_index(p, execlists) ((p) - (execlists)->port)
269
270 /**
271 * @context_id: context ID for port
272 */
273 GEM_DEBUG_DECL(u32 context_id);
274
275#define EXECLIST_MAX_PORTS 2
276 } port[EXECLIST_MAX_PORTS];
277
278 /**
279 * @active: is the HW active? We consider the HW as active after
280 * submitting any context for execution and until we have seen the
281 * last context completion event. After that, we do not expect any
282 * more events until we submit, and so can park the HW.
283 *
284 * As we have a small number of different sources from which we feed
285 * the HW, we track the state of each inside a single bitfield.
286 */
287 unsigned int active;
288#define EXECLISTS_ACTIVE_USER 0
289#define EXECLISTS_ACTIVE_PREEMPT 1
290#define EXECLISTS_ACTIVE_HWACK 2
291
292 /**
293 * @port_mask: number of execlist ports - 1
294 */
295 unsigned int port_mask;
296
297 /**
298 * @queue_priority_hint: Highest pending priority.
299 *
300 * When we add requests into the queue, or adjust the priority of
301 * executing requests, we compute the maximum priority of those
302 * pending requests. We can then use this value to determine if
303 * we need to preempt the executing requests to service the queue.
304 * However, since the we may have recorded the priority of an inflight
305 * request we wanted to preempt but since completed, at the time of
306 * dequeuing the priority hint may no longer may match the highest
307 * available request priority.
308 */
309 int queue_priority_hint;
310
311 /**
312 * @queue: queue of requests, in priority lists
313 */
314 struct rb_root_cached queue;
315
316 /**
317 * @csb_write: control register for Context Switch buffer
318 *
319 * Note this register may be either mmio or HWSP shadow.
320 */
321 u32 *csb_write;
322
323 /**
324 * @csb_status: status array for Context Switch buffer
325 *
326 * Note these register may be either mmio or HWSP shadow.
327 */
328 u32 *csb_status;
329
330 /**
331 * @preempt_complete_status: expected CSB upon completing preemption
332 */
333 u32 preempt_complete_status;
334
335 /**
336 * @csb_head: context status buffer head
337 */
338 u8 csb_head;
339
340 I915_SELFTEST_DECLARE(struct st_preempt_hang preempt_hang;)
341};
342
343#define INTEL_ENGINE_CS_MAX_NAME 8
344
345struct intel_engine_cs {
346 struct drm_i915_private *i915;
347 char name[INTEL_ENGINE_CS_MAX_NAME];
348
349 enum intel_engine_id id;
350 unsigned int hw_id;
351 unsigned int guc_id;
352
353 u8 uabi_id;
354 u8 uabi_class;
355
356 u8 class;
357 u8 instance;
358 u32 context_size;
359 u32 mmio_base;
360
361 struct intel_ring *buffer;
362
363 struct i915_timeline timeline;
364
365 struct drm_i915_gem_object *default_state;
366 void *pinned_default_state;
367
368 /* Rather than have every client wait upon all user interrupts,
369 * with the herd waking after every interrupt and each doing the
370 * heavyweight seqno dance, we delegate the task (of being the
371 * bottom-half of the user interrupt) to the first client. After
372 * every interrupt, we wake up one client, who does the heavyweight
373 * coherent seqno read and either goes back to sleep (if incomplete),
374 * or wakes up all the completed clients in parallel, before then
375 * transferring the bottom-half status to the next client in the queue.
376 *
377 * Compared to walking the entire list of waiters in a single dedicated
378 * bottom-half, we reduce the latency of the first waiter by avoiding
379 * a context switch, but incur additional coherent seqno reads when
380 * following the chain of request breadcrumbs. Since it is most likely
381 * that we have a single client waiting on each seqno, then reducing
382 * the overhead of waking that client is much preferred.
383 */
384 struct intel_breadcrumbs {
385 spinlock_t irq_lock;
386 struct list_head signalers;
387
388 struct irq_work irq_work; /* for use from inside irq_lock */
389
390 unsigned int irq_enabled;
391
392 bool irq_armed;
393 } breadcrumbs;
394
395 struct {
396 /**
397 * @enable: Bitmask of enable sample events on this engine.
398 *
399 * Bits correspond to sample event types, for instance
400 * I915_SAMPLE_QUEUED is bit 0 etc.
401 */
402 u32 enable;
403 /**
404 * @enable_count: Reference count for the enabled samplers.
405 *
406 * Index number corresponds to @enum drm_i915_pmu_engine_sample.
407 */
408 unsigned int enable_count[I915_ENGINE_SAMPLE_COUNT];
409 /**
410 * @sample: Counter values for sampling events.
411 *
412 * Our internal timer stores the current counters in this field.
413 *
414 * Index number corresponds to @enum drm_i915_pmu_engine_sample.
415 */
416 struct i915_pmu_sample sample[I915_ENGINE_SAMPLE_COUNT];
417 } pmu;
418
419 /*
420 * A pool of objects to use as shadow copies of client batch buffers
421 * when the command parser is enabled. Prevents the client from
422 * modifying the batch contents after software parsing.
423 */
424 struct i915_gem_batch_pool batch_pool;
425
426 struct intel_hw_status_page status_page;
427 struct i915_ctx_workarounds wa_ctx;
428 struct i915_wa_list ctx_wa_list;
429 struct i915_wa_list wa_list;
430 struct i915_wa_list whitelist;
431
432 u32 irq_keep_mask; /* always keep these interrupts */
433 u32 irq_enable_mask; /* bitmask to enable ring interrupt */
434 void (*irq_enable)(struct intel_engine_cs *engine);
435 void (*irq_disable)(struct intel_engine_cs *engine);
436
437 int (*init_hw)(struct intel_engine_cs *engine);
438
439 struct {
440 void (*prepare)(struct intel_engine_cs *engine);
441 void (*reset)(struct intel_engine_cs *engine, bool stalled);
442 void (*finish)(struct intel_engine_cs *engine);
443 } reset;
444
445 void (*park)(struct intel_engine_cs *engine);
446 void (*unpark)(struct intel_engine_cs *engine);
447
448 void (*set_default_submission)(struct intel_engine_cs *engine);
449
450 struct intel_context *(*context_pin)(struct intel_engine_cs *engine,
451 struct i915_gem_context *ctx);
452
453 int (*request_alloc)(struct i915_request *rq);
454 int (*init_context)(struct i915_request *rq);
455
456 int (*emit_flush)(struct i915_request *request, u32 mode);
457#define EMIT_INVALIDATE BIT(0)
458#define EMIT_FLUSH BIT(1)
459#define EMIT_BARRIER (EMIT_INVALIDATE | EMIT_FLUSH)
460 int (*emit_bb_start)(struct i915_request *rq,
461 u64 offset, u32 length,
462 unsigned int dispatch_flags);
463#define I915_DISPATCH_SECURE BIT(0)
464#define I915_DISPATCH_PINNED BIT(1)
465 int (*emit_init_breadcrumb)(struct i915_request *rq);
466 u32 *(*emit_fini_breadcrumb)(struct i915_request *rq,
467 u32 *cs);
468 unsigned int emit_fini_breadcrumb_dw;
469
470 /* Pass the request to the hardware queue (e.g. directly into
471 * the legacy ringbuffer or to the end of an execlist).
472 *
473 * This is called from an atomic context with irqs disabled; must
474 * be irq safe.
475 */
476 void (*submit_request)(struct i915_request *rq);
477
478 /*
479 * Call when the priority on a request has changed and it and its
480 * dependencies may need rescheduling. Note the request itself may
481 * not be ready to run!
482 */
483 void (*schedule)(struct i915_request *request,
484 const struct i915_sched_attr *attr);
485
486 /*
487 * Cancel all requests on the hardware, or queued for execution.
488 * This should only cancel the ready requests that have been
489 * submitted to the engine (via the engine->submit_request callback).
490 * This is called when marking the device as wedged.
491 */
492 void (*cancel_requests)(struct intel_engine_cs *engine);
493
494 void (*cleanup)(struct intel_engine_cs *engine);
495
496 struct intel_engine_execlists execlists;
497
498 /* Contexts are pinned whilst they are active on the GPU. The last
499 * context executed remains active whilst the GPU is idle - the
500 * switch away and write to the context object only occurs on the
501 * next execution. Contexts are only unpinned on retirement of the
502 * following request ensuring that we can always write to the object
503 * on the context switch even after idling. Across suspend, we switch
504 * to the kernel context and trash it as the save may not happen
505 * before the hardware is powered down.
506 */
507 struct intel_context *last_retired_context;
508
509 /* status_notifier: list of callbacks for context-switch changes */
510 struct atomic_notifier_head context_status_notifier;
511
512 struct intel_engine_hangcheck hangcheck;
513
514#define I915_ENGINE_NEEDS_CMD_PARSER BIT(0)
515#define I915_ENGINE_SUPPORTS_STATS BIT(1)
516#define I915_ENGINE_HAS_PREEMPTION BIT(2)
517 unsigned int flags;
518
519 /*
520 * Table of commands the command parser needs to know about
521 * for this engine.
522 */
523 DECLARE_HASHTABLE(cmd_hash, I915_CMD_HASH_ORDER);
524
525 /*
526 * Table of registers allowed in commands that read/write registers.
527 */
528 const struct drm_i915_reg_table *reg_tables;
529 int reg_table_count;
530
531 /*
532 * Returns the bitmask for the length field of the specified command.
533 * Return 0 for an unrecognized/invalid command.
534 *
535 * If the command parser finds an entry for a command in the engine's
536 * cmd_tables, it gets the command's length based on the table entry.
537 * If not, it calls this function to determine the per-engine length
538 * field encoding for the command (i.e. different opcode ranges use
539 * certain bits to encode the command length in the header).
540 */
541 u32 (*get_cmd_length_mask)(u32 cmd_header);
542
543 struct {
544 /**
545 * @lock: Lock protecting the below fields.
546 */
547 seqlock_t lock;
548 /**
549 * @enabled: Reference count indicating number of listeners.
550 */
551 unsigned int enabled;
552 /**
553 * @active: Number of contexts currently scheduled in.
554 */
555 unsigned int active;
556 /**
557 * @enabled_at: Timestamp when busy stats were enabled.
558 */
559 ktime_t enabled_at;
560 /**
561 * @start: Timestamp of the last idle to active transition.
562 *
563 * Idle is defined as active == 0, active is active > 0.
564 */
565 ktime_t start;
566 /**
567 * @total: Total time this engine was busy.
568 *
569 * Accumulated time not counting the most recent block in cases
570 * where engine is currently busy (active > 0).
571 */
572 ktime_t total;
573 } stats;
574};
575
576static inline bool
577intel_engine_needs_cmd_parser(const struct intel_engine_cs *engine)
578{
579 return engine->flags & I915_ENGINE_NEEDS_CMD_PARSER;
580}
581
582static inline bool
583intel_engine_supports_stats(const struct intel_engine_cs *engine)
584{
585 return engine->flags & I915_ENGINE_SUPPORTS_STATS;
586}
587
588static inline bool
589intel_engine_has_preemption(const struct intel_engine_cs *engine)
590{
591 return engine->flags & I915_ENGINE_HAS_PREEMPTION;
592}
593
594static inline bool __execlists_need_preempt(int prio, int last)
595{
596 /*
597 * Allow preemption of low -> normal -> high, but we do
598 * not allow low priority tasks to preempt other low priority
599 * tasks under the impression that latency for low priority
600 * tasks does not matter (as much as background throughput),
601 * so kiss.
602 *
603 * More naturally we would write
604 * prio >= max(0, last);
605 * except that we wish to prevent triggering preemption at the same
606 * priority level: the task that is running should remain running
607 * to preserve FIFO ordering of dependencies.
608 */
609 return prio > max(I915_PRIORITY_NORMAL - 1, last);
610}
611
612static inline void
613execlists_set_active(struct intel_engine_execlists *execlists,
614 unsigned int bit)
615{
616 __set_bit(bit, (unsigned long *)&execlists->active);
617}
618
619static inline bool
620execlists_set_active_once(struct intel_engine_execlists *execlists,
621 unsigned int bit)
622{
623 return !__test_and_set_bit(bit, (unsigned long *)&execlists->active);
624}
625
626static inline void
627execlists_clear_active(struct intel_engine_execlists *execlists,
628 unsigned int bit)
629{
630 __clear_bit(bit, (unsigned long *)&execlists->active);
631}
632
633static inline void
634execlists_clear_all_active(struct intel_engine_execlists *execlists)
635{
636 execlists->active = 0;
637}
638
639static inline bool
640execlists_is_active(const struct intel_engine_execlists *execlists,
641 unsigned int bit)
642{
643 return test_bit(bit, (unsigned long *)&execlists->active);
644}
645
646void execlists_user_begin(struct intel_engine_execlists *execlists,
647 const struct execlist_port *port);
648void execlists_user_end(struct intel_engine_execlists *execlists);
649
650void
651execlists_cancel_port_requests(struct intel_engine_execlists * const execlists);
652
653void
654execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists);
655
656static inline unsigned int
657execlists_num_ports(const struct intel_engine_execlists * const execlists)
658{
659 return execlists->port_mask + 1;
660}
661
662static inline struct execlist_port *
663execlists_port_complete(struct intel_engine_execlists * const execlists,
664 struct execlist_port * const port)
665{
666 const unsigned int m = execlists->port_mask;
667
668 GEM_BUG_ON(port_index(port, execlists) != 0);
669 GEM_BUG_ON(!execlists_is_active(execlists, EXECLISTS_ACTIVE_USER));
670
671 memmove(port, port + 1, m * sizeof(struct execlist_port));
672 memset(port + m, 0, sizeof(struct execlist_port));
673
674 return port;
675}
676
677static inline unsigned int
678intel_engine_flag(const struct intel_engine_cs *engine)
679{
680 return BIT(engine->id);
681}
682
683static inline u32
684intel_read_status_page(const struct intel_engine_cs *engine, int reg)
685{
686 /* Ensure that the compiler doesn't optimize away the load. */
687 return READ_ONCE(engine->status_page.addr[reg]);
688}
689
690static inline void
691intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value)
692{
693 /* Writing into the status page should be done sparingly. Since
694 * we do when we are uncertain of the device state, we take a bit
695 * of extra paranoia to try and ensure that the HWS takes the value
696 * we give and that it doesn't end up trapped inside the CPU!
697 */
698 if (static_cpu_has(X86_FEATURE_CLFLUSH)) {
699 mb();
700 clflush(&engine->status_page.addr[reg]);
701 engine->status_page.addr[reg] = value;
702 clflush(&engine->status_page.addr[reg]);
703 mb();
704 } else {
705 WRITE_ONCE(engine->status_page.addr[reg], value);
706 }
707}
708
709/*
710 * Reads a dword out of the status page, which is written to from the command
711 * queue by automatic updates, MI_REPORT_HEAD, MI_STORE_DATA_INDEX, or
712 * MI_STORE_DATA_IMM.
713 *
714 * The following dwords have a reserved meaning:
715 * 0x00: ISR copy, updated when an ISR bit not set in the HWSTAM changes.
716 * 0x04: ring 0 head pointer
717 * 0x05: ring 1 head pointer (915-class)
718 * 0x06: ring 2 head pointer (915-class)
719 * 0x10-0x1b: Context status DWords (GM45)
720 * 0x1f: Last written status offset. (GM45)
721 * 0x20-0x2f: Reserved (Gen6+)
722 *
723 * The area from dword 0x30 to 0x3ff is available for driver usage.
724 */
725#define I915_GEM_HWS_INDEX 0x30
726#define I915_GEM_HWS_INDEX_ADDR (I915_GEM_HWS_INDEX * sizeof(u32))
727#define I915_GEM_HWS_PREEMPT 0x32
728#define I915_GEM_HWS_PREEMPT_ADDR (I915_GEM_HWS_PREEMPT * sizeof(u32))
729#define I915_GEM_HWS_SEQNO 0x40
730#define I915_GEM_HWS_SEQNO_ADDR (I915_GEM_HWS_SEQNO * sizeof(u32))
731#define I915_GEM_HWS_SCRATCH 0x80
732#define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH * sizeof(u32))
733
734#define I915_HWS_CSB_BUF0_INDEX 0x10
735#define I915_HWS_CSB_WRITE_INDEX 0x1f
736#define CNL_HWS_CSB_WRITE_INDEX 0x2f
737
738struct intel_ring *
739intel_engine_create_ring(struct intel_engine_cs *engine,
740 struct i915_timeline *timeline,
741 int size);
742int intel_ring_pin(struct intel_ring *ring);
743void intel_ring_reset(struct intel_ring *ring, u32 tail);
744unsigned int intel_ring_update_space(struct intel_ring *ring);
745void intel_ring_unpin(struct intel_ring *ring);
746void intel_ring_free(struct intel_ring *ring);
747
748void intel_engine_stop(struct intel_engine_cs *engine);
749void intel_engine_cleanup(struct intel_engine_cs *engine);
750
751void intel_legacy_submission_resume(struct drm_i915_private *dev_priv);
752
753int __must_check intel_ring_cacheline_align(struct i915_request *rq);
754
755u32 __must_check *intel_ring_begin(struct i915_request *rq, unsigned int n);
756
757static inline void intel_ring_advance(struct i915_request *rq, u32 *cs)
758{
759 /* Dummy function.
760 *
761 * This serves as a placeholder in the code so that the reader
762 * can compare against the preceding intel_ring_begin() and
763 * check that the number of dwords emitted matches the space
764 * reserved for the command packet (i.e. the value passed to
765 * intel_ring_begin()).
766 */
767 GEM_BUG_ON((rq->ring->vaddr + rq->ring->emit) != cs);
768}
769
770static inline u32 intel_ring_wrap(const struct intel_ring *ring, u32 pos)
771{
772 return pos & (ring->size - 1);
773}
774
775static inline bool
776intel_ring_offset_valid(const struct intel_ring *ring,
777 unsigned int pos)
778{
779 if (pos & -ring->size) /* must be strictly within the ring */
780 return false;
781
782 if (!IS_ALIGNED(pos, 8)) /* must be qword aligned */
783 return false;
784
785 return true;
786}
787
788static inline u32 intel_ring_offset(const struct i915_request *rq, void *addr)
789{
790 /* Don't write ring->size (equivalent to 0) as that hangs some GPUs. */
791 u32 offset = addr - rq->ring->vaddr;
792 GEM_BUG_ON(offset > rq->ring->size);
793 return intel_ring_wrap(rq->ring, offset);
794}
795
796static inline void
797assert_ring_tail_valid(const struct intel_ring *ring, unsigned int tail)
798{
799 GEM_BUG_ON(!intel_ring_offset_valid(ring, tail));
800
801 /*
802 * "Ring Buffer Use"
803 * Gen2 BSpec "1. Programming Environment" / 1.4.4.6
804 * Gen3 BSpec "1c Memory Interface Functions" / 2.3.4.5
805 * Gen4+ BSpec "1c Memory Interface and Command Stream" / 5.3.4.5
806 * "If the Ring Buffer Head Pointer and the Tail Pointer are on the
807 * same cacheline, the Head Pointer must not be greater than the Tail
808 * Pointer."
809 *
810 * We use ring->head as the last known location of the actual RING_HEAD,
811 * it may have advanced but in the worst case it is equally the same
812 * as ring->head and so we should never program RING_TAIL to advance
813 * into the same cacheline as ring->head.
814 */
815#define cacheline(a) round_down(a, CACHELINE_BYTES)
816 GEM_BUG_ON(cacheline(tail) == cacheline(ring->head) &&
817 tail < ring->head);
818#undef cacheline
819}
820
821static inline unsigned int
822intel_ring_set_tail(struct intel_ring *ring, unsigned int tail)
823{
824 /* Whilst writes to the tail are strictly order, there is no
825 * serialisation between readers and the writers. The tail may be
826 * read by i915_request_retire() just as it is being updated
827 * by execlists, as although the breadcrumb is complete, the context
828 * switch hasn't been seen.
829 */
830 assert_ring_tail_valid(ring, tail);
831 ring->tail = tail;
832 return tail;
833}
834
835static inline unsigned int
836__intel_ring_space(unsigned int head, unsigned int tail, unsigned int size)
837{
838 /*
839 * "If the Ring Buffer Head Pointer and the Tail Pointer are on the
840 * same cacheline, the Head Pointer must not be greater than the Tail
841 * Pointer."
842 */
843 GEM_BUG_ON(!is_power_of_2(size));
844 return (head - tail - CACHELINE_BYTES) & (size - 1);
845}
846
847void intel_engine_write_global_seqno(struct intel_engine_cs *engine, u32 seqno);
848
849int intel_engine_setup_common(struct intel_engine_cs *engine);
850int intel_engine_init_common(struct intel_engine_cs *engine);
851void intel_engine_cleanup_common(struct intel_engine_cs *engine);
852
853int intel_init_render_ring_buffer(struct intel_engine_cs *engine);
854int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine);
855int intel_init_blt_ring_buffer(struct intel_engine_cs *engine);
856int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine);
857
858int intel_engine_stop_cs(struct intel_engine_cs *engine);
859void intel_engine_cancel_stop_cs(struct intel_engine_cs *engine);
860
861void intel_engine_set_hwsp_writemask(struct intel_engine_cs *engine, u32 mask);
862
863u64 intel_engine_get_active_head(const struct intel_engine_cs *engine);
864u64 intel_engine_get_last_batch_head(const struct intel_engine_cs *engine);
865
866static inline u32 intel_engine_last_submit(struct intel_engine_cs *engine)
867{
868 /*
869 * We are only peeking at the tail of the submit queue (and not the
870 * queue itself) in order to gain a hint as to the current active
871 * state of the engine. Callers are not expected to be taking
872 * engine->timeline->lock, nor are they expected to be concerned
873 * wtih serialising this hint with anything, so document it as
874 * a hint and nothing more.
875 */
876 return READ_ONCE(engine->timeline.seqno);
877}
878
879static inline u32 intel_engine_get_seqno(struct intel_engine_cs *engine)
880{
881 return intel_read_status_page(engine, I915_GEM_HWS_INDEX);
882}
883
884static inline bool intel_engine_signaled(struct intel_engine_cs *engine,
885 u32 seqno)
886{
887 return i915_seqno_passed(intel_engine_get_seqno(engine), seqno);
888}
889
890static inline bool intel_engine_has_completed(struct intel_engine_cs *engine,
891 u32 seqno)
892{
893 GEM_BUG_ON(!seqno);
894 return intel_engine_signaled(engine, seqno);
895}
896
897static inline bool intel_engine_has_started(struct intel_engine_cs *engine,
898 u32 seqno)
899{
900 GEM_BUG_ON(!seqno);
901 return intel_engine_signaled(engine, seqno - 1);
902}
903
904void intel_engine_get_instdone(struct intel_engine_cs *engine,
905 struct intel_instdone *instdone);
906
907void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine);
908void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine);
909
910void intel_engine_pin_breadcrumbs_irq(struct intel_engine_cs *engine);
911void intel_engine_unpin_breadcrumbs_irq(struct intel_engine_cs *engine);
912
913bool intel_engine_signal_breadcrumbs(struct intel_engine_cs *engine);
914void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine);
915
916static inline void
917intel_engine_queue_breadcrumbs(struct intel_engine_cs *engine)
918{
919 irq_work_queue(&engine->breadcrumbs.irq_work);
920}
921
922bool intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine);
923
924void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine);
925void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine);
926
927void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine,
928 struct drm_printer *p);
929
930static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset)
931{
932 memset(batch, 0, 6 * sizeof(u32));
933
934 batch[0] = GFX_OP_PIPE_CONTROL(6);
935 batch[1] = flags;
936 batch[2] = offset;
937
938 return batch + 6;
939}
940
941static inline u32 *
942gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags)
943{
944 /* We're using qword write, offset should be aligned to 8 bytes. */
945 GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8));
946
947 /* w/a for post sync ops following a GPGPU operation we
948 * need a prior CS_STALL, which is emitted by the flush
949 * following the batch.
950 */
951 *cs++ = GFX_OP_PIPE_CONTROL(6);
952 *cs++ = flags | PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_GLOBAL_GTT_IVB;
953 *cs++ = gtt_offset;
954 *cs++ = 0;
955 *cs++ = value;
956 /* We're thrashing one dword of HWS. */
957 *cs++ = 0;
958
959 return cs;
960}
961
962static inline u32 *
963gen8_emit_ggtt_write(u32 *cs, u32 value, u32 gtt_offset)
964{
965 /* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */
966 GEM_BUG_ON(gtt_offset & (1 << 5));
967 /* Offset should be aligned to 8 bytes for both (QW/DW) write types */
968 GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8));
969
970 *cs++ = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW;
971 *cs++ = gtt_offset | MI_FLUSH_DW_USE_GTT;
972 *cs++ = 0;
973 *cs++ = value;
974
975 return cs;
976}
977
978static inline void intel_engine_reset(struct intel_engine_cs *engine,
979 bool stalled)
980{
981 if (engine->reset.reset)
982 engine->reset.reset(engine, stalled);
983}
984
985void intel_engines_sanitize(struct drm_i915_private *i915, bool force);
986
987bool intel_engine_is_idle(struct intel_engine_cs *engine);
988bool intel_engines_are_idle(struct drm_i915_private *dev_priv);
989
990bool intel_engine_has_kernel_context(const struct intel_engine_cs *engine);
991void intel_engine_lost_context(struct intel_engine_cs *engine);
992
993void intel_engines_park(struct drm_i915_private *i915);
994void intel_engines_unpark(struct drm_i915_private *i915);
995
996void intel_engines_reset_default_submission(struct drm_i915_private *i915);
997unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915);
998
999bool intel_engine_can_store_dword(struct intel_engine_cs *engine);
1000
1001__printf(3, 4)
1002void intel_engine_dump(struct intel_engine_cs *engine,
1003 struct drm_printer *m,
1004 const char *header, ...);
1005
1006struct intel_engine_cs *
1007intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance);
1008
1009static inline void intel_engine_context_in(struct intel_engine_cs *engine)
1010{
1011 unsigned long flags;
1012
1013 if (READ_ONCE(engine->stats.enabled) == 0)
1014 return;
1015
1016 write_seqlock_irqsave(&engine->stats.lock, flags);
1017
1018 if (engine->stats.enabled > 0) {
1019 if (engine->stats.active++ == 0)
1020 engine->stats.start = ktime_get();
1021 GEM_BUG_ON(engine->stats.active == 0);
1022 }
1023
1024 write_sequnlock_irqrestore(&engine->stats.lock, flags);
1025}
1026
1027static inline void intel_engine_context_out(struct intel_engine_cs *engine)
1028{
1029 unsigned long flags;
1030
1031 if (READ_ONCE(engine->stats.enabled) == 0)
1032 return;
1033
1034 write_seqlock_irqsave(&engine->stats.lock, flags);
1035
1036 if (engine->stats.enabled > 0) {
1037 ktime_t last;
1038
1039 if (engine->stats.active && --engine->stats.active == 0) {
1040 /*
1041 * Decrement the active context count and in case GPU
1042 * is now idle add up to the running total.
1043 */
1044 last = ktime_sub(ktime_get(), engine->stats.start);
1045
1046 engine->stats.total = ktime_add(engine->stats.total,
1047 last);
1048 } else if (engine->stats.active == 0) {
1049 /*
1050 * After turning on engine stats, context out might be
1051 * the first event in which case we account from the
1052 * time stats gathering was turned on.
1053 */
1054 last = ktime_sub(ktime_get(), engine->stats.enabled_at);
1055
1056 engine->stats.total = ktime_add(engine->stats.total,
1057 last);
1058 }
1059 }
1060
1061 write_sequnlock_irqrestore(&engine->stats.lock, flags);
1062}
1063
1064int intel_enable_engine_stats(struct intel_engine_cs *engine);
1065void intel_disable_engine_stats(struct intel_engine_cs *engine);
1066
1067ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine);
1068
1069#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
1070
1071static inline bool inject_preempt_hang(struct intel_engine_execlists *execlists)
1072{
1073 if (!execlists->preempt_hang.inject_hang)
1074 return false;
1075
1076 complete(&execlists->preempt_hang.completion);
1077 return true;
1078}
1079
1080#else
1081
1082static inline bool inject_preempt_hang(struct intel_engine_execlists *execlists)
1083{
1084 return false;
1085}
1086
1087#endif
1088
1089#endif /* _INTEL_RINGBUFFER_H_ */