drivers/gpu/drm/i915/intel_ringbuffer.h at v5.1

tjh.dev / kernel
fork atom
Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
fork atom
kernel / drivers / gpu / drm / i915 / intel_ringbuffer.h
at v5.1 1089 lines 33 kB view raw
wrap content
   1/* SPDX-License-Identifier: MIT */
   2#ifndef _INTEL_RINGBUFFER_H_
   3#define _INTEL_RINGBUFFER_H_
   4
   5#include <drm/drm_util.h>
   6
   7#include <linux/hashtable.h>
   8#include <linux/irq_work.h>
   9#include <linux/seqlock.h>
  10
  11#include "i915_gem_batch_pool.h"
  12
  13#include "i915_reg.h"
  14#include "i915_pmu.h"
  15#include "i915_request.h"
  16#include "i915_selftest.h"
  17#include "i915_timeline.h"
  18#include "intel_gpu_commands.h"
  19#include "intel_workarounds.h"
  20
  21struct drm_printer;
  22struct i915_sched_attr;
  23
  24#define I915_CMD_HASH_ORDER 9
  25
  26/* Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill,
  27 * but keeps the logic simple. Indeed, the whole purpose of this macro is just
  28 * to give some inclination as to some of the magic values used in the various
  29 * workarounds!
  30 */
  31#define CACHELINE_BYTES 64
  32#define CACHELINE_DWORDS (CACHELINE_BYTES / sizeof(u32))
  33
  34struct intel_hw_status_page {
  35	struct i915_vma *vma;
  36	u32 *addr;
  37};
  38
  39#define I915_READ_TAIL(engine) I915_READ(RING_TAIL((engine)->mmio_base))
  40#define I915_WRITE_TAIL(engine, val) I915_WRITE(RING_TAIL((engine)->mmio_base), val)
  41
  42#define I915_READ_START(engine) I915_READ(RING_START((engine)->mmio_base))
  43#define I915_WRITE_START(engine, val) I915_WRITE(RING_START((engine)->mmio_base), val)
  44
  45#define I915_READ_HEAD(engine)  I915_READ(RING_HEAD((engine)->mmio_base))
  46#define I915_WRITE_HEAD(engine, val) I915_WRITE(RING_HEAD((engine)->mmio_base), val)
  47
  48#define I915_READ_CTL(engine) I915_READ(RING_CTL((engine)->mmio_base))
  49#define I915_WRITE_CTL(engine, val) I915_WRITE(RING_CTL((engine)->mmio_base), val)
  50
  51#define I915_READ_IMR(engine) I915_READ(RING_IMR((engine)->mmio_base))
  52#define I915_WRITE_IMR(engine, val) I915_WRITE(RING_IMR((engine)->mmio_base), val)
  53
  54#define I915_READ_MODE(engine) I915_READ(RING_MI_MODE((engine)->mmio_base))
  55#define I915_WRITE_MODE(engine, val) I915_WRITE(RING_MI_MODE((engine)->mmio_base), val)
  56
  57/* seqno size is actually only a uint32, but since we plan to use MI_FLUSH_DW to
  58 * do the writes, and that must have qw aligned offsets, simply pretend it's 8b.
  59 */
  60enum intel_engine_hangcheck_action {
  61	ENGINE_IDLE = 0,
  62	ENGINE_WAIT,
  63	ENGINE_ACTIVE_SEQNO,
  64	ENGINE_ACTIVE_HEAD,
  65	ENGINE_ACTIVE_SUBUNITS,
  66	ENGINE_WAIT_KICK,
  67	ENGINE_DEAD,
  68};
  69
  70static inline const char *
  71hangcheck_action_to_str(const enum intel_engine_hangcheck_action a)
  72{
  73	switch (a) {
  74	case ENGINE_IDLE:
  75		return "idle";
  76	case ENGINE_WAIT:
  77		return "wait";
  78	case ENGINE_ACTIVE_SEQNO:
  79		return "active seqno";
  80	case ENGINE_ACTIVE_HEAD:
  81		return "active head";
  82	case ENGINE_ACTIVE_SUBUNITS:
  83		return "active subunits";
  84	case ENGINE_WAIT_KICK:
  85		return "wait kick";
  86	case ENGINE_DEAD:
  87		return "dead";
  88	}
  89
  90	return "unknown";
  91}
  92
  93#define I915_MAX_SLICES	3
  94#define I915_MAX_SUBSLICES 8
  95
  96#define instdone_slice_mask(dev_priv__) \
  97	(IS_GEN(dev_priv__, 7) ? \
  98	 1 : RUNTIME_INFO(dev_priv__)->sseu.slice_mask)
  99
 100#define instdone_subslice_mask(dev_priv__) \
 101	(IS_GEN(dev_priv__, 7) ? \
 102	 1 : RUNTIME_INFO(dev_priv__)->sseu.subslice_mask[0])
 103
 104#define for_each_instdone_slice_subslice(dev_priv__, slice__, subslice__) \
 105	for ((slice__) = 0, (subslice__) = 0; \
 106	     (slice__) < I915_MAX_SLICES; \
 107	     (subslice__) = ((subslice__) + 1) < I915_MAX_SUBSLICES ? (subslice__) + 1 : 0, \
 108	       (slice__) += ((subslice__) == 0)) \
 109		for_each_if((BIT(slice__) & instdone_slice_mask(dev_priv__)) && \
 110			    (BIT(subslice__) & instdone_subslice_mask(dev_priv__)))
 111
 112struct intel_instdone {
 113	u32 instdone;
 114	/* The following exist only in the RCS engine */
 115	u32 slice_common;
 116	u32 sampler[I915_MAX_SLICES][I915_MAX_SUBSLICES];
 117	u32 row[I915_MAX_SLICES][I915_MAX_SUBSLICES];
 118};
 119
 120struct intel_engine_hangcheck {
 121	u64 acthd;
 122	u32 seqno;
 123	unsigned long action_timestamp;
 124	struct intel_instdone instdone;
 125};
 126
 127struct intel_ring {
 128	struct i915_vma *vma;
 129	void *vaddr;
 130
 131	struct i915_timeline *timeline;
 132	struct list_head request_list;
 133	struct list_head active_link;
 134
 135	u32 head;
 136	u32 tail;
 137	u32 emit;
 138
 139	u32 space;
 140	u32 size;
 141	u32 effective_size;
 142};
 143
 144struct i915_gem_context;
 145struct drm_i915_reg_table;
 146
 147/*
 148 * we use a single page to load ctx workarounds so all of these
 149 * values are referred in terms of dwords
 150 *
 151 * struct i915_wa_ctx_bb:
 152 *  offset: specifies batch starting position, also helpful in case
 153 *    if we want to have multiple batches at different offsets based on
 154 *    some criteria. It is not a requirement at the moment but provides
 155 *    an option for future use.
 156 *  size: size of the batch in DWORDS
 157 */
 158struct i915_ctx_workarounds {
 159	struct i915_wa_ctx_bb {
 160		u32 offset;
 161		u32 size;
 162	} indirect_ctx, per_ctx;
 163	struct i915_vma *vma;
 164};
 165
 166struct i915_request;
 167
 168#define I915_MAX_VCS	4
 169#define I915_MAX_VECS	2
 170
 171/*
 172 * Engine IDs definitions.
 173 * Keep instances of the same type engine together.
 174 */
 175enum intel_engine_id {
 176	RCS = 0,
 177	BCS,
 178	VCS,
 179	VCS2,
 180	VCS3,
 181	VCS4,
 182#define _VCS(n) (VCS + (n))
 183	VECS,
 184	VECS2
 185#define _VECS(n) (VECS + (n))
 186};
 187
 188struct i915_priolist {
 189	struct list_head requests[I915_PRIORITY_COUNT];
 190	struct rb_node node;
 191	unsigned long used;
 192	int priority;
 193};
 194
 195#define priolist_for_each_request(it, plist, idx) \
 196	for (idx = 0; idx < ARRAY_SIZE((plist)->requests); idx++) \
 197		list_for_each_entry(it, &(plist)->requests[idx], sched.link)
 198
 199#define priolist_for_each_request_consume(it, n, plist, idx) \
 200	for (; (idx = ffs((plist)->used)); (plist)->used &= ~BIT(idx - 1)) \
 201		list_for_each_entry_safe(it, n, \
 202					 &(plist)->requests[idx - 1], \
 203					 sched.link)
 204
 205struct st_preempt_hang {
 206	struct completion completion;
 207	unsigned int count;
 208	bool inject_hang;
 209};
 210
 211/**
 212 * struct intel_engine_execlists - execlist submission queue and port state
 213 *
 214 * The struct intel_engine_execlists represents the combined logical state of
 215 * driver and the hardware state for execlist mode of submission.
 216 */
 217struct intel_engine_execlists {
 218	/**
 219	 * @tasklet: softirq tasklet for bottom handler
 220	 */
 221	struct tasklet_struct tasklet;
 222
 223	/**
 224	 * @default_priolist: priority list for I915_PRIORITY_NORMAL
 225	 */
 226	struct i915_priolist default_priolist;
 227
 228	/**
 229	 * @no_priolist: priority lists disabled
 230	 */
 231	bool no_priolist;
 232
 233	/**
 234	 * @submit_reg: gen-specific execlist submission register
 235	 * set to the ExecList Submission Port (elsp) register pre-Gen11 and to
 236	 * the ExecList Submission Queue Contents register array for Gen11+
 237	 */
 238	u32 __iomem *submit_reg;
 239
 240	/**
 241	 * @ctrl_reg: the enhanced execlists control register, used to load the
 242	 * submit queue on the HW and to request preemptions to idle
 243	 */
 244	u32 __iomem *ctrl_reg;
 245
 246	/**
 247	 * @port: execlist port states
 248	 *
 249	 * For each hardware ELSP (ExecList Submission Port) we keep
 250	 * track of the last request and the number of times we submitted
 251	 * that port to hw. We then count the number of times the hw reports
 252	 * a context completion or preemption. As only one context can
 253	 * be active on hw, we limit resubmission of context to port[0]. This
 254	 * is called Lite Restore, of the context.
 255	 */
 256	struct execlist_port {
 257		/**
 258		 * @request_count: combined request and submission count
 259		 */
 260		struct i915_request *request_count;
 261#define EXECLIST_COUNT_BITS 2
 262#define port_request(p) ptr_mask_bits((p)->request_count, EXECLIST_COUNT_BITS)
 263#define port_count(p) ptr_unmask_bits((p)->request_count, EXECLIST_COUNT_BITS)
 264#define port_pack(rq, count) ptr_pack_bits(rq, count, EXECLIST_COUNT_BITS)
 265#define port_unpack(p, count) ptr_unpack_bits((p)->request_count, count, EXECLIST_COUNT_BITS)
 266#define port_set(p, packed) ((p)->request_count = (packed))
 267#define port_isset(p) ((p)->request_count)
 268#define port_index(p, execlists) ((p) - (execlists)->port)
 269
 270		/**
 271		 * @context_id: context ID for port
 272		 */
 273		GEM_DEBUG_DECL(u32 context_id);
 274
 275#define EXECLIST_MAX_PORTS 2
 276	} port[EXECLIST_MAX_PORTS];
 277
 278	/**
 279	 * @active: is the HW active? We consider the HW as active after
 280	 * submitting any context for execution and until we have seen the
 281	 * last context completion event. After that, we do not expect any
 282	 * more events until we submit, and so can park the HW.
 283	 *
 284	 * As we have a small number of different sources from which we feed
 285	 * the HW, we track the state of each inside a single bitfield.
 286	 */
 287	unsigned int active;
 288#define EXECLISTS_ACTIVE_USER 0
 289#define EXECLISTS_ACTIVE_PREEMPT 1
 290#define EXECLISTS_ACTIVE_HWACK 2
 291
 292	/**
 293	 * @port_mask: number of execlist ports - 1
 294	 */
 295	unsigned int port_mask;
 296
 297	/**
 298	 * @queue_priority_hint: Highest pending priority.
 299	 *
 300	 * When we add requests into the queue, or adjust the priority of
 301	 * executing requests, we compute the maximum priority of those
 302	 * pending requests. We can then use this value to determine if
 303	 * we need to preempt the executing requests to service the queue.
 304	 * However, since the we may have recorded the priority of an inflight
 305	 * request we wanted to preempt but since completed, at the time of
 306	 * dequeuing the priority hint may no longer may match the highest
 307	 * available request priority.
 308	 */
 309	int queue_priority_hint;
 310
 311	/**
 312	 * @queue: queue of requests, in priority lists
 313	 */
 314	struct rb_root_cached queue;
 315
 316	/**
 317	 * @csb_write: control register for Context Switch buffer
 318	 *
 319	 * Note this register may be either mmio or HWSP shadow.
 320	 */
 321	u32 *csb_write;
 322
 323	/**
 324	 * @csb_status: status array for Context Switch buffer
 325	 *
 326	 * Note these register may be either mmio or HWSP shadow.
 327	 */
 328	u32 *csb_status;
 329
 330	/**
 331	 * @preempt_complete_status: expected CSB upon completing preemption
 332	 */
 333	u32 preempt_complete_status;
 334
 335	/**
 336	 * @csb_head: context status buffer head
 337	 */
 338	u8 csb_head;
 339
 340	I915_SELFTEST_DECLARE(struct st_preempt_hang preempt_hang;)
 341};
 342
 343#define INTEL_ENGINE_CS_MAX_NAME 8
 344
 345struct intel_engine_cs {
 346	struct drm_i915_private *i915;
 347	char name[INTEL_ENGINE_CS_MAX_NAME];
 348
 349	enum intel_engine_id id;
 350	unsigned int hw_id;
 351	unsigned int guc_id;
 352
 353	u8 uabi_id;
 354	u8 uabi_class;
 355
 356	u8 class;
 357	u8 instance;
 358	u32 context_size;
 359	u32 mmio_base;
 360
 361	struct intel_ring *buffer;
 362
 363	struct i915_timeline timeline;
 364
 365	struct drm_i915_gem_object *default_state;
 366	void *pinned_default_state;
 367
 368	/* Rather than have every client wait upon all user interrupts,
 369	 * with the herd waking after every interrupt and each doing the
 370	 * heavyweight seqno dance, we delegate the task (of being the
 371	 * bottom-half of the user interrupt) to the first client. After
 372	 * every interrupt, we wake up one client, who does the heavyweight
 373	 * coherent seqno read and either goes back to sleep (if incomplete),
 374	 * or wakes up all the completed clients in parallel, before then
 375	 * transferring the bottom-half status to the next client in the queue.
 376	 *
 377	 * Compared to walking the entire list of waiters in a single dedicated
 378	 * bottom-half, we reduce the latency of the first waiter by avoiding
 379	 * a context switch, but incur additional coherent seqno reads when
 380	 * following the chain of request breadcrumbs. Since it is most likely
 381	 * that we have a single client waiting on each seqno, then reducing
 382	 * the overhead of waking that client is much preferred.
 383	 */
 384	struct intel_breadcrumbs {
 385		spinlock_t irq_lock;
 386		struct list_head signalers;
 387
 388		struct irq_work irq_work; /* for use from inside irq_lock */
 389
 390		unsigned int irq_enabled;
 391
 392		bool irq_armed;
 393	} breadcrumbs;
 394
 395	struct {
 396		/**
 397		 * @enable: Bitmask of enable sample events on this engine.
 398		 *
 399		 * Bits correspond to sample event types, for instance
 400		 * I915_SAMPLE_QUEUED is bit 0 etc.
 401		 */
 402		u32 enable;
 403		/**
 404		 * @enable_count: Reference count for the enabled samplers.
 405		 *
 406		 * Index number corresponds to @enum drm_i915_pmu_engine_sample.
 407		 */
 408		unsigned int enable_count[I915_ENGINE_SAMPLE_COUNT];
 409		/**
 410		 * @sample: Counter values for sampling events.
 411		 *
 412		 * Our internal timer stores the current counters in this field.
 413		 *
 414		 * Index number corresponds to @enum drm_i915_pmu_engine_sample.
 415		 */
 416		struct i915_pmu_sample sample[I915_ENGINE_SAMPLE_COUNT];
 417	} pmu;
 418
 419	/*
 420	 * A pool of objects to use as shadow copies of client batch buffers
 421	 * when the command parser is enabled. Prevents the client from
 422	 * modifying the batch contents after software parsing.
 423	 */
 424	struct i915_gem_batch_pool batch_pool;
 425
 426	struct intel_hw_status_page status_page;
 427	struct i915_ctx_workarounds wa_ctx;
 428	struct i915_wa_list ctx_wa_list;
 429	struct i915_wa_list wa_list;
 430	struct i915_wa_list whitelist;
 431
 432	u32             irq_keep_mask; /* always keep these interrupts */
 433	u32		irq_enable_mask; /* bitmask to enable ring interrupt */
 434	void		(*irq_enable)(struct intel_engine_cs *engine);
 435	void		(*irq_disable)(struct intel_engine_cs *engine);
 436
 437	int		(*init_hw)(struct intel_engine_cs *engine);
 438
 439	struct {
 440		void (*prepare)(struct intel_engine_cs *engine);
 441		void (*reset)(struct intel_engine_cs *engine, bool stalled);
 442		void (*finish)(struct intel_engine_cs *engine);
 443	} reset;
 444
 445	void		(*park)(struct intel_engine_cs *engine);
 446	void		(*unpark)(struct intel_engine_cs *engine);
 447
 448	void		(*set_default_submission)(struct intel_engine_cs *engine);
 449
 450	struct intel_context *(*context_pin)(struct intel_engine_cs *engine,
 451					     struct i915_gem_context *ctx);
 452
 453	int		(*request_alloc)(struct i915_request *rq);
 454	int		(*init_context)(struct i915_request *rq);
 455
 456	int		(*emit_flush)(struct i915_request *request, u32 mode);
 457#define EMIT_INVALIDATE	BIT(0)
 458#define EMIT_FLUSH	BIT(1)
 459#define EMIT_BARRIER	(EMIT_INVALIDATE | EMIT_FLUSH)
 460	int		(*emit_bb_start)(struct i915_request *rq,
 461					 u64 offset, u32 length,
 462					 unsigned int dispatch_flags);
 463#define I915_DISPATCH_SECURE BIT(0)
 464#define I915_DISPATCH_PINNED BIT(1)
 465	int		 (*emit_init_breadcrumb)(struct i915_request *rq);
 466	u32		*(*emit_fini_breadcrumb)(struct i915_request *rq,
 467						 u32 *cs);
 468	unsigned int	emit_fini_breadcrumb_dw;
 469
 470	/* Pass the request to the hardware queue (e.g. directly into
 471	 * the legacy ringbuffer or to the end of an execlist).
 472	 *
 473	 * This is called from an atomic context with irqs disabled; must
 474	 * be irq safe.
 475	 */
 476	void		(*submit_request)(struct i915_request *rq);
 477
 478	/*
 479	 * Call when the priority on a request has changed and it and its
 480	 * dependencies may need rescheduling. Note the request itself may
 481	 * not be ready to run!
 482	 */
 483	void		(*schedule)(struct i915_request *request,
 484				    const struct i915_sched_attr *attr);
 485
 486	/*
 487	 * Cancel all requests on the hardware, or queued for execution.
 488	 * This should only cancel the ready requests that have been
 489	 * submitted to the engine (via the engine->submit_request callback).
 490	 * This is called when marking the device as wedged.
 491	 */
 492	void		(*cancel_requests)(struct intel_engine_cs *engine);
 493
 494	void		(*cleanup)(struct intel_engine_cs *engine);
 495
 496	struct intel_engine_execlists execlists;
 497
 498	/* Contexts are pinned whilst they are active on the GPU. The last
 499	 * context executed remains active whilst the GPU is idle - the
 500	 * switch away and write to the context object only occurs on the
 501	 * next execution.  Contexts are only unpinned on retirement of the
 502	 * following request ensuring that we can always write to the object
 503	 * on the context switch even after idling. Across suspend, we switch
 504	 * to the kernel context and trash it as the save may not happen
 505	 * before the hardware is powered down.
 506	 */
 507	struct intel_context *last_retired_context;
 508
 509	/* status_notifier: list of callbacks for context-switch changes */
 510	struct atomic_notifier_head context_status_notifier;
 511
 512	struct intel_engine_hangcheck hangcheck;
 513
 514#define I915_ENGINE_NEEDS_CMD_PARSER BIT(0)
 515#define I915_ENGINE_SUPPORTS_STATS   BIT(1)
 516#define I915_ENGINE_HAS_PREEMPTION   BIT(2)
 517	unsigned int flags;
 518
 519	/*
 520	 * Table of commands the command parser needs to know about
 521	 * for this engine.
 522	 */
 523	DECLARE_HASHTABLE(cmd_hash, I915_CMD_HASH_ORDER);
 524
 525	/*
 526	 * Table of registers allowed in commands that read/write registers.
 527	 */
 528	const struct drm_i915_reg_table *reg_tables;
 529	int reg_table_count;
 530
 531	/*
 532	 * Returns the bitmask for the length field of the specified command.
 533	 * Return 0 for an unrecognized/invalid command.
 534	 *
 535	 * If the command parser finds an entry for a command in the engine's
 536	 * cmd_tables, it gets the command's length based on the table entry.
 537	 * If not, it calls this function to determine the per-engine length
 538	 * field encoding for the command (i.e. different opcode ranges use
 539	 * certain bits to encode the command length in the header).
 540	 */
 541	u32 (*get_cmd_length_mask)(u32 cmd_header);
 542
 543	struct {
 544		/**
 545		 * @lock: Lock protecting the below fields.
 546		 */
 547		seqlock_t lock;
 548		/**
 549		 * @enabled: Reference count indicating number of listeners.
 550		 */
 551		unsigned int enabled;
 552		/**
 553		 * @active: Number of contexts currently scheduled in.
 554		 */
 555		unsigned int active;
 556		/**
 557		 * @enabled_at: Timestamp when busy stats were enabled.
 558		 */
 559		ktime_t enabled_at;
 560		/**
 561		 * @start: Timestamp of the last idle to active transition.
 562		 *
 563		 * Idle is defined as active == 0, active is active > 0.
 564		 */
 565		ktime_t start;
 566		/**
 567		 * @total: Total time this engine was busy.
 568		 *
 569		 * Accumulated time not counting the most recent block in cases
 570		 * where engine is currently busy (active > 0).
 571		 */
 572		ktime_t total;
 573	} stats;
 574};
 575
 576static inline bool
 577intel_engine_needs_cmd_parser(const struct intel_engine_cs *engine)
 578{
 579	return engine->flags & I915_ENGINE_NEEDS_CMD_PARSER;
 580}
 581
 582static inline bool
 583intel_engine_supports_stats(const struct intel_engine_cs *engine)
 584{
 585	return engine->flags & I915_ENGINE_SUPPORTS_STATS;
 586}
 587
 588static inline bool
 589intel_engine_has_preemption(const struct intel_engine_cs *engine)
 590{
 591	return engine->flags & I915_ENGINE_HAS_PREEMPTION;
 592}
 593
 594static inline bool __execlists_need_preempt(int prio, int last)
 595{
 596	/*
 597	 * Allow preemption of low -> normal -> high, but we do
 598	 * not allow low priority tasks to preempt other low priority
 599	 * tasks under the impression that latency for low priority
 600	 * tasks does not matter (as much as background throughput),
 601	 * so kiss.
 602	 *
 603	 * More naturally we would write
 604	 *	prio >= max(0, last);
 605	 * except that we wish to prevent triggering preemption at the same
 606	 * priority level: the task that is running should remain running
 607	 * to preserve FIFO ordering of dependencies.
 608	 */
 609	return prio > max(I915_PRIORITY_NORMAL - 1, last);
 610}
 611
 612static inline void
 613execlists_set_active(struct intel_engine_execlists *execlists,
 614		     unsigned int bit)
 615{
 616	__set_bit(bit, (unsigned long *)&execlists->active);
 617}
 618
 619static inline bool
 620execlists_set_active_once(struct intel_engine_execlists *execlists,
 621			  unsigned int bit)
 622{
 623	return !__test_and_set_bit(bit, (unsigned long *)&execlists->active);
 624}
 625
 626static inline void
 627execlists_clear_active(struct intel_engine_execlists *execlists,
 628		       unsigned int bit)
 629{
 630	__clear_bit(bit, (unsigned long *)&execlists->active);
 631}
 632
 633static inline void
 634execlists_clear_all_active(struct intel_engine_execlists *execlists)
 635{
 636	execlists->active = 0;
 637}
 638
 639static inline bool
 640execlists_is_active(const struct intel_engine_execlists *execlists,
 641		    unsigned int bit)
 642{
 643	return test_bit(bit, (unsigned long *)&execlists->active);
 644}
 645
 646void execlists_user_begin(struct intel_engine_execlists *execlists,
 647			  const struct execlist_port *port);
 648void execlists_user_end(struct intel_engine_execlists *execlists);
 649
 650void
 651execlists_cancel_port_requests(struct intel_engine_execlists * const execlists);
 652
 653void
 654execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists);
 655
 656static inline unsigned int
 657execlists_num_ports(const struct intel_engine_execlists * const execlists)
 658{
 659	return execlists->port_mask + 1;
 660}
 661
 662static inline struct execlist_port *
 663execlists_port_complete(struct intel_engine_execlists * const execlists,
 664			struct execlist_port * const port)
 665{
 666	const unsigned int m = execlists->port_mask;
 667
 668	GEM_BUG_ON(port_index(port, execlists) != 0);
 669	GEM_BUG_ON(!execlists_is_active(execlists, EXECLISTS_ACTIVE_USER));
 670
 671	memmove(port, port + 1, m * sizeof(struct execlist_port));
 672	memset(port + m, 0, sizeof(struct execlist_port));
 673
 674	return port;
 675}
 676
 677static inline unsigned int
 678intel_engine_flag(const struct intel_engine_cs *engine)
 679{
 680	return BIT(engine->id);
 681}
 682
 683static inline u32
 684intel_read_status_page(const struct intel_engine_cs *engine, int reg)
 685{
 686	/* Ensure that the compiler doesn't optimize away the load. */
 687	return READ_ONCE(engine->status_page.addr[reg]);
 688}
 689
 690static inline void
 691intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value)
 692{
 693	/* Writing into the status page should be done sparingly. Since
 694	 * we do when we are uncertain of the device state, we take a bit
 695	 * of extra paranoia to try and ensure that the HWS takes the value
 696	 * we give and that it doesn't end up trapped inside the CPU!
 697	 */
 698	if (static_cpu_has(X86_FEATURE_CLFLUSH)) {
 699		mb();
 700		clflush(&engine->status_page.addr[reg]);
 701		engine->status_page.addr[reg] = value;
 702		clflush(&engine->status_page.addr[reg]);
 703		mb();
 704	} else {
 705		WRITE_ONCE(engine->status_page.addr[reg], value);
 706	}
 707}
 708
 709/*
 710 * Reads a dword out of the status page, which is written to from the command
 711 * queue by automatic updates, MI_REPORT_HEAD, MI_STORE_DATA_INDEX, or
 712 * MI_STORE_DATA_IMM.
 713 *
 714 * The following dwords have a reserved meaning:
 715 * 0x00: ISR copy, updated when an ISR bit not set in the HWSTAM changes.
 716 * 0x04: ring 0 head pointer
 717 * 0x05: ring 1 head pointer (915-class)
 718 * 0x06: ring 2 head pointer (915-class)
 719 * 0x10-0x1b: Context status DWords (GM45)
 720 * 0x1f: Last written status offset. (GM45)
 721 * 0x20-0x2f: Reserved (Gen6+)
 722 *
 723 * The area from dword 0x30 to 0x3ff is available for driver usage.
 724 */
 725#define I915_GEM_HWS_INDEX		0x30
 726#define I915_GEM_HWS_INDEX_ADDR		(I915_GEM_HWS_INDEX * sizeof(u32))
 727#define I915_GEM_HWS_PREEMPT		0x32
 728#define I915_GEM_HWS_PREEMPT_ADDR	(I915_GEM_HWS_PREEMPT * sizeof(u32))
 729#define I915_GEM_HWS_SEQNO		0x40
 730#define I915_GEM_HWS_SEQNO_ADDR		(I915_GEM_HWS_SEQNO * sizeof(u32))
 731#define I915_GEM_HWS_SCRATCH		0x80
 732#define I915_GEM_HWS_SCRATCH_ADDR	(I915_GEM_HWS_SCRATCH * sizeof(u32))
 733
 734#define I915_HWS_CSB_BUF0_INDEX		0x10
 735#define I915_HWS_CSB_WRITE_INDEX	0x1f
 736#define CNL_HWS_CSB_WRITE_INDEX		0x2f
 737
 738struct intel_ring *
 739intel_engine_create_ring(struct intel_engine_cs *engine,
 740			 struct i915_timeline *timeline,
 741			 int size);
 742int intel_ring_pin(struct intel_ring *ring);
 743void intel_ring_reset(struct intel_ring *ring, u32 tail);
 744unsigned int intel_ring_update_space(struct intel_ring *ring);
 745void intel_ring_unpin(struct intel_ring *ring);
 746void intel_ring_free(struct intel_ring *ring);
 747
 748void intel_engine_stop(struct intel_engine_cs *engine);
 749void intel_engine_cleanup(struct intel_engine_cs *engine);
 750
 751void intel_legacy_submission_resume(struct drm_i915_private *dev_priv);
 752
 753int __must_check intel_ring_cacheline_align(struct i915_request *rq);
 754
 755u32 __must_check *intel_ring_begin(struct i915_request *rq, unsigned int n);
 756
 757static inline void intel_ring_advance(struct i915_request *rq, u32 *cs)
 758{
 759	/* Dummy function.
 760	 *
 761	 * This serves as a placeholder in the code so that the reader
 762	 * can compare against the preceding intel_ring_begin() and
 763	 * check that the number of dwords emitted matches the space
 764	 * reserved for the command packet (i.e. the value passed to
 765	 * intel_ring_begin()).
 766	 */
 767	GEM_BUG_ON((rq->ring->vaddr + rq->ring->emit) != cs);
 768}
 769
 770static inline u32 intel_ring_wrap(const struct intel_ring *ring, u32 pos)
 771{
 772	return pos & (ring->size - 1);
 773}
 774
 775static inline bool
 776intel_ring_offset_valid(const struct intel_ring *ring,
 777			unsigned int pos)
 778{
 779	if (pos & -ring->size) /* must be strictly within the ring */
 780		return false;
 781
 782	if (!IS_ALIGNED(pos, 8)) /* must be qword aligned */
 783		return false;
 784
 785	return true;
 786}
 787
 788static inline u32 intel_ring_offset(const struct i915_request *rq, void *addr)
 789{
 790	/* Don't write ring->size (equivalent to 0) as that hangs some GPUs. */
 791	u32 offset = addr - rq->ring->vaddr;
 792	GEM_BUG_ON(offset > rq->ring->size);
 793	return intel_ring_wrap(rq->ring, offset);
 794}
 795
 796static inline void
 797assert_ring_tail_valid(const struct intel_ring *ring, unsigned int tail)
 798{
 799	GEM_BUG_ON(!intel_ring_offset_valid(ring, tail));
 800
 801	/*
 802	 * "Ring Buffer Use"
 803	 *	Gen2 BSpec "1. Programming Environment" / 1.4.4.6
 804	 *	Gen3 BSpec "1c Memory Interface Functions" / 2.3.4.5
 805	 *	Gen4+ BSpec "1c Memory Interface and Command Stream" / 5.3.4.5
 806	 * "If the Ring Buffer Head Pointer and the Tail Pointer are on the
 807	 * same cacheline, the Head Pointer must not be greater than the Tail
 808	 * Pointer."
 809	 *
 810	 * We use ring->head as the last known location of the actual RING_HEAD,
 811	 * it may have advanced but in the worst case it is equally the same
 812	 * as ring->head and so we should never program RING_TAIL to advance
 813	 * into the same cacheline as ring->head.
 814	 */
 815#define cacheline(a) round_down(a, CACHELINE_BYTES)
 816	GEM_BUG_ON(cacheline(tail) == cacheline(ring->head) &&
 817		   tail < ring->head);
 818#undef cacheline
 819}
 820
 821static inline unsigned int
 822intel_ring_set_tail(struct intel_ring *ring, unsigned int tail)
 823{
 824	/* Whilst writes to the tail are strictly order, there is no
 825	 * serialisation between readers and the writers. The tail may be
 826	 * read by i915_request_retire() just as it is being updated
 827	 * by execlists, as although the breadcrumb is complete, the context
 828	 * switch hasn't been seen.
 829	 */
 830	assert_ring_tail_valid(ring, tail);
 831	ring->tail = tail;
 832	return tail;
 833}
 834
 835static inline unsigned int
 836__intel_ring_space(unsigned int head, unsigned int tail, unsigned int size)
 837{
 838	/*
 839	 * "If the Ring Buffer Head Pointer and the Tail Pointer are on the
 840	 * same cacheline, the Head Pointer must not be greater than the Tail
 841	 * Pointer."
 842	 */
 843	GEM_BUG_ON(!is_power_of_2(size));
 844	return (head - tail - CACHELINE_BYTES) & (size - 1);
 845}
 846
 847void intel_engine_write_global_seqno(struct intel_engine_cs *engine, u32 seqno);
 848
 849int intel_engine_setup_common(struct intel_engine_cs *engine);
 850int intel_engine_init_common(struct intel_engine_cs *engine);
 851void intel_engine_cleanup_common(struct intel_engine_cs *engine);
 852
 853int intel_init_render_ring_buffer(struct intel_engine_cs *engine);
 854int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine);
 855int intel_init_blt_ring_buffer(struct intel_engine_cs *engine);
 856int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine);
 857
 858int intel_engine_stop_cs(struct intel_engine_cs *engine);
 859void intel_engine_cancel_stop_cs(struct intel_engine_cs *engine);
 860
 861void intel_engine_set_hwsp_writemask(struct intel_engine_cs *engine, u32 mask);
 862
 863u64 intel_engine_get_active_head(const struct intel_engine_cs *engine);
 864u64 intel_engine_get_last_batch_head(const struct intel_engine_cs *engine);
 865
 866static inline u32 intel_engine_last_submit(struct intel_engine_cs *engine)
 867{
 868	/*
 869	 * We are only peeking at the tail of the submit queue (and not the
 870	 * queue itself) in order to gain a hint as to the current active
 871	 * state of the engine. Callers are not expected to be taking
 872	 * engine->timeline->lock, nor are they expected to be concerned
 873	 * wtih serialising this hint with anything, so document it as
 874	 * a hint and nothing more.
 875	 */
 876	return READ_ONCE(engine->timeline.seqno);
 877}
 878
 879static inline u32 intel_engine_get_seqno(struct intel_engine_cs *engine)
 880{
 881	return intel_read_status_page(engine, I915_GEM_HWS_INDEX);
 882}
 883
 884static inline bool intel_engine_signaled(struct intel_engine_cs *engine,
 885					 u32 seqno)
 886{
 887	return i915_seqno_passed(intel_engine_get_seqno(engine), seqno);
 888}
 889
 890static inline bool intel_engine_has_completed(struct intel_engine_cs *engine,
 891					      u32 seqno)
 892{
 893	GEM_BUG_ON(!seqno);
 894	return intel_engine_signaled(engine, seqno);
 895}
 896
 897static inline bool intel_engine_has_started(struct intel_engine_cs *engine,
 898					    u32 seqno)
 899{
 900	GEM_BUG_ON(!seqno);
 901	return intel_engine_signaled(engine, seqno - 1);
 902}
 903
 904void intel_engine_get_instdone(struct intel_engine_cs *engine,
 905			       struct intel_instdone *instdone);
 906
 907void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine);
 908void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine);
 909
 910void intel_engine_pin_breadcrumbs_irq(struct intel_engine_cs *engine);
 911void intel_engine_unpin_breadcrumbs_irq(struct intel_engine_cs *engine);
 912
 913bool intel_engine_signal_breadcrumbs(struct intel_engine_cs *engine);
 914void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine);
 915
 916static inline void
 917intel_engine_queue_breadcrumbs(struct intel_engine_cs *engine)
 918{
 919	irq_work_queue(&engine->breadcrumbs.irq_work);
 920}
 921
 922bool intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine);
 923
 924void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine);
 925void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine);
 926
 927void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine,
 928				    struct drm_printer *p);
 929
 930static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset)
 931{
 932	memset(batch, 0, 6 * sizeof(u32));
 933
 934	batch[0] = GFX_OP_PIPE_CONTROL(6);
 935	batch[1] = flags;
 936	batch[2] = offset;
 937
 938	return batch + 6;
 939}
 940
 941static inline u32 *
 942gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags)
 943{
 944	/* We're using qword write, offset should be aligned to 8 bytes. */
 945	GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8));
 946
 947	/* w/a for post sync ops following a GPGPU operation we
 948	 * need a prior CS_STALL, which is emitted by the flush
 949	 * following the batch.
 950	 */
 951	*cs++ = GFX_OP_PIPE_CONTROL(6);
 952	*cs++ = flags | PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_GLOBAL_GTT_IVB;
 953	*cs++ = gtt_offset;
 954	*cs++ = 0;
 955	*cs++ = value;
 956	/* We're thrashing one dword of HWS. */
 957	*cs++ = 0;
 958
 959	return cs;
 960}
 961
 962static inline u32 *
 963gen8_emit_ggtt_write(u32 *cs, u32 value, u32 gtt_offset)
 964{
 965	/* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */
 966	GEM_BUG_ON(gtt_offset & (1 << 5));
 967	/* Offset should be aligned to 8 bytes for both (QW/DW) write types */
 968	GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8));
 969
 970	*cs++ = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW;
 971	*cs++ = gtt_offset | MI_FLUSH_DW_USE_GTT;
 972	*cs++ = 0;
 973	*cs++ = value;
 974
 975	return cs;
 976}
 977
 978static inline void intel_engine_reset(struct intel_engine_cs *engine,
 979				      bool stalled)
 980{
 981	if (engine->reset.reset)
 982		engine->reset.reset(engine, stalled);
 983}
 984
 985void intel_engines_sanitize(struct drm_i915_private *i915, bool force);
 986
 987bool intel_engine_is_idle(struct intel_engine_cs *engine);
 988bool intel_engines_are_idle(struct drm_i915_private *dev_priv);
 989
 990bool intel_engine_has_kernel_context(const struct intel_engine_cs *engine);
 991void intel_engine_lost_context(struct intel_engine_cs *engine);
 992
 993void intel_engines_park(struct drm_i915_private *i915);
 994void intel_engines_unpark(struct drm_i915_private *i915);
 995
 996void intel_engines_reset_default_submission(struct drm_i915_private *i915);
 997unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915);
 998
 999bool intel_engine_can_store_dword(struct intel_engine_cs *engine);
1000
1001__printf(3, 4)
1002void intel_engine_dump(struct intel_engine_cs *engine,
1003		       struct drm_printer *m,
1004		       const char *header, ...);
1005
1006struct intel_engine_cs *
1007intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance);
1008
1009static inline void intel_engine_context_in(struct intel_engine_cs *engine)
1010{
1011	unsigned long flags;
1012
1013	if (READ_ONCE(engine->stats.enabled) == 0)
1014		return;
1015
1016	write_seqlock_irqsave(&engine->stats.lock, flags);
1017
1018	if (engine->stats.enabled > 0) {
1019		if (engine->stats.active++ == 0)
1020			engine->stats.start = ktime_get();
1021		GEM_BUG_ON(engine->stats.active == 0);
1022	}
1023
1024	write_sequnlock_irqrestore(&engine->stats.lock, flags);
1025}
1026
1027static inline void intel_engine_context_out(struct intel_engine_cs *engine)
1028{
1029	unsigned long flags;
1030
1031	if (READ_ONCE(engine->stats.enabled) == 0)
1032		return;
1033
1034	write_seqlock_irqsave(&engine->stats.lock, flags);
1035
1036	if (engine->stats.enabled > 0) {
1037		ktime_t last;
1038
1039		if (engine->stats.active && --engine->stats.active == 0) {
1040			/*
1041			 * Decrement the active context count and in case GPU
1042			 * is now idle add up to the running total.
1043			 */
1044			last = ktime_sub(ktime_get(), engine->stats.start);
1045
1046			engine->stats.total = ktime_add(engine->stats.total,
1047							last);
1048		} else if (engine->stats.active == 0) {
1049			/*
1050			 * After turning on engine stats, context out might be
1051			 * the first event in which case we account from the
1052			 * time stats gathering was turned on.
1053			 */
1054			last = ktime_sub(ktime_get(), engine->stats.enabled_at);
1055
1056			engine->stats.total = ktime_add(engine->stats.total,
1057							last);
1058		}
1059	}
1060
1061	write_sequnlock_irqrestore(&engine->stats.lock, flags);
1062}
1063
1064int intel_enable_engine_stats(struct intel_engine_cs *engine);
1065void intel_disable_engine_stats(struct intel_engine_cs *engine);
1066
1067ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine);
1068
1069#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
1070
1071static inline bool inject_preempt_hang(struct intel_engine_execlists *execlists)
1072{
1073	if (!execlists->preempt_hang.inject_hang)
1074		return false;
1075
1076	complete(&execlists->preempt_hang.completion);
1077	return true;
1078}
1079
1080#else
1081
1082static inline bool inject_preempt_hang(struct intel_engine_execlists *execlists)
1083{
1084	return false;
1085}
1086
1087#endif
1088
1089#endif /* _INTEL_RINGBUFFER_H_ */