Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
at v4.19 1185 lines 37 kB view raw
1/* SPDX-License-Identifier: GPL-2.0 */ 2#ifndef _INTEL_RINGBUFFER_H_ 3#define _INTEL_RINGBUFFER_H_ 4 5#include <linux/hashtable.h> 6#include <linux/seqlock.h> 7 8#include "i915_gem_batch_pool.h" 9 10#include "i915_reg.h" 11#include "i915_pmu.h" 12#include "i915_request.h" 13#include "i915_selftest.h" 14#include "i915_timeline.h" 15#include "intel_gpu_commands.h" 16 17struct drm_printer; 18struct i915_sched_attr; 19 20#define I915_CMD_HASH_ORDER 9 21 22/* Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill, 23 * but keeps the logic simple. Indeed, the whole purpose of this macro is just 24 * to give some inclination as to some of the magic values used in the various 25 * workarounds! 26 */ 27#define CACHELINE_BYTES 64 28#define CACHELINE_DWORDS (CACHELINE_BYTES / sizeof(uint32_t)) 29 30struct intel_hw_status_page { 31 struct i915_vma *vma; 32 u32 *page_addr; 33 u32 ggtt_offset; 34}; 35 36#define I915_READ_TAIL(engine) I915_READ(RING_TAIL((engine)->mmio_base)) 37#define I915_WRITE_TAIL(engine, val) I915_WRITE(RING_TAIL((engine)->mmio_base), val) 38 39#define I915_READ_START(engine) I915_READ(RING_START((engine)->mmio_base)) 40#define I915_WRITE_START(engine, val) I915_WRITE(RING_START((engine)->mmio_base), val) 41 42#define I915_READ_HEAD(engine) I915_READ(RING_HEAD((engine)->mmio_base)) 43#define I915_WRITE_HEAD(engine, val) I915_WRITE(RING_HEAD((engine)->mmio_base), val) 44 45#define I915_READ_CTL(engine) I915_READ(RING_CTL((engine)->mmio_base)) 46#define I915_WRITE_CTL(engine, val) I915_WRITE(RING_CTL((engine)->mmio_base), val) 47 48#define I915_READ_IMR(engine) I915_READ(RING_IMR((engine)->mmio_base)) 49#define I915_WRITE_IMR(engine, val) I915_WRITE(RING_IMR((engine)->mmio_base), val) 50 51#define I915_READ_MODE(engine) I915_READ(RING_MI_MODE((engine)->mmio_base)) 52#define I915_WRITE_MODE(engine, val) I915_WRITE(RING_MI_MODE((engine)->mmio_base), val) 53 54/* seqno size is actually only a uint32, but since we plan to use MI_FLUSH_DW to 55 * do the writes, and that must have qw aligned offsets, simply pretend it's 8b. 56 */ 57enum intel_engine_hangcheck_action { 58 ENGINE_IDLE = 0, 59 ENGINE_WAIT, 60 ENGINE_ACTIVE_SEQNO, 61 ENGINE_ACTIVE_HEAD, 62 ENGINE_ACTIVE_SUBUNITS, 63 ENGINE_WAIT_KICK, 64 ENGINE_DEAD, 65}; 66 67static inline const char * 68hangcheck_action_to_str(const enum intel_engine_hangcheck_action a) 69{ 70 switch (a) { 71 case ENGINE_IDLE: 72 return "idle"; 73 case ENGINE_WAIT: 74 return "wait"; 75 case ENGINE_ACTIVE_SEQNO: 76 return "active seqno"; 77 case ENGINE_ACTIVE_HEAD: 78 return "active head"; 79 case ENGINE_ACTIVE_SUBUNITS: 80 return "active subunits"; 81 case ENGINE_WAIT_KICK: 82 return "wait kick"; 83 case ENGINE_DEAD: 84 return "dead"; 85 } 86 87 return "unknown"; 88} 89 90#define I915_MAX_SLICES 3 91#define I915_MAX_SUBSLICES 8 92 93#define instdone_slice_mask(dev_priv__) \ 94 (INTEL_GEN(dev_priv__) == 7 ? \ 95 1 : INTEL_INFO(dev_priv__)->sseu.slice_mask) 96 97#define instdone_subslice_mask(dev_priv__) \ 98 (INTEL_GEN(dev_priv__) == 7 ? \ 99 1 : INTEL_INFO(dev_priv__)->sseu.subslice_mask[0]) 100 101#define for_each_instdone_slice_subslice(dev_priv__, slice__, subslice__) \ 102 for ((slice__) = 0, (subslice__) = 0; \ 103 (slice__) < I915_MAX_SLICES; \ 104 (subslice__) = ((subslice__) + 1) < I915_MAX_SUBSLICES ? (subslice__) + 1 : 0, \ 105 (slice__) += ((subslice__) == 0)) \ 106 for_each_if((BIT(slice__) & instdone_slice_mask(dev_priv__)) && \ 107 (BIT(subslice__) & instdone_subslice_mask(dev_priv__))) 108 109struct intel_instdone { 110 u32 instdone; 111 /* The following exist only in the RCS engine */ 112 u32 slice_common; 113 u32 sampler[I915_MAX_SLICES][I915_MAX_SUBSLICES]; 114 u32 row[I915_MAX_SLICES][I915_MAX_SUBSLICES]; 115}; 116 117struct intel_engine_hangcheck { 118 u64 acthd; 119 u32 seqno; 120 enum intel_engine_hangcheck_action action; 121 unsigned long action_timestamp; 122 int deadlock; 123 struct intel_instdone instdone; 124 struct i915_request *active_request; 125 bool stalled:1; 126 bool wedged:1; 127}; 128 129struct intel_ring { 130 struct i915_vma *vma; 131 void *vaddr; 132 133 struct i915_timeline *timeline; 134 struct list_head request_list; 135 struct list_head active_link; 136 137 u32 head; 138 u32 tail; 139 u32 emit; 140 141 u32 space; 142 u32 size; 143 u32 effective_size; 144}; 145 146struct i915_gem_context; 147struct drm_i915_reg_table; 148 149/* 150 * we use a single page to load ctx workarounds so all of these 151 * values are referred in terms of dwords 152 * 153 * struct i915_wa_ctx_bb: 154 * offset: specifies batch starting position, also helpful in case 155 * if we want to have multiple batches at different offsets based on 156 * some criteria. It is not a requirement at the moment but provides 157 * an option for future use. 158 * size: size of the batch in DWORDS 159 */ 160struct i915_ctx_workarounds { 161 struct i915_wa_ctx_bb { 162 u32 offset; 163 u32 size; 164 } indirect_ctx, per_ctx; 165 struct i915_vma *vma; 166}; 167 168struct i915_request; 169 170#define I915_MAX_VCS 4 171#define I915_MAX_VECS 2 172 173/* 174 * Engine IDs definitions. 175 * Keep instances of the same type engine together. 176 */ 177enum intel_engine_id { 178 RCS = 0, 179 BCS, 180 VCS, 181 VCS2, 182 VCS3, 183 VCS4, 184#define _VCS(n) (VCS + (n)) 185 VECS, 186 VECS2 187#define _VECS(n) (VECS + (n)) 188}; 189 190struct i915_priolist { 191 struct rb_node node; 192 struct list_head requests; 193 int priority; 194}; 195 196struct st_preempt_hang { 197 struct completion completion; 198 bool inject_hang; 199}; 200 201/** 202 * struct intel_engine_execlists - execlist submission queue and port state 203 * 204 * The struct intel_engine_execlists represents the combined logical state of 205 * driver and the hardware state for execlist mode of submission. 206 */ 207struct intel_engine_execlists { 208 /** 209 * @tasklet: softirq tasklet for bottom handler 210 */ 211 struct tasklet_struct tasklet; 212 213 /** 214 * @default_priolist: priority list for I915_PRIORITY_NORMAL 215 */ 216 struct i915_priolist default_priolist; 217 218 /** 219 * @no_priolist: priority lists disabled 220 */ 221 bool no_priolist; 222 223 /** 224 * @submit_reg: gen-specific execlist submission register 225 * set to the ExecList Submission Port (elsp) register pre-Gen11 and to 226 * the ExecList Submission Queue Contents register array for Gen11+ 227 */ 228 u32 __iomem *submit_reg; 229 230 /** 231 * @ctrl_reg: the enhanced execlists control register, used to load the 232 * submit queue on the HW and to request preemptions to idle 233 */ 234 u32 __iomem *ctrl_reg; 235 236 /** 237 * @port: execlist port states 238 * 239 * For each hardware ELSP (ExecList Submission Port) we keep 240 * track of the last request and the number of times we submitted 241 * that port to hw. We then count the number of times the hw reports 242 * a context completion or preemption. As only one context can 243 * be active on hw, we limit resubmission of context to port[0]. This 244 * is called Lite Restore, of the context. 245 */ 246 struct execlist_port { 247 /** 248 * @request_count: combined request and submission count 249 */ 250 struct i915_request *request_count; 251#define EXECLIST_COUNT_BITS 2 252#define port_request(p) ptr_mask_bits((p)->request_count, EXECLIST_COUNT_BITS) 253#define port_count(p) ptr_unmask_bits((p)->request_count, EXECLIST_COUNT_BITS) 254#define port_pack(rq, count) ptr_pack_bits(rq, count, EXECLIST_COUNT_BITS) 255#define port_unpack(p, count) ptr_unpack_bits((p)->request_count, count, EXECLIST_COUNT_BITS) 256#define port_set(p, packed) ((p)->request_count = (packed)) 257#define port_isset(p) ((p)->request_count) 258#define port_index(p, execlists) ((p) - (execlists)->port) 259 260 /** 261 * @context_id: context ID for port 262 */ 263 GEM_DEBUG_DECL(u32 context_id); 264 265#define EXECLIST_MAX_PORTS 2 266 } port[EXECLIST_MAX_PORTS]; 267 268 /** 269 * @active: is the HW active? We consider the HW as active after 270 * submitting any context for execution and until we have seen the 271 * last context completion event. After that, we do not expect any 272 * more events until we submit, and so can park the HW. 273 * 274 * As we have a small number of different sources from which we feed 275 * the HW, we track the state of each inside a single bitfield. 276 */ 277 unsigned int active; 278#define EXECLISTS_ACTIVE_USER 0 279#define EXECLISTS_ACTIVE_PREEMPT 1 280#define EXECLISTS_ACTIVE_HWACK 2 281 282 /** 283 * @port_mask: number of execlist ports - 1 284 */ 285 unsigned int port_mask; 286 287 /** 288 * @queue_priority: Highest pending priority. 289 * 290 * When we add requests into the queue, or adjust the priority of 291 * executing requests, we compute the maximum priority of those 292 * pending requests. We can then use this value to determine if 293 * we need to preempt the executing requests to service the queue. 294 */ 295 int queue_priority; 296 297 /** 298 * @queue: queue of requests, in priority lists 299 */ 300 struct rb_root_cached queue; 301 302 /** 303 * @csb_read: control register for Context Switch buffer 304 * 305 * Note this register is always in mmio. 306 */ 307 u32 __iomem *csb_read; 308 309 /** 310 * @csb_write: control register for Context Switch buffer 311 * 312 * Note this register may be either mmio or HWSP shadow. 313 */ 314 u32 *csb_write; 315 316 /** 317 * @csb_status: status array for Context Switch buffer 318 * 319 * Note these register may be either mmio or HWSP shadow. 320 */ 321 u32 *csb_status; 322 323 /** 324 * @preempt_complete_status: expected CSB upon completing preemption 325 */ 326 u32 preempt_complete_status; 327 328 /** 329 * @csb_write_reset: reset value for CSB write pointer 330 * 331 * As the CSB write pointer maybe either in HWSP or as a field 332 * inside an mmio register, we want to reprogram it slightly 333 * differently to avoid later confusion. 334 */ 335 u32 csb_write_reset; 336 337 /** 338 * @csb_head: context status buffer head 339 */ 340 u8 csb_head; 341 342 I915_SELFTEST_DECLARE(struct st_preempt_hang preempt_hang;) 343}; 344 345#define INTEL_ENGINE_CS_MAX_NAME 8 346 347struct intel_engine_cs { 348 struct drm_i915_private *i915; 349 char name[INTEL_ENGINE_CS_MAX_NAME]; 350 351 enum intel_engine_id id; 352 unsigned int hw_id; 353 unsigned int guc_id; 354 355 u8 uabi_id; 356 u8 uabi_class; 357 358 u8 class; 359 u8 instance; 360 u32 context_size; 361 u32 mmio_base; 362 363 struct intel_ring *buffer; 364 365 struct i915_timeline timeline; 366 367 struct drm_i915_gem_object *default_state; 368 void *pinned_default_state; 369 370 unsigned long irq_posted; 371#define ENGINE_IRQ_BREADCRUMB 0 372 373 /* Rather than have every client wait upon all user interrupts, 374 * with the herd waking after every interrupt and each doing the 375 * heavyweight seqno dance, we delegate the task (of being the 376 * bottom-half of the user interrupt) to the first client. After 377 * every interrupt, we wake up one client, who does the heavyweight 378 * coherent seqno read and either goes back to sleep (if incomplete), 379 * or wakes up all the completed clients in parallel, before then 380 * transferring the bottom-half status to the next client in the queue. 381 * 382 * Compared to walking the entire list of waiters in a single dedicated 383 * bottom-half, we reduce the latency of the first waiter by avoiding 384 * a context switch, but incur additional coherent seqno reads when 385 * following the chain of request breadcrumbs. Since it is most likely 386 * that we have a single client waiting on each seqno, then reducing 387 * the overhead of waking that client is much preferred. 388 */ 389 struct intel_breadcrumbs { 390 spinlock_t irq_lock; /* protects irq_*; irqsafe */ 391 struct intel_wait *irq_wait; /* oldest waiter by retirement */ 392 393 spinlock_t rb_lock; /* protects the rb and wraps irq_lock */ 394 struct rb_root waiters; /* sorted by retirement, priority */ 395 struct list_head signals; /* sorted by retirement */ 396 struct task_struct *signaler; /* used for fence signalling */ 397 398 struct timer_list fake_irq; /* used after a missed interrupt */ 399 struct timer_list hangcheck; /* detect missed interrupts */ 400 401 unsigned int hangcheck_interrupts; 402 unsigned int irq_enabled; 403 unsigned int irq_count; 404 405 bool irq_armed : 1; 406 I915_SELFTEST_DECLARE(bool mock : 1); 407 } breadcrumbs; 408 409 struct { 410 /** 411 * @enable: Bitmask of enable sample events on this engine. 412 * 413 * Bits correspond to sample event types, for instance 414 * I915_SAMPLE_QUEUED is bit 0 etc. 415 */ 416 u32 enable; 417 /** 418 * @enable_count: Reference count for the enabled samplers. 419 * 420 * Index number corresponds to the bit number from @enable. 421 */ 422 unsigned int enable_count[I915_PMU_SAMPLE_BITS]; 423 /** 424 * @sample: Counter values for sampling events. 425 * 426 * Our internal timer stores the current counters in this field. 427 */ 428#define I915_ENGINE_SAMPLE_MAX (I915_SAMPLE_SEMA + 1) 429 struct i915_pmu_sample sample[I915_ENGINE_SAMPLE_MAX]; 430 } pmu; 431 432 /* 433 * A pool of objects to use as shadow copies of client batch buffers 434 * when the command parser is enabled. Prevents the client from 435 * modifying the batch contents after software parsing. 436 */ 437 struct i915_gem_batch_pool batch_pool; 438 439 struct intel_hw_status_page status_page; 440 struct i915_ctx_workarounds wa_ctx; 441 struct i915_vma *scratch; 442 443 u32 irq_keep_mask; /* always keep these interrupts */ 444 u32 irq_enable_mask; /* bitmask to enable ring interrupt */ 445 void (*irq_enable)(struct intel_engine_cs *engine); 446 void (*irq_disable)(struct intel_engine_cs *engine); 447 448 int (*init_hw)(struct intel_engine_cs *engine); 449 450 struct { 451 struct i915_request *(*prepare)(struct intel_engine_cs *engine); 452 void (*reset)(struct intel_engine_cs *engine, 453 struct i915_request *rq); 454 void (*finish)(struct intel_engine_cs *engine); 455 } reset; 456 457 void (*park)(struct intel_engine_cs *engine); 458 void (*unpark)(struct intel_engine_cs *engine); 459 460 void (*set_default_submission)(struct intel_engine_cs *engine); 461 462 struct intel_context *(*context_pin)(struct intel_engine_cs *engine, 463 struct i915_gem_context *ctx); 464 465 int (*request_alloc)(struct i915_request *rq); 466 int (*init_context)(struct i915_request *rq); 467 468 int (*emit_flush)(struct i915_request *request, u32 mode); 469#define EMIT_INVALIDATE BIT(0) 470#define EMIT_FLUSH BIT(1) 471#define EMIT_BARRIER (EMIT_INVALIDATE | EMIT_FLUSH) 472 int (*emit_bb_start)(struct i915_request *rq, 473 u64 offset, u32 length, 474 unsigned int dispatch_flags); 475#define I915_DISPATCH_SECURE BIT(0) 476#define I915_DISPATCH_PINNED BIT(1) 477#define I915_DISPATCH_RS BIT(2) 478 void (*emit_breadcrumb)(struct i915_request *rq, u32 *cs); 479 int emit_breadcrumb_sz; 480 481 /* Pass the request to the hardware queue (e.g. directly into 482 * the legacy ringbuffer or to the end of an execlist). 483 * 484 * This is called from an atomic context with irqs disabled; must 485 * be irq safe. 486 */ 487 void (*submit_request)(struct i915_request *rq); 488 489 /* Call when the priority on a request has changed and it and its 490 * dependencies may need rescheduling. Note the request itself may 491 * not be ready to run! 492 * 493 * Called under the struct_mutex. 494 */ 495 void (*schedule)(struct i915_request *request, 496 const struct i915_sched_attr *attr); 497 498 /* 499 * Cancel all requests on the hardware, or queued for execution. 500 * This should only cancel the ready requests that have been 501 * submitted to the engine (via the engine->submit_request callback). 502 * This is called when marking the device as wedged. 503 */ 504 void (*cancel_requests)(struct intel_engine_cs *engine); 505 506 /* Some chipsets are not quite as coherent as advertised and need 507 * an expensive kick to force a true read of the up-to-date seqno. 508 * However, the up-to-date seqno is not always required and the last 509 * seen value is good enough. Note that the seqno will always be 510 * monotonic, even if not coherent. 511 */ 512 void (*irq_seqno_barrier)(struct intel_engine_cs *engine); 513 void (*cleanup)(struct intel_engine_cs *engine); 514 515 /* GEN8 signal/wait table - never trust comments! 516 * signal to signal to signal to signal to signal to 517 * RCS VCS BCS VECS VCS2 518 * -------------------------------------------------------------------- 519 * RCS | NOP (0x00) | VCS (0x08) | BCS (0x10) | VECS (0x18) | VCS2 (0x20) | 520 * |------------------------------------------------------------------- 521 * VCS | RCS (0x28) | NOP (0x30) | BCS (0x38) | VECS (0x40) | VCS2 (0x48) | 522 * |------------------------------------------------------------------- 523 * BCS | RCS (0x50) | VCS (0x58) | NOP (0x60) | VECS (0x68) | VCS2 (0x70) | 524 * |------------------------------------------------------------------- 525 * VECS | RCS (0x78) | VCS (0x80) | BCS (0x88) | NOP (0x90) | VCS2 (0x98) | 526 * |------------------------------------------------------------------- 527 * VCS2 | RCS (0xa0) | VCS (0xa8) | BCS (0xb0) | VECS (0xb8) | NOP (0xc0) | 528 * |------------------------------------------------------------------- 529 * 530 * Generalization: 531 * f(x, y) := (x->id * NUM_RINGS * seqno_size) + (seqno_size * y->id) 532 * ie. transpose of g(x, y) 533 * 534 * sync from sync from sync from sync from sync from 535 * RCS VCS BCS VECS VCS2 536 * -------------------------------------------------------------------- 537 * RCS | NOP (0x00) | VCS (0x28) | BCS (0x50) | VECS (0x78) | VCS2 (0xa0) | 538 * |------------------------------------------------------------------- 539 * VCS | RCS (0x08) | NOP (0x30) | BCS (0x58) | VECS (0x80) | VCS2 (0xa8) | 540 * |------------------------------------------------------------------- 541 * BCS | RCS (0x10) | VCS (0x38) | NOP (0x60) | VECS (0x88) | VCS2 (0xb0) | 542 * |------------------------------------------------------------------- 543 * VECS | RCS (0x18) | VCS (0x40) | BCS (0x68) | NOP (0x90) | VCS2 (0xb8) | 544 * |------------------------------------------------------------------- 545 * VCS2 | RCS (0x20) | VCS (0x48) | BCS (0x70) | VECS (0x98) | NOP (0xc0) | 546 * |------------------------------------------------------------------- 547 * 548 * Generalization: 549 * g(x, y) := (y->id * NUM_RINGS * seqno_size) + (seqno_size * x->id) 550 * ie. transpose of f(x, y) 551 */ 552 struct { 553#define GEN6_SEMAPHORE_LAST VECS_HW 554#define GEN6_NUM_SEMAPHORES (GEN6_SEMAPHORE_LAST + 1) 555#define GEN6_SEMAPHORES_MASK GENMASK(GEN6_SEMAPHORE_LAST, 0) 556 struct { 557 /* our mbox written by others */ 558 u32 wait[GEN6_NUM_SEMAPHORES]; 559 /* mboxes this ring signals to */ 560 i915_reg_t signal[GEN6_NUM_SEMAPHORES]; 561 } mbox; 562 563 /* AKA wait() */ 564 int (*sync_to)(struct i915_request *rq, 565 struct i915_request *signal); 566 u32 *(*signal)(struct i915_request *rq, u32 *cs); 567 } semaphore; 568 569 struct intel_engine_execlists execlists; 570 571 /* Contexts are pinned whilst they are active on the GPU. The last 572 * context executed remains active whilst the GPU is idle - the 573 * switch away and write to the context object only occurs on the 574 * next execution. Contexts are only unpinned on retirement of the 575 * following request ensuring that we can always write to the object 576 * on the context switch even after idling. Across suspend, we switch 577 * to the kernel context and trash it as the save may not happen 578 * before the hardware is powered down. 579 */ 580 struct intel_context *last_retired_context; 581 582 /* status_notifier: list of callbacks for context-switch changes */ 583 struct atomic_notifier_head context_status_notifier; 584 585 struct intel_engine_hangcheck hangcheck; 586 587#define I915_ENGINE_NEEDS_CMD_PARSER BIT(0) 588#define I915_ENGINE_SUPPORTS_STATS BIT(1) 589#define I915_ENGINE_HAS_PREEMPTION BIT(2) 590 unsigned int flags; 591 592 /* 593 * Table of commands the command parser needs to know about 594 * for this engine. 595 */ 596 DECLARE_HASHTABLE(cmd_hash, I915_CMD_HASH_ORDER); 597 598 /* 599 * Table of registers allowed in commands that read/write registers. 600 */ 601 const struct drm_i915_reg_table *reg_tables; 602 int reg_table_count; 603 604 /* 605 * Returns the bitmask for the length field of the specified command. 606 * Return 0 for an unrecognized/invalid command. 607 * 608 * If the command parser finds an entry for a command in the engine's 609 * cmd_tables, it gets the command's length based on the table entry. 610 * If not, it calls this function to determine the per-engine length 611 * field encoding for the command (i.e. different opcode ranges use 612 * certain bits to encode the command length in the header). 613 */ 614 u32 (*get_cmd_length_mask)(u32 cmd_header); 615 616 struct { 617 /** 618 * @lock: Lock protecting the below fields. 619 */ 620 seqlock_t lock; 621 /** 622 * @enabled: Reference count indicating number of listeners. 623 */ 624 unsigned int enabled; 625 /** 626 * @active: Number of contexts currently scheduled in. 627 */ 628 unsigned int active; 629 /** 630 * @enabled_at: Timestamp when busy stats were enabled. 631 */ 632 ktime_t enabled_at; 633 /** 634 * @start: Timestamp of the last idle to active transition. 635 * 636 * Idle is defined as active == 0, active is active > 0. 637 */ 638 ktime_t start; 639 /** 640 * @total: Total time this engine was busy. 641 * 642 * Accumulated time not counting the most recent block in cases 643 * where engine is currently busy (active > 0). 644 */ 645 ktime_t total; 646 } stats; 647}; 648 649static inline bool 650intel_engine_needs_cmd_parser(const struct intel_engine_cs *engine) 651{ 652 return engine->flags & I915_ENGINE_NEEDS_CMD_PARSER; 653} 654 655static inline bool 656intel_engine_supports_stats(const struct intel_engine_cs *engine) 657{ 658 return engine->flags & I915_ENGINE_SUPPORTS_STATS; 659} 660 661static inline bool 662intel_engine_has_preemption(const struct intel_engine_cs *engine) 663{ 664 return engine->flags & I915_ENGINE_HAS_PREEMPTION; 665} 666 667static inline bool __execlists_need_preempt(int prio, int last) 668{ 669 return prio > max(0, last); 670} 671 672static inline void 673execlists_set_active(struct intel_engine_execlists *execlists, 674 unsigned int bit) 675{ 676 __set_bit(bit, (unsigned long *)&execlists->active); 677} 678 679static inline bool 680execlists_set_active_once(struct intel_engine_execlists *execlists, 681 unsigned int bit) 682{ 683 return !__test_and_set_bit(bit, (unsigned long *)&execlists->active); 684} 685 686static inline void 687execlists_clear_active(struct intel_engine_execlists *execlists, 688 unsigned int bit) 689{ 690 __clear_bit(bit, (unsigned long *)&execlists->active); 691} 692 693static inline void 694execlists_clear_all_active(struct intel_engine_execlists *execlists) 695{ 696 execlists->active = 0; 697} 698 699static inline bool 700execlists_is_active(const struct intel_engine_execlists *execlists, 701 unsigned int bit) 702{ 703 return test_bit(bit, (unsigned long *)&execlists->active); 704} 705 706void execlists_user_begin(struct intel_engine_execlists *execlists, 707 const struct execlist_port *port); 708void execlists_user_end(struct intel_engine_execlists *execlists); 709 710void 711execlists_cancel_port_requests(struct intel_engine_execlists * const execlists); 712 713void 714execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists); 715 716static inline unsigned int 717execlists_num_ports(const struct intel_engine_execlists * const execlists) 718{ 719 return execlists->port_mask + 1; 720} 721 722static inline struct execlist_port * 723execlists_port_complete(struct intel_engine_execlists * const execlists, 724 struct execlist_port * const port) 725{ 726 const unsigned int m = execlists->port_mask; 727 728 GEM_BUG_ON(port_index(port, execlists) != 0); 729 GEM_BUG_ON(!execlists_is_active(execlists, EXECLISTS_ACTIVE_USER)); 730 731 memmove(port, port + 1, m * sizeof(struct execlist_port)); 732 memset(port + m, 0, sizeof(struct execlist_port)); 733 734 return port; 735} 736 737static inline unsigned int 738intel_engine_flag(const struct intel_engine_cs *engine) 739{ 740 return BIT(engine->id); 741} 742 743static inline u32 744intel_read_status_page(const struct intel_engine_cs *engine, int reg) 745{ 746 /* Ensure that the compiler doesn't optimize away the load. */ 747 return READ_ONCE(engine->status_page.page_addr[reg]); 748} 749 750static inline void 751intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value) 752{ 753 /* Writing into the status page should be done sparingly. Since 754 * we do when we are uncertain of the device state, we take a bit 755 * of extra paranoia to try and ensure that the HWS takes the value 756 * we give and that it doesn't end up trapped inside the CPU! 757 */ 758 if (static_cpu_has(X86_FEATURE_CLFLUSH)) { 759 mb(); 760 clflush(&engine->status_page.page_addr[reg]); 761 engine->status_page.page_addr[reg] = value; 762 clflush(&engine->status_page.page_addr[reg]); 763 mb(); 764 } else { 765 WRITE_ONCE(engine->status_page.page_addr[reg], value); 766 } 767} 768 769/* 770 * Reads a dword out of the status page, which is written to from the command 771 * queue by automatic updates, MI_REPORT_HEAD, MI_STORE_DATA_INDEX, or 772 * MI_STORE_DATA_IMM. 773 * 774 * The following dwords have a reserved meaning: 775 * 0x00: ISR copy, updated when an ISR bit not set in the HWSTAM changes. 776 * 0x04: ring 0 head pointer 777 * 0x05: ring 1 head pointer (915-class) 778 * 0x06: ring 2 head pointer (915-class) 779 * 0x10-0x1b: Context status DWords (GM45) 780 * 0x1f: Last written status offset. (GM45) 781 * 0x20-0x2f: Reserved (Gen6+) 782 * 783 * The area from dword 0x30 to 0x3ff is available for driver usage. 784 */ 785#define I915_GEM_HWS_INDEX 0x30 786#define I915_GEM_HWS_INDEX_ADDR (I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT) 787#define I915_GEM_HWS_PREEMPT_INDEX 0x32 788#define I915_GEM_HWS_PREEMPT_ADDR (I915_GEM_HWS_PREEMPT_INDEX << MI_STORE_DWORD_INDEX_SHIFT) 789#define I915_GEM_HWS_SCRATCH_INDEX 0x40 790#define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH_INDEX << MI_STORE_DWORD_INDEX_SHIFT) 791 792#define I915_HWS_CSB_BUF0_INDEX 0x10 793#define I915_HWS_CSB_WRITE_INDEX 0x1f 794#define CNL_HWS_CSB_WRITE_INDEX 0x2f 795 796struct intel_ring * 797intel_engine_create_ring(struct intel_engine_cs *engine, 798 struct i915_timeline *timeline, 799 int size); 800int intel_ring_pin(struct intel_ring *ring, 801 struct drm_i915_private *i915, 802 unsigned int offset_bias); 803void intel_ring_reset(struct intel_ring *ring, u32 tail); 804unsigned int intel_ring_update_space(struct intel_ring *ring); 805void intel_ring_unpin(struct intel_ring *ring); 806void intel_ring_free(struct intel_ring *ring); 807 808void intel_engine_stop(struct intel_engine_cs *engine); 809void intel_engine_cleanup(struct intel_engine_cs *engine); 810 811void intel_legacy_submission_resume(struct drm_i915_private *dev_priv); 812 813int __must_check intel_ring_cacheline_align(struct i915_request *rq); 814 815int intel_ring_wait_for_space(struct intel_ring *ring, unsigned int bytes); 816u32 __must_check *intel_ring_begin(struct i915_request *rq, unsigned int n); 817 818static inline void intel_ring_advance(struct i915_request *rq, u32 *cs) 819{ 820 /* Dummy function. 821 * 822 * This serves as a placeholder in the code so that the reader 823 * can compare against the preceding intel_ring_begin() and 824 * check that the number of dwords emitted matches the space 825 * reserved for the command packet (i.e. the value passed to 826 * intel_ring_begin()). 827 */ 828 GEM_BUG_ON((rq->ring->vaddr + rq->ring->emit) != cs); 829} 830 831static inline u32 intel_ring_wrap(const struct intel_ring *ring, u32 pos) 832{ 833 return pos & (ring->size - 1); 834} 835 836static inline bool 837intel_ring_offset_valid(const struct intel_ring *ring, 838 unsigned int pos) 839{ 840 if (pos & -ring->size) /* must be strictly within the ring */ 841 return false; 842 843 if (!IS_ALIGNED(pos, 8)) /* must be qword aligned */ 844 return false; 845 846 return true; 847} 848 849static inline u32 intel_ring_offset(const struct i915_request *rq, void *addr) 850{ 851 /* Don't write ring->size (equivalent to 0) as that hangs some GPUs. */ 852 u32 offset = addr - rq->ring->vaddr; 853 GEM_BUG_ON(offset > rq->ring->size); 854 return intel_ring_wrap(rq->ring, offset); 855} 856 857static inline void 858assert_ring_tail_valid(const struct intel_ring *ring, unsigned int tail) 859{ 860 GEM_BUG_ON(!intel_ring_offset_valid(ring, tail)); 861 862 /* 863 * "Ring Buffer Use" 864 * Gen2 BSpec "1. Programming Environment" / 1.4.4.6 865 * Gen3 BSpec "1c Memory Interface Functions" / 2.3.4.5 866 * Gen4+ BSpec "1c Memory Interface and Command Stream" / 5.3.4.5 867 * "If the Ring Buffer Head Pointer and the Tail Pointer are on the 868 * same cacheline, the Head Pointer must not be greater than the Tail 869 * Pointer." 870 * 871 * We use ring->head as the last known location of the actual RING_HEAD, 872 * it may have advanced but in the worst case it is equally the same 873 * as ring->head and so we should never program RING_TAIL to advance 874 * into the same cacheline as ring->head. 875 */ 876#define cacheline(a) round_down(a, CACHELINE_BYTES) 877 GEM_BUG_ON(cacheline(tail) == cacheline(ring->head) && 878 tail < ring->head); 879#undef cacheline 880} 881 882static inline unsigned int 883intel_ring_set_tail(struct intel_ring *ring, unsigned int tail) 884{ 885 /* Whilst writes to the tail are strictly order, there is no 886 * serialisation between readers and the writers. The tail may be 887 * read by i915_request_retire() just as it is being updated 888 * by execlists, as although the breadcrumb is complete, the context 889 * switch hasn't been seen. 890 */ 891 assert_ring_tail_valid(ring, tail); 892 ring->tail = tail; 893 return tail; 894} 895 896void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno); 897 898void intel_engine_setup_common(struct intel_engine_cs *engine); 899int intel_engine_init_common(struct intel_engine_cs *engine); 900void intel_engine_cleanup_common(struct intel_engine_cs *engine); 901 902int intel_engine_create_scratch(struct intel_engine_cs *engine, 903 unsigned int size); 904void intel_engine_cleanup_scratch(struct intel_engine_cs *engine); 905 906int intel_init_render_ring_buffer(struct intel_engine_cs *engine); 907int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine); 908int intel_init_blt_ring_buffer(struct intel_engine_cs *engine); 909int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine); 910 911int intel_engine_stop_cs(struct intel_engine_cs *engine); 912 913u64 intel_engine_get_active_head(const struct intel_engine_cs *engine); 914u64 intel_engine_get_last_batch_head(const struct intel_engine_cs *engine); 915 916static inline u32 intel_engine_get_seqno(struct intel_engine_cs *engine) 917{ 918 return intel_read_status_page(engine, I915_GEM_HWS_INDEX); 919} 920 921static inline u32 intel_engine_last_submit(struct intel_engine_cs *engine) 922{ 923 /* We are only peeking at the tail of the submit queue (and not the 924 * queue itself) in order to gain a hint as to the current active 925 * state of the engine. Callers are not expected to be taking 926 * engine->timeline->lock, nor are they expected to be concerned 927 * wtih serialising this hint with anything, so document it as 928 * a hint and nothing more. 929 */ 930 return READ_ONCE(engine->timeline.seqno); 931} 932 933void intel_engine_get_instdone(struct intel_engine_cs *engine, 934 struct intel_instdone *instdone); 935 936/* 937 * Arbitrary size for largest possible 'add request' sequence. The code paths 938 * are complex and variable. Empirical measurement shows that the worst case 939 * is BDW at 192 bytes (6 + 6 + 36 dwords), then ILK at 136 bytes. However, 940 * we need to allocate double the largest single packet within that emission 941 * to account for tail wraparound (so 6 + 6 + 72 dwords for BDW). 942 */ 943#define MIN_SPACE_FOR_ADD_REQUEST 336 944 945static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine) 946{ 947 return engine->status_page.ggtt_offset + I915_GEM_HWS_INDEX_ADDR; 948} 949 950static inline u32 intel_hws_preempt_done_address(struct intel_engine_cs *engine) 951{ 952 return engine->status_page.ggtt_offset + I915_GEM_HWS_PREEMPT_ADDR; 953} 954 955/* intel_breadcrumbs.c -- user interrupt bottom-half for waiters */ 956int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine); 957 958static inline void intel_wait_init(struct intel_wait *wait) 959{ 960 wait->tsk = current; 961 wait->request = NULL; 962} 963 964static inline void intel_wait_init_for_seqno(struct intel_wait *wait, u32 seqno) 965{ 966 wait->tsk = current; 967 wait->seqno = seqno; 968} 969 970static inline bool intel_wait_has_seqno(const struct intel_wait *wait) 971{ 972 return wait->seqno; 973} 974 975static inline bool 976intel_wait_update_seqno(struct intel_wait *wait, u32 seqno) 977{ 978 wait->seqno = seqno; 979 return intel_wait_has_seqno(wait); 980} 981 982static inline bool 983intel_wait_update_request(struct intel_wait *wait, 984 const struct i915_request *rq) 985{ 986 return intel_wait_update_seqno(wait, i915_request_global_seqno(rq)); 987} 988 989static inline bool 990intel_wait_check_seqno(const struct intel_wait *wait, u32 seqno) 991{ 992 return wait->seqno == seqno; 993} 994 995static inline bool 996intel_wait_check_request(const struct intel_wait *wait, 997 const struct i915_request *rq) 998{ 999 return intel_wait_check_seqno(wait, i915_request_global_seqno(rq)); 1000} 1001 1002static inline bool intel_wait_complete(const struct intel_wait *wait) 1003{ 1004 return RB_EMPTY_NODE(&wait->node); 1005} 1006 1007bool intel_engine_add_wait(struct intel_engine_cs *engine, 1008 struct intel_wait *wait); 1009void intel_engine_remove_wait(struct intel_engine_cs *engine, 1010 struct intel_wait *wait); 1011bool intel_engine_enable_signaling(struct i915_request *request, bool wakeup); 1012void intel_engine_cancel_signaling(struct i915_request *request); 1013 1014static inline bool intel_engine_has_waiter(const struct intel_engine_cs *engine) 1015{ 1016 return READ_ONCE(engine->breadcrumbs.irq_wait); 1017} 1018 1019unsigned int intel_engine_wakeup(struct intel_engine_cs *engine); 1020#define ENGINE_WAKEUP_WAITER BIT(0) 1021#define ENGINE_WAKEUP_ASLEEP BIT(1) 1022 1023void intel_engine_pin_breadcrumbs_irq(struct intel_engine_cs *engine); 1024void intel_engine_unpin_breadcrumbs_irq(struct intel_engine_cs *engine); 1025 1026void __intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine); 1027void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine); 1028 1029void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine); 1030void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine); 1031 1032static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset) 1033{ 1034 memset(batch, 0, 6 * sizeof(u32)); 1035 1036 batch[0] = GFX_OP_PIPE_CONTROL(6); 1037 batch[1] = flags; 1038 batch[2] = offset; 1039 1040 return batch + 6; 1041} 1042 1043static inline u32 * 1044gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset) 1045{ 1046 /* We're using qword write, offset should be aligned to 8 bytes. */ 1047 GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8)); 1048 1049 /* w/a for post sync ops following a GPGPU operation we 1050 * need a prior CS_STALL, which is emitted by the flush 1051 * following the batch. 1052 */ 1053 *cs++ = GFX_OP_PIPE_CONTROL(6); 1054 *cs++ = PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_CS_STALL | 1055 PIPE_CONTROL_QW_WRITE; 1056 *cs++ = gtt_offset; 1057 *cs++ = 0; 1058 *cs++ = value; 1059 /* We're thrashing one dword of HWS. */ 1060 *cs++ = 0; 1061 1062 return cs; 1063} 1064 1065static inline u32 * 1066gen8_emit_ggtt_write(u32 *cs, u32 value, u32 gtt_offset) 1067{ 1068 /* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */ 1069 GEM_BUG_ON(gtt_offset & (1 << 5)); 1070 /* Offset should be aligned to 8 bytes for both (QW/DW) write types */ 1071 GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8)); 1072 1073 *cs++ = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW; 1074 *cs++ = gtt_offset | MI_FLUSH_DW_USE_GTT; 1075 *cs++ = 0; 1076 *cs++ = value; 1077 1078 return cs; 1079} 1080 1081void intel_engines_sanitize(struct drm_i915_private *i915); 1082 1083bool intel_engine_is_idle(struct intel_engine_cs *engine); 1084bool intel_engines_are_idle(struct drm_i915_private *dev_priv); 1085 1086bool intel_engine_has_kernel_context(const struct intel_engine_cs *engine); 1087void intel_engine_lost_context(struct intel_engine_cs *engine); 1088 1089void intel_engines_park(struct drm_i915_private *i915); 1090void intel_engines_unpark(struct drm_i915_private *i915); 1091 1092void intel_engines_reset_default_submission(struct drm_i915_private *i915); 1093unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915); 1094 1095bool intel_engine_can_store_dword(struct intel_engine_cs *engine); 1096 1097__printf(3, 4) 1098void intel_engine_dump(struct intel_engine_cs *engine, 1099 struct drm_printer *m, 1100 const char *header, ...); 1101 1102struct intel_engine_cs * 1103intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance); 1104 1105static inline void intel_engine_context_in(struct intel_engine_cs *engine) 1106{ 1107 unsigned long flags; 1108 1109 if (READ_ONCE(engine->stats.enabled) == 0) 1110 return; 1111 1112 write_seqlock_irqsave(&engine->stats.lock, flags); 1113 1114 if (engine->stats.enabled > 0) { 1115 if (engine->stats.active++ == 0) 1116 engine->stats.start = ktime_get(); 1117 GEM_BUG_ON(engine->stats.active == 0); 1118 } 1119 1120 write_sequnlock_irqrestore(&engine->stats.lock, flags); 1121} 1122 1123static inline void intel_engine_context_out(struct intel_engine_cs *engine) 1124{ 1125 unsigned long flags; 1126 1127 if (READ_ONCE(engine->stats.enabled) == 0) 1128 return; 1129 1130 write_seqlock_irqsave(&engine->stats.lock, flags); 1131 1132 if (engine->stats.enabled > 0) { 1133 ktime_t last; 1134 1135 if (engine->stats.active && --engine->stats.active == 0) { 1136 /* 1137 * Decrement the active context count and in case GPU 1138 * is now idle add up to the running total. 1139 */ 1140 last = ktime_sub(ktime_get(), engine->stats.start); 1141 1142 engine->stats.total = ktime_add(engine->stats.total, 1143 last); 1144 } else if (engine->stats.active == 0) { 1145 /* 1146 * After turning on engine stats, context out might be 1147 * the first event in which case we account from the 1148 * time stats gathering was turned on. 1149 */ 1150 last = ktime_sub(ktime_get(), engine->stats.enabled_at); 1151 1152 engine->stats.total = ktime_add(engine->stats.total, 1153 last); 1154 } 1155 } 1156 1157 write_sequnlock_irqrestore(&engine->stats.lock, flags); 1158} 1159 1160int intel_enable_engine_stats(struct intel_engine_cs *engine); 1161void intel_disable_engine_stats(struct intel_engine_cs *engine); 1162 1163ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine); 1164 1165#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 1166 1167static inline bool inject_preempt_hang(struct intel_engine_execlists *execlists) 1168{ 1169 if (!execlists->preempt_hang.inject_hang) 1170 return false; 1171 1172 complete(&execlists->preempt_hang.completion); 1173 return true; 1174} 1175 1176#else 1177 1178static inline bool inject_preempt_hang(struct intel_engine_execlists *execlists) 1179{ 1180 return false; 1181} 1182 1183#endif 1184 1185#endif /* _INTEL_RINGBUFFER_H_ */