Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
at v4.14 751 lines 26 kB view raw
1/* SPDX-License-Identifier: GPL-2.0 */ 2#ifndef _INTEL_RINGBUFFER_H_ 3#define _INTEL_RINGBUFFER_H_ 4 5#include <linux/hashtable.h> 6#include "i915_gem_batch_pool.h" 7#include "i915_gem_request.h" 8#include "i915_gem_timeline.h" 9#include "i915_selftest.h" 10 11#define I915_CMD_HASH_ORDER 9 12 13/* Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill, 14 * but keeps the logic simple. Indeed, the whole purpose of this macro is just 15 * to give some inclination as to some of the magic values used in the various 16 * workarounds! 17 */ 18#define CACHELINE_BYTES 64 19#define CACHELINE_DWORDS (CACHELINE_BYTES / sizeof(uint32_t)) 20 21struct intel_hw_status_page { 22 struct i915_vma *vma; 23 u32 *page_addr; 24 u32 ggtt_offset; 25}; 26 27#define I915_READ_TAIL(engine) I915_READ(RING_TAIL((engine)->mmio_base)) 28#define I915_WRITE_TAIL(engine, val) I915_WRITE(RING_TAIL((engine)->mmio_base), val) 29 30#define I915_READ_START(engine) I915_READ(RING_START((engine)->mmio_base)) 31#define I915_WRITE_START(engine, val) I915_WRITE(RING_START((engine)->mmio_base), val) 32 33#define I915_READ_HEAD(engine) I915_READ(RING_HEAD((engine)->mmio_base)) 34#define I915_WRITE_HEAD(engine, val) I915_WRITE(RING_HEAD((engine)->mmio_base), val) 35 36#define I915_READ_CTL(engine) I915_READ(RING_CTL((engine)->mmio_base)) 37#define I915_WRITE_CTL(engine, val) I915_WRITE(RING_CTL((engine)->mmio_base), val) 38 39#define I915_READ_IMR(engine) I915_READ(RING_IMR((engine)->mmio_base)) 40#define I915_WRITE_IMR(engine, val) I915_WRITE(RING_IMR((engine)->mmio_base), val) 41 42#define I915_READ_MODE(engine) I915_READ(RING_MI_MODE((engine)->mmio_base)) 43#define I915_WRITE_MODE(engine, val) I915_WRITE(RING_MI_MODE((engine)->mmio_base), val) 44 45/* seqno size is actually only a uint32, but since we plan to use MI_FLUSH_DW to 46 * do the writes, and that must have qw aligned offsets, simply pretend it's 8b. 47 */ 48#define gen8_semaphore_seqno_size sizeof(uint64_t) 49#define GEN8_SEMAPHORE_OFFSET(__from, __to) \ 50 (((__from) * I915_NUM_ENGINES + (__to)) * gen8_semaphore_seqno_size) 51#define GEN8_SIGNAL_OFFSET(__ring, to) \ 52 (dev_priv->semaphore->node.start + \ 53 GEN8_SEMAPHORE_OFFSET((__ring)->id, (to))) 54#define GEN8_WAIT_OFFSET(__ring, from) \ 55 (dev_priv->semaphore->node.start + \ 56 GEN8_SEMAPHORE_OFFSET(from, (__ring)->id)) 57 58enum intel_engine_hangcheck_action { 59 ENGINE_IDLE = 0, 60 ENGINE_WAIT, 61 ENGINE_ACTIVE_SEQNO, 62 ENGINE_ACTIVE_HEAD, 63 ENGINE_ACTIVE_SUBUNITS, 64 ENGINE_WAIT_KICK, 65 ENGINE_DEAD, 66}; 67 68static inline const char * 69hangcheck_action_to_str(const enum intel_engine_hangcheck_action a) 70{ 71 switch (a) { 72 case ENGINE_IDLE: 73 return "idle"; 74 case ENGINE_WAIT: 75 return "wait"; 76 case ENGINE_ACTIVE_SEQNO: 77 return "active seqno"; 78 case ENGINE_ACTIVE_HEAD: 79 return "active head"; 80 case ENGINE_ACTIVE_SUBUNITS: 81 return "active subunits"; 82 case ENGINE_WAIT_KICK: 83 return "wait kick"; 84 case ENGINE_DEAD: 85 return "dead"; 86 } 87 88 return "unknown"; 89} 90 91#define I915_MAX_SLICES 3 92#define I915_MAX_SUBSLICES 3 93 94#define instdone_slice_mask(dev_priv__) \ 95 (INTEL_GEN(dev_priv__) == 7 ? \ 96 1 : INTEL_INFO(dev_priv__)->sseu.slice_mask) 97 98#define instdone_subslice_mask(dev_priv__) \ 99 (INTEL_GEN(dev_priv__) == 7 ? \ 100 1 : INTEL_INFO(dev_priv__)->sseu.subslice_mask) 101 102#define for_each_instdone_slice_subslice(dev_priv__, slice__, subslice__) \ 103 for ((slice__) = 0, (subslice__) = 0; \ 104 (slice__) < I915_MAX_SLICES; \ 105 (subslice__) = ((subslice__) + 1) < I915_MAX_SUBSLICES ? (subslice__) + 1 : 0, \ 106 (slice__) += ((subslice__) == 0)) \ 107 for_each_if((BIT(slice__) & instdone_slice_mask(dev_priv__)) && \ 108 (BIT(subslice__) & instdone_subslice_mask(dev_priv__))) 109 110struct intel_instdone { 111 u32 instdone; 112 /* The following exist only in the RCS engine */ 113 u32 slice_common; 114 u32 sampler[I915_MAX_SLICES][I915_MAX_SUBSLICES]; 115 u32 row[I915_MAX_SLICES][I915_MAX_SUBSLICES]; 116}; 117 118struct intel_engine_hangcheck { 119 u64 acthd; 120 u32 seqno; 121 enum intel_engine_hangcheck_action action; 122 unsigned long action_timestamp; 123 int deadlock; 124 struct intel_instdone instdone; 125 struct drm_i915_gem_request *active_request; 126 bool stalled; 127}; 128 129struct intel_ring { 130 struct i915_vma *vma; 131 void *vaddr; 132 133 struct list_head request_list; 134 135 u32 head; 136 u32 tail; 137 u32 emit; 138 139 u32 space; 140 u32 size; 141 u32 effective_size; 142}; 143 144struct i915_gem_context; 145struct drm_i915_reg_table; 146 147/* 148 * we use a single page to load ctx workarounds so all of these 149 * values are referred in terms of dwords 150 * 151 * struct i915_wa_ctx_bb: 152 * offset: specifies batch starting position, also helpful in case 153 * if we want to have multiple batches at different offsets based on 154 * some criteria. It is not a requirement at the moment but provides 155 * an option for future use. 156 * size: size of the batch in DWORDS 157 */ 158struct i915_ctx_workarounds { 159 struct i915_wa_ctx_bb { 160 u32 offset; 161 u32 size; 162 } indirect_ctx, per_ctx; 163 struct i915_vma *vma; 164}; 165 166struct drm_i915_gem_request; 167struct intel_render_state; 168 169/* 170 * Engine IDs definitions. 171 * Keep instances of the same type engine together. 172 */ 173enum intel_engine_id { 174 RCS = 0, 175 BCS, 176 VCS, 177 VCS2, 178#define _VCS(n) (VCS + (n)) 179 VECS 180}; 181 182struct i915_priolist { 183 struct rb_node node; 184 struct list_head requests; 185 int priority; 186}; 187 188#define INTEL_ENGINE_CS_MAX_NAME 8 189 190struct intel_engine_cs { 191 struct drm_i915_private *i915; 192 char name[INTEL_ENGINE_CS_MAX_NAME]; 193 enum intel_engine_id id; 194 unsigned int uabi_id; 195 unsigned int hw_id; 196 unsigned int guc_id; 197 198 u8 class; 199 u8 instance; 200 u32 context_size; 201 u32 mmio_base; 202 unsigned int irq_shift; 203 204 struct intel_ring *buffer; 205 struct intel_timeline *timeline; 206 207 struct intel_render_state *render_state; 208 209 atomic_t irq_count; 210 unsigned long irq_posted; 211#define ENGINE_IRQ_BREADCRUMB 0 212#define ENGINE_IRQ_EXECLIST 1 213 214 /* Rather than have every client wait upon all user interrupts, 215 * with the herd waking after every interrupt and each doing the 216 * heavyweight seqno dance, we delegate the task (of being the 217 * bottom-half of the user interrupt) to the first client. After 218 * every interrupt, we wake up one client, who does the heavyweight 219 * coherent seqno read and either goes back to sleep (if incomplete), 220 * or wakes up all the completed clients in parallel, before then 221 * transferring the bottom-half status to the next client in the queue. 222 * 223 * Compared to walking the entire list of waiters in a single dedicated 224 * bottom-half, we reduce the latency of the first waiter by avoiding 225 * a context switch, but incur additional coherent seqno reads when 226 * following the chain of request breadcrumbs. Since it is most likely 227 * that we have a single client waiting on each seqno, then reducing 228 * the overhead of waking that client is much preferred. 229 */ 230 struct intel_breadcrumbs { 231 spinlock_t irq_lock; /* protects irq_*; irqsafe */ 232 struct intel_wait *irq_wait; /* oldest waiter by retirement */ 233 234 spinlock_t rb_lock; /* protects the rb and wraps irq_lock */ 235 struct rb_root waiters; /* sorted by retirement, priority */ 236 struct rb_root signals; /* sorted by retirement */ 237 struct task_struct *signaler; /* used for fence signalling */ 238 struct drm_i915_gem_request __rcu *first_signal; 239 struct timer_list fake_irq; /* used after a missed interrupt */ 240 struct timer_list hangcheck; /* detect missed interrupts */ 241 242 unsigned int hangcheck_interrupts; 243 244 bool irq_armed : 1; 245 bool irq_enabled : 1; 246 I915_SELFTEST_DECLARE(bool mock : 1); 247 } breadcrumbs; 248 249 /* 250 * A pool of objects to use as shadow copies of client batch buffers 251 * when the command parser is enabled. Prevents the client from 252 * modifying the batch contents after software parsing. 253 */ 254 struct i915_gem_batch_pool batch_pool; 255 256 struct intel_hw_status_page status_page; 257 struct i915_ctx_workarounds wa_ctx; 258 struct i915_vma *scratch; 259 260 u32 irq_keep_mask; /* always keep these interrupts */ 261 u32 irq_enable_mask; /* bitmask to enable ring interrupt */ 262 void (*irq_enable)(struct intel_engine_cs *engine); 263 void (*irq_disable)(struct intel_engine_cs *engine); 264 265 int (*init_hw)(struct intel_engine_cs *engine); 266 void (*reset_hw)(struct intel_engine_cs *engine, 267 struct drm_i915_gem_request *req); 268 269 void (*set_default_submission)(struct intel_engine_cs *engine); 270 271 struct intel_ring *(*context_pin)(struct intel_engine_cs *engine, 272 struct i915_gem_context *ctx); 273 void (*context_unpin)(struct intel_engine_cs *engine, 274 struct i915_gem_context *ctx); 275 int (*request_alloc)(struct drm_i915_gem_request *req); 276 int (*init_context)(struct drm_i915_gem_request *req); 277 278 int (*emit_flush)(struct drm_i915_gem_request *request, 279 u32 mode); 280#define EMIT_INVALIDATE BIT(0) 281#define EMIT_FLUSH BIT(1) 282#define EMIT_BARRIER (EMIT_INVALIDATE | EMIT_FLUSH) 283 int (*emit_bb_start)(struct drm_i915_gem_request *req, 284 u64 offset, u32 length, 285 unsigned int dispatch_flags); 286#define I915_DISPATCH_SECURE BIT(0) 287#define I915_DISPATCH_PINNED BIT(1) 288#define I915_DISPATCH_RS BIT(2) 289 void (*emit_breadcrumb)(struct drm_i915_gem_request *req, 290 u32 *cs); 291 int emit_breadcrumb_sz; 292 293 /* Pass the request to the hardware queue (e.g. directly into 294 * the legacy ringbuffer or to the end of an execlist). 295 * 296 * This is called from an atomic context with irqs disabled; must 297 * be irq safe. 298 */ 299 void (*submit_request)(struct drm_i915_gem_request *req); 300 301 /* Call when the priority on a request has changed and it and its 302 * dependencies may need rescheduling. Note the request itself may 303 * not be ready to run! 304 * 305 * Called under the struct_mutex. 306 */ 307 void (*schedule)(struct drm_i915_gem_request *request, 308 int priority); 309 310 /* Some chipsets are not quite as coherent as advertised and need 311 * an expensive kick to force a true read of the up-to-date seqno. 312 * However, the up-to-date seqno is not always required and the last 313 * seen value is good enough. Note that the seqno will always be 314 * monotonic, even if not coherent. 315 */ 316 void (*irq_seqno_barrier)(struct intel_engine_cs *engine); 317 void (*cleanup)(struct intel_engine_cs *engine); 318 319 /* GEN8 signal/wait table - never trust comments! 320 * signal to signal to signal to signal to signal to 321 * RCS VCS BCS VECS VCS2 322 * -------------------------------------------------------------------- 323 * RCS | NOP (0x00) | VCS (0x08) | BCS (0x10) | VECS (0x18) | VCS2 (0x20) | 324 * |------------------------------------------------------------------- 325 * VCS | RCS (0x28) | NOP (0x30) | BCS (0x38) | VECS (0x40) | VCS2 (0x48) | 326 * |------------------------------------------------------------------- 327 * BCS | RCS (0x50) | VCS (0x58) | NOP (0x60) | VECS (0x68) | VCS2 (0x70) | 328 * |------------------------------------------------------------------- 329 * VECS | RCS (0x78) | VCS (0x80) | BCS (0x88) | NOP (0x90) | VCS2 (0x98) | 330 * |------------------------------------------------------------------- 331 * VCS2 | RCS (0xa0) | VCS (0xa8) | BCS (0xb0) | VECS (0xb8) | NOP (0xc0) | 332 * |------------------------------------------------------------------- 333 * 334 * Generalization: 335 * f(x, y) := (x->id * NUM_RINGS * seqno_size) + (seqno_size * y->id) 336 * ie. transpose of g(x, y) 337 * 338 * sync from sync from sync from sync from sync from 339 * RCS VCS BCS VECS VCS2 340 * -------------------------------------------------------------------- 341 * RCS | NOP (0x00) | VCS (0x28) | BCS (0x50) | VECS (0x78) | VCS2 (0xa0) | 342 * |------------------------------------------------------------------- 343 * VCS | RCS (0x08) | NOP (0x30) | BCS (0x58) | VECS (0x80) | VCS2 (0xa8) | 344 * |------------------------------------------------------------------- 345 * BCS | RCS (0x10) | VCS (0x38) | NOP (0x60) | VECS (0x88) | VCS2 (0xb0) | 346 * |------------------------------------------------------------------- 347 * VECS | RCS (0x18) | VCS (0x40) | BCS (0x68) | NOP (0x90) | VCS2 (0xb8) | 348 * |------------------------------------------------------------------- 349 * VCS2 | RCS (0x20) | VCS (0x48) | BCS (0x70) | VECS (0x98) | NOP (0xc0) | 350 * |------------------------------------------------------------------- 351 * 352 * Generalization: 353 * g(x, y) := (y->id * NUM_RINGS * seqno_size) + (seqno_size * x->id) 354 * ie. transpose of f(x, y) 355 */ 356 struct { 357 union { 358#define GEN6_SEMAPHORE_LAST VECS_HW 359#define GEN6_NUM_SEMAPHORES (GEN6_SEMAPHORE_LAST + 1) 360#define GEN6_SEMAPHORES_MASK GENMASK(GEN6_SEMAPHORE_LAST, 0) 361 struct { 362 /* our mbox written by others */ 363 u32 wait[GEN6_NUM_SEMAPHORES]; 364 /* mboxes this ring signals to */ 365 i915_reg_t signal[GEN6_NUM_SEMAPHORES]; 366 } mbox; 367 u64 signal_ggtt[I915_NUM_ENGINES]; 368 }; 369 370 /* AKA wait() */ 371 int (*sync_to)(struct drm_i915_gem_request *req, 372 struct drm_i915_gem_request *signal); 373 u32 *(*signal)(struct drm_i915_gem_request *req, u32 *cs); 374 } semaphore; 375 376 /* Execlists */ 377 struct tasklet_struct irq_tasklet; 378 struct i915_priolist default_priolist; 379 bool no_priolist; 380 struct execlist_port { 381 struct drm_i915_gem_request *request_count; 382#define EXECLIST_COUNT_BITS 2 383#define port_request(p) ptr_mask_bits((p)->request_count, EXECLIST_COUNT_BITS) 384#define port_count(p) ptr_unmask_bits((p)->request_count, EXECLIST_COUNT_BITS) 385#define port_pack(rq, count) ptr_pack_bits(rq, count, EXECLIST_COUNT_BITS) 386#define port_unpack(p, count) ptr_unpack_bits((p)->request_count, count, EXECLIST_COUNT_BITS) 387#define port_set(p, packed) ((p)->request_count = (packed)) 388#define port_isset(p) ((p)->request_count) 389#define port_index(p, e) ((p) - (e)->execlist_port) 390 GEM_DEBUG_DECL(u32 context_id); 391 } execlist_port[2]; 392 struct rb_root execlist_queue; 393 struct rb_node *execlist_first; 394 unsigned int fw_domains; 395 396 /* Contexts are pinned whilst they are active on the GPU. The last 397 * context executed remains active whilst the GPU is idle - the 398 * switch away and write to the context object only occurs on the 399 * next execution. Contexts are only unpinned on retirement of the 400 * following request ensuring that we can always write to the object 401 * on the context switch even after idling. Across suspend, we switch 402 * to the kernel context and trash it as the save may not happen 403 * before the hardware is powered down. 404 */ 405 struct i915_gem_context *last_retired_context; 406 407 /* We track the current MI_SET_CONTEXT in order to eliminate 408 * redudant context switches. This presumes that requests are not 409 * reordered! Or when they are the tracking is updated along with 410 * the emission of individual requests into the legacy command 411 * stream (ring). 412 */ 413 struct i915_gem_context *legacy_active_context; 414 415 /* status_notifier: list of callbacks for context-switch changes */ 416 struct atomic_notifier_head context_status_notifier; 417 418 struct intel_engine_hangcheck hangcheck; 419 420 bool needs_cmd_parser; 421 422 /* 423 * Table of commands the command parser needs to know about 424 * for this engine. 425 */ 426 DECLARE_HASHTABLE(cmd_hash, I915_CMD_HASH_ORDER); 427 428 /* 429 * Table of registers allowed in commands that read/write registers. 430 */ 431 const struct drm_i915_reg_table *reg_tables; 432 int reg_table_count; 433 434 /* 435 * Returns the bitmask for the length field of the specified command. 436 * Return 0 for an unrecognized/invalid command. 437 * 438 * If the command parser finds an entry for a command in the engine's 439 * cmd_tables, it gets the command's length based on the table entry. 440 * If not, it calls this function to determine the per-engine length 441 * field encoding for the command (i.e. different opcode ranges use 442 * certain bits to encode the command length in the header). 443 */ 444 u32 (*get_cmd_length_mask)(u32 cmd_header); 445}; 446 447static inline unsigned int 448intel_engine_flag(const struct intel_engine_cs *engine) 449{ 450 return BIT(engine->id); 451} 452 453static inline u32 454intel_read_status_page(struct intel_engine_cs *engine, int reg) 455{ 456 /* Ensure that the compiler doesn't optimize away the load. */ 457 return READ_ONCE(engine->status_page.page_addr[reg]); 458} 459 460static inline void 461intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value) 462{ 463 /* Writing into the status page should be done sparingly. Since 464 * we do when we are uncertain of the device state, we take a bit 465 * of extra paranoia to try and ensure that the HWS takes the value 466 * we give and that it doesn't end up trapped inside the CPU! 467 */ 468 if (static_cpu_has(X86_FEATURE_CLFLUSH)) { 469 mb(); 470 clflush(&engine->status_page.page_addr[reg]); 471 engine->status_page.page_addr[reg] = value; 472 clflush(&engine->status_page.page_addr[reg]); 473 mb(); 474 } else { 475 WRITE_ONCE(engine->status_page.page_addr[reg], value); 476 } 477} 478 479/* 480 * Reads a dword out of the status page, which is written to from the command 481 * queue by automatic updates, MI_REPORT_HEAD, MI_STORE_DATA_INDEX, or 482 * MI_STORE_DATA_IMM. 483 * 484 * The following dwords have a reserved meaning: 485 * 0x00: ISR copy, updated when an ISR bit not set in the HWSTAM changes. 486 * 0x04: ring 0 head pointer 487 * 0x05: ring 1 head pointer (915-class) 488 * 0x06: ring 2 head pointer (915-class) 489 * 0x10-0x1b: Context status DWords (GM45) 490 * 0x1f: Last written status offset. (GM45) 491 * 0x20-0x2f: Reserved (Gen6+) 492 * 493 * The area from dword 0x30 to 0x3ff is available for driver usage. 494 */ 495#define I915_GEM_HWS_INDEX 0x30 496#define I915_GEM_HWS_INDEX_ADDR (I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT) 497#define I915_GEM_HWS_SCRATCH_INDEX 0x40 498#define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH_INDEX << MI_STORE_DWORD_INDEX_SHIFT) 499 500struct intel_ring * 501intel_engine_create_ring(struct intel_engine_cs *engine, int size); 502int intel_ring_pin(struct intel_ring *ring, 503 struct drm_i915_private *i915, 504 unsigned int offset_bias); 505void intel_ring_reset(struct intel_ring *ring, u32 tail); 506unsigned int intel_ring_update_space(struct intel_ring *ring); 507void intel_ring_unpin(struct intel_ring *ring); 508void intel_ring_free(struct intel_ring *ring); 509 510void intel_engine_stop(struct intel_engine_cs *engine); 511void intel_engine_cleanup(struct intel_engine_cs *engine); 512 513void intel_legacy_submission_resume(struct drm_i915_private *dev_priv); 514 515int __must_check intel_ring_cacheline_align(struct drm_i915_gem_request *req); 516 517u32 __must_check *intel_ring_begin(struct drm_i915_gem_request *req, 518 unsigned int n); 519 520static inline void 521intel_ring_advance(struct drm_i915_gem_request *req, u32 *cs) 522{ 523 /* Dummy function. 524 * 525 * This serves as a placeholder in the code so that the reader 526 * can compare against the preceding intel_ring_begin() and 527 * check that the number of dwords emitted matches the space 528 * reserved for the command packet (i.e. the value passed to 529 * intel_ring_begin()). 530 */ 531 GEM_BUG_ON((req->ring->vaddr + req->ring->emit) != cs); 532} 533 534static inline u32 535intel_ring_wrap(const struct intel_ring *ring, u32 pos) 536{ 537 return pos & (ring->size - 1); 538} 539 540static inline u32 541intel_ring_offset(const struct drm_i915_gem_request *req, void *addr) 542{ 543 /* Don't write ring->size (equivalent to 0) as that hangs some GPUs. */ 544 u32 offset = addr - req->ring->vaddr; 545 GEM_BUG_ON(offset > req->ring->size); 546 return intel_ring_wrap(req->ring, offset); 547} 548 549static inline void 550assert_ring_tail_valid(const struct intel_ring *ring, unsigned int tail) 551{ 552 /* We could combine these into a single tail operation, but keeping 553 * them as seperate tests will help identify the cause should one 554 * ever fire. 555 */ 556 GEM_BUG_ON(!IS_ALIGNED(tail, 8)); 557 GEM_BUG_ON(tail >= ring->size); 558 559 /* 560 * "Ring Buffer Use" 561 * Gen2 BSpec "1. Programming Environment" / 1.4.4.6 562 * Gen3 BSpec "1c Memory Interface Functions" / 2.3.4.5 563 * Gen4+ BSpec "1c Memory Interface and Command Stream" / 5.3.4.5 564 * "If the Ring Buffer Head Pointer and the Tail Pointer are on the 565 * same cacheline, the Head Pointer must not be greater than the Tail 566 * Pointer." 567 * 568 * We use ring->head as the last known location of the actual RING_HEAD, 569 * it may have advanced but in the worst case it is equally the same 570 * as ring->head and so we should never program RING_TAIL to advance 571 * into the same cacheline as ring->head. 572 */ 573#define cacheline(a) round_down(a, CACHELINE_BYTES) 574 GEM_BUG_ON(cacheline(tail) == cacheline(ring->head) && 575 tail < ring->head); 576#undef cacheline 577} 578 579static inline unsigned int 580intel_ring_set_tail(struct intel_ring *ring, unsigned int tail) 581{ 582 /* Whilst writes to the tail are strictly order, there is no 583 * serialisation between readers and the writers. The tail may be 584 * read by i915_gem_request_retire() just as it is being updated 585 * by execlists, as although the breadcrumb is complete, the context 586 * switch hasn't been seen. 587 */ 588 assert_ring_tail_valid(ring, tail); 589 ring->tail = tail; 590 return tail; 591} 592 593void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno); 594 595void intel_engine_setup_common(struct intel_engine_cs *engine); 596int intel_engine_init_common(struct intel_engine_cs *engine); 597int intel_engine_create_scratch(struct intel_engine_cs *engine, int size); 598void intel_engine_cleanup_common(struct intel_engine_cs *engine); 599 600int intel_init_render_ring_buffer(struct intel_engine_cs *engine); 601int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine); 602int intel_init_blt_ring_buffer(struct intel_engine_cs *engine); 603int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine); 604 605u64 intel_engine_get_active_head(struct intel_engine_cs *engine); 606u64 intel_engine_get_last_batch_head(struct intel_engine_cs *engine); 607 608static inline u32 intel_engine_get_seqno(struct intel_engine_cs *engine) 609{ 610 return intel_read_status_page(engine, I915_GEM_HWS_INDEX); 611} 612 613static inline u32 intel_engine_last_submit(struct intel_engine_cs *engine) 614{ 615 /* We are only peeking at the tail of the submit queue (and not the 616 * queue itself) in order to gain a hint as to the current active 617 * state of the engine. Callers are not expected to be taking 618 * engine->timeline->lock, nor are they expected to be concerned 619 * wtih serialising this hint with anything, so document it as 620 * a hint and nothing more. 621 */ 622 return READ_ONCE(engine->timeline->seqno); 623} 624 625int init_workarounds_ring(struct intel_engine_cs *engine); 626int intel_ring_workarounds_emit(struct drm_i915_gem_request *req); 627 628void intel_engine_get_instdone(struct intel_engine_cs *engine, 629 struct intel_instdone *instdone); 630 631/* 632 * Arbitrary size for largest possible 'add request' sequence. The code paths 633 * are complex and variable. Empirical measurement shows that the worst case 634 * is BDW at 192 bytes (6 + 6 + 36 dwords), then ILK at 136 bytes. However, 635 * we need to allocate double the largest single packet within that emission 636 * to account for tail wraparound (so 6 + 6 + 72 dwords for BDW). 637 */ 638#define MIN_SPACE_FOR_ADD_REQUEST 336 639 640static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine) 641{ 642 return engine->status_page.ggtt_offset + I915_GEM_HWS_INDEX_ADDR; 643} 644 645/* intel_breadcrumbs.c -- user interrupt bottom-half for waiters */ 646int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine); 647 648static inline void intel_wait_init(struct intel_wait *wait, 649 struct drm_i915_gem_request *rq) 650{ 651 wait->tsk = current; 652 wait->request = rq; 653} 654 655static inline void intel_wait_init_for_seqno(struct intel_wait *wait, u32 seqno) 656{ 657 wait->tsk = current; 658 wait->seqno = seqno; 659} 660 661static inline bool intel_wait_has_seqno(const struct intel_wait *wait) 662{ 663 return wait->seqno; 664} 665 666static inline bool 667intel_wait_update_seqno(struct intel_wait *wait, u32 seqno) 668{ 669 wait->seqno = seqno; 670 return intel_wait_has_seqno(wait); 671} 672 673static inline bool 674intel_wait_update_request(struct intel_wait *wait, 675 const struct drm_i915_gem_request *rq) 676{ 677 return intel_wait_update_seqno(wait, i915_gem_request_global_seqno(rq)); 678} 679 680static inline bool 681intel_wait_check_seqno(const struct intel_wait *wait, u32 seqno) 682{ 683 return wait->seqno == seqno; 684} 685 686static inline bool 687intel_wait_check_request(const struct intel_wait *wait, 688 const struct drm_i915_gem_request *rq) 689{ 690 return intel_wait_check_seqno(wait, i915_gem_request_global_seqno(rq)); 691} 692 693static inline bool intel_wait_complete(const struct intel_wait *wait) 694{ 695 return RB_EMPTY_NODE(&wait->node); 696} 697 698bool intel_engine_add_wait(struct intel_engine_cs *engine, 699 struct intel_wait *wait); 700void intel_engine_remove_wait(struct intel_engine_cs *engine, 701 struct intel_wait *wait); 702void intel_engine_enable_signaling(struct drm_i915_gem_request *request, 703 bool wakeup); 704void intel_engine_cancel_signaling(struct drm_i915_gem_request *request); 705 706static inline bool intel_engine_has_waiter(const struct intel_engine_cs *engine) 707{ 708 return READ_ONCE(engine->breadcrumbs.irq_wait); 709} 710 711unsigned int intel_engine_wakeup(struct intel_engine_cs *engine); 712#define ENGINE_WAKEUP_WAITER BIT(0) 713#define ENGINE_WAKEUP_ASLEEP BIT(1) 714 715void __intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine); 716void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine); 717 718void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine); 719void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine); 720bool intel_breadcrumbs_busy(struct intel_engine_cs *engine); 721 722static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset) 723{ 724 memset(batch, 0, 6 * sizeof(u32)); 725 726 batch[0] = GFX_OP_PIPE_CONTROL(6); 727 batch[1] = flags; 728 batch[2] = offset; 729 730 return batch + 6; 731} 732 733bool intel_engine_is_idle(struct intel_engine_cs *engine); 734bool intel_engines_are_idle(struct drm_i915_private *dev_priv); 735 736void intel_engines_mark_idle(struct drm_i915_private *i915); 737void intel_engines_reset_default_submission(struct drm_i915_private *i915); 738 739static inline bool 740__intel_engine_can_store_dword(unsigned int gen, unsigned int class) 741{ 742 if (gen <= 2) 743 return false; /* uses physical not virtual addresses */ 744 745 if (gen == 6 && class == VIDEO_DECODE_CLASS) 746 return false; /* b0rked */ 747 748 return true; 749} 750 751#endif /* _INTEL_RINGBUFFER_H_ */