at v4.13 737 lines 25 kB view raw
1#ifndef _INTEL_RINGBUFFER_H_ 2#define _INTEL_RINGBUFFER_H_ 3 4#include <linux/hashtable.h> 5#include "i915_gem_batch_pool.h" 6#include "i915_gem_request.h" 7#include "i915_gem_timeline.h" 8#include "i915_selftest.h" 9 10#define I915_CMD_HASH_ORDER 9 11 12/* Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill, 13 * but keeps the logic simple. Indeed, the whole purpose of this macro is just 14 * to give some inclination as to some of the magic values used in the various 15 * workarounds! 16 */ 17#define CACHELINE_BYTES 64 18#define CACHELINE_DWORDS (CACHELINE_BYTES / sizeof(uint32_t)) 19 20struct intel_hw_status_page { 21 struct i915_vma *vma; 22 u32 *page_addr; 23 u32 ggtt_offset; 24}; 25 26#define I915_READ_TAIL(engine) I915_READ(RING_TAIL((engine)->mmio_base)) 27#define I915_WRITE_TAIL(engine, val) I915_WRITE(RING_TAIL((engine)->mmio_base), val) 28 29#define I915_READ_START(engine) I915_READ(RING_START((engine)->mmio_base)) 30#define I915_WRITE_START(engine, val) I915_WRITE(RING_START((engine)->mmio_base), val) 31 32#define I915_READ_HEAD(engine) I915_READ(RING_HEAD((engine)->mmio_base)) 33#define I915_WRITE_HEAD(engine, val) I915_WRITE(RING_HEAD((engine)->mmio_base), val) 34 35#define I915_READ_CTL(engine) I915_READ(RING_CTL((engine)->mmio_base)) 36#define I915_WRITE_CTL(engine, val) I915_WRITE(RING_CTL((engine)->mmio_base), val) 37 38#define I915_READ_IMR(engine) I915_READ(RING_IMR((engine)->mmio_base)) 39#define I915_WRITE_IMR(engine, val) I915_WRITE(RING_IMR((engine)->mmio_base), val) 40 41#define I915_READ_MODE(engine) I915_READ(RING_MI_MODE((engine)->mmio_base)) 42#define I915_WRITE_MODE(engine, val) I915_WRITE(RING_MI_MODE((engine)->mmio_base), val) 43 44/* seqno size is actually only a uint32, but since we plan to use MI_FLUSH_DW to 45 * do the writes, and that must have qw aligned offsets, simply pretend it's 8b. 46 */ 47#define gen8_semaphore_seqno_size sizeof(uint64_t) 48#define GEN8_SEMAPHORE_OFFSET(__from, __to) \ 49 (((__from) * I915_NUM_ENGINES + (__to)) * gen8_semaphore_seqno_size) 50#define GEN8_SIGNAL_OFFSET(__ring, to) \ 51 (dev_priv->semaphore->node.start + \ 52 GEN8_SEMAPHORE_OFFSET((__ring)->id, (to))) 53#define GEN8_WAIT_OFFSET(__ring, from) \ 54 (dev_priv->semaphore->node.start + \ 55 GEN8_SEMAPHORE_OFFSET(from, (__ring)->id)) 56 57enum intel_engine_hangcheck_action { 58 ENGINE_IDLE = 0, 59 ENGINE_WAIT, 60 ENGINE_ACTIVE_SEQNO, 61 ENGINE_ACTIVE_HEAD, 62 ENGINE_ACTIVE_SUBUNITS, 63 ENGINE_WAIT_KICK, 64 ENGINE_DEAD, 65}; 66 67static inline const char * 68hangcheck_action_to_str(const enum intel_engine_hangcheck_action a) 69{ 70 switch (a) { 71 case ENGINE_IDLE: 72 return "idle"; 73 case ENGINE_WAIT: 74 return "wait"; 75 case ENGINE_ACTIVE_SEQNO: 76 return "active seqno"; 77 case ENGINE_ACTIVE_HEAD: 78 return "active head"; 79 case ENGINE_ACTIVE_SUBUNITS: 80 return "active subunits"; 81 case ENGINE_WAIT_KICK: 82 return "wait kick"; 83 case ENGINE_DEAD: 84 return "dead"; 85 } 86 87 return "unknown"; 88} 89 90#define I915_MAX_SLICES 3 91#define I915_MAX_SUBSLICES 3 92 93#define instdone_slice_mask(dev_priv__) \ 94 (INTEL_GEN(dev_priv__) == 7 ? \ 95 1 : INTEL_INFO(dev_priv__)->sseu.slice_mask) 96 97#define instdone_subslice_mask(dev_priv__) \ 98 (INTEL_GEN(dev_priv__) == 7 ? \ 99 1 : INTEL_INFO(dev_priv__)->sseu.subslice_mask) 100 101#define for_each_instdone_slice_subslice(dev_priv__, slice__, subslice__) \ 102 for ((slice__) = 0, (subslice__) = 0; \ 103 (slice__) < I915_MAX_SLICES; \ 104 (subslice__) = ((subslice__) + 1) < I915_MAX_SUBSLICES ? (subslice__) + 1 : 0, \ 105 (slice__) += ((subslice__) == 0)) \ 106 for_each_if((BIT(slice__) & instdone_slice_mask(dev_priv__)) && \ 107 (BIT(subslice__) & instdone_subslice_mask(dev_priv__))) 108 109struct intel_instdone { 110 u32 instdone; 111 /* The following exist only in the RCS engine */ 112 u32 slice_common; 113 u32 sampler[I915_MAX_SLICES][I915_MAX_SUBSLICES]; 114 u32 row[I915_MAX_SLICES][I915_MAX_SUBSLICES]; 115}; 116 117struct intel_engine_hangcheck { 118 u64 acthd; 119 u32 seqno; 120 enum intel_engine_hangcheck_action action; 121 unsigned long action_timestamp; 122 int deadlock; 123 struct intel_instdone instdone; 124 bool stalled; 125}; 126 127struct intel_ring { 128 struct i915_vma *vma; 129 void *vaddr; 130 131 struct list_head request_list; 132 133 u32 head; 134 u32 tail; 135 u32 emit; 136 137 u32 space; 138 u32 size; 139 u32 effective_size; 140}; 141 142struct i915_gem_context; 143struct drm_i915_reg_table; 144 145/* 146 * we use a single page to load ctx workarounds so all of these 147 * values are referred in terms of dwords 148 * 149 * struct i915_wa_ctx_bb: 150 * offset: specifies batch starting position, also helpful in case 151 * if we want to have multiple batches at different offsets based on 152 * some criteria. It is not a requirement at the moment but provides 153 * an option for future use. 154 * size: size of the batch in DWORDS 155 */ 156struct i915_ctx_workarounds { 157 struct i915_wa_ctx_bb { 158 u32 offset; 159 u32 size; 160 } indirect_ctx, per_ctx; 161 struct i915_vma *vma; 162}; 163 164struct drm_i915_gem_request; 165struct intel_render_state; 166 167/* 168 * Engine IDs definitions. 169 * Keep instances of the same type engine together. 170 */ 171enum intel_engine_id { 172 RCS = 0, 173 BCS, 174 VCS, 175 VCS2, 176#define _VCS(n) (VCS + (n)) 177 VECS 178}; 179 180struct i915_priolist { 181 struct rb_node node; 182 struct list_head requests; 183 int priority; 184}; 185 186#define INTEL_ENGINE_CS_MAX_NAME 8 187 188struct intel_engine_cs { 189 struct drm_i915_private *i915; 190 char name[INTEL_ENGINE_CS_MAX_NAME]; 191 enum intel_engine_id id; 192 unsigned int uabi_id; 193 unsigned int hw_id; 194 unsigned int guc_id; 195 196 u8 class; 197 u8 instance; 198 u32 context_size; 199 u32 mmio_base; 200 unsigned int irq_shift; 201 202 struct intel_ring *buffer; 203 struct intel_timeline *timeline; 204 205 struct intel_render_state *render_state; 206 207 atomic_t irq_count; 208 unsigned long irq_posted; 209#define ENGINE_IRQ_BREADCRUMB 0 210#define ENGINE_IRQ_EXECLIST 1 211 212 /* Rather than have every client wait upon all user interrupts, 213 * with the herd waking after every interrupt and each doing the 214 * heavyweight seqno dance, we delegate the task (of being the 215 * bottom-half of the user interrupt) to the first client. After 216 * every interrupt, we wake up one client, who does the heavyweight 217 * coherent seqno read and either goes back to sleep (if incomplete), 218 * or wakes up all the completed clients in parallel, before then 219 * transferring the bottom-half status to the next client in the queue. 220 * 221 * Compared to walking the entire list of waiters in a single dedicated 222 * bottom-half, we reduce the latency of the first waiter by avoiding 223 * a context switch, but incur additional coherent seqno reads when 224 * following the chain of request breadcrumbs. Since it is most likely 225 * that we have a single client waiting on each seqno, then reducing 226 * the overhead of waking that client is much preferred. 227 */ 228 struct intel_breadcrumbs { 229 spinlock_t irq_lock; /* protects irq_*; irqsafe */ 230 struct intel_wait *irq_wait; /* oldest waiter by retirement */ 231 232 spinlock_t rb_lock; /* protects the rb and wraps irq_lock */ 233 struct rb_root waiters; /* sorted by retirement, priority */ 234 struct rb_root signals; /* sorted by retirement */ 235 struct task_struct *signaler; /* used for fence signalling */ 236 struct drm_i915_gem_request __rcu *first_signal; 237 struct timer_list fake_irq; /* used after a missed interrupt */ 238 struct timer_list hangcheck; /* detect missed interrupts */ 239 240 unsigned int hangcheck_interrupts; 241 242 bool irq_armed : 1; 243 bool irq_enabled : 1; 244 I915_SELFTEST_DECLARE(bool mock : 1); 245 } breadcrumbs; 246 247 /* 248 * A pool of objects to use as shadow copies of client batch buffers 249 * when the command parser is enabled. Prevents the client from 250 * modifying the batch contents after software parsing. 251 */ 252 struct i915_gem_batch_pool batch_pool; 253 254 struct intel_hw_status_page status_page; 255 struct i915_ctx_workarounds wa_ctx; 256 struct i915_vma *scratch; 257 258 u32 irq_keep_mask; /* always keep these interrupts */ 259 u32 irq_enable_mask; /* bitmask to enable ring interrupt */ 260 void (*irq_enable)(struct intel_engine_cs *engine); 261 void (*irq_disable)(struct intel_engine_cs *engine); 262 263 int (*init_hw)(struct intel_engine_cs *engine); 264 void (*reset_hw)(struct intel_engine_cs *engine, 265 struct drm_i915_gem_request *req); 266 267 void (*set_default_submission)(struct intel_engine_cs *engine); 268 269 struct intel_ring *(*context_pin)(struct intel_engine_cs *engine, 270 struct i915_gem_context *ctx); 271 void (*context_unpin)(struct intel_engine_cs *engine, 272 struct i915_gem_context *ctx); 273 int (*request_alloc)(struct drm_i915_gem_request *req); 274 int (*init_context)(struct drm_i915_gem_request *req); 275 276 int (*emit_flush)(struct drm_i915_gem_request *request, 277 u32 mode); 278#define EMIT_INVALIDATE BIT(0) 279#define EMIT_FLUSH BIT(1) 280#define EMIT_BARRIER (EMIT_INVALIDATE | EMIT_FLUSH) 281 int (*emit_bb_start)(struct drm_i915_gem_request *req, 282 u64 offset, u32 length, 283 unsigned int dispatch_flags); 284#define I915_DISPATCH_SECURE BIT(0) 285#define I915_DISPATCH_PINNED BIT(1) 286#define I915_DISPATCH_RS BIT(2) 287 void (*emit_breadcrumb)(struct drm_i915_gem_request *req, 288 u32 *cs); 289 int emit_breadcrumb_sz; 290 291 /* Pass the request to the hardware queue (e.g. directly into 292 * the legacy ringbuffer or to the end of an execlist). 293 * 294 * This is called from an atomic context with irqs disabled; must 295 * be irq safe. 296 */ 297 void (*submit_request)(struct drm_i915_gem_request *req); 298 299 /* Call when the priority on a request has changed and it and its 300 * dependencies may need rescheduling. Note the request itself may 301 * not be ready to run! 302 * 303 * Called under the struct_mutex. 304 */ 305 void (*schedule)(struct drm_i915_gem_request *request, 306 int priority); 307 308 /* Some chipsets are not quite as coherent as advertised and need 309 * an expensive kick to force a true read of the up-to-date seqno. 310 * However, the up-to-date seqno is not always required and the last 311 * seen value is good enough. Note that the seqno will always be 312 * monotonic, even if not coherent. 313 */ 314 void (*irq_seqno_barrier)(struct intel_engine_cs *engine); 315 void (*cleanup)(struct intel_engine_cs *engine); 316 317 /* GEN8 signal/wait table - never trust comments! 318 * signal to signal to signal to signal to signal to 319 * RCS VCS BCS VECS VCS2 320 * -------------------------------------------------------------------- 321 * RCS | NOP (0x00) | VCS (0x08) | BCS (0x10) | VECS (0x18) | VCS2 (0x20) | 322 * |------------------------------------------------------------------- 323 * VCS | RCS (0x28) | NOP (0x30) | BCS (0x38) | VECS (0x40) | VCS2 (0x48) | 324 * |------------------------------------------------------------------- 325 * BCS | RCS (0x50) | VCS (0x58) | NOP (0x60) | VECS (0x68) | VCS2 (0x70) | 326 * |------------------------------------------------------------------- 327 * VECS | RCS (0x78) | VCS (0x80) | BCS (0x88) | NOP (0x90) | VCS2 (0x98) | 328 * |------------------------------------------------------------------- 329 * VCS2 | RCS (0xa0) | VCS (0xa8) | BCS (0xb0) | VECS (0xb8) | NOP (0xc0) | 330 * |------------------------------------------------------------------- 331 * 332 * Generalization: 333 * f(x, y) := (x->id * NUM_RINGS * seqno_size) + (seqno_size * y->id) 334 * ie. transpose of g(x, y) 335 * 336 * sync from sync from sync from sync from sync from 337 * RCS VCS BCS VECS VCS2 338 * -------------------------------------------------------------------- 339 * RCS | NOP (0x00) | VCS (0x28) | BCS (0x50) | VECS (0x78) | VCS2 (0xa0) | 340 * |------------------------------------------------------------------- 341 * VCS | RCS (0x08) | NOP (0x30) | BCS (0x58) | VECS (0x80) | VCS2 (0xa8) | 342 * |------------------------------------------------------------------- 343 * BCS | RCS (0x10) | VCS (0x38) | NOP (0x60) | VECS (0x88) | VCS2 (0xb0) | 344 * |------------------------------------------------------------------- 345 * VECS | RCS (0x18) | VCS (0x40) | BCS (0x68) | NOP (0x90) | VCS2 (0xb8) | 346 * |------------------------------------------------------------------- 347 * VCS2 | RCS (0x20) | VCS (0x48) | BCS (0x70) | VECS (0x98) | NOP (0xc0) | 348 * |------------------------------------------------------------------- 349 * 350 * Generalization: 351 * g(x, y) := (y->id * NUM_RINGS * seqno_size) + (seqno_size * x->id) 352 * ie. transpose of f(x, y) 353 */ 354 struct { 355 union { 356#define GEN6_SEMAPHORE_LAST VECS_HW 357#define GEN6_NUM_SEMAPHORES (GEN6_SEMAPHORE_LAST + 1) 358#define GEN6_SEMAPHORES_MASK GENMASK(GEN6_SEMAPHORE_LAST, 0) 359 struct { 360 /* our mbox written by others */ 361 u32 wait[GEN6_NUM_SEMAPHORES]; 362 /* mboxes this ring signals to */ 363 i915_reg_t signal[GEN6_NUM_SEMAPHORES]; 364 } mbox; 365 u64 signal_ggtt[I915_NUM_ENGINES]; 366 }; 367 368 /* AKA wait() */ 369 int (*sync_to)(struct drm_i915_gem_request *req, 370 struct drm_i915_gem_request *signal); 371 u32 *(*signal)(struct drm_i915_gem_request *req, u32 *cs); 372 } semaphore; 373 374 /* Execlists */ 375 struct tasklet_struct irq_tasklet; 376 struct i915_priolist default_priolist; 377 bool no_priolist; 378 struct execlist_port { 379 struct drm_i915_gem_request *request_count; 380#define EXECLIST_COUNT_BITS 2 381#define port_request(p) ptr_mask_bits((p)->request_count, EXECLIST_COUNT_BITS) 382#define port_count(p) ptr_unmask_bits((p)->request_count, EXECLIST_COUNT_BITS) 383#define port_pack(rq, count) ptr_pack_bits(rq, count, EXECLIST_COUNT_BITS) 384#define port_unpack(p, count) ptr_unpack_bits((p)->request_count, count, EXECLIST_COUNT_BITS) 385#define port_set(p, packed) ((p)->request_count = (packed)) 386#define port_isset(p) ((p)->request_count) 387#define port_index(p, e) ((p) - (e)->execlist_port) 388 GEM_DEBUG_DECL(u32 context_id); 389 } execlist_port[2]; 390 struct rb_root execlist_queue; 391 struct rb_node *execlist_first; 392 unsigned int fw_domains; 393 394 /* Contexts are pinned whilst they are active on the GPU. The last 395 * context executed remains active whilst the GPU is idle - the 396 * switch away and write to the context object only occurs on the 397 * next execution. Contexts are only unpinned on retirement of the 398 * following request ensuring that we can always write to the object 399 * on the context switch even after idling. Across suspend, we switch 400 * to the kernel context and trash it as the save may not happen 401 * before the hardware is powered down. 402 */ 403 struct i915_gem_context *last_retired_context; 404 405 /* We track the current MI_SET_CONTEXT in order to eliminate 406 * redudant context switches. This presumes that requests are not 407 * reordered! Or when they are the tracking is updated along with 408 * the emission of individual requests into the legacy command 409 * stream (ring). 410 */ 411 struct i915_gem_context *legacy_active_context; 412 413 /* status_notifier: list of callbacks for context-switch changes */ 414 struct atomic_notifier_head context_status_notifier; 415 416 struct intel_engine_hangcheck hangcheck; 417 418 bool needs_cmd_parser; 419 420 /* 421 * Table of commands the command parser needs to know about 422 * for this engine. 423 */ 424 DECLARE_HASHTABLE(cmd_hash, I915_CMD_HASH_ORDER); 425 426 /* 427 * Table of registers allowed in commands that read/write registers. 428 */ 429 const struct drm_i915_reg_table *reg_tables; 430 int reg_table_count; 431 432 /* 433 * Returns the bitmask for the length field of the specified command. 434 * Return 0 for an unrecognized/invalid command. 435 * 436 * If the command parser finds an entry for a command in the engine's 437 * cmd_tables, it gets the command's length based on the table entry. 438 * If not, it calls this function to determine the per-engine length 439 * field encoding for the command (i.e. different opcode ranges use 440 * certain bits to encode the command length in the header). 441 */ 442 u32 (*get_cmd_length_mask)(u32 cmd_header); 443}; 444 445static inline unsigned int 446intel_engine_flag(const struct intel_engine_cs *engine) 447{ 448 return BIT(engine->id); 449} 450 451static inline u32 452intel_read_status_page(struct intel_engine_cs *engine, int reg) 453{ 454 /* Ensure that the compiler doesn't optimize away the load. */ 455 return READ_ONCE(engine->status_page.page_addr[reg]); 456} 457 458static inline void 459intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value) 460{ 461 /* Writing into the status page should be done sparingly. Since 462 * we do when we are uncertain of the device state, we take a bit 463 * of extra paranoia to try and ensure that the HWS takes the value 464 * we give and that it doesn't end up trapped inside the CPU! 465 */ 466 if (static_cpu_has(X86_FEATURE_CLFLUSH)) { 467 mb(); 468 clflush(&engine->status_page.page_addr[reg]); 469 engine->status_page.page_addr[reg] = value; 470 clflush(&engine->status_page.page_addr[reg]); 471 mb(); 472 } else { 473 WRITE_ONCE(engine->status_page.page_addr[reg], value); 474 } 475} 476 477/* 478 * Reads a dword out of the status page, which is written to from the command 479 * queue by automatic updates, MI_REPORT_HEAD, MI_STORE_DATA_INDEX, or 480 * MI_STORE_DATA_IMM. 481 * 482 * The following dwords have a reserved meaning: 483 * 0x00: ISR copy, updated when an ISR bit not set in the HWSTAM changes. 484 * 0x04: ring 0 head pointer 485 * 0x05: ring 1 head pointer (915-class) 486 * 0x06: ring 2 head pointer (915-class) 487 * 0x10-0x1b: Context status DWords (GM45) 488 * 0x1f: Last written status offset. (GM45) 489 * 0x20-0x2f: Reserved (Gen6+) 490 * 491 * The area from dword 0x30 to 0x3ff is available for driver usage. 492 */ 493#define I915_GEM_HWS_INDEX 0x30 494#define I915_GEM_HWS_INDEX_ADDR (I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT) 495#define I915_GEM_HWS_SCRATCH_INDEX 0x40 496#define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH_INDEX << MI_STORE_DWORD_INDEX_SHIFT) 497 498struct intel_ring * 499intel_engine_create_ring(struct intel_engine_cs *engine, int size); 500int intel_ring_pin(struct intel_ring *ring, 501 struct drm_i915_private *i915, 502 unsigned int offset_bias); 503void intel_ring_reset(struct intel_ring *ring, u32 tail); 504unsigned int intel_ring_update_space(struct intel_ring *ring); 505void intel_ring_unpin(struct intel_ring *ring); 506void intel_ring_free(struct intel_ring *ring); 507 508void intel_engine_stop(struct intel_engine_cs *engine); 509void intel_engine_cleanup(struct intel_engine_cs *engine); 510 511void intel_legacy_submission_resume(struct drm_i915_private *dev_priv); 512 513int __must_check intel_ring_cacheline_align(struct drm_i915_gem_request *req); 514 515u32 __must_check *intel_ring_begin(struct drm_i915_gem_request *req, 516 unsigned int n); 517 518static inline void 519intel_ring_advance(struct drm_i915_gem_request *req, u32 *cs) 520{ 521 /* Dummy function. 522 * 523 * This serves as a placeholder in the code so that the reader 524 * can compare against the preceding intel_ring_begin() and 525 * check that the number of dwords emitted matches the space 526 * reserved for the command packet (i.e. the value passed to 527 * intel_ring_begin()). 528 */ 529 GEM_BUG_ON((req->ring->vaddr + req->ring->emit) != cs); 530} 531 532static inline u32 533intel_ring_wrap(const struct intel_ring *ring, u32 pos) 534{ 535 return pos & (ring->size - 1); 536} 537 538static inline u32 539intel_ring_offset(const struct drm_i915_gem_request *req, void *addr) 540{ 541 /* Don't write ring->size (equivalent to 0) as that hangs some GPUs. */ 542 u32 offset = addr - req->ring->vaddr; 543 GEM_BUG_ON(offset > req->ring->size); 544 return intel_ring_wrap(req->ring, offset); 545} 546 547static inline void 548assert_ring_tail_valid(const struct intel_ring *ring, unsigned int tail) 549{ 550 /* We could combine these into a single tail operation, but keeping 551 * them as seperate tests will help identify the cause should one 552 * ever fire. 553 */ 554 GEM_BUG_ON(!IS_ALIGNED(tail, 8)); 555 GEM_BUG_ON(tail >= ring->size); 556 557 /* 558 * "Ring Buffer Use" 559 * Gen2 BSpec "1. Programming Environment" / 1.4.4.6 560 * Gen3 BSpec "1c Memory Interface Functions" / 2.3.4.5 561 * Gen4+ BSpec "1c Memory Interface and Command Stream" / 5.3.4.5 562 * "If the Ring Buffer Head Pointer and the Tail Pointer are on the 563 * same cacheline, the Head Pointer must not be greater than the Tail 564 * Pointer." 565 * 566 * We use ring->head as the last known location of the actual RING_HEAD, 567 * it may have advanced but in the worst case it is equally the same 568 * as ring->head and so we should never program RING_TAIL to advance 569 * into the same cacheline as ring->head. 570 */ 571#define cacheline(a) round_down(a, CACHELINE_BYTES) 572 GEM_BUG_ON(cacheline(tail) == cacheline(ring->head) && 573 tail < ring->head); 574#undef cacheline 575} 576 577static inline unsigned int 578intel_ring_set_tail(struct intel_ring *ring, unsigned int tail) 579{ 580 /* Whilst writes to the tail are strictly order, there is no 581 * serialisation between readers and the writers. The tail may be 582 * read by i915_gem_request_retire() just as it is being updated 583 * by execlists, as although the breadcrumb is complete, the context 584 * switch hasn't been seen. 585 */ 586 assert_ring_tail_valid(ring, tail); 587 ring->tail = tail; 588 return tail; 589} 590 591void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno); 592 593void intel_engine_setup_common(struct intel_engine_cs *engine); 594int intel_engine_init_common(struct intel_engine_cs *engine); 595int intel_engine_create_scratch(struct intel_engine_cs *engine, int size); 596void intel_engine_cleanup_common(struct intel_engine_cs *engine); 597 598int intel_init_render_ring_buffer(struct intel_engine_cs *engine); 599int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine); 600int intel_init_blt_ring_buffer(struct intel_engine_cs *engine); 601int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine); 602 603u64 intel_engine_get_active_head(struct intel_engine_cs *engine); 604u64 intel_engine_get_last_batch_head(struct intel_engine_cs *engine); 605 606static inline u32 intel_engine_get_seqno(struct intel_engine_cs *engine) 607{ 608 return intel_read_status_page(engine, I915_GEM_HWS_INDEX); 609} 610 611static inline u32 intel_engine_last_submit(struct intel_engine_cs *engine) 612{ 613 /* We are only peeking at the tail of the submit queue (and not the 614 * queue itself) in order to gain a hint as to the current active 615 * state of the engine. Callers are not expected to be taking 616 * engine->timeline->lock, nor are they expected to be concerned 617 * wtih serialising this hint with anything, so document it as 618 * a hint and nothing more. 619 */ 620 return READ_ONCE(engine->timeline->seqno); 621} 622 623int init_workarounds_ring(struct intel_engine_cs *engine); 624int intel_ring_workarounds_emit(struct drm_i915_gem_request *req); 625 626void intel_engine_get_instdone(struct intel_engine_cs *engine, 627 struct intel_instdone *instdone); 628 629/* 630 * Arbitrary size for largest possible 'add request' sequence. The code paths 631 * are complex and variable. Empirical measurement shows that the worst case 632 * is BDW at 192 bytes (6 + 6 + 36 dwords), then ILK at 136 bytes. However, 633 * we need to allocate double the largest single packet within that emission 634 * to account for tail wraparound (so 6 + 6 + 72 dwords for BDW). 635 */ 636#define MIN_SPACE_FOR_ADD_REQUEST 336 637 638static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine) 639{ 640 return engine->status_page.ggtt_offset + I915_GEM_HWS_INDEX_ADDR; 641} 642 643/* intel_breadcrumbs.c -- user interrupt bottom-half for waiters */ 644int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine); 645 646static inline void intel_wait_init(struct intel_wait *wait, 647 struct drm_i915_gem_request *rq) 648{ 649 wait->tsk = current; 650 wait->request = rq; 651} 652 653static inline void intel_wait_init_for_seqno(struct intel_wait *wait, u32 seqno) 654{ 655 wait->tsk = current; 656 wait->seqno = seqno; 657} 658 659static inline bool intel_wait_has_seqno(const struct intel_wait *wait) 660{ 661 return wait->seqno; 662} 663 664static inline bool 665intel_wait_update_seqno(struct intel_wait *wait, u32 seqno) 666{ 667 wait->seqno = seqno; 668 return intel_wait_has_seqno(wait); 669} 670 671static inline bool 672intel_wait_update_request(struct intel_wait *wait, 673 const struct drm_i915_gem_request *rq) 674{ 675 return intel_wait_update_seqno(wait, i915_gem_request_global_seqno(rq)); 676} 677 678static inline bool 679intel_wait_check_seqno(const struct intel_wait *wait, u32 seqno) 680{ 681 return wait->seqno == seqno; 682} 683 684static inline bool 685intel_wait_check_request(const struct intel_wait *wait, 686 const struct drm_i915_gem_request *rq) 687{ 688 return intel_wait_check_seqno(wait, i915_gem_request_global_seqno(rq)); 689} 690 691static inline bool intel_wait_complete(const struct intel_wait *wait) 692{ 693 return RB_EMPTY_NODE(&wait->node); 694} 695 696bool intel_engine_add_wait(struct intel_engine_cs *engine, 697 struct intel_wait *wait); 698void intel_engine_remove_wait(struct intel_engine_cs *engine, 699 struct intel_wait *wait); 700void intel_engine_enable_signaling(struct drm_i915_gem_request *request, 701 bool wakeup); 702void intel_engine_cancel_signaling(struct drm_i915_gem_request *request); 703 704static inline bool intel_engine_has_waiter(const struct intel_engine_cs *engine) 705{ 706 return READ_ONCE(engine->breadcrumbs.irq_wait); 707} 708 709unsigned int intel_engine_wakeup(struct intel_engine_cs *engine); 710#define ENGINE_WAKEUP_WAITER BIT(0) 711#define ENGINE_WAKEUP_ASLEEP BIT(1) 712 713void __intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine); 714void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine); 715 716void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine); 717void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine); 718bool intel_breadcrumbs_busy(struct intel_engine_cs *engine); 719 720static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset) 721{ 722 memset(batch, 0, 6 * sizeof(u32)); 723 724 batch[0] = GFX_OP_PIPE_CONTROL(6); 725 batch[1] = flags; 726 batch[2] = offset; 727 728 return batch + 6; 729} 730 731bool intel_engine_is_idle(struct intel_engine_cs *engine); 732bool intel_engines_are_idle(struct drm_i915_private *dev_priv); 733 734void intel_engines_mark_idle(struct drm_i915_private *i915); 735void intel_engines_reset_default_submission(struct drm_i915_private *i915); 736 737#endif /* _INTEL_RINGBUFFER_H_ */