Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
at v5.1 1089 lines 33 kB view raw
1/* SPDX-License-Identifier: MIT */ 2#ifndef _INTEL_RINGBUFFER_H_ 3#define _INTEL_RINGBUFFER_H_ 4 5#include <drm/drm_util.h> 6 7#include <linux/hashtable.h> 8#include <linux/irq_work.h> 9#include <linux/seqlock.h> 10 11#include "i915_gem_batch_pool.h" 12 13#include "i915_reg.h" 14#include "i915_pmu.h" 15#include "i915_request.h" 16#include "i915_selftest.h" 17#include "i915_timeline.h" 18#include "intel_gpu_commands.h" 19#include "intel_workarounds.h" 20 21struct drm_printer; 22struct i915_sched_attr; 23 24#define I915_CMD_HASH_ORDER 9 25 26/* Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill, 27 * but keeps the logic simple. Indeed, the whole purpose of this macro is just 28 * to give some inclination as to some of the magic values used in the various 29 * workarounds! 30 */ 31#define CACHELINE_BYTES 64 32#define CACHELINE_DWORDS (CACHELINE_BYTES / sizeof(u32)) 33 34struct intel_hw_status_page { 35 struct i915_vma *vma; 36 u32 *addr; 37}; 38 39#define I915_READ_TAIL(engine) I915_READ(RING_TAIL((engine)->mmio_base)) 40#define I915_WRITE_TAIL(engine, val) I915_WRITE(RING_TAIL((engine)->mmio_base), val) 41 42#define I915_READ_START(engine) I915_READ(RING_START((engine)->mmio_base)) 43#define I915_WRITE_START(engine, val) I915_WRITE(RING_START((engine)->mmio_base), val) 44 45#define I915_READ_HEAD(engine) I915_READ(RING_HEAD((engine)->mmio_base)) 46#define I915_WRITE_HEAD(engine, val) I915_WRITE(RING_HEAD((engine)->mmio_base), val) 47 48#define I915_READ_CTL(engine) I915_READ(RING_CTL((engine)->mmio_base)) 49#define I915_WRITE_CTL(engine, val) I915_WRITE(RING_CTL((engine)->mmio_base), val) 50 51#define I915_READ_IMR(engine) I915_READ(RING_IMR((engine)->mmio_base)) 52#define I915_WRITE_IMR(engine, val) I915_WRITE(RING_IMR((engine)->mmio_base), val) 53 54#define I915_READ_MODE(engine) I915_READ(RING_MI_MODE((engine)->mmio_base)) 55#define I915_WRITE_MODE(engine, val) I915_WRITE(RING_MI_MODE((engine)->mmio_base), val) 56 57/* seqno size is actually only a uint32, but since we plan to use MI_FLUSH_DW to 58 * do the writes, and that must have qw aligned offsets, simply pretend it's 8b. 59 */ 60enum intel_engine_hangcheck_action { 61 ENGINE_IDLE = 0, 62 ENGINE_WAIT, 63 ENGINE_ACTIVE_SEQNO, 64 ENGINE_ACTIVE_HEAD, 65 ENGINE_ACTIVE_SUBUNITS, 66 ENGINE_WAIT_KICK, 67 ENGINE_DEAD, 68}; 69 70static inline const char * 71hangcheck_action_to_str(const enum intel_engine_hangcheck_action a) 72{ 73 switch (a) { 74 case ENGINE_IDLE: 75 return "idle"; 76 case ENGINE_WAIT: 77 return "wait"; 78 case ENGINE_ACTIVE_SEQNO: 79 return "active seqno"; 80 case ENGINE_ACTIVE_HEAD: 81 return "active head"; 82 case ENGINE_ACTIVE_SUBUNITS: 83 return "active subunits"; 84 case ENGINE_WAIT_KICK: 85 return "wait kick"; 86 case ENGINE_DEAD: 87 return "dead"; 88 } 89 90 return "unknown"; 91} 92 93#define I915_MAX_SLICES 3 94#define I915_MAX_SUBSLICES 8 95 96#define instdone_slice_mask(dev_priv__) \ 97 (IS_GEN(dev_priv__, 7) ? \ 98 1 : RUNTIME_INFO(dev_priv__)->sseu.slice_mask) 99 100#define instdone_subslice_mask(dev_priv__) \ 101 (IS_GEN(dev_priv__, 7) ? \ 102 1 : RUNTIME_INFO(dev_priv__)->sseu.subslice_mask[0]) 103 104#define for_each_instdone_slice_subslice(dev_priv__, slice__, subslice__) \ 105 for ((slice__) = 0, (subslice__) = 0; \ 106 (slice__) < I915_MAX_SLICES; \ 107 (subslice__) = ((subslice__) + 1) < I915_MAX_SUBSLICES ? (subslice__) + 1 : 0, \ 108 (slice__) += ((subslice__) == 0)) \ 109 for_each_if((BIT(slice__) & instdone_slice_mask(dev_priv__)) && \ 110 (BIT(subslice__) & instdone_subslice_mask(dev_priv__))) 111 112struct intel_instdone { 113 u32 instdone; 114 /* The following exist only in the RCS engine */ 115 u32 slice_common; 116 u32 sampler[I915_MAX_SLICES][I915_MAX_SUBSLICES]; 117 u32 row[I915_MAX_SLICES][I915_MAX_SUBSLICES]; 118}; 119 120struct intel_engine_hangcheck { 121 u64 acthd; 122 u32 seqno; 123 unsigned long action_timestamp; 124 struct intel_instdone instdone; 125}; 126 127struct intel_ring { 128 struct i915_vma *vma; 129 void *vaddr; 130 131 struct i915_timeline *timeline; 132 struct list_head request_list; 133 struct list_head active_link; 134 135 u32 head; 136 u32 tail; 137 u32 emit; 138 139 u32 space; 140 u32 size; 141 u32 effective_size; 142}; 143 144struct i915_gem_context; 145struct drm_i915_reg_table; 146 147/* 148 * we use a single page to load ctx workarounds so all of these 149 * values are referred in terms of dwords 150 * 151 * struct i915_wa_ctx_bb: 152 * offset: specifies batch starting position, also helpful in case 153 * if we want to have multiple batches at different offsets based on 154 * some criteria. It is not a requirement at the moment but provides 155 * an option for future use. 156 * size: size of the batch in DWORDS 157 */ 158struct i915_ctx_workarounds { 159 struct i915_wa_ctx_bb { 160 u32 offset; 161 u32 size; 162 } indirect_ctx, per_ctx; 163 struct i915_vma *vma; 164}; 165 166struct i915_request; 167 168#define I915_MAX_VCS 4 169#define I915_MAX_VECS 2 170 171/* 172 * Engine IDs definitions. 173 * Keep instances of the same type engine together. 174 */ 175enum intel_engine_id { 176 RCS = 0, 177 BCS, 178 VCS, 179 VCS2, 180 VCS3, 181 VCS4, 182#define _VCS(n) (VCS + (n)) 183 VECS, 184 VECS2 185#define _VECS(n) (VECS + (n)) 186}; 187 188struct i915_priolist { 189 struct list_head requests[I915_PRIORITY_COUNT]; 190 struct rb_node node; 191 unsigned long used; 192 int priority; 193}; 194 195#define priolist_for_each_request(it, plist, idx) \ 196 for (idx = 0; idx < ARRAY_SIZE((plist)->requests); idx++) \ 197 list_for_each_entry(it, &(plist)->requests[idx], sched.link) 198 199#define priolist_for_each_request_consume(it, n, plist, idx) \ 200 for (; (idx = ffs((plist)->used)); (plist)->used &= ~BIT(idx - 1)) \ 201 list_for_each_entry_safe(it, n, \ 202 &(plist)->requests[idx - 1], \ 203 sched.link) 204 205struct st_preempt_hang { 206 struct completion completion; 207 unsigned int count; 208 bool inject_hang; 209}; 210 211/** 212 * struct intel_engine_execlists - execlist submission queue and port state 213 * 214 * The struct intel_engine_execlists represents the combined logical state of 215 * driver and the hardware state for execlist mode of submission. 216 */ 217struct intel_engine_execlists { 218 /** 219 * @tasklet: softirq tasklet for bottom handler 220 */ 221 struct tasklet_struct tasklet; 222 223 /** 224 * @default_priolist: priority list for I915_PRIORITY_NORMAL 225 */ 226 struct i915_priolist default_priolist; 227 228 /** 229 * @no_priolist: priority lists disabled 230 */ 231 bool no_priolist; 232 233 /** 234 * @submit_reg: gen-specific execlist submission register 235 * set to the ExecList Submission Port (elsp) register pre-Gen11 and to 236 * the ExecList Submission Queue Contents register array for Gen11+ 237 */ 238 u32 __iomem *submit_reg; 239 240 /** 241 * @ctrl_reg: the enhanced execlists control register, used to load the 242 * submit queue on the HW and to request preemptions to idle 243 */ 244 u32 __iomem *ctrl_reg; 245 246 /** 247 * @port: execlist port states 248 * 249 * For each hardware ELSP (ExecList Submission Port) we keep 250 * track of the last request and the number of times we submitted 251 * that port to hw. We then count the number of times the hw reports 252 * a context completion or preemption. As only one context can 253 * be active on hw, we limit resubmission of context to port[0]. This 254 * is called Lite Restore, of the context. 255 */ 256 struct execlist_port { 257 /** 258 * @request_count: combined request and submission count 259 */ 260 struct i915_request *request_count; 261#define EXECLIST_COUNT_BITS 2 262#define port_request(p) ptr_mask_bits((p)->request_count, EXECLIST_COUNT_BITS) 263#define port_count(p) ptr_unmask_bits((p)->request_count, EXECLIST_COUNT_BITS) 264#define port_pack(rq, count) ptr_pack_bits(rq, count, EXECLIST_COUNT_BITS) 265#define port_unpack(p, count) ptr_unpack_bits((p)->request_count, count, EXECLIST_COUNT_BITS) 266#define port_set(p, packed) ((p)->request_count = (packed)) 267#define port_isset(p) ((p)->request_count) 268#define port_index(p, execlists) ((p) - (execlists)->port) 269 270 /** 271 * @context_id: context ID for port 272 */ 273 GEM_DEBUG_DECL(u32 context_id); 274 275#define EXECLIST_MAX_PORTS 2 276 } port[EXECLIST_MAX_PORTS]; 277 278 /** 279 * @active: is the HW active? We consider the HW as active after 280 * submitting any context for execution and until we have seen the 281 * last context completion event. After that, we do not expect any 282 * more events until we submit, and so can park the HW. 283 * 284 * As we have a small number of different sources from which we feed 285 * the HW, we track the state of each inside a single bitfield. 286 */ 287 unsigned int active; 288#define EXECLISTS_ACTIVE_USER 0 289#define EXECLISTS_ACTIVE_PREEMPT 1 290#define EXECLISTS_ACTIVE_HWACK 2 291 292 /** 293 * @port_mask: number of execlist ports - 1 294 */ 295 unsigned int port_mask; 296 297 /** 298 * @queue_priority_hint: Highest pending priority. 299 * 300 * When we add requests into the queue, or adjust the priority of 301 * executing requests, we compute the maximum priority of those 302 * pending requests. We can then use this value to determine if 303 * we need to preempt the executing requests to service the queue. 304 * However, since the we may have recorded the priority of an inflight 305 * request we wanted to preempt but since completed, at the time of 306 * dequeuing the priority hint may no longer may match the highest 307 * available request priority. 308 */ 309 int queue_priority_hint; 310 311 /** 312 * @queue: queue of requests, in priority lists 313 */ 314 struct rb_root_cached queue; 315 316 /** 317 * @csb_write: control register for Context Switch buffer 318 * 319 * Note this register may be either mmio or HWSP shadow. 320 */ 321 u32 *csb_write; 322 323 /** 324 * @csb_status: status array for Context Switch buffer 325 * 326 * Note these register may be either mmio or HWSP shadow. 327 */ 328 u32 *csb_status; 329 330 /** 331 * @preempt_complete_status: expected CSB upon completing preemption 332 */ 333 u32 preempt_complete_status; 334 335 /** 336 * @csb_head: context status buffer head 337 */ 338 u8 csb_head; 339 340 I915_SELFTEST_DECLARE(struct st_preempt_hang preempt_hang;) 341}; 342 343#define INTEL_ENGINE_CS_MAX_NAME 8 344 345struct intel_engine_cs { 346 struct drm_i915_private *i915; 347 char name[INTEL_ENGINE_CS_MAX_NAME]; 348 349 enum intel_engine_id id; 350 unsigned int hw_id; 351 unsigned int guc_id; 352 353 u8 uabi_id; 354 u8 uabi_class; 355 356 u8 class; 357 u8 instance; 358 u32 context_size; 359 u32 mmio_base; 360 361 struct intel_ring *buffer; 362 363 struct i915_timeline timeline; 364 365 struct drm_i915_gem_object *default_state; 366 void *pinned_default_state; 367 368 /* Rather than have every client wait upon all user interrupts, 369 * with the herd waking after every interrupt and each doing the 370 * heavyweight seqno dance, we delegate the task (of being the 371 * bottom-half of the user interrupt) to the first client. After 372 * every interrupt, we wake up one client, who does the heavyweight 373 * coherent seqno read and either goes back to sleep (if incomplete), 374 * or wakes up all the completed clients in parallel, before then 375 * transferring the bottom-half status to the next client in the queue. 376 * 377 * Compared to walking the entire list of waiters in a single dedicated 378 * bottom-half, we reduce the latency of the first waiter by avoiding 379 * a context switch, but incur additional coherent seqno reads when 380 * following the chain of request breadcrumbs. Since it is most likely 381 * that we have a single client waiting on each seqno, then reducing 382 * the overhead of waking that client is much preferred. 383 */ 384 struct intel_breadcrumbs { 385 spinlock_t irq_lock; 386 struct list_head signalers; 387 388 struct irq_work irq_work; /* for use from inside irq_lock */ 389 390 unsigned int irq_enabled; 391 392 bool irq_armed; 393 } breadcrumbs; 394 395 struct { 396 /** 397 * @enable: Bitmask of enable sample events on this engine. 398 * 399 * Bits correspond to sample event types, for instance 400 * I915_SAMPLE_QUEUED is bit 0 etc. 401 */ 402 u32 enable; 403 /** 404 * @enable_count: Reference count for the enabled samplers. 405 * 406 * Index number corresponds to @enum drm_i915_pmu_engine_sample. 407 */ 408 unsigned int enable_count[I915_ENGINE_SAMPLE_COUNT]; 409 /** 410 * @sample: Counter values for sampling events. 411 * 412 * Our internal timer stores the current counters in this field. 413 * 414 * Index number corresponds to @enum drm_i915_pmu_engine_sample. 415 */ 416 struct i915_pmu_sample sample[I915_ENGINE_SAMPLE_COUNT]; 417 } pmu; 418 419 /* 420 * A pool of objects to use as shadow copies of client batch buffers 421 * when the command parser is enabled. Prevents the client from 422 * modifying the batch contents after software parsing. 423 */ 424 struct i915_gem_batch_pool batch_pool; 425 426 struct intel_hw_status_page status_page; 427 struct i915_ctx_workarounds wa_ctx; 428 struct i915_wa_list ctx_wa_list; 429 struct i915_wa_list wa_list; 430 struct i915_wa_list whitelist; 431 432 u32 irq_keep_mask; /* always keep these interrupts */ 433 u32 irq_enable_mask; /* bitmask to enable ring interrupt */ 434 void (*irq_enable)(struct intel_engine_cs *engine); 435 void (*irq_disable)(struct intel_engine_cs *engine); 436 437 int (*init_hw)(struct intel_engine_cs *engine); 438 439 struct { 440 void (*prepare)(struct intel_engine_cs *engine); 441 void (*reset)(struct intel_engine_cs *engine, bool stalled); 442 void (*finish)(struct intel_engine_cs *engine); 443 } reset; 444 445 void (*park)(struct intel_engine_cs *engine); 446 void (*unpark)(struct intel_engine_cs *engine); 447 448 void (*set_default_submission)(struct intel_engine_cs *engine); 449 450 struct intel_context *(*context_pin)(struct intel_engine_cs *engine, 451 struct i915_gem_context *ctx); 452 453 int (*request_alloc)(struct i915_request *rq); 454 int (*init_context)(struct i915_request *rq); 455 456 int (*emit_flush)(struct i915_request *request, u32 mode); 457#define EMIT_INVALIDATE BIT(0) 458#define EMIT_FLUSH BIT(1) 459#define EMIT_BARRIER (EMIT_INVALIDATE | EMIT_FLUSH) 460 int (*emit_bb_start)(struct i915_request *rq, 461 u64 offset, u32 length, 462 unsigned int dispatch_flags); 463#define I915_DISPATCH_SECURE BIT(0) 464#define I915_DISPATCH_PINNED BIT(1) 465 int (*emit_init_breadcrumb)(struct i915_request *rq); 466 u32 *(*emit_fini_breadcrumb)(struct i915_request *rq, 467 u32 *cs); 468 unsigned int emit_fini_breadcrumb_dw; 469 470 /* Pass the request to the hardware queue (e.g. directly into 471 * the legacy ringbuffer or to the end of an execlist). 472 * 473 * This is called from an atomic context with irqs disabled; must 474 * be irq safe. 475 */ 476 void (*submit_request)(struct i915_request *rq); 477 478 /* 479 * Call when the priority on a request has changed and it and its 480 * dependencies may need rescheduling. Note the request itself may 481 * not be ready to run! 482 */ 483 void (*schedule)(struct i915_request *request, 484 const struct i915_sched_attr *attr); 485 486 /* 487 * Cancel all requests on the hardware, or queued for execution. 488 * This should only cancel the ready requests that have been 489 * submitted to the engine (via the engine->submit_request callback). 490 * This is called when marking the device as wedged. 491 */ 492 void (*cancel_requests)(struct intel_engine_cs *engine); 493 494 void (*cleanup)(struct intel_engine_cs *engine); 495 496 struct intel_engine_execlists execlists; 497 498 /* Contexts are pinned whilst they are active on the GPU. The last 499 * context executed remains active whilst the GPU is idle - the 500 * switch away and write to the context object only occurs on the 501 * next execution. Contexts are only unpinned on retirement of the 502 * following request ensuring that we can always write to the object 503 * on the context switch even after idling. Across suspend, we switch 504 * to the kernel context and trash it as the save may not happen 505 * before the hardware is powered down. 506 */ 507 struct intel_context *last_retired_context; 508 509 /* status_notifier: list of callbacks for context-switch changes */ 510 struct atomic_notifier_head context_status_notifier; 511 512 struct intel_engine_hangcheck hangcheck; 513 514#define I915_ENGINE_NEEDS_CMD_PARSER BIT(0) 515#define I915_ENGINE_SUPPORTS_STATS BIT(1) 516#define I915_ENGINE_HAS_PREEMPTION BIT(2) 517 unsigned int flags; 518 519 /* 520 * Table of commands the command parser needs to know about 521 * for this engine. 522 */ 523 DECLARE_HASHTABLE(cmd_hash, I915_CMD_HASH_ORDER); 524 525 /* 526 * Table of registers allowed in commands that read/write registers. 527 */ 528 const struct drm_i915_reg_table *reg_tables; 529 int reg_table_count; 530 531 /* 532 * Returns the bitmask for the length field of the specified command. 533 * Return 0 for an unrecognized/invalid command. 534 * 535 * If the command parser finds an entry for a command in the engine's 536 * cmd_tables, it gets the command's length based on the table entry. 537 * If not, it calls this function to determine the per-engine length 538 * field encoding for the command (i.e. different opcode ranges use 539 * certain bits to encode the command length in the header). 540 */ 541 u32 (*get_cmd_length_mask)(u32 cmd_header); 542 543 struct { 544 /** 545 * @lock: Lock protecting the below fields. 546 */ 547 seqlock_t lock; 548 /** 549 * @enabled: Reference count indicating number of listeners. 550 */ 551 unsigned int enabled; 552 /** 553 * @active: Number of contexts currently scheduled in. 554 */ 555 unsigned int active; 556 /** 557 * @enabled_at: Timestamp when busy stats were enabled. 558 */ 559 ktime_t enabled_at; 560 /** 561 * @start: Timestamp of the last idle to active transition. 562 * 563 * Idle is defined as active == 0, active is active > 0. 564 */ 565 ktime_t start; 566 /** 567 * @total: Total time this engine was busy. 568 * 569 * Accumulated time not counting the most recent block in cases 570 * where engine is currently busy (active > 0). 571 */ 572 ktime_t total; 573 } stats; 574}; 575 576static inline bool 577intel_engine_needs_cmd_parser(const struct intel_engine_cs *engine) 578{ 579 return engine->flags & I915_ENGINE_NEEDS_CMD_PARSER; 580} 581 582static inline bool 583intel_engine_supports_stats(const struct intel_engine_cs *engine) 584{ 585 return engine->flags & I915_ENGINE_SUPPORTS_STATS; 586} 587 588static inline bool 589intel_engine_has_preemption(const struct intel_engine_cs *engine) 590{ 591 return engine->flags & I915_ENGINE_HAS_PREEMPTION; 592} 593 594static inline bool __execlists_need_preempt(int prio, int last) 595{ 596 /* 597 * Allow preemption of low -> normal -> high, but we do 598 * not allow low priority tasks to preempt other low priority 599 * tasks under the impression that latency for low priority 600 * tasks does not matter (as much as background throughput), 601 * so kiss. 602 * 603 * More naturally we would write 604 * prio >= max(0, last); 605 * except that we wish to prevent triggering preemption at the same 606 * priority level: the task that is running should remain running 607 * to preserve FIFO ordering of dependencies. 608 */ 609 return prio > max(I915_PRIORITY_NORMAL - 1, last); 610} 611 612static inline void 613execlists_set_active(struct intel_engine_execlists *execlists, 614 unsigned int bit) 615{ 616 __set_bit(bit, (unsigned long *)&execlists->active); 617} 618 619static inline bool 620execlists_set_active_once(struct intel_engine_execlists *execlists, 621 unsigned int bit) 622{ 623 return !__test_and_set_bit(bit, (unsigned long *)&execlists->active); 624} 625 626static inline void 627execlists_clear_active(struct intel_engine_execlists *execlists, 628 unsigned int bit) 629{ 630 __clear_bit(bit, (unsigned long *)&execlists->active); 631} 632 633static inline void 634execlists_clear_all_active(struct intel_engine_execlists *execlists) 635{ 636 execlists->active = 0; 637} 638 639static inline bool 640execlists_is_active(const struct intel_engine_execlists *execlists, 641 unsigned int bit) 642{ 643 return test_bit(bit, (unsigned long *)&execlists->active); 644} 645 646void execlists_user_begin(struct intel_engine_execlists *execlists, 647 const struct execlist_port *port); 648void execlists_user_end(struct intel_engine_execlists *execlists); 649 650void 651execlists_cancel_port_requests(struct intel_engine_execlists * const execlists); 652 653void 654execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists); 655 656static inline unsigned int 657execlists_num_ports(const struct intel_engine_execlists * const execlists) 658{ 659 return execlists->port_mask + 1; 660} 661 662static inline struct execlist_port * 663execlists_port_complete(struct intel_engine_execlists * const execlists, 664 struct execlist_port * const port) 665{ 666 const unsigned int m = execlists->port_mask; 667 668 GEM_BUG_ON(port_index(port, execlists) != 0); 669 GEM_BUG_ON(!execlists_is_active(execlists, EXECLISTS_ACTIVE_USER)); 670 671 memmove(port, port + 1, m * sizeof(struct execlist_port)); 672 memset(port + m, 0, sizeof(struct execlist_port)); 673 674 return port; 675} 676 677static inline unsigned int 678intel_engine_flag(const struct intel_engine_cs *engine) 679{ 680 return BIT(engine->id); 681} 682 683static inline u32 684intel_read_status_page(const struct intel_engine_cs *engine, int reg) 685{ 686 /* Ensure that the compiler doesn't optimize away the load. */ 687 return READ_ONCE(engine->status_page.addr[reg]); 688} 689 690static inline void 691intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value) 692{ 693 /* Writing into the status page should be done sparingly. Since 694 * we do when we are uncertain of the device state, we take a bit 695 * of extra paranoia to try and ensure that the HWS takes the value 696 * we give and that it doesn't end up trapped inside the CPU! 697 */ 698 if (static_cpu_has(X86_FEATURE_CLFLUSH)) { 699 mb(); 700 clflush(&engine->status_page.addr[reg]); 701 engine->status_page.addr[reg] = value; 702 clflush(&engine->status_page.addr[reg]); 703 mb(); 704 } else { 705 WRITE_ONCE(engine->status_page.addr[reg], value); 706 } 707} 708 709/* 710 * Reads a dword out of the status page, which is written to from the command 711 * queue by automatic updates, MI_REPORT_HEAD, MI_STORE_DATA_INDEX, or 712 * MI_STORE_DATA_IMM. 713 * 714 * The following dwords have a reserved meaning: 715 * 0x00: ISR copy, updated when an ISR bit not set in the HWSTAM changes. 716 * 0x04: ring 0 head pointer 717 * 0x05: ring 1 head pointer (915-class) 718 * 0x06: ring 2 head pointer (915-class) 719 * 0x10-0x1b: Context status DWords (GM45) 720 * 0x1f: Last written status offset. (GM45) 721 * 0x20-0x2f: Reserved (Gen6+) 722 * 723 * The area from dword 0x30 to 0x3ff is available for driver usage. 724 */ 725#define I915_GEM_HWS_INDEX 0x30 726#define I915_GEM_HWS_INDEX_ADDR (I915_GEM_HWS_INDEX * sizeof(u32)) 727#define I915_GEM_HWS_PREEMPT 0x32 728#define I915_GEM_HWS_PREEMPT_ADDR (I915_GEM_HWS_PREEMPT * sizeof(u32)) 729#define I915_GEM_HWS_SEQNO 0x40 730#define I915_GEM_HWS_SEQNO_ADDR (I915_GEM_HWS_SEQNO * sizeof(u32)) 731#define I915_GEM_HWS_SCRATCH 0x80 732#define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH * sizeof(u32)) 733 734#define I915_HWS_CSB_BUF0_INDEX 0x10 735#define I915_HWS_CSB_WRITE_INDEX 0x1f 736#define CNL_HWS_CSB_WRITE_INDEX 0x2f 737 738struct intel_ring * 739intel_engine_create_ring(struct intel_engine_cs *engine, 740 struct i915_timeline *timeline, 741 int size); 742int intel_ring_pin(struct intel_ring *ring); 743void intel_ring_reset(struct intel_ring *ring, u32 tail); 744unsigned int intel_ring_update_space(struct intel_ring *ring); 745void intel_ring_unpin(struct intel_ring *ring); 746void intel_ring_free(struct intel_ring *ring); 747 748void intel_engine_stop(struct intel_engine_cs *engine); 749void intel_engine_cleanup(struct intel_engine_cs *engine); 750 751void intel_legacy_submission_resume(struct drm_i915_private *dev_priv); 752 753int __must_check intel_ring_cacheline_align(struct i915_request *rq); 754 755u32 __must_check *intel_ring_begin(struct i915_request *rq, unsigned int n); 756 757static inline void intel_ring_advance(struct i915_request *rq, u32 *cs) 758{ 759 /* Dummy function. 760 * 761 * This serves as a placeholder in the code so that the reader 762 * can compare against the preceding intel_ring_begin() and 763 * check that the number of dwords emitted matches the space 764 * reserved for the command packet (i.e. the value passed to 765 * intel_ring_begin()). 766 */ 767 GEM_BUG_ON((rq->ring->vaddr + rq->ring->emit) != cs); 768} 769 770static inline u32 intel_ring_wrap(const struct intel_ring *ring, u32 pos) 771{ 772 return pos & (ring->size - 1); 773} 774 775static inline bool 776intel_ring_offset_valid(const struct intel_ring *ring, 777 unsigned int pos) 778{ 779 if (pos & -ring->size) /* must be strictly within the ring */ 780 return false; 781 782 if (!IS_ALIGNED(pos, 8)) /* must be qword aligned */ 783 return false; 784 785 return true; 786} 787 788static inline u32 intel_ring_offset(const struct i915_request *rq, void *addr) 789{ 790 /* Don't write ring->size (equivalent to 0) as that hangs some GPUs. */ 791 u32 offset = addr - rq->ring->vaddr; 792 GEM_BUG_ON(offset > rq->ring->size); 793 return intel_ring_wrap(rq->ring, offset); 794} 795 796static inline void 797assert_ring_tail_valid(const struct intel_ring *ring, unsigned int tail) 798{ 799 GEM_BUG_ON(!intel_ring_offset_valid(ring, tail)); 800 801 /* 802 * "Ring Buffer Use" 803 * Gen2 BSpec "1. Programming Environment" / 1.4.4.6 804 * Gen3 BSpec "1c Memory Interface Functions" / 2.3.4.5 805 * Gen4+ BSpec "1c Memory Interface and Command Stream" / 5.3.4.5 806 * "If the Ring Buffer Head Pointer and the Tail Pointer are on the 807 * same cacheline, the Head Pointer must not be greater than the Tail 808 * Pointer." 809 * 810 * We use ring->head as the last known location of the actual RING_HEAD, 811 * it may have advanced but in the worst case it is equally the same 812 * as ring->head and so we should never program RING_TAIL to advance 813 * into the same cacheline as ring->head. 814 */ 815#define cacheline(a) round_down(a, CACHELINE_BYTES) 816 GEM_BUG_ON(cacheline(tail) == cacheline(ring->head) && 817 tail < ring->head); 818#undef cacheline 819} 820 821static inline unsigned int 822intel_ring_set_tail(struct intel_ring *ring, unsigned int tail) 823{ 824 /* Whilst writes to the tail are strictly order, there is no 825 * serialisation between readers and the writers. The tail may be 826 * read by i915_request_retire() just as it is being updated 827 * by execlists, as although the breadcrumb is complete, the context 828 * switch hasn't been seen. 829 */ 830 assert_ring_tail_valid(ring, tail); 831 ring->tail = tail; 832 return tail; 833} 834 835static inline unsigned int 836__intel_ring_space(unsigned int head, unsigned int tail, unsigned int size) 837{ 838 /* 839 * "If the Ring Buffer Head Pointer and the Tail Pointer are on the 840 * same cacheline, the Head Pointer must not be greater than the Tail 841 * Pointer." 842 */ 843 GEM_BUG_ON(!is_power_of_2(size)); 844 return (head - tail - CACHELINE_BYTES) & (size - 1); 845} 846 847void intel_engine_write_global_seqno(struct intel_engine_cs *engine, u32 seqno); 848 849int intel_engine_setup_common(struct intel_engine_cs *engine); 850int intel_engine_init_common(struct intel_engine_cs *engine); 851void intel_engine_cleanup_common(struct intel_engine_cs *engine); 852 853int intel_init_render_ring_buffer(struct intel_engine_cs *engine); 854int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine); 855int intel_init_blt_ring_buffer(struct intel_engine_cs *engine); 856int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine); 857 858int intel_engine_stop_cs(struct intel_engine_cs *engine); 859void intel_engine_cancel_stop_cs(struct intel_engine_cs *engine); 860 861void intel_engine_set_hwsp_writemask(struct intel_engine_cs *engine, u32 mask); 862 863u64 intel_engine_get_active_head(const struct intel_engine_cs *engine); 864u64 intel_engine_get_last_batch_head(const struct intel_engine_cs *engine); 865 866static inline u32 intel_engine_last_submit(struct intel_engine_cs *engine) 867{ 868 /* 869 * We are only peeking at the tail of the submit queue (and not the 870 * queue itself) in order to gain a hint as to the current active 871 * state of the engine. Callers are not expected to be taking 872 * engine->timeline->lock, nor are they expected to be concerned 873 * wtih serialising this hint with anything, so document it as 874 * a hint and nothing more. 875 */ 876 return READ_ONCE(engine->timeline.seqno); 877} 878 879static inline u32 intel_engine_get_seqno(struct intel_engine_cs *engine) 880{ 881 return intel_read_status_page(engine, I915_GEM_HWS_INDEX); 882} 883 884static inline bool intel_engine_signaled(struct intel_engine_cs *engine, 885 u32 seqno) 886{ 887 return i915_seqno_passed(intel_engine_get_seqno(engine), seqno); 888} 889 890static inline bool intel_engine_has_completed(struct intel_engine_cs *engine, 891 u32 seqno) 892{ 893 GEM_BUG_ON(!seqno); 894 return intel_engine_signaled(engine, seqno); 895} 896 897static inline bool intel_engine_has_started(struct intel_engine_cs *engine, 898 u32 seqno) 899{ 900 GEM_BUG_ON(!seqno); 901 return intel_engine_signaled(engine, seqno - 1); 902} 903 904void intel_engine_get_instdone(struct intel_engine_cs *engine, 905 struct intel_instdone *instdone); 906 907void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine); 908void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine); 909 910void intel_engine_pin_breadcrumbs_irq(struct intel_engine_cs *engine); 911void intel_engine_unpin_breadcrumbs_irq(struct intel_engine_cs *engine); 912 913bool intel_engine_signal_breadcrumbs(struct intel_engine_cs *engine); 914void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine); 915 916static inline void 917intel_engine_queue_breadcrumbs(struct intel_engine_cs *engine) 918{ 919 irq_work_queue(&engine->breadcrumbs.irq_work); 920} 921 922bool intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine); 923 924void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine); 925void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine); 926 927void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine, 928 struct drm_printer *p); 929 930static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset) 931{ 932 memset(batch, 0, 6 * sizeof(u32)); 933 934 batch[0] = GFX_OP_PIPE_CONTROL(6); 935 batch[1] = flags; 936 batch[2] = offset; 937 938 return batch + 6; 939} 940 941static inline u32 * 942gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags) 943{ 944 /* We're using qword write, offset should be aligned to 8 bytes. */ 945 GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8)); 946 947 /* w/a for post sync ops following a GPGPU operation we 948 * need a prior CS_STALL, which is emitted by the flush 949 * following the batch. 950 */ 951 *cs++ = GFX_OP_PIPE_CONTROL(6); 952 *cs++ = flags | PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_GLOBAL_GTT_IVB; 953 *cs++ = gtt_offset; 954 *cs++ = 0; 955 *cs++ = value; 956 /* We're thrashing one dword of HWS. */ 957 *cs++ = 0; 958 959 return cs; 960} 961 962static inline u32 * 963gen8_emit_ggtt_write(u32 *cs, u32 value, u32 gtt_offset) 964{ 965 /* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */ 966 GEM_BUG_ON(gtt_offset & (1 << 5)); 967 /* Offset should be aligned to 8 bytes for both (QW/DW) write types */ 968 GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8)); 969 970 *cs++ = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW; 971 *cs++ = gtt_offset | MI_FLUSH_DW_USE_GTT; 972 *cs++ = 0; 973 *cs++ = value; 974 975 return cs; 976} 977 978static inline void intel_engine_reset(struct intel_engine_cs *engine, 979 bool stalled) 980{ 981 if (engine->reset.reset) 982 engine->reset.reset(engine, stalled); 983} 984 985void intel_engines_sanitize(struct drm_i915_private *i915, bool force); 986 987bool intel_engine_is_idle(struct intel_engine_cs *engine); 988bool intel_engines_are_idle(struct drm_i915_private *dev_priv); 989 990bool intel_engine_has_kernel_context(const struct intel_engine_cs *engine); 991void intel_engine_lost_context(struct intel_engine_cs *engine); 992 993void intel_engines_park(struct drm_i915_private *i915); 994void intel_engines_unpark(struct drm_i915_private *i915); 995 996void intel_engines_reset_default_submission(struct drm_i915_private *i915); 997unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915); 998 999bool intel_engine_can_store_dword(struct intel_engine_cs *engine); 1000 1001__printf(3, 4) 1002void intel_engine_dump(struct intel_engine_cs *engine, 1003 struct drm_printer *m, 1004 const char *header, ...); 1005 1006struct intel_engine_cs * 1007intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance); 1008 1009static inline void intel_engine_context_in(struct intel_engine_cs *engine) 1010{ 1011 unsigned long flags; 1012 1013 if (READ_ONCE(engine->stats.enabled) == 0) 1014 return; 1015 1016 write_seqlock_irqsave(&engine->stats.lock, flags); 1017 1018 if (engine->stats.enabled > 0) { 1019 if (engine->stats.active++ == 0) 1020 engine->stats.start = ktime_get(); 1021 GEM_BUG_ON(engine->stats.active == 0); 1022 } 1023 1024 write_sequnlock_irqrestore(&engine->stats.lock, flags); 1025} 1026 1027static inline void intel_engine_context_out(struct intel_engine_cs *engine) 1028{ 1029 unsigned long flags; 1030 1031 if (READ_ONCE(engine->stats.enabled) == 0) 1032 return; 1033 1034 write_seqlock_irqsave(&engine->stats.lock, flags); 1035 1036 if (engine->stats.enabled > 0) { 1037 ktime_t last; 1038 1039 if (engine->stats.active && --engine->stats.active == 0) { 1040 /* 1041 * Decrement the active context count and in case GPU 1042 * is now idle add up to the running total. 1043 */ 1044 last = ktime_sub(ktime_get(), engine->stats.start); 1045 1046 engine->stats.total = ktime_add(engine->stats.total, 1047 last); 1048 } else if (engine->stats.active == 0) { 1049 /* 1050 * After turning on engine stats, context out might be 1051 * the first event in which case we account from the 1052 * time stats gathering was turned on. 1053 */ 1054 last = ktime_sub(ktime_get(), engine->stats.enabled_at); 1055 1056 engine->stats.total = ktime_add(engine->stats.total, 1057 last); 1058 } 1059 } 1060 1061 write_sequnlock_irqrestore(&engine->stats.lock, flags); 1062} 1063 1064int intel_enable_engine_stats(struct intel_engine_cs *engine); 1065void intel_disable_engine_stats(struct intel_engine_cs *engine); 1066 1067ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine); 1068 1069#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 1070 1071static inline bool inject_preempt_hang(struct intel_engine_execlists *execlists) 1072{ 1073 if (!execlists->preempt_hang.inject_hang) 1074 return false; 1075 1076 complete(&execlists->preempt_hang.completion); 1077 return true; 1078} 1079 1080#else 1081 1082static inline bool inject_preempt_hang(struct intel_engine_execlists *execlists) 1083{ 1084 return false; 1085} 1086 1087#endif 1088 1089#endif /* _INTEL_RINGBUFFER_H_ */