Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v5.2 1265 lines 36 kB view raw
1/* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright © 2014-2018 Intel Corporation 5 */ 6 7#include "i915_drv.h" 8#include "intel_workarounds.h" 9 10/** 11 * DOC: Hardware workarounds 12 * 13 * This file is intended as a central place to implement most [1]_ of the 14 * required workarounds for hardware to work as originally intended. They fall 15 * in five basic categories depending on how/when they are applied: 16 * 17 * - Workarounds that touch registers that are saved/restored to/from the HW 18 * context image. The list is emitted (via Load Register Immediate commands) 19 * everytime a new context is created. 20 * - GT workarounds. The list of these WAs is applied whenever these registers 21 * revert to default values (on GPU reset, suspend/resume [2]_, etc..). 22 * - Display workarounds. The list is applied during display clock-gating 23 * initialization. 24 * - Workarounds that whitelist a privileged register, so that UMDs can manage 25 * them directly. This is just a special case of a MMMIO workaround (as we 26 * write the list of these to/be-whitelisted registers to some special HW 27 * registers). 28 * - Workaround batchbuffers, that get executed automatically by the hardware 29 * on every HW context restore. 30 * 31 * .. [1] Please notice that there are other WAs that, due to their nature, 32 * cannot be applied from a central place. Those are peppered around the rest 33 * of the code, as needed. 34 * 35 * .. [2] Technically, some registers are powercontext saved & restored, so they 36 * survive a suspend/resume. In practice, writing them again is not too 37 * costly and simplifies things. We can revisit this in the future. 38 * 39 * Layout 40 * ~~~~~~ 41 * 42 * Keep things in this file ordered by WA type, as per the above (context, GT, 43 * display, register whitelist, batchbuffer). Then, inside each type, keep the 44 * following order: 45 * 46 * - Infrastructure functions and macros 47 * - WAs per platform in standard gen/chrono order 48 * - Public functions to init or apply the given workaround type. 49 */ 50 51static void wa_init_start(struct i915_wa_list *wal, const char *name) 52{ 53 wal->name = name; 54} 55 56#define WA_LIST_CHUNK (1 << 4) 57 58static void wa_init_finish(struct i915_wa_list *wal) 59{ 60 /* Trim unused entries. */ 61 if (!IS_ALIGNED(wal->count, WA_LIST_CHUNK)) { 62 struct i915_wa *list = kmemdup(wal->list, 63 wal->count * sizeof(*list), 64 GFP_KERNEL); 65 66 if (list) { 67 kfree(wal->list); 68 wal->list = list; 69 } 70 } 71 72 if (!wal->count) 73 return; 74 75 DRM_DEBUG_DRIVER("Initialized %u %s workarounds\n", 76 wal->wa_count, wal->name); 77} 78 79static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa) 80{ 81 unsigned int addr = i915_mmio_reg_offset(wa->reg); 82 unsigned int start = 0, end = wal->count; 83 const unsigned int grow = WA_LIST_CHUNK; 84 struct i915_wa *wa_; 85 86 GEM_BUG_ON(!is_power_of_2(grow)); 87 88 if (IS_ALIGNED(wal->count, grow)) { /* Either uninitialized or full. */ 89 struct i915_wa *list; 90 91 list = kmalloc_array(ALIGN(wal->count + 1, grow), sizeof(*wa), 92 GFP_KERNEL); 93 if (!list) { 94 DRM_ERROR("No space for workaround init!\n"); 95 return; 96 } 97 98 if (wal->list) 99 memcpy(list, wal->list, sizeof(*wa) * wal->count); 100 101 wal->list = list; 102 } 103 104 while (start < end) { 105 unsigned int mid = start + (end - start) / 2; 106 107 if (i915_mmio_reg_offset(wal->list[mid].reg) < addr) { 108 start = mid + 1; 109 } else if (i915_mmio_reg_offset(wal->list[mid].reg) > addr) { 110 end = mid; 111 } else { 112 wa_ = &wal->list[mid]; 113 114 if ((wa->mask & ~wa_->mask) == 0) { 115 DRM_ERROR("Discarding overwritten w/a for reg %04x (mask: %08x, value: %08x)\n", 116 i915_mmio_reg_offset(wa_->reg), 117 wa_->mask, wa_->val); 118 119 wa_->val &= ~wa->mask; 120 } 121 122 wal->wa_count++; 123 wa_->val |= wa->val; 124 wa_->mask |= wa->mask; 125 return; 126 } 127 } 128 129 wal->wa_count++; 130 wa_ = &wal->list[wal->count++]; 131 *wa_ = *wa; 132 133 while (wa_-- > wal->list) { 134 GEM_BUG_ON(i915_mmio_reg_offset(wa_[0].reg) == 135 i915_mmio_reg_offset(wa_[1].reg)); 136 if (i915_mmio_reg_offset(wa_[1].reg) > 137 i915_mmio_reg_offset(wa_[0].reg)) 138 break; 139 140 swap(wa_[1], wa_[0]); 141 } 142} 143 144static void 145wa_write_masked_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask, 146 u32 val) 147{ 148 struct i915_wa wa = { 149 .reg = reg, 150 .mask = mask, 151 .val = val 152 }; 153 154 _wa_add(wal, &wa); 155} 156 157static void 158wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val) 159{ 160 wa_write_masked_or(wal, reg, val, _MASKED_BIT_ENABLE(val)); 161} 162 163static void 164wa_write(struct i915_wa_list *wal, i915_reg_t reg, u32 val) 165{ 166 wa_write_masked_or(wal, reg, ~0, val); 167} 168 169static void 170wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 val) 171{ 172 wa_write_masked_or(wal, reg, val, val); 173} 174 175#define WA_SET_BIT_MASKED(addr, mask) \ 176 wa_write_masked_or(wal, (addr), (mask), _MASKED_BIT_ENABLE(mask)) 177 178#define WA_CLR_BIT_MASKED(addr, mask) \ 179 wa_write_masked_or(wal, (addr), (mask), _MASKED_BIT_DISABLE(mask)) 180 181#define WA_SET_FIELD_MASKED(addr, mask, value) \ 182 wa_write_masked_or(wal, (addr), (mask), _MASKED_FIELD((mask), (value))) 183 184static void gen8_ctx_workarounds_init(struct intel_engine_cs *engine) 185{ 186 struct i915_wa_list *wal = &engine->ctx_wa_list; 187 188 WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING); 189 190 /* WaDisableAsyncFlipPerfMode:bdw,chv */ 191 WA_SET_BIT_MASKED(MI_MODE, ASYNC_FLIP_PERF_DISABLE); 192 193 /* WaDisablePartialInstShootdown:bdw,chv */ 194 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, 195 PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); 196 197 /* Use Force Non-Coherent whenever executing a 3D context. This is a 198 * workaround for for a possible hang in the unlikely event a TLB 199 * invalidation occurs during a PSD flush. 200 */ 201 /* WaForceEnableNonCoherent:bdw,chv */ 202 /* WaHdcDisableFetchWhenMasked:bdw,chv */ 203 WA_SET_BIT_MASKED(HDC_CHICKEN0, 204 HDC_DONOT_FETCH_MEM_WHEN_MASKED | 205 HDC_FORCE_NON_COHERENT); 206 207 /* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0: 208 * "The Hierarchical Z RAW Stall Optimization allows non-overlapping 209 * polygons in the same 8x4 pixel/sample area to be processed without 210 * stalling waiting for the earlier ones to write to Hierarchical Z 211 * buffer." 212 * 213 * This optimization is off by default for BDW and CHV; turn it on. 214 */ 215 WA_CLR_BIT_MASKED(CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE); 216 217 /* Wa4x4STCOptimizationDisable:bdw,chv */ 218 WA_SET_BIT_MASKED(CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE); 219 220 /* 221 * BSpec recommends 8x4 when MSAA is used, 222 * however in practice 16x4 seems fastest. 223 * 224 * Note that PS/WM thread counts depend on the WIZ hashing 225 * disable bit, which we don't touch here, but it's good 226 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). 227 */ 228 WA_SET_FIELD_MASKED(GEN7_GT_MODE, 229 GEN6_WIZ_HASHING_MASK, 230 GEN6_WIZ_HASHING_16x4); 231} 232 233static void bdw_ctx_workarounds_init(struct intel_engine_cs *engine) 234{ 235 struct drm_i915_private *i915 = engine->i915; 236 struct i915_wa_list *wal = &engine->ctx_wa_list; 237 238 gen8_ctx_workarounds_init(engine); 239 240 /* WaDisableThreadStallDopClockGating:bdw (pre-production) */ 241 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); 242 243 /* WaDisableDopClockGating:bdw 244 * 245 * Also see the related UCGTCL1 write in broadwell_init_clock_gating() 246 * to disable EUTC clock gating. 247 */ 248 WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, 249 DOP_CLOCK_GATING_DISABLE); 250 251 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, 252 GEN8_SAMPLER_POWER_BYPASS_DIS); 253 254 WA_SET_BIT_MASKED(HDC_CHICKEN0, 255 /* WaForceContextSaveRestoreNonCoherent:bdw */ 256 HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT | 257 /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */ 258 (IS_BDW_GT3(i915) ? HDC_FENCE_DEST_SLM_DISABLE : 0)); 259} 260 261static void chv_ctx_workarounds_init(struct intel_engine_cs *engine) 262{ 263 struct i915_wa_list *wal = &engine->ctx_wa_list; 264 265 gen8_ctx_workarounds_init(engine); 266 267 /* WaDisableThreadStallDopClockGating:chv */ 268 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); 269 270 /* Improve HiZ throughput on CHV. */ 271 WA_SET_BIT_MASKED(HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X); 272} 273 274static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine) 275{ 276 struct drm_i915_private *i915 = engine->i915; 277 struct i915_wa_list *wal = &engine->ctx_wa_list; 278 279 if (HAS_LLC(i915)) { 280 /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl 281 * 282 * Must match Display Engine. See 283 * WaCompressedResourceDisplayNewHashMode. 284 */ 285 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, 286 GEN9_PBE_COMPRESSED_HASH_SELECTION); 287 WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7, 288 GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR); 289 } 290 291 /* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk,cfl */ 292 /* WaDisablePartialInstShootdown:skl,bxt,kbl,glk,cfl */ 293 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, 294 FLOW_CONTROL_ENABLE | 295 PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); 296 297 /* Syncing dependencies between camera and graphics:skl,bxt,kbl */ 298 if (!IS_COFFEELAKE(i915)) 299 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, 300 GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC); 301 302 /* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl,glk,cfl */ 303 /* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl,cfl */ 304 WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7, 305 GEN9_ENABLE_YV12_BUGFIX | 306 GEN9_ENABLE_GPGPU_PREEMPTION); 307 308 /* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk,cfl */ 309 /* WaDisablePartialResolveInVc:skl,bxt,kbl,cfl */ 310 WA_SET_BIT_MASKED(CACHE_MODE_1, 311 GEN8_4x4_STC_OPTIMIZATION_DISABLE | 312 GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE); 313 314 /* WaCcsTlbPrefetchDisable:skl,bxt,kbl,glk,cfl */ 315 WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5, 316 GEN9_CCS_TLB_PREFETCH_ENABLE); 317 318 /* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl,cfl */ 319 WA_SET_BIT_MASKED(HDC_CHICKEN0, 320 HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT | 321 HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE); 322 323 /* WaForceEnableNonCoherent and WaDisableHDCInvalidation are 324 * both tied to WaForceContextSaveRestoreNonCoherent 325 * in some hsds for skl. We keep the tie for all gen9. The 326 * documentation is a bit hazy and so we want to get common behaviour, 327 * even though there is no clear evidence we would need both on kbl/bxt. 328 * This area has been source of system hangs so we play it safe 329 * and mimic the skl regardless of what bspec says. 330 * 331 * Use Force Non-Coherent whenever executing a 3D context. This 332 * is a workaround for a possible hang in the unlikely event 333 * a TLB invalidation occurs during a PSD flush. 334 */ 335 336 /* WaForceEnableNonCoherent:skl,bxt,kbl,cfl */ 337 WA_SET_BIT_MASKED(HDC_CHICKEN0, 338 HDC_FORCE_NON_COHERENT); 339 340 /* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl,cfl */ 341 if (IS_SKYLAKE(i915) || IS_KABYLAKE(i915) || IS_COFFEELAKE(i915)) 342 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, 343 GEN8_SAMPLER_POWER_BYPASS_DIS); 344 345 /* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk,cfl */ 346 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE); 347 348 /* 349 * Supporting preemption with fine-granularity requires changes in the 350 * batch buffer programming. Since we can't break old userspace, we 351 * need to set our default preemption level to safe value. Userspace is 352 * still able to use more fine-grained preemption levels, since in 353 * WaEnablePreemptionGranularityControlByUMD we're whitelisting the 354 * per-ctx register. As such, WaDisable{3D,GPGPU}MidCmdPreemption are 355 * not real HW workarounds, but merely a way to start using preemption 356 * while maintaining old contract with userspace. 357 */ 358 359 /* WaDisable3DMidCmdPreemption:skl,bxt,glk,cfl,[cnl] */ 360 WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL); 361 362 /* WaDisableGPGPUMidCmdPreemption:skl,bxt,blk,cfl,[cnl] */ 363 WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1, 364 GEN9_PREEMPT_GPGPU_LEVEL_MASK, 365 GEN9_PREEMPT_GPGPU_COMMAND_LEVEL); 366 367 /* WaClearHIZ_WM_CHICKEN3:bxt,glk */ 368 if (IS_GEN9_LP(i915)) 369 WA_SET_BIT_MASKED(GEN9_WM_CHICKEN3, GEN9_FACTOR_IN_CLR_VAL_HIZ); 370} 371 372static void skl_tune_iz_hashing(struct intel_engine_cs *engine) 373{ 374 struct drm_i915_private *i915 = engine->i915; 375 struct i915_wa_list *wal = &engine->ctx_wa_list; 376 u8 vals[3] = { 0, 0, 0 }; 377 unsigned int i; 378 379 for (i = 0; i < 3; i++) { 380 u8 ss; 381 382 /* 383 * Only consider slices where one, and only one, subslice has 7 384 * EUs 385 */ 386 if (!is_power_of_2(RUNTIME_INFO(i915)->sseu.subslice_7eu[i])) 387 continue; 388 389 /* 390 * subslice_7eu[i] != 0 (because of the check above) and 391 * ss_max == 4 (maximum number of subslices possible per slice) 392 * 393 * -> 0 <= ss <= 3; 394 */ 395 ss = ffs(RUNTIME_INFO(i915)->sseu.subslice_7eu[i]) - 1; 396 vals[i] = 3 - ss; 397 } 398 399 if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0) 400 return; 401 402 /* Tune IZ hashing. See intel_device_info_runtime_init() */ 403 WA_SET_FIELD_MASKED(GEN7_GT_MODE, 404 GEN9_IZ_HASHING_MASK(2) | 405 GEN9_IZ_HASHING_MASK(1) | 406 GEN9_IZ_HASHING_MASK(0), 407 GEN9_IZ_HASHING(2, vals[2]) | 408 GEN9_IZ_HASHING(1, vals[1]) | 409 GEN9_IZ_HASHING(0, vals[0])); 410} 411 412static void skl_ctx_workarounds_init(struct intel_engine_cs *engine) 413{ 414 gen9_ctx_workarounds_init(engine); 415 skl_tune_iz_hashing(engine); 416} 417 418static void bxt_ctx_workarounds_init(struct intel_engine_cs *engine) 419{ 420 struct i915_wa_list *wal = &engine->ctx_wa_list; 421 422 gen9_ctx_workarounds_init(engine); 423 424 /* WaDisableThreadStallDopClockGating:bxt */ 425 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, 426 STALL_DOP_GATING_DISABLE); 427 428 /* WaToEnableHwFixForPushConstHWBug:bxt */ 429 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, 430 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); 431} 432 433static void kbl_ctx_workarounds_init(struct intel_engine_cs *engine) 434{ 435 struct drm_i915_private *i915 = engine->i915; 436 struct i915_wa_list *wal = &engine->ctx_wa_list; 437 438 gen9_ctx_workarounds_init(engine); 439 440 /* WaToEnableHwFixForPushConstHWBug:kbl */ 441 if (IS_KBL_REVID(i915, KBL_REVID_C0, REVID_FOREVER)) 442 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, 443 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); 444 445 /* WaDisableSbeCacheDispatchPortSharing:kbl */ 446 WA_SET_BIT_MASKED(GEN7_HALF_SLICE_CHICKEN1, 447 GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); 448} 449 450static void glk_ctx_workarounds_init(struct intel_engine_cs *engine) 451{ 452 struct i915_wa_list *wal = &engine->ctx_wa_list; 453 454 gen9_ctx_workarounds_init(engine); 455 456 /* WaToEnableHwFixForPushConstHWBug:glk */ 457 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, 458 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); 459} 460 461static void cfl_ctx_workarounds_init(struct intel_engine_cs *engine) 462{ 463 struct i915_wa_list *wal = &engine->ctx_wa_list; 464 465 gen9_ctx_workarounds_init(engine); 466 467 /* WaToEnableHwFixForPushConstHWBug:cfl */ 468 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, 469 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); 470 471 /* WaDisableSbeCacheDispatchPortSharing:cfl */ 472 WA_SET_BIT_MASKED(GEN7_HALF_SLICE_CHICKEN1, 473 GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); 474} 475 476static void cnl_ctx_workarounds_init(struct intel_engine_cs *engine) 477{ 478 struct drm_i915_private *i915 = engine->i915; 479 struct i915_wa_list *wal = &engine->ctx_wa_list; 480 481 /* WaForceContextSaveRestoreNonCoherent:cnl */ 482 WA_SET_BIT_MASKED(CNL_HDC_CHICKEN0, 483 HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT); 484 485 /* WaThrottleEUPerfToAvoidTDBackPressure:cnl(pre-prod) */ 486 if (IS_CNL_REVID(i915, CNL_REVID_B0, CNL_REVID_B0)) 487 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, THROTTLE_12_5); 488 489 /* WaDisableReplayBufferBankArbitrationOptimization:cnl */ 490 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, 491 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); 492 493 /* WaDisableEnhancedSBEVertexCaching:cnl (pre-prod) */ 494 if (IS_CNL_REVID(i915, 0, CNL_REVID_B0)) 495 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, 496 GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE); 497 498 /* WaPushConstantDereferenceHoldDisable:cnl */ 499 WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, PUSH_CONSTANT_DEREF_DISABLE); 500 501 /* FtrEnableFastAnisoL1BankingFix:cnl */ 502 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, CNL_FAST_ANISO_L1_BANKING_FIX); 503 504 /* WaDisable3DMidCmdPreemption:cnl */ 505 WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL); 506 507 /* WaDisableGPGPUMidCmdPreemption:cnl */ 508 WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1, 509 GEN9_PREEMPT_GPGPU_LEVEL_MASK, 510 GEN9_PREEMPT_GPGPU_COMMAND_LEVEL); 511 512 /* WaDisableEarlyEOT:cnl */ 513 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, DISABLE_EARLY_EOT); 514} 515 516static void icl_ctx_workarounds_init(struct intel_engine_cs *engine) 517{ 518 struct drm_i915_private *i915 = engine->i915; 519 struct i915_wa_list *wal = &engine->ctx_wa_list; 520 521 /* WaDisableBankHangMode:icl */ 522 wa_write(wal, 523 GEN8_L3CNTLREG, 524 intel_uncore_read(engine->uncore, GEN8_L3CNTLREG) | 525 GEN8_ERRDETBCTRL); 526 527 /* Wa_1604370585:icl (pre-prod) 528 * Formerly known as WaPushConstantDereferenceHoldDisable 529 */ 530 if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0)) 531 WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, 532 PUSH_CONSTANT_DEREF_DISABLE); 533 534 /* WaForceEnableNonCoherent:icl 535 * This is not the same workaround as in early Gen9 platforms, where 536 * lacking this could cause system hangs, but coherency performance 537 * overhead is high and only a few compute workloads really need it 538 * (the register is whitelisted in hardware now, so UMDs can opt in 539 * for coherency if they have a good reason). 540 */ 541 WA_SET_BIT_MASKED(ICL_HDC_MODE, HDC_FORCE_NON_COHERENT); 542 543 /* Wa_2006611047:icl (pre-prod) 544 * Formerly known as WaDisableImprovedTdlClkGating 545 */ 546 if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0)) 547 WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, 548 GEN11_TDL_CLOCK_GATING_FIX_DISABLE); 549 550 /* Wa_2006665173:icl (pre-prod) */ 551 if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0)) 552 WA_SET_BIT_MASKED(GEN11_COMMON_SLICE_CHICKEN3, 553 GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC); 554 555 /* WaEnableFloatBlendOptimization:icl */ 556 wa_write_masked_or(wal, 557 GEN10_CACHE_MODE_SS, 558 0, /* write-only, so skip validation */ 559 _MASKED_BIT_ENABLE(FLOAT_BLEND_OPTIMIZATION_ENABLE)); 560 561 /* WaDisableGPGPUMidThreadPreemption:icl */ 562 WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1, 563 GEN9_PREEMPT_GPGPU_LEVEL_MASK, 564 GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL); 565} 566 567void intel_engine_init_ctx_wa(struct intel_engine_cs *engine) 568{ 569 struct drm_i915_private *i915 = engine->i915; 570 struct i915_wa_list *wal = &engine->ctx_wa_list; 571 572 wa_init_start(wal, "context"); 573 574 if (IS_GEN(i915, 11)) 575 icl_ctx_workarounds_init(engine); 576 else if (IS_CANNONLAKE(i915)) 577 cnl_ctx_workarounds_init(engine); 578 else if (IS_COFFEELAKE(i915)) 579 cfl_ctx_workarounds_init(engine); 580 else if (IS_GEMINILAKE(i915)) 581 glk_ctx_workarounds_init(engine); 582 else if (IS_KABYLAKE(i915)) 583 kbl_ctx_workarounds_init(engine); 584 else if (IS_BROXTON(i915)) 585 bxt_ctx_workarounds_init(engine); 586 else if (IS_SKYLAKE(i915)) 587 skl_ctx_workarounds_init(engine); 588 else if (IS_CHERRYVIEW(i915)) 589 chv_ctx_workarounds_init(engine); 590 else if (IS_BROADWELL(i915)) 591 bdw_ctx_workarounds_init(engine); 592 else if (INTEL_GEN(i915) < 8) 593 return; 594 else 595 MISSING_CASE(INTEL_GEN(i915)); 596 597 wa_init_finish(wal); 598} 599 600int intel_engine_emit_ctx_wa(struct i915_request *rq) 601{ 602 struct i915_wa_list *wal = &rq->engine->ctx_wa_list; 603 struct i915_wa *wa; 604 unsigned int i; 605 u32 *cs; 606 int ret; 607 608 if (wal->count == 0) 609 return 0; 610 611 ret = rq->engine->emit_flush(rq, EMIT_BARRIER); 612 if (ret) 613 return ret; 614 615 cs = intel_ring_begin(rq, (wal->count * 2 + 2)); 616 if (IS_ERR(cs)) 617 return PTR_ERR(cs); 618 619 *cs++ = MI_LOAD_REGISTER_IMM(wal->count); 620 for (i = 0, wa = wal->list; i < wal->count; i++, wa++) { 621 *cs++ = i915_mmio_reg_offset(wa->reg); 622 *cs++ = wa->val; 623 } 624 *cs++ = MI_NOOP; 625 626 intel_ring_advance(rq, cs); 627 628 ret = rq->engine->emit_flush(rq, EMIT_BARRIER); 629 if (ret) 630 return ret; 631 632 return 0; 633} 634 635static void 636gen9_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) 637{ 638 /* WaDisableKillLogic:bxt,skl,kbl */ 639 if (!IS_COFFEELAKE(i915)) 640 wa_write_or(wal, 641 GAM_ECOCHK, 642 ECOCHK_DIS_TLB); 643 644 if (HAS_LLC(i915)) { 645 /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl 646 * 647 * Must match Display Engine. See 648 * WaCompressedResourceDisplayNewHashMode. 649 */ 650 wa_write_or(wal, 651 MMCD_MISC_CTRL, 652 MMCD_PCLA | MMCD_HOTSPOT_EN); 653 } 654 655 /* WaDisableHDCInvalidation:skl,bxt,kbl,cfl */ 656 wa_write_or(wal, 657 GAM_ECOCHK, 658 BDW_DISABLE_HDC_INVALIDATION); 659} 660 661static void 662skl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) 663{ 664 gen9_gt_workarounds_init(i915, wal); 665 666 /* WaDisableGafsUnitClkGating:skl */ 667 wa_write_or(wal, 668 GEN7_UCGCTL4, 669 GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE); 670 671 /* WaInPlaceDecompressionHang:skl */ 672 if (IS_SKL_REVID(i915, SKL_REVID_H0, REVID_FOREVER)) 673 wa_write_or(wal, 674 GEN9_GAMT_ECO_REG_RW_IA, 675 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); 676} 677 678static void 679bxt_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) 680{ 681 gen9_gt_workarounds_init(i915, wal); 682 683 /* WaInPlaceDecompressionHang:bxt */ 684 wa_write_or(wal, 685 GEN9_GAMT_ECO_REG_RW_IA, 686 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); 687} 688 689static void 690kbl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) 691{ 692 gen9_gt_workarounds_init(i915, wal); 693 694 /* WaDisableDynamicCreditSharing:kbl */ 695 if (IS_KBL_REVID(i915, 0, KBL_REVID_B0)) 696 wa_write_or(wal, 697 GAMT_CHKN_BIT_REG, 698 GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING); 699 700 /* WaDisableGafsUnitClkGating:kbl */ 701 wa_write_or(wal, 702 GEN7_UCGCTL4, 703 GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE); 704 705 /* WaInPlaceDecompressionHang:kbl */ 706 wa_write_or(wal, 707 GEN9_GAMT_ECO_REG_RW_IA, 708 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); 709} 710 711static void 712glk_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) 713{ 714 gen9_gt_workarounds_init(i915, wal); 715} 716 717static void 718cfl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) 719{ 720 gen9_gt_workarounds_init(i915, wal); 721 722 /* WaDisableGafsUnitClkGating:cfl */ 723 wa_write_or(wal, 724 GEN7_UCGCTL4, 725 GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE); 726 727 /* WaInPlaceDecompressionHang:cfl */ 728 wa_write_or(wal, 729 GEN9_GAMT_ECO_REG_RW_IA, 730 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); 731} 732 733static void 734wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal) 735{ 736 const struct sseu_dev_info *sseu = &RUNTIME_INFO(i915)->sseu; 737 u32 mcr_slice_subslice_mask; 738 739 /* 740 * WaProgramMgsrForL3BankSpecificMmioReads: cnl,icl 741 * L3Banks could be fused off in single slice scenario. If that is 742 * the case, we might need to program MCR select to a valid L3Bank 743 * by default, to make sure we correctly read certain registers 744 * later on (in the range 0xB100 - 0xB3FF). 745 * This might be incompatible with 746 * WaProgramMgsrForCorrectSliceSpecificMmioReads. 747 * Fortunately, this should not happen in production hardware, so 748 * we only assert that this is the case (instead of implementing 749 * something more complex that requires checking the range of every 750 * MMIO read). 751 */ 752 if (INTEL_GEN(i915) >= 10 && 753 is_power_of_2(sseu->slice_mask)) { 754 /* 755 * read FUSE3 for enabled L3 Bank IDs, if L3 Bank matches 756 * enabled subslice, no need to redirect MCR packet 757 */ 758 u32 slice = fls(sseu->slice_mask); 759 u32 fuse3 = 760 intel_uncore_read(&i915->uncore, GEN10_MIRROR_FUSE3); 761 u8 ss_mask = sseu->subslice_mask[slice]; 762 763 u8 enabled_mask = (ss_mask | ss_mask >> 764 GEN10_L3BANK_PAIR_COUNT) & GEN10_L3BANK_MASK; 765 u8 disabled_mask = fuse3 & GEN10_L3BANK_MASK; 766 767 /* 768 * Production silicon should have matched L3Bank and 769 * subslice enabled 770 */ 771 WARN_ON((enabled_mask & disabled_mask) != enabled_mask); 772 } 773 774 if (INTEL_GEN(i915) >= 11) 775 mcr_slice_subslice_mask = GEN11_MCR_SLICE_MASK | 776 GEN11_MCR_SUBSLICE_MASK; 777 else 778 mcr_slice_subslice_mask = GEN8_MCR_SLICE_MASK | 779 GEN8_MCR_SUBSLICE_MASK; 780 /* 781 * WaProgramMgsrForCorrectSliceSpecificMmioReads:cnl,icl 782 * Before any MMIO read into slice/subslice specific registers, MCR 783 * packet control register needs to be programmed to point to any 784 * enabled s/ss pair. Otherwise, incorrect values will be returned. 785 * This means each subsequent MMIO read will be forwarded to an 786 * specific s/ss combination, but this is OK since these registers 787 * are consistent across s/ss in almost all cases. In the rare 788 * occasions, such as INSTDONE, where this value is dependent 789 * on s/ss combo, the read should be done with read_subslice_reg. 790 */ 791 wa_write_masked_or(wal, 792 GEN8_MCR_SELECTOR, 793 mcr_slice_subslice_mask, 794 intel_calculate_mcr_s_ss_select(i915)); 795} 796 797static void 798cnl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) 799{ 800 wa_init_mcr(i915, wal); 801 802 /* WaDisableI2mCycleOnWRPort:cnl (pre-prod) */ 803 if (IS_CNL_REVID(i915, CNL_REVID_B0, CNL_REVID_B0)) 804 wa_write_or(wal, 805 GAMT_CHKN_BIT_REG, 806 GAMT_CHKN_DISABLE_I2M_CYCLE_ON_WR_PORT); 807 808 /* WaInPlaceDecompressionHang:cnl */ 809 wa_write_or(wal, 810 GEN9_GAMT_ECO_REG_RW_IA, 811 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); 812} 813 814static void 815icl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) 816{ 817 wa_init_mcr(i915, wal); 818 819 /* WaInPlaceDecompressionHang:icl */ 820 wa_write_or(wal, 821 GEN9_GAMT_ECO_REG_RW_IA, 822 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); 823 824 /* WaModifyGamTlbPartitioning:icl */ 825 wa_write_masked_or(wal, 826 GEN11_GACB_PERF_CTRL, 827 GEN11_HASH_CTRL_MASK, 828 GEN11_HASH_CTRL_BIT0 | GEN11_HASH_CTRL_BIT4); 829 830 /* Wa_1405766107:icl 831 * Formerly known as WaCL2SFHalfMaxAlloc 832 */ 833 wa_write_or(wal, 834 GEN11_LSN_UNSLCVC, 835 GEN11_LSN_UNSLCVC_GAFS_HALF_SF_MAXALLOC | 836 GEN11_LSN_UNSLCVC_GAFS_HALF_CL2_MAXALLOC); 837 838 /* Wa_220166154:icl 839 * Formerly known as WaDisCtxReload 840 */ 841 wa_write_or(wal, 842 GEN8_GAMW_ECO_DEV_RW_IA, 843 GAMW_ECO_DEV_CTX_RELOAD_DISABLE); 844 845 /* Wa_1405779004:icl (pre-prod) */ 846 if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0)) 847 wa_write_or(wal, 848 SLICE_UNIT_LEVEL_CLKGATE, 849 MSCUNIT_CLKGATE_DIS); 850 851 /* Wa_1406680159:icl */ 852 wa_write_or(wal, 853 SUBSLICE_UNIT_LEVEL_CLKGATE, 854 GWUNIT_CLKGATE_DIS); 855 856 /* Wa_1406838659:icl (pre-prod) */ 857 if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0)) 858 wa_write_or(wal, 859 INF_UNIT_LEVEL_CLKGATE, 860 CGPSF_CLKGATE_DIS); 861 862 /* Wa_1406463099:icl 863 * Formerly known as WaGamTlbPendError 864 */ 865 wa_write_or(wal, 866 GAMT_CHKN_BIT_REG, 867 GAMT_CHKN_DISABLE_L3_COH_PIPE); 868} 869 870static void 871gt_init_workarounds(struct drm_i915_private *i915, struct i915_wa_list *wal) 872{ 873 if (IS_GEN(i915, 11)) 874 icl_gt_workarounds_init(i915, wal); 875 else if (IS_CANNONLAKE(i915)) 876 cnl_gt_workarounds_init(i915, wal); 877 else if (IS_COFFEELAKE(i915)) 878 cfl_gt_workarounds_init(i915, wal); 879 else if (IS_GEMINILAKE(i915)) 880 glk_gt_workarounds_init(i915, wal); 881 else if (IS_KABYLAKE(i915)) 882 kbl_gt_workarounds_init(i915, wal); 883 else if (IS_BROXTON(i915)) 884 bxt_gt_workarounds_init(i915, wal); 885 else if (IS_SKYLAKE(i915)) 886 skl_gt_workarounds_init(i915, wal); 887 else if (INTEL_GEN(i915) <= 8) 888 return; 889 else 890 MISSING_CASE(INTEL_GEN(i915)); 891} 892 893void intel_gt_init_workarounds(struct drm_i915_private *i915) 894{ 895 struct i915_wa_list *wal = &i915->gt_wa_list; 896 897 wa_init_start(wal, "GT"); 898 gt_init_workarounds(i915, wal); 899 wa_init_finish(wal); 900} 901 902static enum forcewake_domains 903wal_get_fw_for_rmw(struct intel_uncore *uncore, const struct i915_wa_list *wal) 904{ 905 enum forcewake_domains fw = 0; 906 struct i915_wa *wa; 907 unsigned int i; 908 909 for (i = 0, wa = wal->list; i < wal->count; i++, wa++) 910 fw |= intel_uncore_forcewake_for_reg(uncore, 911 wa->reg, 912 FW_REG_READ | 913 FW_REG_WRITE); 914 915 return fw; 916} 917 918static void 919wa_list_apply(struct intel_uncore *uncore, const struct i915_wa_list *wal) 920{ 921 enum forcewake_domains fw; 922 unsigned long flags; 923 struct i915_wa *wa; 924 unsigned int i; 925 926 if (!wal->count) 927 return; 928 929 fw = wal_get_fw_for_rmw(uncore, wal); 930 931 spin_lock_irqsave(&uncore->lock, flags); 932 intel_uncore_forcewake_get__locked(uncore, fw); 933 934 for (i = 0, wa = wal->list; i < wal->count; i++, wa++) { 935 intel_uncore_rmw_fw(uncore, wa->reg, wa->mask, wa->val); 936 } 937 938 intel_uncore_forcewake_put__locked(uncore, fw); 939 spin_unlock_irqrestore(&uncore->lock, flags); 940} 941 942void intel_gt_apply_workarounds(struct drm_i915_private *i915) 943{ 944 wa_list_apply(&i915->uncore, &i915->gt_wa_list); 945} 946 947static bool 948wa_verify(const struct i915_wa *wa, u32 cur, const char *name, const char *from) 949{ 950 if ((cur ^ wa->val) & wa->mask) { 951 DRM_ERROR("%s workaround lost on %s! (%x=%x/%x, expected %x, mask=%x)\n", 952 name, from, i915_mmio_reg_offset(wa->reg), cur, 953 cur & wa->mask, wa->val, wa->mask); 954 955 return false; 956 } 957 958 return true; 959} 960 961static bool wa_list_verify(struct intel_uncore *uncore, 962 const struct i915_wa_list *wal, 963 const char *from) 964{ 965 struct i915_wa *wa; 966 unsigned int i; 967 bool ok = true; 968 969 for (i = 0, wa = wal->list; i < wal->count; i++, wa++) 970 ok &= wa_verify(wa, 971 intel_uncore_read(uncore, wa->reg), 972 wal->name, from); 973 974 return ok; 975} 976 977bool intel_gt_verify_workarounds(struct drm_i915_private *i915, 978 const char *from) 979{ 980 return wa_list_verify(&i915->uncore, &i915->gt_wa_list, from); 981} 982 983static void 984whitelist_reg(struct i915_wa_list *wal, i915_reg_t reg) 985{ 986 struct i915_wa wa = { 987 .reg = reg 988 }; 989 990 if (GEM_DEBUG_WARN_ON(wal->count >= RING_MAX_NONPRIV_SLOTS)) 991 return; 992 993 _wa_add(wal, &wa); 994} 995 996static void gen9_whitelist_build(struct i915_wa_list *w) 997{ 998 /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */ 999 whitelist_reg(w, GEN9_CTX_PREEMPT_REG); 1000 1001 /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl,[cnl] */ 1002 whitelist_reg(w, GEN8_CS_CHICKEN1); 1003 1004 /* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk,cfl */ 1005 whitelist_reg(w, GEN8_HDC_CHICKEN1); 1006} 1007 1008static void skl_whitelist_build(struct i915_wa_list *w) 1009{ 1010 gen9_whitelist_build(w); 1011 1012 /* WaDisableLSQCROPERFforOCL:skl */ 1013 whitelist_reg(w, GEN8_L3SQCREG4); 1014} 1015 1016static void bxt_whitelist_build(struct i915_wa_list *w) 1017{ 1018 gen9_whitelist_build(w); 1019} 1020 1021static void kbl_whitelist_build(struct i915_wa_list *w) 1022{ 1023 gen9_whitelist_build(w); 1024 1025 /* WaDisableLSQCROPERFforOCL:kbl */ 1026 whitelist_reg(w, GEN8_L3SQCREG4); 1027} 1028 1029static void glk_whitelist_build(struct i915_wa_list *w) 1030{ 1031 gen9_whitelist_build(w); 1032 1033 /* WA #0862: Userspace has to set "Barrier Mode" to avoid hangs. */ 1034 whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1); 1035} 1036 1037static void cfl_whitelist_build(struct i915_wa_list *w) 1038{ 1039 gen9_whitelist_build(w); 1040} 1041 1042static void cnl_whitelist_build(struct i915_wa_list *w) 1043{ 1044 /* WaEnablePreemptionGranularityControlByUMD:cnl */ 1045 whitelist_reg(w, GEN8_CS_CHICKEN1); 1046} 1047 1048static void icl_whitelist_build(struct i915_wa_list *w) 1049{ 1050 /* WaAllowUMDToModifyHalfSliceChicken7:icl */ 1051 whitelist_reg(w, GEN9_HALF_SLICE_CHICKEN7); 1052 1053 /* WaAllowUMDToModifySamplerMode:icl */ 1054 whitelist_reg(w, GEN10_SAMPLER_MODE); 1055 1056 /* WaEnableStateCacheRedirectToCS:icl */ 1057 whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1); 1058} 1059 1060void intel_engine_init_whitelist(struct intel_engine_cs *engine) 1061{ 1062 struct drm_i915_private *i915 = engine->i915; 1063 struct i915_wa_list *w = &engine->whitelist; 1064 1065 GEM_BUG_ON(engine->id != RCS0); 1066 1067 wa_init_start(w, "whitelist"); 1068 1069 if (IS_GEN(i915, 11)) 1070 icl_whitelist_build(w); 1071 else if (IS_CANNONLAKE(i915)) 1072 cnl_whitelist_build(w); 1073 else if (IS_COFFEELAKE(i915)) 1074 cfl_whitelist_build(w); 1075 else if (IS_GEMINILAKE(i915)) 1076 glk_whitelist_build(w); 1077 else if (IS_KABYLAKE(i915)) 1078 kbl_whitelist_build(w); 1079 else if (IS_BROXTON(i915)) 1080 bxt_whitelist_build(w); 1081 else if (IS_SKYLAKE(i915)) 1082 skl_whitelist_build(w); 1083 else if (INTEL_GEN(i915) <= 8) 1084 return; 1085 else 1086 MISSING_CASE(INTEL_GEN(i915)); 1087 1088 wa_init_finish(w); 1089} 1090 1091void intel_engine_apply_whitelist(struct intel_engine_cs *engine) 1092{ 1093 const struct i915_wa_list *wal = &engine->whitelist; 1094 struct intel_uncore *uncore = engine->uncore; 1095 const u32 base = engine->mmio_base; 1096 struct i915_wa *wa; 1097 unsigned int i; 1098 1099 if (!wal->count) 1100 return; 1101 1102 for (i = 0, wa = wal->list; i < wal->count; i++, wa++) 1103 intel_uncore_write(uncore, 1104 RING_FORCE_TO_NONPRIV(base, i), 1105 i915_mmio_reg_offset(wa->reg)); 1106 1107 /* And clear the rest just in case of garbage */ 1108 for (; i < RING_MAX_NONPRIV_SLOTS; i++) 1109 intel_uncore_write(uncore, 1110 RING_FORCE_TO_NONPRIV(base, i), 1111 i915_mmio_reg_offset(RING_NOPID(base))); 1112} 1113 1114static void 1115rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) 1116{ 1117 struct drm_i915_private *i915 = engine->i915; 1118 1119 if (IS_GEN(i915, 11)) { 1120 /* This is not an Wa. Enable for better image quality */ 1121 wa_masked_en(wal, 1122 _3D_CHICKEN3, 1123 _3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE); 1124 1125 /* WaPipelineFlushCoherentLines:icl */ 1126 wa_write_or(wal, 1127 GEN8_L3SQCREG4, 1128 GEN8_LQSC_FLUSH_COHERENT_LINES); 1129 1130 /* 1131 * Wa_1405543622:icl 1132 * Formerly known as WaGAPZPriorityScheme 1133 */ 1134 wa_write_or(wal, 1135 GEN8_GARBCNTL, 1136 GEN11_ARBITRATION_PRIO_ORDER_MASK); 1137 1138 /* 1139 * Wa_1604223664:icl 1140 * Formerly known as WaL3BankAddressHashing 1141 */ 1142 wa_write_masked_or(wal, 1143 GEN8_GARBCNTL, 1144 GEN11_HASH_CTRL_EXCL_MASK, 1145 GEN11_HASH_CTRL_EXCL_BIT0); 1146 wa_write_masked_or(wal, 1147 GEN11_GLBLINVL, 1148 GEN11_BANK_HASH_ADDR_EXCL_MASK, 1149 GEN11_BANK_HASH_ADDR_EXCL_BIT0); 1150 1151 /* 1152 * Wa_1405733216:icl 1153 * Formerly known as WaDisableCleanEvicts 1154 */ 1155 wa_write_or(wal, 1156 GEN8_L3SQCREG4, 1157 GEN11_LQSC_CLEAN_EVICT_DISABLE); 1158 1159 /* WaForwardProgressSoftReset:icl */ 1160 wa_write_or(wal, 1161 GEN10_SCRATCH_LNCF2, 1162 PMFLUSHDONE_LNICRSDROP | 1163 PMFLUSH_GAPL3UNBLOCK | 1164 PMFLUSHDONE_LNEBLK); 1165 1166 /* Wa_1406609255:icl (pre-prod) */ 1167 if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0)) 1168 wa_write_or(wal, 1169 GEN7_SARCHKMD, 1170 GEN7_DISABLE_DEMAND_PREFETCH | 1171 GEN7_DISABLE_SAMPLER_PREFETCH); 1172 } 1173 1174 if (IS_GEN_RANGE(i915, 9, 11)) { 1175 /* FtrPerCtxtPreemptionGranularityControl:skl,bxt,kbl,cfl,cnl,icl */ 1176 wa_masked_en(wal, 1177 GEN7_FF_SLICE_CS_CHICKEN1, 1178 GEN9_FFSC_PERCTX_PREEMPT_CTRL); 1179 } 1180 1181 if (IS_SKYLAKE(i915) || IS_KABYLAKE(i915) || IS_COFFEELAKE(i915)) { 1182 /* WaEnableGapsTsvCreditFix:skl,kbl,cfl */ 1183 wa_write_or(wal, 1184 GEN8_GARBCNTL, 1185 GEN9_GAPS_TSV_CREDIT_DISABLE); 1186 } 1187 1188 if (IS_BROXTON(i915)) { 1189 /* WaDisablePooledEuLoadBalancingFix:bxt */ 1190 wa_masked_en(wal, 1191 FF_SLICE_CS_CHICKEN2, 1192 GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE); 1193 } 1194 1195 if (IS_GEN(i915, 9)) { 1196 /* WaContextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl,glk,cfl */ 1197 wa_masked_en(wal, 1198 GEN9_CSFE_CHICKEN1_RCS, 1199 GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE); 1200 1201 /* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk,cfl */ 1202 wa_write_or(wal, 1203 BDW_SCRATCH1, 1204 GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE); 1205 1206 /* WaProgramL3SqcReg1DefaultForPerf:bxt,glk */ 1207 if (IS_GEN9_LP(i915)) 1208 wa_write_masked_or(wal, 1209 GEN8_L3SQCREG1, 1210 L3_PRIO_CREDITS_MASK, 1211 L3_GENERAL_PRIO_CREDITS(62) | 1212 L3_HIGH_PRIO_CREDITS(2)); 1213 1214 /* WaOCLCoherentLineFlush:skl,bxt,kbl,cfl */ 1215 wa_write_or(wal, 1216 GEN8_L3SQCREG4, 1217 GEN8_LQSC_FLUSH_COHERENT_LINES); 1218 } 1219} 1220 1221static void 1222xcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) 1223{ 1224 struct drm_i915_private *i915 = engine->i915; 1225 1226 /* WaKBLVECSSemaphoreWaitPoll:kbl */ 1227 if (IS_KBL_REVID(i915, KBL_REVID_A0, KBL_REVID_E0)) { 1228 wa_write(wal, 1229 RING_SEMA_WAIT_POLL(engine->mmio_base), 1230 1); 1231 } 1232} 1233 1234static void 1235engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal) 1236{ 1237 if (I915_SELFTEST_ONLY(INTEL_GEN(engine->i915) < 8)) 1238 return; 1239 1240 if (engine->id == RCS0) 1241 rcs_engine_wa_init(engine, wal); 1242 else 1243 xcs_engine_wa_init(engine, wal); 1244} 1245 1246void intel_engine_init_workarounds(struct intel_engine_cs *engine) 1247{ 1248 struct i915_wa_list *wal = &engine->wa_list; 1249 1250 if (GEM_WARN_ON(INTEL_GEN(engine->i915) < 8)) 1251 return; 1252 1253 wa_init_start(wal, engine->name); 1254 engine_init_workarounds(engine, wal); 1255 wa_init_finish(wal); 1256} 1257 1258void intel_engine_apply_workarounds(struct intel_engine_cs *engine) 1259{ 1260 wa_list_apply(engine->uncore, &engine->wa_list); 1261} 1262 1263#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 1264#include "selftests/intel_workarounds.c" 1265#endif