Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/i915: Move a bunch of workaround-related code to its own file

This has grown to be a sizable amount of code, so move it to
its own file before we try to refactor anything. For the moment,
we are leaving behind the WA BB code and the WAs that get applied
(incorrectly) in init_clock_gating, but we will deal with it later.

v2: Use intel_ prefix for code that deals with the hardware (Chris)
v3: Rebased
v4:
- Rebased
- New license header
v5:
- Rebased
- Added some organisational notes to the file (Chris)
v6: Include DOC section in the documentation build (Jani)

Signed-off-by: Oscar Mateo <oscar.mateo@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Jani Nikula <jani.nikula@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
[ickle: appease checkpatch, mostly]
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: https://patchwork.freedesktop.org/patch/msgid/1523376767-18480-1-git-send-email-oscar.mateo@intel.com

authored by

Oscar Mateo and committed by
Chris Wilson
7d3c425f 15c83c43

+709 -638
+6
Documentation/gpu/i915.rst
··· 58 58 .. kernel-doc:: drivers/gpu/drm/i915/intel_gvt.c 59 59 :internal: 60 60 61 + Workarounds 62 + ----------- 63 + 64 + .. kernel-doc:: drivers/gpu/drm/i915/intel_workarounds.c 65 + :doc: Hardware workarounds 66 + 61 67 Display Hardware Handling 62 68 ========================= 63 69
+2 -1
drivers/gpu/drm/i915/Makefile
··· 43 43 intel_csr.o \ 44 44 intel_device_info.o \ 45 45 intel_pm.o \ 46 - intel_runtime_pm.o 46 + intel_runtime_pm.o \ 47 + intel_workarounds.o 47 48 48 49 i915-$(CONFIG_COMPAT) += i915_ioc32.o 49 50 i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o intel_pipe_crc.o
-634
drivers/gpu/drm/i915/intel_engine_cs.c
··· 903 903 } 904 904 } 905 905 906 - static int wa_add(struct drm_i915_private *dev_priv, 907 - i915_reg_t addr, 908 - const u32 mask, const u32 val) 909 - { 910 - const u32 idx = dev_priv->workarounds.count; 911 - 912 - if (WARN_ON(idx >= I915_MAX_WA_REGS)) 913 - return -ENOSPC; 914 - 915 - dev_priv->workarounds.reg[idx].addr = addr; 916 - dev_priv->workarounds.reg[idx].value = val; 917 - dev_priv->workarounds.reg[idx].mask = mask; 918 - 919 - dev_priv->workarounds.count++; 920 - 921 - return 0; 922 - } 923 - 924 - #define WA_REG(addr, mask, val) do { \ 925 - const int r = wa_add(dev_priv, (addr), (mask), (val)); \ 926 - if (r) \ 927 - return r; \ 928 - } while (0) 929 - 930 - #define WA_SET_BIT_MASKED(addr, mask) \ 931 - WA_REG(addr, (mask), _MASKED_BIT_ENABLE(mask)) 932 - 933 - #define WA_CLR_BIT_MASKED(addr, mask) \ 934 - WA_REG(addr, (mask), _MASKED_BIT_DISABLE(mask)) 935 - 936 - #define WA_SET_FIELD_MASKED(addr, mask, value) \ 937 - WA_REG(addr, mask, _MASKED_FIELD(mask, value)) 938 - 939 - static int wa_ring_whitelist_reg(struct intel_engine_cs *engine, 940 - i915_reg_t reg) 941 - { 942 - struct drm_i915_private *dev_priv = engine->i915; 943 - struct i915_workarounds *wa = &dev_priv->workarounds; 944 - const uint32_t index = wa->hw_whitelist_count[engine->id]; 945 - 946 - if (WARN_ON(index >= RING_MAX_NONPRIV_SLOTS)) 947 - return -EINVAL; 948 - 949 - I915_WRITE(RING_FORCE_TO_NONPRIV(engine->mmio_base, index), 950 - i915_mmio_reg_offset(reg)); 951 - wa->hw_whitelist_count[engine->id]++; 952 - 953 - return 0; 954 - } 955 - 956 - static int gen8_init_workarounds(struct intel_engine_cs *engine) 957 - { 958 - struct drm_i915_private *dev_priv = engine->i915; 959 - 960 - WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING); 961 - 962 - /* WaDisableAsyncFlipPerfMode:bdw,chv */ 963 - WA_SET_BIT_MASKED(MI_MODE, ASYNC_FLIP_PERF_DISABLE); 964 - 965 - /* WaDisablePartialInstShootdown:bdw,chv */ 966 - WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, 967 - PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); 968 - 969 - /* Use Force Non-Coherent whenever executing a 3D context. This is a 970 - * workaround for for a possible hang in the unlikely event a TLB 971 - * invalidation occurs during a PSD flush. 972 - */ 973 - /* WaForceEnableNonCoherent:bdw,chv */ 974 - /* WaHdcDisableFetchWhenMasked:bdw,chv */ 975 - WA_SET_BIT_MASKED(HDC_CHICKEN0, 976 - HDC_DONOT_FETCH_MEM_WHEN_MASKED | 977 - HDC_FORCE_NON_COHERENT); 978 - 979 - /* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0: 980 - * "The Hierarchical Z RAW Stall Optimization allows non-overlapping 981 - * polygons in the same 8x4 pixel/sample area to be processed without 982 - * stalling waiting for the earlier ones to write to Hierarchical Z 983 - * buffer." 984 - * 985 - * This optimization is off by default for BDW and CHV; turn it on. 986 - */ 987 - WA_CLR_BIT_MASKED(CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE); 988 - 989 - /* Wa4x4STCOptimizationDisable:bdw,chv */ 990 - WA_SET_BIT_MASKED(CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE); 991 - 992 - /* 993 - * BSpec recommends 8x4 when MSAA is used, 994 - * however in practice 16x4 seems fastest. 995 - * 996 - * Note that PS/WM thread counts depend on the WIZ hashing 997 - * disable bit, which we don't touch here, but it's good 998 - * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). 999 - */ 1000 - WA_SET_FIELD_MASKED(GEN7_GT_MODE, 1001 - GEN6_WIZ_HASHING_MASK, 1002 - GEN6_WIZ_HASHING_16x4); 1003 - 1004 - return 0; 1005 - } 1006 - 1007 - static int bdw_init_workarounds(struct intel_engine_cs *engine) 1008 - { 1009 - struct drm_i915_private *dev_priv = engine->i915; 1010 - int ret; 1011 - 1012 - ret = gen8_init_workarounds(engine); 1013 - if (ret) 1014 - return ret; 1015 - 1016 - /* WaDisableThreadStallDopClockGating:bdw (pre-production) */ 1017 - WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); 1018 - 1019 - /* WaDisableDopClockGating:bdw 1020 - * 1021 - * Also see the related UCGTCL1 write in broadwell_init_clock_gating() 1022 - * to disable EUTC clock gating. 1023 - */ 1024 - WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, 1025 - DOP_CLOCK_GATING_DISABLE); 1026 - 1027 - WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, 1028 - GEN8_SAMPLER_POWER_BYPASS_DIS); 1029 - 1030 - WA_SET_BIT_MASKED(HDC_CHICKEN0, 1031 - /* WaForceContextSaveRestoreNonCoherent:bdw */ 1032 - HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT | 1033 - /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */ 1034 - (IS_BDW_GT3(dev_priv) ? HDC_FENCE_DEST_SLM_DISABLE : 0)); 1035 - 1036 - return 0; 1037 - } 1038 - 1039 - static int chv_init_workarounds(struct intel_engine_cs *engine) 1040 - { 1041 - struct drm_i915_private *dev_priv = engine->i915; 1042 - int ret; 1043 - 1044 - ret = gen8_init_workarounds(engine); 1045 - if (ret) 1046 - return ret; 1047 - 1048 - /* WaDisableThreadStallDopClockGating:chv */ 1049 - WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); 1050 - 1051 - /* Improve HiZ throughput on CHV. */ 1052 - WA_SET_BIT_MASKED(HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X); 1053 - 1054 - return 0; 1055 - } 1056 - 1057 - static int gen9_init_workarounds(struct intel_engine_cs *engine) 1058 - { 1059 - struct drm_i915_private *dev_priv = engine->i915; 1060 - int ret; 1061 - 1062 - /* WaConextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl,glk,cfl */ 1063 - I915_WRITE(GEN9_CSFE_CHICKEN1_RCS, _MASKED_BIT_ENABLE(GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE)); 1064 - 1065 - /* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk,cfl */ 1066 - I915_WRITE(BDW_SCRATCH1, I915_READ(BDW_SCRATCH1) | 1067 - GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE); 1068 - 1069 - /* WaDisableKillLogic:bxt,skl,kbl */ 1070 - if (!IS_COFFEELAKE(dev_priv)) 1071 - I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | 1072 - ECOCHK_DIS_TLB); 1073 - 1074 - if (HAS_LLC(dev_priv)) { 1075 - /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl 1076 - * 1077 - * Must match Display Engine. See 1078 - * WaCompressedResourceDisplayNewHashMode. 1079 - */ 1080 - WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, 1081 - GEN9_PBE_COMPRESSED_HASH_SELECTION); 1082 - WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7, 1083 - GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR); 1084 - 1085 - I915_WRITE(MMCD_MISC_CTRL, 1086 - I915_READ(MMCD_MISC_CTRL) | 1087 - MMCD_PCLA | 1088 - MMCD_HOTSPOT_EN); 1089 - } 1090 - 1091 - /* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk,cfl */ 1092 - /* WaDisablePartialInstShootdown:skl,bxt,kbl,glk,cfl */ 1093 - WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, 1094 - FLOW_CONTROL_ENABLE | 1095 - PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); 1096 - 1097 - /* Syncing dependencies between camera and graphics:skl,bxt,kbl */ 1098 - if (!IS_COFFEELAKE(dev_priv)) 1099 - WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, 1100 - GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC); 1101 - 1102 - /* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl,glk,cfl */ 1103 - /* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl,cfl */ 1104 - WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7, 1105 - GEN9_ENABLE_YV12_BUGFIX | 1106 - GEN9_ENABLE_GPGPU_PREEMPTION); 1107 - 1108 - /* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk,cfl */ 1109 - /* WaDisablePartialResolveInVc:skl,bxt,kbl,cfl */ 1110 - WA_SET_BIT_MASKED(CACHE_MODE_1, (GEN8_4x4_STC_OPTIMIZATION_DISABLE | 1111 - GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE)); 1112 - 1113 - /* WaCcsTlbPrefetchDisable:skl,bxt,kbl,glk,cfl */ 1114 - WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5, 1115 - GEN9_CCS_TLB_PREFETCH_ENABLE); 1116 - 1117 - /* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl,cfl */ 1118 - WA_SET_BIT_MASKED(HDC_CHICKEN0, 1119 - HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT | 1120 - HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE); 1121 - 1122 - /* WaForceEnableNonCoherent and WaDisableHDCInvalidation are 1123 - * both tied to WaForceContextSaveRestoreNonCoherent 1124 - * in some hsds for skl. We keep the tie for all gen9. The 1125 - * documentation is a bit hazy and so we want to get common behaviour, 1126 - * even though there is no clear evidence we would need both on kbl/bxt. 1127 - * This area has been source of system hangs so we play it safe 1128 - * and mimic the skl regardless of what bspec says. 1129 - * 1130 - * Use Force Non-Coherent whenever executing a 3D context. This 1131 - * is a workaround for a possible hang in the unlikely event 1132 - * a TLB invalidation occurs during a PSD flush. 1133 - */ 1134 - 1135 - /* WaForceEnableNonCoherent:skl,bxt,kbl,cfl */ 1136 - WA_SET_BIT_MASKED(HDC_CHICKEN0, 1137 - HDC_FORCE_NON_COHERENT); 1138 - 1139 - /* WaDisableHDCInvalidation:skl,bxt,kbl,cfl */ 1140 - I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | 1141 - BDW_DISABLE_HDC_INVALIDATION); 1142 - 1143 - /* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl,cfl */ 1144 - if (IS_SKYLAKE(dev_priv) || 1145 - IS_KABYLAKE(dev_priv) || 1146 - IS_COFFEELAKE(dev_priv)) 1147 - WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, 1148 - GEN8_SAMPLER_POWER_BYPASS_DIS); 1149 - 1150 - /* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk,cfl */ 1151 - WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE); 1152 - 1153 - /* WaProgramL3SqcReg1DefaultForPerf:bxt,glk */ 1154 - if (IS_GEN9_LP(dev_priv)) { 1155 - u32 val = I915_READ(GEN8_L3SQCREG1); 1156 - 1157 - val &= ~L3_PRIO_CREDITS_MASK; 1158 - val |= L3_GENERAL_PRIO_CREDITS(62) | L3_HIGH_PRIO_CREDITS(2); 1159 - I915_WRITE(GEN8_L3SQCREG1, val); 1160 - } 1161 - 1162 - /* WaOCLCoherentLineFlush:skl,bxt,kbl,cfl */ 1163 - I915_WRITE(GEN8_L3SQCREG4, (I915_READ(GEN8_L3SQCREG4) | 1164 - GEN8_LQSC_FLUSH_COHERENT_LINES)); 1165 - 1166 - /* 1167 - * Supporting preemption with fine-granularity requires changes in the 1168 - * batch buffer programming. Since we can't break old userspace, we 1169 - * need to set our default preemption level to safe value. Userspace is 1170 - * still able to use more fine-grained preemption levels, since in 1171 - * WaEnablePreemptionGranularityControlByUMD we're whitelisting the 1172 - * per-ctx register. As such, WaDisable{3D,GPGPU}MidCmdPreemption are 1173 - * not real HW workarounds, but merely a way to start using preemption 1174 - * while maintaining old contract with userspace. 1175 - */ 1176 - 1177 - /* WaDisable3DMidCmdPreemption:skl,bxt,glk,cfl,[cnl] */ 1178 - WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL); 1179 - 1180 - /* WaDisableGPGPUMidCmdPreemption:skl,bxt,blk,cfl,[cnl] */ 1181 - WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_GPGPU_LEVEL_MASK, 1182 - GEN9_PREEMPT_GPGPU_COMMAND_LEVEL); 1183 - 1184 - /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */ 1185 - ret = wa_ring_whitelist_reg(engine, GEN9_CTX_PREEMPT_REG); 1186 - if (ret) 1187 - return ret; 1188 - 1189 - /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl,[cnl] */ 1190 - I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1, 1191 - _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL)); 1192 - ret = wa_ring_whitelist_reg(engine, GEN8_CS_CHICKEN1); 1193 - if (ret) 1194 - return ret; 1195 - 1196 - /* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk,cfl */ 1197 - ret = wa_ring_whitelist_reg(engine, GEN8_HDC_CHICKEN1); 1198 - if (ret) 1199 - return ret; 1200 - 1201 - return 0; 1202 - } 1203 - 1204 - static int skl_tune_iz_hashing(struct intel_engine_cs *engine) 1205 - { 1206 - struct drm_i915_private *dev_priv = engine->i915; 1207 - u8 vals[3] = { 0, 0, 0 }; 1208 - unsigned int i; 1209 - 1210 - for (i = 0; i < 3; i++) { 1211 - u8 ss; 1212 - 1213 - /* 1214 - * Only consider slices where one, and only one, subslice has 7 1215 - * EUs 1216 - */ 1217 - if (!is_power_of_2(INTEL_INFO(dev_priv)->sseu.subslice_7eu[i])) 1218 - continue; 1219 - 1220 - /* 1221 - * subslice_7eu[i] != 0 (because of the check above) and 1222 - * ss_max == 4 (maximum number of subslices possible per slice) 1223 - * 1224 - * -> 0 <= ss <= 3; 1225 - */ 1226 - ss = ffs(INTEL_INFO(dev_priv)->sseu.subslice_7eu[i]) - 1; 1227 - vals[i] = 3 - ss; 1228 - } 1229 - 1230 - if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0) 1231 - return 0; 1232 - 1233 - /* Tune IZ hashing. See intel_device_info_runtime_init() */ 1234 - WA_SET_FIELD_MASKED(GEN7_GT_MODE, 1235 - GEN9_IZ_HASHING_MASK(2) | 1236 - GEN9_IZ_HASHING_MASK(1) | 1237 - GEN9_IZ_HASHING_MASK(0), 1238 - GEN9_IZ_HASHING(2, vals[2]) | 1239 - GEN9_IZ_HASHING(1, vals[1]) | 1240 - GEN9_IZ_HASHING(0, vals[0])); 1241 - 1242 - return 0; 1243 - } 1244 - 1245 - static int skl_init_workarounds(struct intel_engine_cs *engine) 1246 - { 1247 - struct drm_i915_private *dev_priv = engine->i915; 1248 - int ret; 1249 - 1250 - ret = gen9_init_workarounds(engine); 1251 - if (ret) 1252 - return ret; 1253 - 1254 - /* WaEnableGapsTsvCreditFix:skl */ 1255 - I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) | 1256 - GEN9_GAPS_TSV_CREDIT_DISABLE)); 1257 - 1258 - /* WaDisableGafsUnitClkGating:skl */ 1259 - I915_WRITE(GEN7_UCGCTL4, (I915_READ(GEN7_UCGCTL4) | 1260 - GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE)); 1261 - 1262 - /* WaInPlaceDecompressionHang:skl */ 1263 - if (IS_SKL_REVID(dev_priv, SKL_REVID_H0, REVID_FOREVER)) 1264 - I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA, 1265 - (I915_READ(GEN9_GAMT_ECO_REG_RW_IA) | 1266 - GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS)); 1267 - 1268 - /* WaDisableLSQCROPERFforOCL:skl */ 1269 - ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4); 1270 - if (ret) 1271 - return ret; 1272 - 1273 - return skl_tune_iz_hashing(engine); 1274 - } 1275 - 1276 - static int bxt_init_workarounds(struct intel_engine_cs *engine) 1277 - { 1278 - struct drm_i915_private *dev_priv = engine->i915; 1279 - int ret; 1280 - 1281 - ret = gen9_init_workarounds(engine); 1282 - if (ret) 1283 - return ret; 1284 - 1285 - /* WaDisableThreadStallDopClockGating:bxt */ 1286 - WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, 1287 - STALL_DOP_GATING_DISABLE); 1288 - 1289 - /* WaDisablePooledEuLoadBalancingFix:bxt */ 1290 - I915_WRITE(FF_SLICE_CS_CHICKEN2, 1291 - _MASKED_BIT_ENABLE(GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE)); 1292 - 1293 - /* WaToEnableHwFixForPushConstHWBug:bxt */ 1294 - WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, 1295 - GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); 1296 - 1297 - /* WaInPlaceDecompressionHang:bxt */ 1298 - I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA, 1299 - (I915_READ(GEN9_GAMT_ECO_REG_RW_IA) | 1300 - GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS)); 1301 - 1302 - return 0; 1303 - } 1304 - 1305 - static int cnl_init_workarounds(struct intel_engine_cs *engine) 1306 - { 1307 - struct drm_i915_private *dev_priv = engine->i915; 1308 - int ret; 1309 - 1310 - /* WaDisableI2mCycleOnWRPort:cnl (pre-prod) */ 1311 - if (IS_CNL_REVID(dev_priv, CNL_REVID_B0, CNL_REVID_B0)) 1312 - I915_WRITE(GAMT_CHKN_BIT_REG, 1313 - (I915_READ(GAMT_CHKN_BIT_REG) | 1314 - GAMT_CHKN_DISABLE_I2M_CYCLE_ON_WR_PORT)); 1315 - 1316 - /* WaForceContextSaveRestoreNonCoherent:cnl */ 1317 - WA_SET_BIT_MASKED(CNL_HDC_CHICKEN0, 1318 - HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT); 1319 - 1320 - /* WaThrottleEUPerfToAvoidTDBackPressure:cnl(pre-prod) */ 1321 - if (IS_CNL_REVID(dev_priv, CNL_REVID_B0, CNL_REVID_B0)) 1322 - WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, THROTTLE_12_5); 1323 - 1324 - /* WaDisableReplayBufferBankArbitrationOptimization:cnl */ 1325 - WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, 1326 - GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); 1327 - 1328 - /* WaDisableEnhancedSBEVertexCaching:cnl (pre-prod) */ 1329 - if (IS_CNL_REVID(dev_priv, 0, CNL_REVID_B0)) 1330 - WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, 1331 - GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE); 1332 - 1333 - /* WaInPlaceDecompressionHang:cnl */ 1334 - I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA, 1335 - (I915_READ(GEN9_GAMT_ECO_REG_RW_IA) | 1336 - GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS)); 1337 - 1338 - /* WaPushConstantDereferenceHoldDisable:cnl */ 1339 - WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, PUSH_CONSTANT_DEREF_DISABLE); 1340 - 1341 - /* FtrEnableFastAnisoL1BankingFix: cnl */ 1342 - WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, CNL_FAST_ANISO_L1_BANKING_FIX); 1343 - 1344 - /* WaDisable3DMidCmdPreemption:cnl */ 1345 - WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL); 1346 - 1347 - /* WaDisableGPGPUMidCmdPreemption:cnl */ 1348 - WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_GPGPU_LEVEL_MASK, 1349 - GEN9_PREEMPT_GPGPU_COMMAND_LEVEL); 1350 - 1351 - /* WaEnablePreemptionGranularityControlByUMD:cnl */ 1352 - I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1, 1353 - _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL)); 1354 - ret= wa_ring_whitelist_reg(engine, GEN8_CS_CHICKEN1); 1355 - if (ret) 1356 - return ret; 1357 - 1358 - /* WaDisableEarlyEOT:cnl */ 1359 - WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, DISABLE_EARLY_EOT); 1360 - 1361 - return 0; 1362 - } 1363 - 1364 - static int kbl_init_workarounds(struct intel_engine_cs *engine) 1365 - { 1366 - struct drm_i915_private *dev_priv = engine->i915; 1367 - int ret; 1368 - 1369 - ret = gen9_init_workarounds(engine); 1370 - if (ret) 1371 - return ret; 1372 - 1373 - /* WaEnableGapsTsvCreditFix:kbl */ 1374 - I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) | 1375 - GEN9_GAPS_TSV_CREDIT_DISABLE)); 1376 - 1377 - /* WaDisableDynamicCreditSharing:kbl */ 1378 - if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0)) 1379 - I915_WRITE(GAMT_CHKN_BIT_REG, 1380 - (I915_READ(GAMT_CHKN_BIT_REG) | 1381 - GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING)); 1382 - 1383 - /* WaDisableFenceDestinationToSLM:kbl (pre-prod) */ 1384 - if (IS_KBL_REVID(dev_priv, KBL_REVID_A0, KBL_REVID_A0)) 1385 - WA_SET_BIT_MASKED(HDC_CHICKEN0, 1386 - HDC_FENCE_DEST_SLM_DISABLE); 1387 - 1388 - /* WaToEnableHwFixForPushConstHWBug:kbl */ 1389 - if (IS_KBL_REVID(dev_priv, KBL_REVID_C0, REVID_FOREVER)) 1390 - WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, 1391 - GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); 1392 - 1393 - /* WaDisableGafsUnitClkGating:kbl */ 1394 - I915_WRITE(GEN7_UCGCTL4, (I915_READ(GEN7_UCGCTL4) | 1395 - GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE)); 1396 - 1397 - /* WaDisableSbeCacheDispatchPortSharing:kbl */ 1398 - WA_SET_BIT_MASKED( 1399 - GEN7_HALF_SLICE_CHICKEN1, 1400 - GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); 1401 - 1402 - /* WaInPlaceDecompressionHang:kbl */ 1403 - I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA, 1404 - (I915_READ(GEN9_GAMT_ECO_REG_RW_IA) | 1405 - GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS)); 1406 - 1407 - /* WaDisableLSQCROPERFforOCL:kbl */ 1408 - ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4); 1409 - if (ret) 1410 - return ret; 1411 - 1412 - return 0; 1413 - } 1414 - 1415 - static int glk_init_workarounds(struct intel_engine_cs *engine) 1416 - { 1417 - struct drm_i915_private *dev_priv = engine->i915; 1418 - int ret; 1419 - 1420 - ret = gen9_init_workarounds(engine); 1421 - if (ret) 1422 - return ret; 1423 - 1424 - /* WA #0862: Userspace has to set "Barrier Mode" to avoid hangs. */ 1425 - ret = wa_ring_whitelist_reg(engine, GEN9_SLICE_COMMON_ECO_CHICKEN1); 1426 - if (ret) 1427 - return ret; 1428 - 1429 - /* WaToEnableHwFixForPushConstHWBug:glk */ 1430 - WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, 1431 - GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); 1432 - 1433 - return 0; 1434 - } 1435 - 1436 - static int cfl_init_workarounds(struct intel_engine_cs *engine) 1437 - { 1438 - struct drm_i915_private *dev_priv = engine->i915; 1439 - int ret; 1440 - 1441 - ret = gen9_init_workarounds(engine); 1442 - if (ret) 1443 - return ret; 1444 - 1445 - /* WaEnableGapsTsvCreditFix:cfl */ 1446 - I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) | 1447 - GEN9_GAPS_TSV_CREDIT_DISABLE)); 1448 - 1449 - /* WaToEnableHwFixForPushConstHWBug:cfl */ 1450 - WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, 1451 - GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); 1452 - 1453 - /* WaDisableGafsUnitClkGating:cfl */ 1454 - I915_WRITE(GEN7_UCGCTL4, (I915_READ(GEN7_UCGCTL4) | 1455 - GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE)); 1456 - 1457 - /* WaDisableSbeCacheDispatchPortSharing:cfl */ 1458 - WA_SET_BIT_MASKED( 1459 - GEN7_HALF_SLICE_CHICKEN1, 1460 - GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); 1461 - 1462 - /* WaInPlaceDecompressionHang:cfl */ 1463 - I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA, 1464 - (I915_READ(GEN9_GAMT_ECO_REG_RW_IA) | 1465 - GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS)); 1466 - 1467 - return 0; 1468 - } 1469 - 1470 - int init_workarounds_ring(struct intel_engine_cs *engine) 1471 - { 1472 - struct drm_i915_private *dev_priv = engine->i915; 1473 - int err; 1474 - 1475 - if (GEM_WARN_ON(engine->id != RCS)) 1476 - return -EINVAL; 1477 - 1478 - dev_priv->workarounds.count = 0; 1479 - dev_priv->workarounds.hw_whitelist_count[engine->id] = 0; 1480 - 1481 - if (IS_BROADWELL(dev_priv)) 1482 - err = bdw_init_workarounds(engine); 1483 - else if (IS_CHERRYVIEW(dev_priv)) 1484 - err = chv_init_workarounds(engine); 1485 - else if (IS_SKYLAKE(dev_priv)) 1486 - err = skl_init_workarounds(engine); 1487 - else if (IS_BROXTON(dev_priv)) 1488 - err = bxt_init_workarounds(engine); 1489 - else if (IS_KABYLAKE(dev_priv)) 1490 - err = kbl_init_workarounds(engine); 1491 - else if (IS_GEMINILAKE(dev_priv)) 1492 - err = glk_init_workarounds(engine); 1493 - else if (IS_COFFEELAKE(dev_priv)) 1494 - err = cfl_init_workarounds(engine); 1495 - else if (IS_CANNONLAKE(dev_priv)) 1496 - err = cnl_init_workarounds(engine); 1497 - else 1498 - err = 0; 1499 - if (err) 1500 - return err; 1501 - 1502 - DRM_DEBUG_DRIVER("%s: Number of context specific w/a: %d\n", 1503 - engine->name, dev_priv->workarounds.count); 1504 - return 0; 1505 - } 1506 - 1507 - int intel_ring_workarounds_emit(struct i915_request *rq) 1508 - { 1509 - struct i915_workarounds *w = &rq->i915->workarounds; 1510 - u32 *cs; 1511 - int ret, i; 1512 - 1513 - if (w->count == 0) 1514 - return 0; 1515 - 1516 - ret = rq->engine->emit_flush(rq, EMIT_BARRIER); 1517 - if (ret) 1518 - return ret; 1519 - 1520 - cs = intel_ring_begin(rq, w->count * 2 + 2); 1521 - if (IS_ERR(cs)) 1522 - return PTR_ERR(cs); 1523 - 1524 - *cs++ = MI_LOAD_REGISTER_IMM(w->count); 1525 - for (i = 0; i < w->count; i++) { 1526 - *cs++ = i915_mmio_reg_offset(w->reg[i].addr); 1527 - *cs++ = w->reg[i].value; 1528 - } 1529 - *cs++ = MI_NOOP; 1530 - 1531 - intel_ring_advance(rq, cs); 1532 - 1533 - ret = rq->engine->emit_flush(rq, EMIT_BARRIER); 1534 - if (ret) 1535 - return ret; 1536 - 1537 - return 0; 1538 - } 1539 - 1540 906 static bool ring_is_idle(struct intel_engine_cs *engine) 1541 907 { 1542 908 struct drm_i915_private *dev_priv = engine->i915;
+1
drivers/gpu/drm/i915/intel_lrc.c
··· 139 139 #include "i915_gem_render_state.h" 140 140 #include "intel_lrc_reg.h" 141 141 #include "intel_mocs.h" 142 + #include "intel_workarounds.h" 142 143 143 144 #define RING_EXECLIST_QFULL (1 << 0x2) 144 145 #define RING_EXECLIST1_VALID (1 << 0x3)
+1
drivers/gpu/drm/i915/intel_ringbuffer.c
··· 36 36 #include "i915_gem_render_state.h" 37 37 #include "i915_trace.h" 38 38 #include "intel_drv.h" 39 + #include "intel_workarounds.h" 39 40 40 41 /* Rough estimate of the typical request size, performing a flush, 41 42 * set-context and then emitting the batch.
-3
drivers/gpu/drm/i915/intel_ringbuffer.h
··· 885 885 return READ_ONCE(engine->timeline->seqno); 886 886 } 887 887 888 - int init_workarounds_ring(struct intel_engine_cs *engine); 889 - int intel_ring_workarounds_emit(struct i915_request *rq); 890 - 891 888 void intel_engine_get_instdone(struct intel_engine_cs *engine, 892 889 struct intel_instdone *instdone); 893 890
+686
drivers/gpu/drm/i915/intel_workarounds.c
··· 1 + /* 2 + * SPDX-License-Identifier: MIT 3 + * 4 + * Copyright © 2014-2018 Intel Corporation 5 + */ 6 + 7 + #include "i915_drv.h" 8 + #include "intel_workarounds.h" 9 + 10 + /** 11 + * DOC: Hardware workarounds 12 + * 13 + * This file is intended as a central place to implement most [1]_ of the 14 + * required workarounds for hardware to work as originally intended. They fall 15 + * in five basic categories depending on how/when they are applied: 16 + * 17 + * - Workarounds that touch registers that are saved/restored to/from the HW 18 + * context image. The list is emitted (via Load Register Immediate commands) 19 + * everytime a new context is created. 20 + * - GT workarounds. The list of these WAs is applied whenever these registers 21 + * revert to default values (on GPU reset, suspend/resume [2]_, etc..). 22 + * - Display workarounds. The list is applied during display clock-gating 23 + * initialization. 24 + * - Workarounds that whitelist a privileged register, so that UMDs can manage 25 + * them directly. This is just a special case of a MMMIO workaround (as we 26 + * write the list of these to/be-whitelisted registers to some special HW 27 + * registers). 28 + * - Workaround batchbuffers, that get executed automatically by the hardware 29 + * on every HW context restore. 30 + * 31 + * .. [1] Please notice that there are other WAs that, due to their nature, 32 + * cannot be applied from a central place. Those are peppered around the rest 33 + * of the code, as needed. 34 + * 35 + * .. [2] Technically, some registers are powercontext saved & restored, so they 36 + * survive a suspend/resume. In practice, writing them again is not too 37 + * costly and simplifies things. We can revisit this in the future. 38 + * 39 + * Layout 40 + * '''''' 41 + * 42 + * Keep things in this file ordered by WA type, as per the above (context, GT, 43 + * display, register whitelist, batchbuffer). Then, inside each type, keep the 44 + * following order: 45 + * 46 + * - Infrastructure functions and macros 47 + * - WAs per platform in standard gen/chrono order 48 + * - Public functions to init or apply the given workaround type. 49 + */ 50 + 51 + static int wa_add(struct drm_i915_private *dev_priv, 52 + i915_reg_t addr, 53 + const u32 mask, const u32 val) 54 + { 55 + const unsigned int idx = dev_priv->workarounds.count; 56 + 57 + if (WARN_ON(idx >= I915_MAX_WA_REGS)) 58 + return -ENOSPC; 59 + 60 + dev_priv->workarounds.reg[idx].addr = addr; 61 + dev_priv->workarounds.reg[idx].value = val; 62 + dev_priv->workarounds.reg[idx].mask = mask; 63 + 64 + dev_priv->workarounds.count++; 65 + 66 + return 0; 67 + } 68 + 69 + #define WA_REG(addr, mask, val) do { \ 70 + const int r = wa_add(dev_priv, (addr), (mask), (val)); \ 71 + if (r) \ 72 + return r; \ 73 + } while (0) 74 + 75 + #define WA_SET_BIT_MASKED(addr, mask) \ 76 + WA_REG(addr, (mask), _MASKED_BIT_ENABLE(mask)) 77 + 78 + #define WA_CLR_BIT_MASKED(addr, mask) \ 79 + WA_REG(addr, (mask), _MASKED_BIT_DISABLE(mask)) 80 + 81 + #define WA_SET_FIELD_MASKED(addr, mask, value) \ 82 + WA_REG(addr, (mask), _MASKED_FIELD(mask, value)) 83 + 84 + static int wa_ring_whitelist_reg(struct intel_engine_cs *engine, 85 + i915_reg_t reg) 86 + { 87 + struct drm_i915_private *dev_priv = engine->i915; 88 + struct i915_workarounds *wa = &dev_priv->workarounds; 89 + const unsigned int index = wa->hw_whitelist_count[engine->id]; 90 + 91 + if (WARN_ON(index >= RING_MAX_NONPRIV_SLOTS)) 92 + return -EINVAL; 93 + 94 + I915_WRITE(RING_FORCE_TO_NONPRIV(engine->mmio_base, index), 95 + i915_mmio_reg_offset(reg)); 96 + wa->hw_whitelist_count[engine->id]++; 97 + 98 + return 0; 99 + } 100 + 101 + static int gen8_init_workarounds(struct intel_engine_cs *engine) 102 + { 103 + struct drm_i915_private *dev_priv = engine->i915; 104 + 105 + WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING); 106 + 107 + /* WaDisableAsyncFlipPerfMode:bdw,chv */ 108 + WA_SET_BIT_MASKED(MI_MODE, ASYNC_FLIP_PERF_DISABLE); 109 + 110 + /* WaDisablePartialInstShootdown:bdw,chv */ 111 + WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, 112 + PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); 113 + 114 + /* Use Force Non-Coherent whenever executing a 3D context. This is a 115 + * workaround for for a possible hang in the unlikely event a TLB 116 + * invalidation occurs during a PSD flush. 117 + */ 118 + /* WaForceEnableNonCoherent:bdw,chv */ 119 + /* WaHdcDisableFetchWhenMasked:bdw,chv */ 120 + WA_SET_BIT_MASKED(HDC_CHICKEN0, 121 + HDC_DONOT_FETCH_MEM_WHEN_MASKED | 122 + HDC_FORCE_NON_COHERENT); 123 + 124 + /* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0: 125 + * "The Hierarchical Z RAW Stall Optimization allows non-overlapping 126 + * polygons in the same 8x4 pixel/sample area to be processed without 127 + * stalling waiting for the earlier ones to write to Hierarchical Z 128 + * buffer." 129 + * 130 + * This optimization is off by default for BDW and CHV; turn it on. 131 + */ 132 + WA_CLR_BIT_MASKED(CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE); 133 + 134 + /* Wa4x4STCOptimizationDisable:bdw,chv */ 135 + WA_SET_BIT_MASKED(CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE); 136 + 137 + /* 138 + * BSpec recommends 8x4 when MSAA is used, 139 + * however in practice 16x4 seems fastest. 140 + * 141 + * Note that PS/WM thread counts depend on the WIZ hashing 142 + * disable bit, which we don't touch here, but it's good 143 + * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). 144 + */ 145 + WA_SET_FIELD_MASKED(GEN7_GT_MODE, 146 + GEN6_WIZ_HASHING_MASK, 147 + GEN6_WIZ_HASHING_16x4); 148 + 149 + return 0; 150 + } 151 + 152 + static int bdw_init_workarounds(struct intel_engine_cs *engine) 153 + { 154 + struct drm_i915_private *dev_priv = engine->i915; 155 + int ret; 156 + 157 + ret = gen8_init_workarounds(engine); 158 + if (ret) 159 + return ret; 160 + 161 + /* WaDisableThreadStallDopClockGating:bdw (pre-production) */ 162 + WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); 163 + 164 + /* WaDisableDopClockGating:bdw 165 + * 166 + * Also see the related UCGTCL1 write in broadwell_init_clock_gating() 167 + * to disable EUTC clock gating. 168 + */ 169 + WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, 170 + DOP_CLOCK_GATING_DISABLE); 171 + 172 + WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, 173 + GEN8_SAMPLER_POWER_BYPASS_DIS); 174 + 175 + WA_SET_BIT_MASKED(HDC_CHICKEN0, 176 + /* WaForceContextSaveRestoreNonCoherent:bdw */ 177 + HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT | 178 + /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */ 179 + (IS_BDW_GT3(dev_priv) ? HDC_FENCE_DEST_SLM_DISABLE : 0)); 180 + 181 + return 0; 182 + } 183 + 184 + static int chv_init_workarounds(struct intel_engine_cs *engine) 185 + { 186 + struct drm_i915_private *dev_priv = engine->i915; 187 + int ret; 188 + 189 + ret = gen8_init_workarounds(engine); 190 + if (ret) 191 + return ret; 192 + 193 + /* WaDisableThreadStallDopClockGating:chv */ 194 + WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); 195 + 196 + /* Improve HiZ throughput on CHV. */ 197 + WA_SET_BIT_MASKED(HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X); 198 + 199 + return 0; 200 + } 201 + 202 + static int gen9_init_workarounds(struct intel_engine_cs *engine) 203 + { 204 + struct drm_i915_private *dev_priv = engine->i915; 205 + int ret; 206 + 207 + /* WaConextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl,glk,cfl */ 208 + I915_WRITE(GEN9_CSFE_CHICKEN1_RCS, 209 + _MASKED_BIT_ENABLE(GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE)); 210 + 211 + /* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk,cfl */ 212 + I915_WRITE(BDW_SCRATCH1, 213 + I915_READ(BDW_SCRATCH1) | 214 + GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE); 215 + 216 + /* WaDisableKillLogic:bxt,skl,kbl */ 217 + if (!IS_COFFEELAKE(dev_priv)) 218 + I915_WRITE(GAM_ECOCHK, 219 + I915_READ(GAM_ECOCHK) | ECOCHK_DIS_TLB); 220 + 221 + if (HAS_LLC(dev_priv)) { 222 + /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl 223 + * 224 + * Must match Display Engine. See 225 + * WaCompressedResourceDisplayNewHashMode. 226 + */ 227 + WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, 228 + GEN9_PBE_COMPRESSED_HASH_SELECTION); 229 + WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7, 230 + GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR); 231 + 232 + I915_WRITE(MMCD_MISC_CTRL, 233 + I915_READ(MMCD_MISC_CTRL) | 234 + MMCD_PCLA | 235 + MMCD_HOTSPOT_EN); 236 + } 237 + 238 + /* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk,cfl */ 239 + /* WaDisablePartialInstShootdown:skl,bxt,kbl,glk,cfl */ 240 + WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, 241 + FLOW_CONTROL_ENABLE | 242 + PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); 243 + 244 + /* Syncing dependencies between camera and graphics:skl,bxt,kbl */ 245 + if (!IS_COFFEELAKE(dev_priv)) 246 + WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, 247 + GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC); 248 + 249 + /* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl,glk,cfl */ 250 + /* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl,cfl */ 251 + WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7, 252 + GEN9_ENABLE_YV12_BUGFIX | 253 + GEN9_ENABLE_GPGPU_PREEMPTION); 254 + 255 + /* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk,cfl */ 256 + /* WaDisablePartialResolveInVc:skl,bxt,kbl,cfl */ 257 + WA_SET_BIT_MASKED(CACHE_MODE_1, 258 + GEN8_4x4_STC_OPTIMIZATION_DISABLE | 259 + GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE); 260 + 261 + /* WaCcsTlbPrefetchDisable:skl,bxt,kbl,glk,cfl */ 262 + WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5, 263 + GEN9_CCS_TLB_PREFETCH_ENABLE); 264 + 265 + /* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl,cfl */ 266 + WA_SET_BIT_MASKED(HDC_CHICKEN0, 267 + HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT | 268 + HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE); 269 + 270 + /* WaForceEnableNonCoherent and WaDisableHDCInvalidation are 271 + * both tied to WaForceContextSaveRestoreNonCoherent 272 + * in some hsds for skl. We keep the tie for all gen9. The 273 + * documentation is a bit hazy and so we want to get common behaviour, 274 + * even though there is no clear evidence we would need both on kbl/bxt. 275 + * This area has been source of system hangs so we play it safe 276 + * and mimic the skl regardless of what bspec says. 277 + * 278 + * Use Force Non-Coherent whenever executing a 3D context. This 279 + * is a workaround for a possible hang in the unlikely event 280 + * a TLB invalidation occurs during a PSD flush. 281 + */ 282 + 283 + /* WaForceEnableNonCoherent:skl,bxt,kbl,cfl */ 284 + WA_SET_BIT_MASKED(HDC_CHICKEN0, 285 + HDC_FORCE_NON_COHERENT); 286 + 287 + /* WaDisableHDCInvalidation:skl,bxt,kbl,cfl */ 288 + I915_WRITE(GAM_ECOCHK, 289 + I915_READ(GAM_ECOCHK) | BDW_DISABLE_HDC_INVALIDATION); 290 + 291 + /* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl,cfl */ 292 + if (IS_SKYLAKE(dev_priv) || 293 + IS_KABYLAKE(dev_priv) || 294 + IS_COFFEELAKE(dev_priv)) 295 + WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, 296 + GEN8_SAMPLER_POWER_BYPASS_DIS); 297 + 298 + /* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk,cfl */ 299 + WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE); 300 + 301 + /* WaProgramL3SqcReg1DefaultForPerf:bxt,glk */ 302 + if (IS_GEN9_LP(dev_priv)) { 303 + u32 val = I915_READ(GEN8_L3SQCREG1); 304 + 305 + val &= ~L3_PRIO_CREDITS_MASK; 306 + val |= L3_GENERAL_PRIO_CREDITS(62) | L3_HIGH_PRIO_CREDITS(2); 307 + I915_WRITE(GEN8_L3SQCREG1, val); 308 + } 309 + 310 + /* WaOCLCoherentLineFlush:skl,bxt,kbl,cfl */ 311 + I915_WRITE(GEN8_L3SQCREG4, 312 + I915_READ(GEN8_L3SQCREG4) | GEN8_LQSC_FLUSH_COHERENT_LINES); 313 + 314 + /* 315 + * Supporting preemption with fine-granularity requires changes in the 316 + * batch buffer programming. Since we can't break old userspace, we 317 + * need to set our default preemption level to safe value. Userspace is 318 + * still able to use more fine-grained preemption levels, since in 319 + * WaEnablePreemptionGranularityControlByUMD we're whitelisting the 320 + * per-ctx register. As such, WaDisable{3D,GPGPU}MidCmdPreemption are 321 + * not real HW workarounds, but merely a way to start using preemption 322 + * while maintaining old contract with userspace. 323 + */ 324 + 325 + /* WaDisable3DMidCmdPreemption:skl,bxt,glk,cfl,[cnl] */ 326 + WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL); 327 + 328 + /* WaDisableGPGPUMidCmdPreemption:skl,bxt,blk,cfl,[cnl] */ 329 + WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1, 330 + GEN9_PREEMPT_GPGPU_LEVEL_MASK, 331 + GEN9_PREEMPT_GPGPU_COMMAND_LEVEL); 332 + 333 + /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */ 334 + ret = wa_ring_whitelist_reg(engine, GEN9_CTX_PREEMPT_REG); 335 + if (ret) 336 + return ret; 337 + 338 + /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl,[cnl] */ 339 + I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1, 340 + _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL)); 341 + ret = wa_ring_whitelist_reg(engine, GEN8_CS_CHICKEN1); 342 + if (ret) 343 + return ret; 344 + 345 + /* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk,cfl */ 346 + ret = wa_ring_whitelist_reg(engine, GEN8_HDC_CHICKEN1); 347 + if (ret) 348 + return ret; 349 + 350 + return 0; 351 + } 352 + 353 + static int skl_tune_iz_hashing(struct intel_engine_cs *engine) 354 + { 355 + struct drm_i915_private *dev_priv = engine->i915; 356 + u8 vals[3] = { 0, 0, 0 }; 357 + unsigned int i; 358 + 359 + for (i = 0; i < 3; i++) { 360 + u8 ss; 361 + 362 + /* 363 + * Only consider slices where one, and only one, subslice has 7 364 + * EUs 365 + */ 366 + if (!is_power_of_2(INTEL_INFO(dev_priv)->sseu.subslice_7eu[i])) 367 + continue; 368 + 369 + /* 370 + * subslice_7eu[i] != 0 (because of the check above) and 371 + * ss_max == 4 (maximum number of subslices possible per slice) 372 + * 373 + * -> 0 <= ss <= 3; 374 + */ 375 + ss = ffs(INTEL_INFO(dev_priv)->sseu.subslice_7eu[i]) - 1; 376 + vals[i] = 3 - ss; 377 + } 378 + 379 + if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0) 380 + return 0; 381 + 382 + /* Tune IZ hashing. See intel_device_info_runtime_init() */ 383 + WA_SET_FIELD_MASKED(GEN7_GT_MODE, 384 + GEN9_IZ_HASHING_MASK(2) | 385 + GEN9_IZ_HASHING_MASK(1) | 386 + GEN9_IZ_HASHING_MASK(0), 387 + GEN9_IZ_HASHING(2, vals[2]) | 388 + GEN9_IZ_HASHING(1, vals[1]) | 389 + GEN9_IZ_HASHING(0, vals[0])); 390 + 391 + return 0; 392 + } 393 + 394 + static int skl_init_workarounds(struct intel_engine_cs *engine) 395 + { 396 + struct drm_i915_private *dev_priv = engine->i915; 397 + int ret; 398 + 399 + ret = gen9_init_workarounds(engine); 400 + if (ret) 401 + return ret; 402 + 403 + /* WaEnableGapsTsvCreditFix:skl */ 404 + I915_WRITE(GEN8_GARBCNTL, 405 + I915_READ(GEN8_GARBCNTL) | GEN9_GAPS_TSV_CREDIT_DISABLE); 406 + 407 + /* WaDisableGafsUnitClkGating:skl */ 408 + I915_WRITE(GEN7_UCGCTL4, 409 + I915_READ(GEN7_UCGCTL4) | GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE); 410 + 411 + /* WaInPlaceDecompressionHang:skl */ 412 + if (IS_SKL_REVID(dev_priv, SKL_REVID_H0, REVID_FOREVER)) 413 + I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA, 414 + I915_READ(GEN9_GAMT_ECO_REG_RW_IA) | 415 + GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); 416 + 417 + /* WaDisableLSQCROPERFforOCL:skl */ 418 + ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4); 419 + if (ret) 420 + return ret; 421 + 422 + return skl_tune_iz_hashing(engine); 423 + } 424 + 425 + static int bxt_init_workarounds(struct intel_engine_cs *engine) 426 + { 427 + struct drm_i915_private *dev_priv = engine->i915; 428 + int ret; 429 + 430 + ret = gen9_init_workarounds(engine); 431 + if (ret) 432 + return ret; 433 + 434 + /* WaDisableThreadStallDopClockGating:bxt */ 435 + WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, 436 + STALL_DOP_GATING_DISABLE); 437 + 438 + /* WaDisablePooledEuLoadBalancingFix:bxt */ 439 + I915_WRITE(FF_SLICE_CS_CHICKEN2, 440 + _MASKED_BIT_ENABLE(GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE)); 441 + 442 + /* WaToEnableHwFixForPushConstHWBug:bxt */ 443 + WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, 444 + GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); 445 + 446 + /* WaInPlaceDecompressionHang:bxt */ 447 + I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA, 448 + I915_READ(GEN9_GAMT_ECO_REG_RW_IA) | 449 + GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); 450 + 451 + return 0; 452 + } 453 + 454 + static int cnl_init_workarounds(struct intel_engine_cs *engine) 455 + { 456 + struct drm_i915_private *dev_priv = engine->i915; 457 + int ret; 458 + 459 + /* WaDisableI2mCycleOnWRPort:cnl (pre-prod) */ 460 + if (IS_CNL_REVID(dev_priv, CNL_REVID_B0, CNL_REVID_B0)) 461 + I915_WRITE(GAMT_CHKN_BIT_REG, 462 + I915_READ(GAMT_CHKN_BIT_REG) | 463 + GAMT_CHKN_DISABLE_I2M_CYCLE_ON_WR_PORT); 464 + 465 + /* WaForceContextSaveRestoreNonCoherent:cnl */ 466 + WA_SET_BIT_MASKED(CNL_HDC_CHICKEN0, 467 + HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT); 468 + 469 + /* WaThrottleEUPerfToAvoidTDBackPressure:cnl(pre-prod) */ 470 + if (IS_CNL_REVID(dev_priv, CNL_REVID_B0, CNL_REVID_B0)) 471 + WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, THROTTLE_12_5); 472 + 473 + /* WaDisableReplayBufferBankArbitrationOptimization:cnl */ 474 + WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, 475 + GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); 476 + 477 + /* WaDisableEnhancedSBEVertexCaching:cnl (pre-prod) */ 478 + if (IS_CNL_REVID(dev_priv, 0, CNL_REVID_B0)) 479 + WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, 480 + GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE); 481 + 482 + /* WaInPlaceDecompressionHang:cnl */ 483 + I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA, 484 + I915_READ(GEN9_GAMT_ECO_REG_RW_IA) | 485 + GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); 486 + 487 + /* WaPushConstantDereferenceHoldDisable:cnl */ 488 + WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, PUSH_CONSTANT_DEREF_DISABLE); 489 + 490 + /* FtrEnableFastAnisoL1BankingFix: cnl */ 491 + WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, CNL_FAST_ANISO_L1_BANKING_FIX); 492 + 493 + /* WaDisable3DMidCmdPreemption:cnl */ 494 + WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL); 495 + 496 + /* WaDisableGPGPUMidCmdPreemption:cnl */ 497 + WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1, 498 + GEN9_PREEMPT_GPGPU_LEVEL_MASK, 499 + GEN9_PREEMPT_GPGPU_COMMAND_LEVEL); 500 + 501 + /* WaEnablePreemptionGranularityControlByUMD:cnl */ 502 + I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1, 503 + _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL)); 504 + ret = wa_ring_whitelist_reg(engine, GEN8_CS_CHICKEN1); 505 + if (ret) 506 + return ret; 507 + 508 + /* WaDisableEarlyEOT:cnl */ 509 + WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, DISABLE_EARLY_EOT); 510 + 511 + return 0; 512 + } 513 + 514 + static int kbl_init_workarounds(struct intel_engine_cs *engine) 515 + { 516 + struct drm_i915_private *dev_priv = engine->i915; 517 + int ret; 518 + 519 + ret = gen9_init_workarounds(engine); 520 + if (ret) 521 + return ret; 522 + 523 + /* WaEnableGapsTsvCreditFix:kbl */ 524 + I915_WRITE(GEN8_GARBCNTL, 525 + I915_READ(GEN8_GARBCNTL) | GEN9_GAPS_TSV_CREDIT_DISABLE); 526 + 527 + /* WaDisableDynamicCreditSharing:kbl */ 528 + if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0)) 529 + I915_WRITE(GAMT_CHKN_BIT_REG, 530 + I915_READ(GAMT_CHKN_BIT_REG) | 531 + GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING); 532 + 533 + /* WaDisableFenceDestinationToSLM:kbl (pre-prod) */ 534 + if (IS_KBL_REVID(dev_priv, KBL_REVID_A0, KBL_REVID_A0)) 535 + WA_SET_BIT_MASKED(HDC_CHICKEN0, 536 + HDC_FENCE_DEST_SLM_DISABLE); 537 + 538 + /* WaToEnableHwFixForPushConstHWBug:kbl */ 539 + if (IS_KBL_REVID(dev_priv, KBL_REVID_C0, REVID_FOREVER)) 540 + WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, 541 + GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); 542 + 543 + /* WaDisableGafsUnitClkGating:kbl */ 544 + I915_WRITE(GEN7_UCGCTL4, 545 + I915_READ(GEN7_UCGCTL4) | GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE); 546 + 547 + /* WaDisableSbeCacheDispatchPortSharing:kbl */ 548 + WA_SET_BIT_MASKED(GEN7_HALF_SLICE_CHICKEN1, 549 + GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); 550 + 551 + /* WaInPlaceDecompressionHang:kbl */ 552 + I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA, 553 + I915_READ(GEN9_GAMT_ECO_REG_RW_IA) | 554 + GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); 555 + 556 + /* WaDisableLSQCROPERFforOCL:kbl */ 557 + ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4); 558 + if (ret) 559 + return ret; 560 + 561 + return 0; 562 + } 563 + 564 + static int glk_init_workarounds(struct intel_engine_cs *engine) 565 + { 566 + struct drm_i915_private *dev_priv = engine->i915; 567 + int ret; 568 + 569 + ret = gen9_init_workarounds(engine); 570 + if (ret) 571 + return ret; 572 + 573 + /* WA #0862: Userspace has to set "Barrier Mode" to avoid hangs. */ 574 + ret = wa_ring_whitelist_reg(engine, GEN9_SLICE_COMMON_ECO_CHICKEN1); 575 + if (ret) 576 + return ret; 577 + 578 + /* WaToEnableHwFixForPushConstHWBug:glk */ 579 + WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, 580 + GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); 581 + 582 + return 0; 583 + } 584 + 585 + static int cfl_init_workarounds(struct intel_engine_cs *engine) 586 + { 587 + struct drm_i915_private *dev_priv = engine->i915; 588 + int ret; 589 + 590 + ret = gen9_init_workarounds(engine); 591 + if (ret) 592 + return ret; 593 + 594 + /* WaEnableGapsTsvCreditFix:cfl */ 595 + I915_WRITE(GEN8_GARBCNTL, 596 + I915_READ(GEN8_GARBCNTL) | GEN9_GAPS_TSV_CREDIT_DISABLE); 597 + 598 + /* WaToEnableHwFixForPushConstHWBug:cfl */ 599 + WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, 600 + GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); 601 + 602 + /* WaDisableGafsUnitClkGating:cfl */ 603 + I915_WRITE(GEN7_UCGCTL4, 604 + I915_READ(GEN7_UCGCTL4) | GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE); 605 + 606 + /* WaDisableSbeCacheDispatchPortSharing:cfl */ 607 + WA_SET_BIT_MASKED(GEN7_HALF_SLICE_CHICKEN1, 608 + GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); 609 + 610 + /* WaInPlaceDecompressionHang:cfl */ 611 + I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA, 612 + I915_READ(GEN9_GAMT_ECO_REG_RW_IA) | 613 + GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); 614 + 615 + return 0; 616 + } 617 + 618 + int init_workarounds_ring(struct intel_engine_cs *engine) 619 + { 620 + struct drm_i915_private *dev_priv = engine->i915; 621 + int err; 622 + 623 + if (GEM_WARN_ON(engine->id != RCS)) 624 + return -EINVAL; 625 + 626 + dev_priv->workarounds.count = 0; 627 + dev_priv->workarounds.hw_whitelist_count[engine->id] = 0; 628 + 629 + if (IS_BROADWELL(dev_priv)) 630 + err = bdw_init_workarounds(engine); 631 + else if (IS_CHERRYVIEW(dev_priv)) 632 + err = chv_init_workarounds(engine); 633 + else if (IS_SKYLAKE(dev_priv)) 634 + err = skl_init_workarounds(engine); 635 + else if (IS_BROXTON(dev_priv)) 636 + err = bxt_init_workarounds(engine); 637 + else if (IS_KABYLAKE(dev_priv)) 638 + err = kbl_init_workarounds(engine); 639 + else if (IS_GEMINILAKE(dev_priv)) 640 + err = glk_init_workarounds(engine); 641 + else if (IS_COFFEELAKE(dev_priv)) 642 + err = cfl_init_workarounds(engine); 643 + else if (IS_CANNONLAKE(dev_priv)) 644 + err = cnl_init_workarounds(engine); 645 + else 646 + err = 0; 647 + if (err) 648 + return err; 649 + 650 + DRM_DEBUG_DRIVER("%s: Number of context specific w/a: %d\n", 651 + engine->name, dev_priv->workarounds.count); 652 + return 0; 653 + } 654 + 655 + int intel_ring_workarounds_emit(struct i915_request *rq) 656 + { 657 + struct i915_workarounds *w = &rq->i915->workarounds; 658 + u32 *cs; 659 + int ret, i; 660 + 661 + if (w->count == 0) 662 + return 0; 663 + 664 + ret = rq->engine->emit_flush(rq, EMIT_BARRIER); 665 + if (ret) 666 + return ret; 667 + 668 + cs = intel_ring_begin(rq, w->count * 2 + 2); 669 + if (IS_ERR(cs)) 670 + return PTR_ERR(cs); 671 + 672 + *cs++ = MI_LOAD_REGISTER_IMM(w->count); 673 + for (i = 0; i < w->count; i++) { 674 + *cs++ = i915_mmio_reg_offset(w->reg[i].addr); 675 + *cs++ = w->reg[i].value; 676 + } 677 + *cs++ = MI_NOOP; 678 + 679 + intel_ring_advance(rq, cs); 680 + 681 + ret = rq->engine->emit_flush(rq, EMIT_BARRIER); 682 + if (ret) 683 + return ret; 684 + 685 + return 0; 686 + }
+13
drivers/gpu/drm/i915/intel_workarounds.h
··· 1 + /* 2 + * SPDX-License-Identifier: MIT 3 + * 4 + * Copyright © 2014-2018 Intel Corporation 5 + */ 6 + 7 + #ifndef _I915_WORKAROUNDS_H_ 8 + #define _I915_WORKAROUNDS_H_ 9 + 10 + int init_workarounds_ring(struct intel_engine_cs *engine); 11 + int intel_ring_workarounds_emit(struct i915_request *rq); 12 + 13 + #endif