Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/i915/guc: Provide mmio list to be saved/restored on engine reset

The driver must provide GuC with a list of mmio registers
that should be saved/restored during a GuC-based engine reset.
Unfortunately, the list must be dynamically allocated as its size is
variable. That means the driver must generate the list twice - once to
work out the size and a second time to actually save it.

v2:
(Alan / CI)
- GEN7_GT_MODE -> GEN6_GT_MODE to fix WA selftest failure

Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
Signed-off-by: Fernando Pacheco <fernando.pacheco@intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20210727002348.97202-16-matthew.brost@intel.com

+222 -26
+24 -22
drivers/gpu/drm/i915/gt/intel_workarounds.c
··· 150 150 } 151 151 152 152 static void wa_add(struct i915_wa_list *wal, i915_reg_t reg, 153 - u32 clear, u32 set, u32 read_mask) 153 + u32 clear, u32 set, u32 read_mask, bool masked_reg) 154 154 { 155 155 struct i915_wa wa = { 156 156 .reg = reg, 157 157 .clr = clear, 158 158 .set = set, 159 159 .read = read_mask, 160 + .masked_reg = masked_reg, 160 161 }; 161 162 162 163 _wa_add(wal, &wa); ··· 166 165 static void 167 166 wa_write_clr_set(struct i915_wa_list *wal, i915_reg_t reg, u32 clear, u32 set) 168 167 { 169 - wa_add(wal, reg, clear, set, clear); 168 + wa_add(wal, reg, clear, set, clear, false); 170 169 } 171 170 172 171 static void ··· 201 200 static void 202 201 wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val) 203 202 { 204 - wa_add(wal, reg, 0, _MASKED_BIT_ENABLE(val), val); 203 + wa_add(wal, reg, 0, _MASKED_BIT_ENABLE(val), val, true); 205 204 } 206 205 207 206 static void 208 207 wa_masked_dis(struct i915_wa_list *wal, i915_reg_t reg, u32 val) 209 208 { 210 - wa_add(wal, reg, 0, _MASKED_BIT_DISABLE(val), val); 209 + wa_add(wal, reg, 0, _MASKED_BIT_DISABLE(val), val, true); 211 210 } 212 211 213 212 static void 214 213 wa_masked_field_set(struct i915_wa_list *wal, i915_reg_t reg, 215 214 u32 mask, u32 val) 216 215 { 217 - wa_add(wal, reg, 0, _MASKED_FIELD(mask, val), mask); 216 + wa_add(wal, reg, 0, _MASKED_FIELD(mask, val), mask, true); 218 217 } 219 218 220 219 static void gen6_ctx_workarounds_init(struct intel_engine_cs *engine, ··· 534 533 wa_masked_en(wal, ICL_HDC_MODE, HDC_FORCE_NON_COHERENT); 535 534 536 535 /* WaEnableFloatBlendOptimization:icl */ 537 - wa_write_clr_set(wal, 538 - GEN10_CACHE_MODE_SS, 539 - 0, /* write-only, so skip validation */ 540 - _MASKED_BIT_ENABLE(FLOAT_BLEND_OPTIMIZATION_ENABLE)); 536 + wa_add(wal, GEN10_CACHE_MODE_SS, 0, 537 + _MASKED_BIT_ENABLE(FLOAT_BLEND_OPTIMIZATION_ENABLE), 538 + 0 /* write-only, so skip validation */, 539 + true); 541 540 542 541 /* WaDisableGPGPUMidThreadPreemption:icl */ 543 542 wa_masked_field_set(wal, GEN8_CS_CHICKEN1, ··· 582 581 FF_MODE2, 583 582 FF_MODE2_TDS_TIMER_MASK, 584 583 FF_MODE2_TDS_TIMER_128, 585 - 0); 584 + 0, false); 586 585 } 587 586 588 587 static void gen12_ctx_workarounds_init(struct intel_engine_cs *engine, ··· 620 619 FF_MODE2, 621 620 FF_MODE2_GS_TIMER_MASK, 622 621 FF_MODE2_GS_TIMER_224, 623 - 0); 622 + 0, false); 624 623 625 624 /* 626 625 * Wa_14012131227:dg1 ··· 796 795 wa_add(wal, 797 796 HSW_ROW_CHICKEN3, 0, 798 797 _MASKED_BIT_ENABLE(HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE), 799 - 0 /* XXX does this reg exist? */); 798 + 0 /* XXX does this reg exist? */, true); 800 799 801 800 /* WaVSRefCountFullforceMissDisable:hsw */ 802 801 wa_write_clr(wal, GEN7_FF_THREAD_MODE, GEN7_FF_VS_REF_CNT_FFME); ··· 1825 1824 * disable bit, which we don't touch here, but it's good 1826 1825 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). 1827 1826 */ 1828 - wa_add(wal, GEN7_GT_MODE, 0, 1829 - _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, 1830 - GEN6_WIZ_HASHING_16x4), 1831 - GEN6_WIZ_HASHING_16x4); 1827 + wa_masked_field_set(wal, 1828 + GEN7_GT_MODE, 1829 + GEN6_WIZ_HASHING_MASK, 1830 + GEN6_WIZ_HASHING_16x4); 1832 1831 } 1833 1832 1834 1833 if (IS_GRAPHICS_VER(i915, 6, 7)) ··· 1878 1877 * disable bit, which we don't touch here, but it's good 1879 1878 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). 1880 1879 */ 1881 - wa_add(wal, 1882 - GEN6_GT_MODE, 0, 1883 - _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4), 1884 - GEN6_WIZ_HASHING_16x4); 1880 + wa_masked_field_set(wal, 1881 + GEN6_GT_MODE, 1882 + GEN6_WIZ_HASHING_MASK, 1883 + GEN6_WIZ_HASHING_16x4); 1885 1884 1886 1885 /* WaDisable_RenderCache_OperationalFlush:snb */ 1887 1886 wa_masked_dis(wal, CACHE_MODE_0, RC_OP_FLUSH_ENABLE); ··· 1902 1901 wa_add(wal, MI_MODE, 1903 1902 0, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH), 1904 1903 /* XXX bit doesn't stick on Broadwater */ 1905 - IS_I965G(i915) ? 0 : VS_TIMER_DISPATCH); 1904 + IS_I965G(i915) ? 0 : VS_TIMER_DISPATCH, true); 1906 1905 1907 1906 if (GRAPHICS_VER(i915) == 4) 1908 1907 /* ··· 1917 1916 */ 1918 1917 wa_add(wal, ECOSKPD, 1919 1918 0, _MASKED_BIT_ENABLE(ECO_CONSTANT_BUFFER_SR_DISABLE), 1920 - 0 /* XXX bit doesn't stick on Broadwater */); 1919 + 0 /* XXX bit doesn't stick on Broadwater */, 1920 + true); 1921 1921 } 1922 1922 1923 1923 static void
+1
drivers/gpu/drm/i915/gt/intel_workarounds_types.h
··· 15 15 u32 clr; 16 16 u32 set; 17 17 u32 read; 18 + bool masked_reg; 18 19 }; 19 20 20 21 struct i915_wa_list {
+1
drivers/gpu/drm/i915/gt/uc/intel_guc.h
··· 59 59 60 60 struct i915_vma *ads_vma; 61 61 struct __guc_ads_blob *ads_blob; 62 + u32 ads_regset_size; 62 63 63 64 struct i915_vma *lrc_desc_pool; 64 65 void *lrc_desc_pool_vaddr;
+195 -4
drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
··· 3 3 * Copyright © 2014-2019 Intel Corporation 4 4 */ 5 5 6 + #include <linux/bsearch.h> 7 + 6 8 #include "gt/intel_gt.h" 7 9 #include "gt/intel_lrc.h" 8 10 #include "intel_guc_ads.h" ··· 25 23 * | guc_policies | 26 24 * +---------------------------------------+ 27 25 * | guc_gt_system_info | 28 - * +---------------------------------------+ 26 + * +---------------------------------------+ <== static 27 + * | guc_mmio_reg[countA] (engine 0.0) | 28 + * | guc_mmio_reg[countB] (engine 0.1) | 29 + * | guc_mmio_reg[countC] (engine 1.0) | 30 + * | ... | 31 + * +---------------------------------------+ <== dynamic 29 32 * | padding | 30 33 * +---------------------------------------+ <== 4K aligned 31 34 * | private data | ··· 42 35 struct guc_ads ads; 43 36 struct guc_policies policies; 44 37 struct guc_gt_system_info system_info; 38 + /* From here on, location is dynamic! Refer to above diagram. */ 39 + struct guc_mmio_reg regset[0]; 45 40 } __packed; 41 + 42 + static u32 guc_ads_regset_size(struct intel_guc *guc) 43 + { 44 + GEM_BUG_ON(!guc->ads_regset_size); 45 + return guc->ads_regset_size; 46 + } 46 47 47 48 static u32 guc_ads_private_data_size(struct intel_guc *guc) 48 49 { 49 50 return PAGE_ALIGN(guc->fw.private_data_size); 50 51 } 51 52 53 + static u32 guc_ads_regset_offset(struct intel_guc *guc) 54 + { 55 + return offsetof(struct __guc_ads_blob, regset); 56 + } 57 + 52 58 static u32 guc_ads_private_data_offset(struct intel_guc *guc) 53 59 { 54 - return PAGE_ALIGN(sizeof(struct __guc_ads_blob)); 60 + u32 offset; 61 + 62 + offset = guc_ads_regset_offset(guc) + 63 + guc_ads_regset_size(guc); 64 + return PAGE_ALIGN(offset); 55 65 } 56 66 57 67 static u32 guc_ads_blob_size(struct intel_guc *guc) ··· 105 81 system_info->mapping_table[guc_class][engine->instance] = 106 82 engine->instance; 107 83 } 84 + } 85 + 86 + /* 87 + * The save/restore register list must be pre-calculated to a temporary 88 + * buffer of driver defined size before it can be generated in place 89 + * inside the ADS. 90 + */ 91 + #define MAX_MMIO_REGS 128 /* Arbitrary size, increase as needed */ 92 + struct temp_regset { 93 + struct guc_mmio_reg *registers; 94 + u32 used; 95 + u32 size; 96 + }; 97 + 98 + static int guc_mmio_reg_cmp(const void *a, const void *b) 99 + { 100 + const struct guc_mmio_reg *ra = a; 101 + const struct guc_mmio_reg *rb = b; 102 + 103 + return (int)ra->offset - (int)rb->offset; 104 + } 105 + 106 + static void guc_mmio_reg_add(struct temp_regset *regset, 107 + u32 offset, u32 flags) 108 + { 109 + u32 count = regset->used; 110 + struct guc_mmio_reg reg = { 111 + .offset = offset, 112 + .flags = flags, 113 + }; 114 + struct guc_mmio_reg *slot; 115 + 116 + GEM_BUG_ON(count >= regset->size); 117 + 118 + /* 119 + * The mmio list is built using separate lists within the driver. 120 + * It's possible that at some point we may attempt to add the same 121 + * register more than once. Do not consider this an error; silently 122 + * move on if the register is already in the list. 123 + */ 124 + if (bsearch(&reg, regset->registers, count, 125 + sizeof(reg), guc_mmio_reg_cmp)) 126 + return; 127 + 128 + slot = &regset->registers[count]; 129 + regset->used++; 130 + *slot = reg; 131 + 132 + while (slot-- > regset->registers) { 133 + GEM_BUG_ON(slot[0].offset == slot[1].offset); 134 + if (slot[1].offset > slot[0].offset) 135 + break; 136 + 137 + swap(slot[1], slot[0]); 138 + } 139 + } 140 + 141 + #define GUC_MMIO_REG_ADD(regset, reg, masked) \ 142 + guc_mmio_reg_add(regset, \ 143 + i915_mmio_reg_offset((reg)), \ 144 + (masked) ? GUC_REGSET_MASKED : 0) 145 + 146 + static void guc_mmio_regset_init(struct temp_regset *regset, 147 + struct intel_engine_cs *engine) 148 + { 149 + const u32 base = engine->mmio_base; 150 + struct i915_wa_list *wal = &engine->wa_list; 151 + struct i915_wa *wa; 152 + unsigned int i; 153 + 154 + regset->used = 0; 155 + 156 + GUC_MMIO_REG_ADD(regset, RING_MODE_GEN7(base), true); 157 + GUC_MMIO_REG_ADD(regset, RING_HWS_PGA(base), false); 158 + GUC_MMIO_REG_ADD(regset, RING_IMR(base), false); 159 + 160 + for (i = 0, wa = wal->list; i < wal->count; i++, wa++) 161 + GUC_MMIO_REG_ADD(regset, wa->reg, wa->masked_reg); 162 + 163 + /* Be extra paranoid and include all whitelist registers. */ 164 + for (i = 0; i < RING_MAX_NONPRIV_SLOTS; i++) 165 + GUC_MMIO_REG_ADD(regset, 166 + RING_FORCE_TO_NONPRIV(base, i), 167 + false); 168 + 169 + /* add in local MOCS registers */ 170 + for (i = 0; i < GEN9_LNCFCMOCS_REG_COUNT; i++) 171 + GUC_MMIO_REG_ADD(regset, GEN9_LNCFCMOCS(i), false); 172 + } 173 + 174 + static int guc_mmio_reg_state_query(struct intel_guc *guc) 175 + { 176 + struct intel_gt *gt = guc_to_gt(guc); 177 + struct intel_engine_cs *engine; 178 + enum intel_engine_id id; 179 + struct temp_regset temp_set; 180 + u32 total; 181 + 182 + /* 183 + * Need to actually build the list in order to filter out 184 + * duplicates and other such data dependent constructions. 185 + */ 186 + temp_set.size = MAX_MMIO_REGS; 187 + temp_set.registers = kmalloc_array(temp_set.size, 188 + sizeof(*temp_set.registers), 189 + GFP_KERNEL); 190 + if (!temp_set.registers) 191 + return -ENOMEM; 192 + 193 + total = 0; 194 + for_each_engine(engine, gt, id) { 195 + guc_mmio_regset_init(&temp_set, engine); 196 + total += temp_set.used; 197 + } 198 + 199 + kfree(temp_set.registers); 200 + 201 + return total * sizeof(struct guc_mmio_reg); 202 + } 203 + 204 + static void guc_mmio_reg_state_init(struct intel_guc *guc, 205 + struct __guc_ads_blob *blob) 206 + { 207 + struct intel_gt *gt = guc_to_gt(guc); 208 + struct intel_engine_cs *engine; 209 + enum intel_engine_id id; 210 + struct temp_regset temp_set; 211 + struct guc_mmio_reg_set *ads_reg_set; 212 + u32 addr_ggtt, offset; 213 + u8 guc_class; 214 + 215 + offset = guc_ads_regset_offset(guc); 216 + addr_ggtt = intel_guc_ggtt_offset(guc, guc->ads_vma) + offset; 217 + temp_set.registers = (struct guc_mmio_reg *)(((u8 *)blob) + offset); 218 + temp_set.size = guc->ads_regset_size / sizeof(temp_set.registers[0]); 219 + 220 + for_each_engine(engine, gt, id) { 221 + /* Class index is checked in class converter */ 222 + GEM_BUG_ON(engine->instance >= GUC_MAX_INSTANCES_PER_CLASS); 223 + 224 + guc_class = engine_class_to_guc_class(engine->class); 225 + ads_reg_set = &blob->ads.reg_state_list[guc_class][engine->instance]; 226 + 227 + guc_mmio_regset_init(&temp_set, engine); 228 + if (!temp_set.used) { 229 + ads_reg_set->address = 0; 230 + ads_reg_set->count = 0; 231 + continue; 232 + } 233 + 234 + ads_reg_set->address = addr_ggtt; 235 + ads_reg_set->count = temp_set.used; 236 + 237 + temp_set.size -= temp_set.used; 238 + temp_set.registers += temp_set.used; 239 + addr_ggtt += temp_set.used * sizeof(struct guc_mmio_reg); 240 + } 241 + 242 + GEM_BUG_ON(temp_set.size); 108 243 } 109 244 110 245 /* ··· 304 121 */ 305 122 blob->ads.golden_context_lrca[guc_class] = 0; 306 123 blob->ads.eng_state_size[guc_class] = 307 - intel_engine_context_size(guc_to_gt(guc), 308 - engine_class) - 124 + intel_engine_context_size(gt, engine_class) - 309 125 skipped_size; 310 126 } 311 127 ··· 335 153 blob->ads.scheduler_policies = base + ptr_offset(blob, policies); 336 154 blob->ads.gt_system_info = base + ptr_offset(blob, system_info); 337 155 156 + /* MMIO save/restore list */ 157 + guc_mmio_reg_state_init(guc, blob); 158 + 338 159 /* Private Data */ 339 160 blob->ads.private_data = base + guc_ads_private_data_offset(guc); 340 161 ··· 357 172 int ret; 358 173 359 174 GEM_BUG_ON(guc->ads_vma); 175 + 176 + /* Need to calculate the reg state size dynamically: */ 177 + ret = guc_mmio_reg_state_query(guc); 178 + if (ret < 0) 179 + return ret; 180 + guc->ads_regset_size = ret; 360 181 361 182 size = guc_ads_blob_size(guc); 362 183
+1
drivers/gpu/drm/i915/i915_reg.h
··· 12316 12316 12317 12317 /* MOCS (Memory Object Control State) registers */ 12318 12318 #define GEN9_LNCFCMOCS(i) _MMIO(0xb020 + (i) * 4) /* L3 Cache Control */ 12319 + #define GEN9_LNCFCMOCS_REG_COUNT 32 12319 12320 12320 12321 #define __GEN9_RCS0_MOCS0 0xc800 12321 12322 #define GEN9_GFX_MOCS(i) _MMIO(__GEN9_RCS0_MOCS0 + (i) * 4)