Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/i915/xehpsdv: Add compute DSS type

Starting in XeHP, the concept of slice has been removed in favor of
DSS (Dual-Subslice) masks for various workload types. These workloads have
been divided into those enabled for geometry and those enabled for compute.

i915 currently maintains a single set of S/SS/EU masks for the device.
The goal of this patch set is to minimize the amount of impact to prior
generations while still giving the user maximum flexibility.

v2:
- Generalize a comment about uapi access to geometry/compute masks; the
proposed uapi has changed since the comment was first written, and
will show up in a future series once the userspace code is published.
(Lucas)

v3:
- Eliminate unnecessary has_compute_dss flag. (Lucas)
- Drop unwanted comment change in uapi header. (Lucas)

Bspec: 33117, 33118, 20376
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: Matt Roper <matthew.d.roper@intel.com>
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Stuart Summers <stuart.summers@intel.com>
Signed-off-by: Steve Hampson <steven.t.hampson@intel.com>
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20210806172901.1049133-1-matthew.d.roper@intel.com

authored by

Stuart Summers and committed by
Matt Roper
d16de9a2 89f2e7ab

+50 -17
+45 -15
drivers/gpu/drm/i915/gt/intel_sseu.c
··· 46 46 } 47 47 48 48 void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice, 49 - u32 ss_mask) 49 + u8 *subslice_mask, u32 ss_mask) 50 50 { 51 51 int offset = slice * sseu->ss_stride; 52 52 53 - memcpy(&sseu->subslice_mask[offset], &ss_mask, sseu->ss_stride); 53 + memcpy(&subslice_mask[offset], &ss_mask, sseu->ss_stride); 54 54 } 55 55 56 56 unsigned int ··· 100 100 return total; 101 101 } 102 102 103 - static void gen11_compute_sseu_info(struct sseu_dev_info *sseu, 104 - u8 s_en, u32 ss_en, u16 eu_en) 103 + static u32 get_ss_stride_mask(struct sseu_dev_info *sseu, u8 s, u32 ss_en) 104 + { 105 + u32 ss_mask; 106 + 107 + ss_mask = ss_en >> (s * sseu->max_subslices); 108 + ss_mask &= GENMASK(sseu->max_subslices - 1, 0); 109 + 110 + return ss_mask; 111 + } 112 + 113 + static void gen11_compute_sseu_info(struct sseu_dev_info *sseu, u8 s_en, 114 + u32 g_ss_en, u32 c_ss_en, u16 eu_en) 105 115 { 106 116 int s, ss; 107 117 108 - /* ss_en represents entire subslice mask across all slices */ 118 + /* g_ss_en/c_ss_en represent entire subslice mask across all slices */ 109 119 GEM_BUG_ON(sseu->max_slices * sseu->max_subslices > 110 - sizeof(ss_en) * BITS_PER_BYTE); 120 + sizeof(g_ss_en) * BITS_PER_BYTE); 111 121 112 122 for (s = 0; s < sseu->max_slices; s++) { 113 123 if ((s_en & BIT(s)) == 0) ··· 125 115 126 116 sseu->slice_mask |= BIT(s); 127 117 128 - intel_sseu_set_subslices(sseu, s, ss_en); 118 + /* 119 + * XeHP introduces the concept of compute vs geometry DSS. To 120 + * reduce variation between GENs around subslice usage, store a 121 + * mask for both the geometry and compute enabled masks since 122 + * userspace will need to be able to query these masks 123 + * independently. Also compute a total enabled subslice count 124 + * for the purposes of selecting subslices to use in a 125 + * particular GEM context. 126 + */ 127 + intel_sseu_set_subslices(sseu, s, sseu->compute_subslice_mask, 128 + get_ss_stride_mask(sseu, s, c_ss_en)); 129 + intel_sseu_set_subslices(sseu, s, sseu->geometry_subslice_mask, 130 + get_ss_stride_mask(sseu, s, g_ss_en)); 131 + intel_sseu_set_subslices(sseu, s, sseu->subslice_mask, 132 + get_ss_stride_mask(sseu, s, 133 + g_ss_en | c_ss_en)); 129 134 130 135 for (ss = 0; ss < sseu->max_subslices; ss++) 131 136 if (intel_sseu_has_subslice(sseu, s, ss)) ··· 154 129 { 155 130 struct sseu_dev_info *sseu = &gt->info.sseu; 156 131 struct intel_uncore *uncore = gt->uncore; 157 - u32 dss_en; 132 + u32 g_dss_en, c_dss_en = 0; 158 133 u16 eu_en = 0; 159 134 u8 eu_en_fuse; 160 135 u8 s_en; ··· 185 160 s_en = intel_uncore_read(uncore, GEN11_GT_SLICE_ENABLE) & 186 161 GEN11_GT_S_ENA_MASK; 187 162 188 - dss_en = intel_uncore_read(uncore, GEN12_GT_DSS_ENABLE); 163 + g_dss_en = intel_uncore_read(uncore, GEN12_GT_GEOMETRY_DSS_ENABLE); 164 + if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50)) 165 + c_dss_en = intel_uncore_read(uncore, GEN12_GT_COMPUTE_DSS_ENABLE); 189 166 190 167 /* one bit per pair of EUs */ 191 168 if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50)) ··· 200 173 if (eu_en_fuse & BIT(eu)) 201 174 eu_en |= BIT(eu * 2) | BIT(eu * 2 + 1); 202 175 203 - gen11_compute_sseu_info(sseu, s_en, dss_en, eu_en); 176 + gen11_compute_sseu_info(sseu, s_en, g_dss_en, c_dss_en, eu_en); 204 177 205 178 /* TGL only supports slice-level power gating */ 206 179 sseu->has_slice_pg = 1; ··· 226 199 eu_en = ~(intel_uncore_read(uncore, GEN11_EU_DISABLE) & 227 200 GEN11_EU_DIS_MASK); 228 201 229 - gen11_compute_sseu_info(sseu, s_en, ss_en, eu_en); 202 + gen11_compute_sseu_info(sseu, s_en, ss_en, 0, eu_en); 230 203 231 204 /* ICL has no power gating restrictions. */ 232 205 sseu->has_slice_pg = 1; ··· 267 240 sseu_set_eus(sseu, 0, 1, ~disabled_mask); 268 241 } 269 242 270 - intel_sseu_set_subslices(sseu, 0, subslice_mask); 243 + intel_sseu_set_subslices(sseu, 0, sseu->subslice_mask, subslice_mask); 271 244 272 245 sseu->eu_total = compute_eu_total(sseu); 273 246 ··· 323 296 /* skip disabled slice */ 324 297 continue; 325 298 326 - intel_sseu_set_subslices(sseu, s, subslice_mask); 299 + intel_sseu_set_subslices(sseu, s, sseu->subslice_mask, 300 + subslice_mask); 327 301 328 302 eu_disable = intel_uncore_read(uncore, GEN9_EU_DISABLE(s)); 329 303 for (ss = 0; ss < sseu->max_subslices; ss++) { ··· 436 408 /* skip disabled slice */ 437 409 continue; 438 410 439 - intel_sseu_set_subslices(sseu, s, subslice_mask); 411 + intel_sseu_set_subslices(sseu, s, sseu->subslice_mask, 412 + subslice_mask); 440 413 441 414 for (ss = 0; ss < sseu->max_subslices; ss++) { 442 415 u8 eu_disabled_mask; ··· 535 506 sseu->eu_per_subslice); 536 507 537 508 for (s = 0; s < sseu->max_slices; s++) { 538 - intel_sseu_set_subslices(sseu, s, subslice_mask); 509 + intel_sseu_set_subslices(sseu, s, sseu->subslice_mask, 510 + subslice_mask); 539 511 540 512 for (ss = 0; ss < sseu->max_subslices; ss++) { 541 513 sseu_set_eus(sseu, s, ss,
+3 -1
drivers/gpu/drm/i915/gt/intel_sseu.h
··· 32 32 struct sseu_dev_info { 33 33 u8 slice_mask; 34 34 u8 subslice_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICE_STRIDE]; 35 + u8 geometry_subslice_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICE_STRIDE]; 36 + u8 compute_subslice_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICE_STRIDE]; 35 37 u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES * GEN_MAX_EU_STRIDE]; 36 38 u16 eu_total; 37 39 u8 eu_per_subslice; ··· 106 104 u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice); 107 105 108 106 void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice, 109 - u32 ss_mask); 107 + u8 *subslice_mask, u32 ss_mask); 110 108 111 109 void intel_sseu_info_init(struct intel_gt *gt); 112 110
+2 -1
drivers/gpu/drm/i915/i915_reg.h
··· 3160 3160 3161 3161 #define GEN11_GT_SUBSLICE_DISABLE _MMIO(0x913C) 3162 3162 3163 - #define GEN12_GT_DSS_ENABLE _MMIO(0x913C) 3163 + #define GEN12_GT_GEOMETRY_DSS_ENABLE _MMIO(0x913C) 3164 + #define GEN12_GT_COMPUTE_DSS_ENABLE _MMIO(0x9144) 3164 3165 3165 3166 #define XEHP_EU_ENABLE _MMIO(0x9134) 3166 3167 #define XEHP_EU_ENA_MASK 0xFF