Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/xe/ptl: L3bank mask is not available on the media GT

On PTL platforms with media version 30.00, the fuse registers for
reporting L3 bank availability to the GT just read out as ~0 and do not
provide proper values. Xe does not use the L3 bank mask for anything
internally; it only passes the mask through to userspace via the GT
topology query.

Since we don't have any way to get the real L3 bank mask, we don't want
to pass garbage to userspace. Passing a zeroed mask or a copy of the
primary GT's L3 bank mask would also be inaccurate and likely to cause
confusion for userspace. The best approach is to simply not include L3
in the list of masks returned by the topology query in cases where we
aren't able to provide a meaningful value. This won't change the
behavior for any existing platforms (where we can always obtain L3 masks
successfully for all GTs), it will only prevent us from mis-reporting
bad information on upcoming platform(s).

There's a good chance this will become a formal workaround in the
future, but for now we don't have a lineage number so "no_media_l3" is
used in place of a lineage as the OOB workaround descriptor.

v2:
- Re-calculate query size to properly match data returned. (Gustavo)
- Update kerneldoc to clarify that the L3bank mask may not be included
in the query results if the hardware doesn't make it available.
(Gustavo)

Cc: Matt Atwood <matthew.s.atwood@intel.com>
Cc: Gustavo Sousa <gustavo.sousa@intel.com>
Signed-off-by: Shekhar Chauhan <shekhar.chauhan@intel.com>
Co-developed-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Reviewed-by: Jonathan Cavitt <jonathan.cavitt@intel.com>
Reviewed-by: Gustavo Sousa <gustavo.sousa@intel.com>
Acked-by: Francois Dugast <francois.dugast@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20241007154143.2021124-2-matthew.d.roper@intel.com

authored by

Shekhar Chauhan and committed by
Matt Roper
9ab440a9 691b5a6a

+49 -12
+14
drivers/gpu/drm/xe/xe_gt_topology.c
··· 5 5 6 6 #include "xe_gt_topology.h" 7 7 8 + #include <generated/xe_wa_oob.h> 8 9 #include <linux/bitmap.h> 9 10 #include <linux/compiler.h> 10 11 ··· 13 12 #include "xe_assert.h" 14 13 #include "xe_gt.h" 15 14 #include "xe_mmio.h" 15 + #include "xe_wa.h" 16 16 17 17 static void 18 18 load_dss_mask(struct xe_gt *gt, xe_dss_mask_t mask, int numregs, ...) ··· 130 128 { 131 129 struct xe_device *xe = gt_to_xe(gt); 132 130 u32 fuse3 = xe_mmio_read32(&gt->mmio, MIRROR_FUSE3); 131 + 132 + /* 133 + * PTL platforms with media version 30.00 do not provide proper values 134 + * for the media GT's L3 bank registers. Skip the readout since we 135 + * don't have any way to obtain real values. 136 + * 137 + * This may get re-described as an official workaround in the future, 138 + * but there's no tracking number assigned yet so we use a custom 139 + * OOB workaround descriptor. 140 + */ 141 + if (XE_WA(gt, no_media_l3)) 142 + return; 133 143 134 144 if (GRAPHICS_VER(xe) >= 20) { 135 145 xe_l3_bank_mask_t per_node = {};
+31 -11
drivers/gpu/drm/xe/xe_query.c
··· 9 9 #include <linux/sched/clock.h> 10 10 11 11 #include <drm/ttm/ttm_placement.h> 12 + #include <generated/xe_wa_oob.h> 12 13 #include <uapi/drm/xe_drm.h> 13 14 14 15 #include "regs/xe_engine_regs.h" ··· 24 23 #include "xe_macros.h" 25 24 #include "xe_mmio.h" 26 25 #include "xe_ttm_vram_mgr.h" 26 + #include "xe_wa.h" 27 27 28 28 static const u16 xe_to_user_engine_class[] = { 29 29 [XE_ENGINE_CLASS_RENDER] = DRM_XE_ENGINE_CLASS_RENDER, ··· 457 455 458 456 static size_t calc_topo_query_size(struct xe_device *xe) 459 457 { 460 - return xe->info.gt_count * 461 - (4 * sizeof(struct drm_xe_query_topology_mask) + 462 - sizeof_field(struct xe_gt, fuse_topo.g_dss_mask) + 463 - sizeof_field(struct xe_gt, fuse_topo.c_dss_mask) + 464 - sizeof_field(struct xe_gt, fuse_topo.l3_bank_mask) + 465 - sizeof_field(struct xe_gt, fuse_topo.eu_mask_per_dss)); 458 + struct xe_gt *gt; 459 + size_t query_size = 0; 460 + int id; 461 + 462 + for_each_gt(gt, xe, id) { 463 + query_size += 3 * sizeof(struct drm_xe_query_topology_mask) + 464 + sizeof_field(struct xe_gt, fuse_topo.g_dss_mask) + 465 + sizeof_field(struct xe_gt, fuse_topo.c_dss_mask) + 466 + sizeof_field(struct xe_gt, fuse_topo.eu_mask_per_dss); 467 + 468 + /* L3bank mask may not be available for some GTs */ 469 + if (!XE_WA(gt, no_media_l3)) 470 + query_size += sizeof(struct drm_xe_query_topology_mask) + 471 + sizeof_field(struct xe_gt, fuse_topo.l3_bank_mask); 472 + } 473 + 474 + return query_size; 466 475 } 467 476 468 477 static int copy_mask(void __user **ptr, ··· 526 513 if (err) 527 514 return err; 528 515 529 - topo.type = DRM_XE_TOPO_L3_BANK; 530 - err = copy_mask(&query_ptr, &topo, gt->fuse_topo.l3_bank_mask, 531 - sizeof(gt->fuse_topo.l3_bank_mask)); 532 - if (err) 533 - return err; 516 + /* 517 + * If the kernel doesn't have a way to obtain a correct L3bank 518 + * mask, then it's better to omit L3 from the query rather than 519 + * reporting bogus or zeroed information to userspace. 520 + */ 521 + if (!XE_WA(gt, no_media_l3)) { 522 + topo.type = DRM_XE_TOPO_L3_BANK; 523 + err = copy_mask(&query_ptr, &topo, gt->fuse_topo.l3_bank_mask, 524 + sizeof(gt->fuse_topo.l3_bank_mask)); 525 + if (err) 526 + return err; 527 + } 534 528 535 529 topo.type = gt->fuse_topo.eu_type == XE_GT_EU_TYPE_SIMD16 ? 536 530 DRM_XE_TOPO_SIMD16_EU_PER_DSS :
+1
drivers/gpu/drm/xe/xe_wa_oob.rules
··· 37 37 16023588340 GRAPHICS_VERSION(2001) 38 38 14019789679 GRAPHICS_VERSION(1255) 39 39 GRAPHICS_VERSION_RANGE(1270, 2004) 40 + no_media_l3 MEDIA_VERSION(3000)
+3 -1
include/uapi/drm/xe_drm.h
··· 512 512 * containing the following in mask: 513 513 * ``DSS_COMPUTE ff ff ff ff 00 00 00 00`` 514 514 * means 32 DSS are available for compute. 515 - * - %DRM_XE_TOPO_L3_BANK - To query the mask of enabled L3 banks 515 + * - %DRM_XE_TOPO_L3_BANK - To query the mask of enabled L3 banks. This type 516 + * may be omitted if the driver is unable to query the mask from the 517 + * hardware. 516 518 * - %DRM_XE_TOPO_EU_PER_DSS - To query the mask of Execution Units (EU) 517 519 * available per Dual Sub Slices (DSS). For example a query response 518 520 * containing the following in mask: