Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/xe: Add plumbing for indirect context workarounds

Some upcoming workarounds need to be emitted from the indirect workaround
context so lets add some plumbing where they will be able to easily slot
in.

No functional changes for now since everything is still deactivated.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
Bspec: 45954
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Cc: Matt Roper <matthew.d.roper@intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://lore.kernel.org/r/20250711160153.49833-7-tvrtko.ursulin@igalia.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>

authored by

Tvrtko Ursulin and committed by
Lucas De Marchi
fba12307 a3397b24

+89 -3
+5
drivers/gpu/drm/xe/regs/xe_lrc_layout.h
··· 12 12 #define CTX_RING_START (0x08 + 1) 13 13 #define CTX_RING_CTL (0x0a + 1) 14 14 #define CTX_BB_PER_CTX_PTR (0x12 + 1) 15 + #define CTX_CS_INDIRECT_CTX (0x14 + 1) 16 + #define CTX_CS_INDIRECT_CTX_OFFSET (0x16 + 1) 15 17 #define CTX_TIMESTAMP (0x22 + 1) 16 18 #define CTX_TIMESTAMP_UDW (0x24 + 1) 17 19 #define CTX_INDIRECT_RING_STATE (0x26 + 1) ··· 37 35 #define INDIRECT_CTX_RING_START (0x06 + 1) 38 36 #define INDIRECT_CTX_RING_START_UDW (0x08 + 1) 39 37 #define INDIRECT_CTX_RING_CTL (0x0a + 1) 38 + 39 + #define CTX_INDIRECT_CTX_OFFSET_MASK REG_GENMASK(15, 6) 40 + #define CTX_INDIRECT_CTX_OFFSET_DEFAULT REG_FIELD_PREP(CTX_INDIRECT_CTX_OFFSET_MASK, 0xd) 40 41 41 42 #endif
+82 -2
drivers/gpu/drm/xe/xe_lrc.c
··· 39 39 #define LRC_ENGINE_INSTANCE GENMASK_ULL(53, 48) 40 40 41 41 #define LRC_PPHWSP_SIZE SZ_4K 42 + #define LRC_INDIRECT_CTX_BO_SIZE SZ_4K 42 43 #define LRC_INDIRECT_RING_STATE_SIZE SZ_4K 43 44 #define LRC_WA_BB_SIZE SZ_4K 44 45 ··· 47 46 lrc_to_xe(struct xe_lrc *lrc) 48 47 { 49 48 return gt_to_xe(lrc->fence_ctx.gt); 49 + } 50 + 51 + static bool 52 + gt_engine_needs_indirect_ctx(struct xe_gt *gt, enum xe_engine_class class) 53 + { 54 + return false; 50 55 } 51 56 52 57 size_t xe_gt_lrc_size(struct xe_gt *gt, enum xe_engine_class class) ··· 724 717 725 718 static inline u32 __xe_lrc_indirect_ring_offset(struct xe_lrc *lrc) 726 719 { 727 - return xe_bo_size(lrc->bo) - LRC_WA_BB_SIZE - LRC_INDIRECT_RING_STATE_SIZE; 720 + u32 offset = xe_bo_size(lrc->bo) - LRC_WA_BB_SIZE - 721 + LRC_INDIRECT_RING_STATE_SIZE; 722 + 723 + if (lrc->flags & XE_LRC_FLAG_INDIRECT_CTX) 724 + offset -= LRC_INDIRECT_CTX_BO_SIZE; 725 + 726 + return offset; 727 + } 728 + 729 + static inline u32 __xe_lrc_indirect_ctx_offset(struct xe_lrc *lrc) 730 + { 731 + return xe_bo_size(lrc->bo) - LRC_WA_BB_SIZE - LRC_INDIRECT_CTX_BO_SIZE; 728 732 } 729 733 730 734 static inline u32 __xe_lrc_wa_bb_offset(struct xe_lrc *lrc) ··· 1095 1077 return 0; 1096 1078 } 1097 1079 1080 + static int 1081 + setup_indirect_ctx(struct xe_lrc *lrc, struct xe_hw_engine *hwe) 1082 + { 1083 + static struct bo_setup rcs_funcs[] = { 1084 + }; 1085 + struct bo_setup_state state = { 1086 + .lrc = lrc, 1087 + .hwe = hwe, 1088 + .max_size = (63 * 64) /* max 63 cachelines */, 1089 + .offset = __xe_lrc_indirect_ctx_offset(lrc), 1090 + }; 1091 + int ret; 1092 + 1093 + if (!(lrc->flags & XE_LRC_FLAG_INDIRECT_CTX)) 1094 + return 0; 1095 + 1096 + if (hwe->class == XE_ENGINE_CLASS_RENDER || 1097 + hwe->class == XE_ENGINE_CLASS_COMPUTE) { 1098 + state.funcs = rcs_funcs; 1099 + state.num_funcs = ARRAY_SIZE(rcs_funcs); 1100 + } 1101 + 1102 + if (xe_gt_WARN_ON(lrc->gt, !state.funcs)) 1103 + return 0; 1104 + 1105 + ret = setup_bo(&state); 1106 + if (ret) 1107 + return ret; 1108 + 1109 + /* 1110 + * Align to 64B cacheline so there's no garbage at the end for CS to 1111 + * execute: size for indirect ctx must be a multiple of 64. 1112 + */ 1113 + while (state.written & 0xf) { 1114 + *state.ptr++ = MI_NOOP; 1115 + state.written++; 1116 + } 1117 + 1118 + finish_bo(&state); 1119 + 1120 + xe_lrc_write_ctx_reg(lrc, 1121 + CTX_CS_INDIRECT_CTX, 1122 + (xe_bo_ggtt_addr(lrc->bo) + state.offset) | 1123 + /* Size in CLs. */ 1124 + (state.written * sizeof(u32) / 64)); 1125 + xe_lrc_write_ctx_reg(lrc, 1126 + CTX_CS_INDIRECT_CTX_OFFSET, 1127 + CTX_INDIRECT_CTX_OFFSET_DEFAULT); 1128 + 1129 + return 0; 1130 + } 1131 + 1098 1132 #define PVC_CTX_ASID (0x2e + 1) 1099 1133 #define PVC_CTX_ACC_CTR_THOLD (0x2a + 1) 1100 1134 ··· 1156 1086 { 1157 1087 struct xe_gt *gt = hwe->gt; 1158 1088 const u32 lrc_size = xe_gt_lrc_size(gt, hwe->class); 1159 - const u32 bo_size = ring_size + lrc_size + LRC_WA_BB_SIZE; 1089 + u32 bo_size = ring_size + lrc_size + LRC_WA_BB_SIZE; 1160 1090 struct xe_tile *tile = gt_to_tile(gt); 1161 1091 struct xe_device *xe = gt_to_xe(gt); 1162 1092 struct iosys_map map; ··· 1171 1101 lrc->flags = 0; 1172 1102 lrc->ring.size = ring_size; 1173 1103 lrc->ring.tail = 0; 1104 + 1105 + if (gt_engine_needs_indirect_ctx(gt, hwe->class)) { 1106 + lrc->flags |= XE_LRC_FLAG_INDIRECT_CTX; 1107 + bo_size += LRC_INDIRECT_CTX_BO_SIZE; 1108 + } 1109 + 1174 1110 if (xe_gt_has_indirect_ring_state(gt)) 1175 1111 lrc->flags |= XE_LRC_FLAG_INDIRECT_RING_STATE; 1176 1112 ··· 1298 1222 xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1); 1299 1223 1300 1224 err = setup_wa_bb(lrc, hwe); 1225 + if (err) 1226 + goto err_lrc_finish; 1227 + 1228 + err = setup_indirect_ctx(lrc, hwe); 1301 1229 if (err) 1302 1230 goto err_lrc_finish; 1303 1231
+2 -1
drivers/gpu/drm/xe/xe_lrc_types.h
··· 29 29 struct xe_gt *gt; 30 30 31 31 /** @flags: LRC flags */ 32 - #define XE_LRC_FLAG_INDIRECT_RING_STATE 0x1 32 + #define XE_LRC_FLAG_INDIRECT_CTX 0x1 33 + #define XE_LRC_FLAG_INDIRECT_RING_STATE 0x2 33 34 u32 flags; 34 35 35 36 /** @refcount: ref count of this lrc */