Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Revert "drm/xe: Force write completion of MI_STORE_DATA_IMM"

This reverts commit 1460bb1fef9ccf7390af0d74a15252442fd6effd.

In all places the MI_STORE_DATA_IMM are not followed by a read of
the same memory address in the same batch buffer and the posted writes
are flushed with PIPE_CONTROL or MI_FLUSH_DW in xe_ring_ops.c functions
so there is no need to set this register.

Reviewed-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: Ashutosh Dixit <ashutosh.dixit@intel.com>
Fixes: 1460bb1fef9c ("drm/xe: Force write completion of MI_STORE_DATA_IMM")
Signed-off-by: José Roberto de Souza <jose.souza@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20241227183230.101334-1-jose.souza@intel.com

+11 -19
+6 -7
drivers/gpu/drm/xe/instructions/xe_mi_commands.h
··· 33 33 #define MI_TOPOLOGY_FILTER __MI_INSTR(0xD) 34 34 #define MI_FORCE_WAKEUP __MI_INSTR(0x1D) 35 35 36 - #define MI_STORE_DATA_IMM __MI_INSTR(0x20) 37 - #define MI_SDI_GGTT REG_BIT(22) 38 - #define MI_FORCE_WRITE_COMPLETION_CHECK REG_BIT(10) 39 - #define MI_SDI_LEN_DW GENMASK(9, 0) 40 - #define MI_SDI_NUM_DW(x) REG_FIELD_PREP(MI_SDI_LEN_DW, (x) + 3 - 2) 41 - #define MI_SDI_NUM_QW(x) (REG_FIELD_PREP(MI_SDI_LEN_DW, 2 * (x) + 3 - 2) | \ 42 - REG_BIT(21)) 36 + #define MI_STORE_DATA_IMM __MI_INSTR(0x20) 37 + #define MI_SDI_GGTT REG_BIT(22) 38 + #define MI_SDI_LEN_DW GENMASK(9, 0) 39 + #define MI_SDI_NUM_DW(x) REG_FIELD_PREP(MI_SDI_LEN_DW, (x) + 3 - 2) 40 + #define MI_SDI_NUM_QW(x) (REG_FIELD_PREP(MI_SDI_LEN_DW, 2 * (x) + 3 - 2) | \ 41 + REG_BIT(21)) 43 42 44 43 #define MI_LOAD_REGISTER_IMM __MI_INSTR(0x22) 45 44 #define MI_LRI_LRM_CS_MMIO REG_BIT(19)
+3 -8
drivers/gpu/drm/xe/xe_migrate.c
··· 581 581 while (ptes) { 582 582 u32 chunk = min(MAX_PTE_PER_SDI, ptes); 583 583 584 - bb->cs[bb->len++] = MI_STORE_DATA_IMM | 585 - MI_FORCE_WRITE_COMPLETION_CHECK | 586 - MI_SDI_NUM_QW(chunk); 584 + bb->cs[bb->len++] = MI_STORE_DATA_IMM | MI_SDI_NUM_QW(chunk); 587 585 bb->cs[bb->len++] = ofs; 588 586 bb->cs[bb->len++] = 0; 589 587 ··· 1223 1225 if (!(bb->len & 1)) 1224 1226 bb->cs[bb->len++] = MI_NOOP; 1225 1227 1226 - bb->cs[bb->len++] = MI_STORE_DATA_IMM | 1227 - MI_FORCE_WRITE_COMPLETION_CHECK | 1228 - MI_SDI_NUM_QW(chunk); 1228 + bb->cs[bb->len++] = MI_STORE_DATA_IMM | MI_SDI_NUM_QW(chunk); 1229 1229 bb->cs[bb->len++] = lower_32_bits(addr); 1230 1230 bb->cs[bb->len++] = upper_32_bits(addr); 1231 1231 if (pt_op->bind) ··· 1388 1392 u32 idx = 0; 1389 1393 1390 1394 bb->cs[bb->len++] = MI_STORE_DATA_IMM | 1391 - MI_FORCE_WRITE_COMPLETION_CHECK | 1392 - MI_SDI_NUM_QW(chunk); 1395 + MI_SDI_NUM_QW(chunk); 1393 1396 bb->cs[bb->len++] = ofs; 1394 1397 bb->cs[bb->len++] = 0; /* upper_32_bits */ 1395 1398
+2 -4
drivers/gpu/drm/xe/xe_ring_ops.c
··· 72 72 73 73 static int emit_store_imm_ggtt(u32 addr, u32 value, u32 *dw, int i) 74 74 { 75 - dw[i++] = MI_STORE_DATA_IMM | MI_SDI_GGTT | 76 - MI_FORCE_WRITE_COMPLETION_CHECK | MI_SDI_NUM_DW(1); 75 + dw[i++] = MI_STORE_DATA_IMM | MI_SDI_GGTT | MI_SDI_NUM_DW(1); 77 76 dw[i++] = addr; 78 77 dw[i++] = 0; 79 78 dw[i++] = value; ··· 162 163 static int emit_store_imm_ppgtt_posted(u64 addr, u64 value, 163 164 u32 *dw, int i) 164 165 { 165 - dw[i++] = MI_STORE_DATA_IMM | MI_FORCE_WRITE_COMPLETION_CHECK | 166 - MI_SDI_NUM_QW(1); 166 + dw[i++] = MI_STORE_DATA_IMM | MI_SDI_NUM_QW(1); 167 167 dw[i++] = lower_32_bits(addr); 168 168 dw[i++] = upper_32_bits(addr); 169 169 dw[i++] = lower_32_bits(value);