Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/xe/bmg: implement Wa_16023588340

This involves enabling l2 caching of host side memory access to VRAM
through the CPU BAR. The main fallout here is with display since VRAM
writes from CPU can now be cached in GPU l2, and display is never
coherent with caches, so needs various manual flushing. In the case of
fbc we disable it due to complications in getting this to work
correctly (in a later patch).

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Jonathan Cavitt <jonathan.cavitt@intel.com>
Cc: Matt Roper <matthew.d.roper@intel.com>
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Cc: Vinod Govindapillai <vinod.govindapillai@intel.com>
Reviewed-by: Jonathan Cavitt <jonathan.cavitt@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240703124338.208220-3-matthew.auld@intel.com
(cherry picked from commit 01570b446939c3538b1aa3d059837f49fa14a3ae)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>

authored by

Matthew Auld and committed by
Rodrigo Vivi
27cb2b7f ad614a70

+117 -1
+2
drivers/gpu/drm/xe/Makefile
··· 25 25 26 26 uses_generated_oob := \ 27 27 $(obj)/xe_ggtt.o \ 28 + $(obj)/xe_device.o \ 28 29 $(obj)/xe_gsc.o \ 29 30 $(obj)/xe_gt.o \ 30 31 $(obj)/xe_guc.o \ 31 32 $(obj)/xe_guc_ads.o \ 32 33 $(obj)/xe_guc_pc.o \ 33 34 $(obj)/xe_migrate.o \ 35 + $(obj)/xe_pat.o \ 34 36 $(obj)/xe_ring_ops.o \ 35 37 $(obj)/xe_vm.o \ 36 38 $(obj)/xe_wa.o \
+8
drivers/gpu/drm/xe/display/xe_dsb_buffer.c
··· 7 7 #include "intel_display_types.h" 8 8 #include "intel_dsb_buffer.h" 9 9 #include "xe_bo.h" 10 + #include "xe_device.h" 11 + #include "xe_device_types.h" 10 12 #include "xe_gt.h" 11 13 12 14 u32 intel_dsb_buffer_ggtt_offset(struct intel_dsb_buffer *dsb_buf) ··· 18 16 19 17 void intel_dsb_buffer_write(struct intel_dsb_buffer *dsb_buf, u32 idx, u32 val) 20 18 { 19 + struct xe_device *xe = dsb_buf->vma->bo->tile->xe; 20 + 21 21 iosys_map_wr(&dsb_buf->vma->bo->vmap, idx * 4, u32, val); 22 + xe_device_l2_flush(xe); 22 23 } 23 24 24 25 u32 intel_dsb_buffer_read(struct intel_dsb_buffer *dsb_buf, u32 idx) ··· 31 26 32 27 void intel_dsb_buffer_memset(struct intel_dsb_buffer *dsb_buf, u32 idx, u32 val, size_t size) 33 28 { 29 + struct xe_device *xe = dsb_buf->vma->bo->tile->xe; 30 + 34 31 WARN_ON(idx > (dsb_buf->buf_size - size) / sizeof(*dsb_buf->cmd_buf)); 35 32 36 33 iosys_map_memset(&dsb_buf->vma->bo->vmap, idx * 4, val, size); 34 + xe_device_l2_flush(xe); 37 35 } 38 36 39 37 bool intel_dsb_buffer_create(struct intel_crtc *crtc, struct intel_dsb_buffer *dsb_buf, size_t size)
+3
drivers/gpu/drm/xe/display/xe_fb_pin.c
··· 10 10 #include "intel_fb.h" 11 11 #include "intel_fb_pin.h" 12 12 #include "xe_bo.h" 13 + #include "xe_device.h" 13 14 #include "xe_ggtt.h" 14 15 #include "xe_gt.h" 15 16 #include "xe_pm.h" ··· 305 304 if (ret) 306 305 goto err_unpin; 307 306 307 + /* Ensure DPT writes are flushed */ 308 + xe_device_l2_flush(xe); 308 309 return vma; 309 310 310 311 err_unpin:
+8
drivers/gpu/drm/xe/regs/xe_gt_regs.h
··· 80 80 #define LE_CACHEABILITY_MASK REG_GENMASK(1, 0) 81 81 #define LE_CACHEABILITY(value) REG_FIELD_PREP(LE_CACHEABILITY_MASK, value) 82 82 83 + #define XE2_GAMREQSTRM_CTRL XE_REG(0x4194) 84 + #define CG_DIS_CNTLBUS REG_BIT(6) 85 + 83 86 #define CCS_AUX_INV XE_REG(0x4208) 84 87 85 88 #define VD0_AUX_INV XE_REG(0x4218) ··· 374 371 #define BLEND_FILL_CACHING_OPT_DIS REG_BIT(3) 375 372 376 373 #define XEHPC_L3CLOS_MASK(i) XE_REG_MCR(0xb194 + (i) * 8) 374 + 375 + #define XE2_GLOBAL_INVAL XE_REG(0xb404) 376 + 377 + #define SCRATCH1LPFC XE_REG(0xb474) 378 + #define EN_L3_RW_CCS_CACHE_FLUSH REG_BIT(0) 377 379 378 380 #define XE2LPM_L3SQCREG5 XE_REG_MCR(0xb658) 379 381
+30
drivers/gpu/drm/xe/xe_device.c
··· 54 54 #include "xe_vm.h" 55 55 #include "xe_vram.h" 56 56 #include "xe_wait_user_fence.h" 57 + #include "xe_wa.h" 58 + 59 + #include <generated/xe_wa_oob.h> 57 60 58 61 static int xe_file_open(struct drm_device *dev, struct drm_file *file) 59 62 { ··· 823 820 if (!IS_DGFX(xe) || GRAPHICS_VER(xe) < 20) 824 821 return; 825 822 823 + if (XE_WA(xe_root_mmio_gt(xe), 16023588340)) { 824 + xe_device_l2_flush(xe); 825 + return; 826 + } 827 + 826 828 for_each_gt(gt, xe, id) { 827 829 if (xe_gt_is_media_type(gt)) 828 830 continue; ··· 849 841 850 842 xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); 851 843 } 844 + } 845 + 846 + void xe_device_l2_flush(struct xe_device *xe) 847 + { 848 + struct xe_gt *gt; 849 + int err; 850 + 851 + gt = xe_root_mmio_gt(xe); 852 + 853 + if (!XE_WA(gt, 16023588340)) 854 + return; 855 + 856 + err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); 857 + if (err) 858 + return; 859 + 860 + xe_mmio_write32(gt, XE2_GLOBAL_INVAL, 0x1); 861 + 862 + if (xe_mmio_wait32(gt, XE2_GLOBAL_INVAL, 0x1, 0x0, 150, NULL, true)) 863 + xe_gt_err_once(gt, "Global invalidation timeout\n"); 864 + 865 + xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); 852 866 } 853 867 854 868 u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size)
+1
drivers/gpu/drm/xe/xe_device.h
··· 162 162 u64 xe_device_uncanonicalize_addr(struct xe_device *xe, u64 address); 163 163 164 164 void xe_device_td_flush(struct xe_device *xe); 165 + void xe_device_l2_flush(struct xe_device *xe); 165 166 166 167 static inline bool xe_device_wedged(struct xe_device *xe) 167 168 {
+54
drivers/gpu/drm/xe/xe_gt.c
··· 11 11 #include <drm/xe_drm.h> 12 12 #include <generated/xe_wa_oob.h> 13 13 14 + #include <generated/xe_wa_oob.h> 15 + 14 16 #include "instructions/xe_gfxpipe_commands.h" 15 17 #include "instructions/xe_mi_commands.h" 16 18 #include "regs/xe_gt_regs.h" ··· 97 95 gt->uc.guc.submission_state.enabled = false; 98 96 } 99 97 98 + static void xe_gt_enable_host_l2_vram(struct xe_gt *gt) 99 + { 100 + u32 reg; 101 + int err; 102 + 103 + if (!XE_WA(gt, 16023588340)) 104 + return; 105 + 106 + err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); 107 + if (WARN_ON(err)) 108 + return; 109 + 110 + if (!xe_gt_is_media_type(gt)) { 111 + xe_mmio_write32(gt, SCRATCH1LPFC, EN_L3_RW_CCS_CACHE_FLUSH); 112 + reg = xe_mmio_read32(gt, XE2_GAMREQSTRM_CTRL); 113 + reg |= CG_DIS_CNTLBUS; 114 + xe_mmio_write32(gt, XE2_GAMREQSTRM_CTRL, reg); 115 + } 116 + 117 + xe_gt_mcr_multicast_write(gt, XEHPC_L3CLOS_MASK(3), 0x3); 118 + xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); 119 + } 120 + 121 + static void xe_gt_disable_host_l2_vram(struct xe_gt *gt) 122 + { 123 + u32 reg; 124 + int err; 125 + 126 + if (!XE_WA(gt, 16023588340)) 127 + return; 128 + 129 + if (xe_gt_is_media_type(gt)) 130 + return; 131 + 132 + err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); 133 + if (WARN_ON(err)) 134 + return; 135 + 136 + reg = xe_mmio_read32(gt, XE2_GAMREQSTRM_CTRL); 137 + reg &= ~CG_DIS_CNTLBUS; 138 + xe_mmio_write32(gt, XE2_GAMREQSTRM_CTRL, reg); 139 + 140 + xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); 141 + } 142 + 100 143 /** 101 144 * xe_gt_remove() - Clean up the GT structures before driver removal 102 145 * @gt: the GT object ··· 158 111 159 112 for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i) 160 113 xe_hw_fence_irq_finish(&gt->fence_irq[i]); 114 + 115 + xe_gt_disable_host_l2_vram(gt); 161 116 } 162 117 163 118 static void gt_reset_worker(struct work_struct *w); ··· 557 508 558 509 xe_gt_mcr_init_early(gt); 559 510 xe_pat_init(gt); 511 + xe_gt_enable_host_l2_vram(gt); 560 512 561 513 err = xe_uc_init(&gt->uc); 562 514 if (err) ··· 692 642 return vf_gt_restart(gt); 693 643 694 644 xe_pat_init(gt); 645 + 646 + xe_gt_enable_host_l2_vram(gt); 695 647 696 648 xe_gt_mcr_set_implicit_defaults(gt); 697 649 xe_reg_sr_apply_mmio(&gt->reg_sr, gt); ··· 847 795 goto err_force_wake; 848 796 849 797 xe_gt_idle_disable_pg(gt); 798 + 799 + xe_gt_disable_host_l2_vram(gt); 850 800 851 801 XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); 852 802 xe_gt_dbg(gt, "suspended\n");
+10 -1
drivers/gpu/drm/xe/xe_pat.c
··· 7 7 8 8 #include <drm/xe_drm.h> 9 9 10 + #include <generated/xe_wa_oob.h> 11 + 10 12 #include "regs/xe_reg_defs.h" 11 13 #include "xe_assert.h" 12 14 #include "xe_device.h" ··· 17 15 #include "xe_gt_mcr.h" 18 16 #include "xe_mmio.h" 19 17 #include "xe_sriov.h" 18 + #include "xe_wa.h" 20 19 21 20 #define _PAT_ATS 0x47fc 22 21 #define _PAT_INDEX(index) _PICK_EVEN_2RANGES(index, 8, \ ··· 385 382 if (GRAPHICS_VER(xe) == 20) { 386 383 xe->pat.ops = &xe2_pat_ops; 387 384 xe->pat.table = xe2_pat_table; 388 - xe->pat.n_entries = ARRAY_SIZE(xe2_pat_table); 385 + 386 + /* Wa_16023588340. XXX: Should use XE_WA */ 387 + if (GRAPHICS_VERx100(xe) == 2001) 388 + xe->pat.n_entries = 28; /* Disable CLOS3 */ 389 + else 390 + xe->pat.n_entries = ARRAY_SIZE(xe2_pat_table); 391 + 389 392 xe->pat.idx[XE_CACHE_NONE] = 3; 390 393 xe->pat.idx[XE_CACHE_WT] = 15; 391 394 xe->pat.idx[XE_CACHE_WB] = 2;
+1
drivers/gpu/drm/xe/xe_wa_oob.rules
··· 29 29 13011645652 GRAPHICS_VERSION(2004) 30 30 22019338487 MEDIA_VERSION(2000) 31 31 GRAPHICS_VERSION(2001) 32 + 16023588340 GRAPHICS_VERSION(2001)