Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/xe: Initial MSI-X support for HW engines

- Configure the HW engines to work with MSI-X
- Program the LRC to use memirq infra (similar to VF)
- CS_INT_VEC field added to the LRC

Bspec: 60342, 72547

Signed-off-by: Ilia Levi <ilia.levi@intel.com>
Reviewed-by: Piotr Piórkowski <piotr.piorkowski@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20241213072538.6823-3-ilia.levi@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>

authored by

Ilia Levi and committed by
Rodrigo Vivi
21d07f5f da889070

+44 -11
+3
drivers/gpu/drm/xe/regs/xe_engine_regs.h
··· 83 83 #define RING_IMR(base) XE_REG((base) + 0xa8) 84 84 #define RING_INT_STATUS_RPT_PTR(base) XE_REG((base) + 0xac) 85 85 86 + #define CS_INT_VEC(base) XE_REG((base) + 0x1b8) 87 + 86 88 #define RING_EIR(base) XE_REG((base) + 0xb0) 87 89 #define RING_EMR(base) XE_REG((base) + 0xb4) 88 90 #define RING_ESR(base) XE_REG((base) + 0xb8) ··· 140 138 141 139 #define RING_MODE(base) XE_REG((base) + 0x29c) 142 140 #define GFX_DISABLE_LEGACY_MODE REG_BIT(3) 141 + #define GFX_MSIX_INTERRUPT_ENABLE REG_BIT(13) 143 142 144 143 #define RING_TIMESTAMP(base) XE_REG((base) + 0x358) 145 144
+3
drivers/gpu/drm/xe/regs/xe_lrc_layout.h
··· 25 25 #define CTX_INT_SRC_REPORT_REG (CTX_LRI_INT_REPORT_PTR + 3) 26 26 #define CTX_INT_SRC_REPORT_PTR (CTX_LRI_INT_REPORT_PTR + 4) 27 27 28 + #define CTX_CS_INT_VEC_REG 0x5a 29 + #define CTX_CS_INT_VEC_DATA (CTX_CS_INT_VEC_REG + 1) 30 + 28 31 #define INDIRECT_CTX_RING_HEAD (0x02 + 1) 29 32 #define INDIRECT_CTX_RING_TAIL (0x04 + 1) 30 33 #define INDIRECT_CTX_RING_START (0x06 + 1)
+3 -1
drivers/gpu/drm/xe/xe_exec_queue.c
··· 16 16 #include "xe_hw_engine_class_sysfs.h" 17 17 #include "xe_hw_engine_group.h" 18 18 #include "xe_hw_fence.h" 19 + #include "xe_irq.h" 19 20 #include "xe_lrc.h" 20 21 #include "xe_macros.h" 21 22 #include "xe_migrate.h" ··· 69 68 q->gt = gt; 70 69 q->class = hwe->class; 71 70 q->width = width; 71 + q->msix_vec = XE_IRQ_DEFAULT_MSIX; 72 72 q->logical_mask = logical_mask; 73 73 q->fence_irq = &gt->fence_irq[hwe->class]; 74 74 q->ring_ops = gt->ring_ops[hwe->class]; ··· 119 117 } 120 118 121 119 for (i = 0; i < q->width; ++i) { 122 - q->lrc[i] = xe_lrc_create(q->hwe, q->vm, SZ_16K); 120 + q->lrc[i] = xe_lrc_create(q->hwe, q->vm, SZ_16K, q->msix_vec); 123 121 if (IS_ERR(q->lrc[i])) { 124 122 err = PTR_ERR(q->lrc[i]); 125 123 goto err_unlock;
+2
drivers/gpu/drm/xe/xe_exec_queue_types.h
··· 63 63 char name[MAX_FENCE_NAME_LEN]; 64 64 /** @width: width (number BB submitted per exec) of this exec queue */ 65 65 u16 width; 66 + /** @msix_vec: MSI-X vector (for platforms that support it) */ 67 + u16 msix_vec; 66 68 /** @fence_irq: fence IRQ used to signal job completion */ 67 69 struct xe_hw_fence_irq *fence_irq; 68 70
+7 -3
drivers/gpu/drm/xe/xe_execlist.c
··· 17 17 #include "xe_exec_queue.h" 18 18 #include "xe_gt.h" 19 19 #include "xe_hw_fence.h" 20 + #include "xe_irq.h" 20 21 #include "xe_lrc.h" 21 22 #include "xe_macros.h" 22 23 #include "xe_mmio.h" ··· 48 47 struct xe_mmio *mmio = &gt->mmio; 49 48 struct xe_device *xe = gt_to_xe(gt); 50 49 u64 lrc_desc; 50 + u32 ring_mode = _MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE); 51 51 52 52 lrc_desc = xe_lrc_descriptor(lrc); 53 53 ··· 82 80 xe_mmio_write32(mmio, RING_HWS_PGA(hwe->mmio_base), 83 81 xe_bo_ggtt_addr(hwe->hwsp)); 84 82 xe_mmio_read32(mmio, RING_HWS_PGA(hwe->mmio_base)); 85 - xe_mmio_write32(mmio, RING_MODE(hwe->mmio_base), 86 - _MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE)); 83 + 84 + if (xe_device_has_msix(gt_to_xe(hwe->gt))) 85 + ring_mode |= _MASKED_BIT_ENABLE(GFX_MSIX_INTERRUPT_ENABLE); 86 + xe_mmio_write32(mmio, RING_MODE(hwe->mmio_base), ring_mode); 87 87 88 88 xe_mmio_write32(mmio, RING_EXECLIST_SQ_CONTENTS_LO(hwe->mmio_base), 89 89 lower_32_bits(lrc_desc)); ··· 269 265 270 266 port->hwe = hwe; 271 267 272 - port->lrc = xe_lrc_create(hwe, NULL, SZ_16K); 268 + port->lrc = xe_lrc_create(hwe, NULL, SZ_16K, XE_IRQ_DEFAULT_MSIX); 273 269 if (IS_ERR(port->lrc)) { 274 270 err = PTR_ERR(port->lrc); 275 271 goto err;
+5 -2
drivers/gpu/drm/xe/xe_hw_engine.c
··· 324 324 { 325 325 u32 ccs_mask = 326 326 xe_hw_engine_mask_per_class(hwe->gt, XE_ENGINE_CLASS_COMPUTE); 327 + u32 ring_mode = _MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE); 327 328 328 329 if (hwe->class == XE_ENGINE_CLASS_COMPUTE && ccs_mask) 329 330 xe_mmio_write32(&hwe->gt->mmio, RCU_MODE, ··· 333 332 xe_hw_engine_mmio_write32(hwe, RING_HWSTAM(0), ~0x0); 334 333 xe_hw_engine_mmio_write32(hwe, RING_HWS_PGA(0), 335 334 xe_bo_ggtt_addr(hwe->hwsp)); 336 - xe_hw_engine_mmio_write32(hwe, RING_MODE(0), 337 - _MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE)); 335 + 336 + if (xe_device_has_msix(gt_to_xe(hwe->gt))) 337 + ring_mode |= _MASKED_BIT_ENABLE(GFX_MSIX_INTERRUPT_ENABLE); 338 + xe_hw_engine_mmio_write32(hwe, RING_MODE(0), ring_mode); 338 339 xe_hw_engine_mmio_write32(hwe, RING_MI_MODE(0), 339 340 _MASKED_BIT_DISABLE(STOP_RING)); 340 341 xe_hw_engine_mmio_read32(hwe, RING_MI_MODE(0));
+20 -4
drivers/gpu/drm/xe/xe_lrc.c
··· 584 584 { 585 585 struct xe_memirq *memirq = &gt_to_tile(hwe->gt)->memirq; 586 586 struct xe_device *xe = gt_to_xe(hwe->gt); 587 + u8 num_regs; 587 588 588 589 if (!xe_device_uses_memirq(xe)) 589 590 return; ··· 594 593 regs[CTX_INT_MASK_ENABLE_REG] = RING_IMR(0).addr; 595 594 regs[CTX_INT_MASK_ENABLE_PTR] = xe_memirq_enable_ptr(memirq); 596 595 597 - regs[CTX_LRI_INT_REPORT_PTR] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(2) | 596 + num_regs = xe_device_has_msix(xe) ? 3 : 2; 597 + regs[CTX_LRI_INT_REPORT_PTR] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(num_regs) | 598 598 MI_LRI_LRM_CS_MMIO | MI_LRI_FORCE_POSTED; 599 599 regs[CTX_INT_STATUS_REPORT_REG] = RING_INT_STATUS_RPT_PTR(0).addr; 600 600 regs[CTX_INT_STATUS_REPORT_PTR] = xe_memirq_status_ptr(memirq, hwe); 601 601 regs[CTX_INT_SRC_REPORT_REG] = RING_INT_SRC_RPT_PTR(0).addr; 602 602 regs[CTX_INT_SRC_REPORT_PTR] = xe_memirq_source_ptr(memirq, hwe); 603 + 604 + if (xe_device_has_msix(xe)) { 605 + regs[CTX_CS_INT_VEC_REG] = CS_INT_VEC(0).addr; 606 + /* CTX_CS_INT_VEC_DATA will be set in xe_lrc_init */ 607 + } 603 608 } 604 609 605 610 static int lrc_ring_mi_mode(struct xe_hw_engine *hwe) ··· 883 876 #define PVC_CTX_ACC_CTR_THOLD (0x2a + 1) 884 877 885 878 static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, 886 - struct xe_vm *vm, u32 ring_size) 879 + struct xe_vm *vm, u32 ring_size, u16 msix_vec) 887 880 { 888 881 struct xe_gt *gt = hwe->gt; 889 882 struct xe_tile *tile = gt_to_tile(gt); ··· 952 945 xe_drm_client_add_bo(vm->xef->client, lrc->bo); 953 946 } 954 947 948 + if (xe_device_has_msix(xe)) { 949 + xe_lrc_write_ctx_reg(lrc, CTX_INT_STATUS_REPORT_PTR, 950 + xe_memirq_status_ptr(&tile->memirq, hwe)); 951 + xe_lrc_write_ctx_reg(lrc, CTX_INT_SRC_REPORT_PTR, 952 + xe_memirq_source_ptr(&tile->memirq, hwe)); 953 + xe_lrc_write_ctx_reg(lrc, CTX_CS_INT_VEC_DATA, msix_vec << 16 | msix_vec); 954 + } 955 + 955 956 if (xe_gt_has_indirect_ring_state(gt)) { 956 957 xe_lrc_write_ctx_reg(lrc, CTX_INDIRECT_RING_STATE, 957 958 __xe_lrc_indirect_ring_ggtt_addr(lrc)); ··· 1020 1005 * @hwe: Hardware Engine 1021 1006 * @vm: The VM (address space) 1022 1007 * @ring_size: LRC ring size 1008 + * @msix_vec: MSI-X interrupt vector (for platforms that support it) 1023 1009 * 1024 1010 * Allocate and initialize the Logical Ring Context (LRC). 1025 1011 * ··· 1028 1012 * upon failure. 1029 1013 */ 1030 1014 struct xe_lrc *xe_lrc_create(struct xe_hw_engine *hwe, struct xe_vm *vm, 1031 - u32 ring_size) 1015 + u32 ring_size, u16 msix_vec) 1032 1016 { 1033 1017 struct xe_lrc *lrc; 1034 1018 int err; ··· 1037 1021 if (!lrc) 1038 1022 return ERR_PTR(-ENOMEM); 1039 1023 1040 - err = xe_lrc_init(lrc, hwe, vm, ring_size); 1024 + err = xe_lrc_init(lrc, hwe, vm, ring_size, msix_vec); 1041 1025 if (err) { 1042 1026 kfree(lrc); 1043 1027 return ERR_PTR(err);
+1 -1
drivers/gpu/drm/xe/xe_lrc.h
··· 42 42 #define LRC_PPHWSP_SCRATCH_ADDR (0x34 * 4) 43 43 44 44 struct xe_lrc *xe_lrc_create(struct xe_hw_engine *hwe, struct xe_vm *vm, 45 - u32 ring_size); 45 + u32 ring_size, u16 msix_vec); 46 46 void xe_lrc_destroy(struct kref *ref); 47 47 48 48 /**