Merge tag 'drm-xe-next-2024-12-11' of https://gitlab.freedesktop.org/drm/xe/kernel into drm-next

+1

Documentation/gpu/xe/index.rst

··· 23 23 xe_firmware 24 24 xe_tile 25 25 xe_debugging 26 + xe_devcoredump 26 27 xe-drm-usage-stats.rst

+14

Documentation/gpu/xe/xe_devcoredump.rst

··· 1 + .. SPDX-License-Identifier: (GPL-2.0+ OR MIT) 2 + 3 + ================== 4 + Xe Device Coredump 5 + ================== 6 + 7 + .. kernel-doc:: drivers/gpu/drm/xe/xe_devcoredump.c 8 + :doc: Xe device coredump 9 + 10 + Internal API 11 + ============ 12 + 13 + .. kernel-doc:: drivers/gpu/drm/xe/xe_devcoredump.c 14 + :internal:

+7

drivers/gpu/drm/i915/display/intel_cx0_phy_regs.h

··· 200 200 #define XELPDP_SSC_ENABLE_PLLA REG_BIT(1) 201 201 #define XELPDP_SSC_ENABLE_PLLB REG_BIT(0) 202 202 203 + #define TCSS_DISP_MAILBOX_IN_CMD _MMIO(0x161300) 204 + #define TCSS_DISP_MAILBOX_IN_CMD_RUN_BUSY REG_BIT(31) 205 + #define TCSS_DISP_MAILBOX_IN_CMD_CMD_MASK REG_GENMASK(7, 0) 206 + #define TCSS_DISP_MAILBOX_IN_CMD_DATA(val) REG_FIELD_PREP(TCSS_DISP_MAILBOX_IN_CMD_CMD_MASK, val) 207 + 208 + #define TCSS_DISP_MAILBOX_IN_DATA _MMIO(0x161304) 209 + 203 210 /* C10 Vendor Registers */ 204 211 #define PHY_C10_VDR_PLL(idx) (0xC00 + (idx)) 205 212 #define C10_PLL0_FRACEN REG_BIT8(4)

+35 -4

drivers/gpu/drm/i915/display/intel_tc.c

··· 1013 1013 return true; 1014 1014 } 1015 1015 1016 + /* 1017 + * Gfx driver WA 14020908590 for PTL tcss_rxdetect_clkswb_req/ack 1018 + * handshake violation when pwwreq= 0->1 during TC7/10 entry 1019 + */ 1020 + static void xelpdp_tc_power_request_wa(struct intel_display *display, bool enable) 1021 + { 1022 + /* check if mailbox is running busy */ 1023 + if (intel_de_wait_for_clear(display, TCSS_DISP_MAILBOX_IN_CMD, 1024 + TCSS_DISP_MAILBOX_IN_CMD_RUN_BUSY, 10)) { 1025 + drm_dbg_kms(display->drm, 1026 + "Timeout waiting for TCSS mailbox run/busy bit to clear\n"); 1027 + return; 1028 + } 1029 + 1030 + intel_de_write(display, TCSS_DISP_MAILBOX_IN_DATA, enable ? 1 : 0); 1031 + intel_de_write(display, TCSS_DISP_MAILBOX_IN_CMD, 1032 + TCSS_DISP_MAILBOX_IN_CMD_RUN_BUSY | 1033 + TCSS_DISP_MAILBOX_IN_CMD_DATA(0x1)); 1034 + 1035 + /* wait to clear mailbox running busy bit before continuing */ 1036 + if (intel_de_wait_for_clear(display, TCSS_DISP_MAILBOX_IN_CMD, 1037 + TCSS_DISP_MAILBOX_IN_CMD_RUN_BUSY, 10)) { 1038 + drm_dbg_kms(display->drm, 1039 + "Timeout after writing data to mailbox. Mailbox run/busy bit did not clear\n"); 1040 + return; 1041 + } 1042 + } 1043 + 1016 1044 static void __xelpdp_tc_phy_enable_tcss_power(struct intel_tc_port *tc, bool enable) 1017 1045 { 1018 - struct drm_i915_private *i915 = tc_to_i915(tc); 1046 + struct intel_display *display = to_intel_display(tc->dig_port); 1019 1047 enum port port = tc->dig_port->base.port; 1020 - i915_reg_t reg = XELPDP_PORT_BUF_CTL1(i915, port); 1048 + i915_reg_t reg = XELPDP_PORT_BUF_CTL1(display, port); 1021 1049 u32 val; 1022 1050 1023 1051 assert_tc_cold_blocked(tc); 1024 1052 1025 - val = intel_de_read(i915, reg); 1053 + if (DISPLAY_VER(display) == 30) 1054 + xelpdp_tc_power_request_wa(display, enable); 1055 + 1056 + val = intel_de_read(display, reg); 1026 1057 if (enable) 1027 1058 val |= XELPDP_TCSS_POWER_REQUEST; 1028 1059 else 1029 1060 val &= ~XELPDP_TCSS_POWER_REQUEST; 1030 - intel_de_write(i915, reg, val); 1061 + intel_de_write(display, reg, val); 1031 1062 } 1032 1063 1033 1064 static bool xelpdp_tc_phy_enable_tcss_power(struct intel_tc_port *tc, bool enable)

+32 -8

drivers/gpu/drm/ttm/ttm_bo_vm.c

··· 405 405 return len; 406 406 } 407 407 408 - int ttm_bo_vm_access(struct vm_area_struct *vma, unsigned long addr, 409 - void *buf, int len, int write) 408 + /** 409 + * ttm_bo_access - Helper to access a buffer object 410 + * 411 + * @bo: ttm buffer object 412 + * @offset: access offset into buffer object 413 + * @buf: pointer to caller memory to read into or write from 414 + * @len: length of access 415 + * @write: write access 416 + * 417 + * Utility function to access a buffer object. Useful when buffer object cannot 418 + * be easily mapped (non-contiguous, non-visible, etc...). Should not directly 419 + * be exported to user space via a peak / poke interface. 420 + * 421 + * Returns: 422 + * @len if successful, negative error code on failure. 423 + */ 424 + int ttm_bo_access(struct ttm_buffer_object *bo, unsigned long offset, 425 + void *buf, int len, int write) 410 426 { 411 - struct ttm_buffer_object *bo = vma->vm_private_data; 412 - unsigned long offset = (addr) - vma->vm_start + 413 - ((vma->vm_pgoff - drm_vma_node_start(&bo->base.vma_node)) 414 - << PAGE_SHIFT); 415 427 int ret; 416 428 417 429 if (len < 1 || (offset + len) > bo->base.size) ··· 441 429 break; 442 430 default: 443 431 if (bo->bdev->funcs->access_memory) 444 - ret = bo->bdev->funcs->access_memory( 445 - bo, offset, buf, len, write); 432 + ret = bo->bdev->funcs->access_memory 433 + (bo, offset, buf, len, write); 446 434 else 447 435 ret = -EIO; 448 436 } ··· 450 438 ttm_bo_unreserve(bo); 451 439 452 440 return ret; 441 + } 442 + EXPORT_SYMBOL(ttm_bo_access); 443 + 444 + int ttm_bo_vm_access(struct vm_area_struct *vma, unsigned long addr, 445 + void *buf, int len, int write) 446 + { 447 + struct ttm_buffer_object *bo = vma->vm_private_data; 448 + unsigned long offset = (addr) - vma->vm_start + 449 + ((vma->vm_pgoff - drm_vma_node_start(&bo->base.vma_node)) 450 + << PAGE_SHIFT); 451 + 452 + return ttm_bo_access(bo, offset, buf, len, write); 453 453 } 454 454 EXPORT_SYMBOL(ttm_bo_vm_access); 455 455

+4 -1

drivers/gpu/drm/xe/Makefile

··· 101 101 xe_trace.o \ 102 102 xe_trace_bo.o \ 103 103 xe_trace_guc.o \ 104 + xe_trace_lrc.o \ 104 105 xe_ttm_sys_mgr.o \ 105 106 xe_ttm_stolen_mgr.o \ 106 107 xe_ttm_vram_mgr.o \ ··· 111 110 xe_vm.o \ 112 111 xe_vram.o \ 113 112 xe_vram_freq.o \ 113 + xe_vsec.o \ 114 114 xe_wait_user_fence.o \ 115 115 xe_wa.o \ 116 116 xe_wopcm.o ··· 126 124 xe_gt_sriov_vf.o \ 127 125 xe_guc_relay.o \ 128 126 xe_memirq.o \ 129 - xe_sriov.o 127 + xe_sriov.o \ 128 + xe_sriov_vf.o 130 129 131 130 xe-$(CONFIG_PCI_IOV) += \ 132 131 xe_gt_sriov_pf.o \

+20

drivers/gpu/drm/xe/abi/guc_actions_abi.h

··· 134 134 XE_GUC_ACTION_DEREGISTER_CONTEXT = 0x4503, 135 135 XE_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER = 0x4505, 136 136 XE_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER = 0x4506, 137 + XE_GUC_ACTION_REGISTER_G2G = 0x4507, 138 + XE_GUC_ACTION_DEREGISTER_G2G = 0x4508, 137 139 XE_GUC_ACTION_DEREGISTER_CONTEXT_DONE = 0x4600, 138 140 XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC = 0x4601, 139 141 XE_GUC_ACTION_CLIENT_SOFT_RESET = 0x5507, ··· 219 217 XE_GUC_TLB_INVAL_MODE_HEAVY = 0x0, 220 218 XE_GUC_TLB_INVAL_MODE_LITE = 0x1, 221 219 }; 220 + 221 + /* 222 + * GuC to GuC communication (de-)registration fields: 223 + */ 224 + enum xe_guc_g2g_type { 225 + XE_G2G_TYPE_IN = 0x0, 226 + XE_G2G_TYPE_OUT, 227 + XE_G2G_TYPE_LIMIT, 228 + }; 229 + 230 + #define XE_G2G_REGISTER_DEVICE REG_GENMASK(16, 16) 231 + #define XE_G2G_REGISTER_TILE REG_GENMASK(15, 12) 232 + #define XE_G2G_REGISTER_TYPE REG_GENMASK(11, 8) 233 + #define XE_G2G_REGISTER_SIZE REG_GENMASK(7, 0) 234 + 235 + #define XE_G2G_DEREGISTER_DEVICE REG_GENMASK(16, 16) 236 + #define XE_G2G_DEREGISTER_TILE REG_GENMASK(15, 12) 237 + #define XE_G2G_DEREGISTER_TYPE REG_GENMASK(11, 8) 222 238 223 239 #endif

+38

drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h

··· 502 502 #define VF2GUC_VF_RESET_RESPONSE_MSG_0_MBZ GUC_HXG_RESPONSE_MSG_0_DATA0 503 503 504 504 /** 505 + * DOC: VF2GUC_NOTIFY_RESFIX_DONE 506 + * 507 + * This action is used by VF to notify the GuC that the VF KMD has completed 508 + * post-migration recovery steps. 509 + * 510 + * This message must be sent as `MMIO HXG Message`_. 511 + * 512 + * +---+-------+--------------------------------------------------------------+ 513 + * | | Bits | Description | 514 + * +===+=======+==============================================================+ 515 + * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_HOST_ | 516 + * | +-------+--------------------------------------------------------------+ 517 + * | | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_ | 518 + * | +-------+--------------------------------------------------------------+ 519 + * | | 27:16 | DATA0 = MBZ | 520 + * | +-------+--------------------------------------------------------------+ 521 + * | | 15:0 | ACTION = _`GUC_ACTION_VF2GUC_NOTIFY_RESFIX_DONE` = 0x5508 | 522 + * +---+-------+--------------------------------------------------------------+ 523 + * 524 + * +---+-------+--------------------------------------------------------------+ 525 + * | | Bits | Description | 526 + * +===+=======+==============================================================+ 527 + * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_GUC_ | 528 + * | +-------+--------------------------------------------------------------+ 529 + * | | 30:28 | TYPE = GUC_HXG_TYPE_RESPONSE_SUCCESS_ | 530 + * | +-------+--------------------------------------------------------------+ 531 + * | | 27:0 | DATA0 = MBZ | 532 + * +---+-------+--------------------------------------------------------------+ 533 + */ 534 + #define GUC_ACTION_VF2GUC_NOTIFY_RESFIX_DONE 0x5508u 535 + 536 + #define VF2GUC_NOTIFY_RESFIX_DONE_REQUEST_MSG_LEN GUC_HXG_REQUEST_MSG_MIN_LEN 537 + #define VF2GUC_NOTIFY_RESFIX_DONE_REQUEST_MSG_0_MBZ GUC_HXG_REQUEST_MSG_0_DATA0 538 + 539 + #define VF2GUC_NOTIFY_RESFIX_DONE_RESPONSE_MSG_LEN GUC_HXG_RESPONSE_MSG_MIN_LEN 540 + #define VF2GUC_NOTIFY_RESFIX_DONE_RESPONSE_MSG_0_MBZ GUC_HXG_RESPONSE_MSG_0_DATA0 541 + 542 + /** 505 543 * DOC: VF2GUC_QUERY_SINGLE_KLV 506 544 * 507 545 * This action is used by VF to query value of the single KLV data.

+14

drivers/gpu/drm/xe/abi/guc_klvs_abi.h

··· 291 291 * 292 292 * :0: (default) 293 293 * :1-65535: number of contexts (Gen12) 294 + * 295 + * _`GUC_KLV_VF_CFG_SCHED_PRIORITY` : 0x8A0C 296 + * This config controls VF’s scheduling priority. 297 + * 298 + * :0: LOW = schedule VF only if it has active work (default) 299 + * :1: NORMAL = schedule VF always, irrespective of whether it has work or not 300 + * :2: HIGH = schedule VF in the next time-slice after current active 301 + * time-slice completes if it has active work 294 302 */ 295 303 296 304 #define GUC_KLV_VF_CFG_GGTT_START_KEY 0x0001 ··· 350 342 351 343 #define GUC_KLV_VF_CFG_BEGIN_CONTEXT_ID_KEY 0x8a0b 352 344 #define GUC_KLV_VF_CFG_BEGIN_CONTEXT_ID_LEN 1u 345 + 346 + #define GUC_KLV_VF_CFG_SCHED_PRIORITY_KEY 0x8a0c 347 + #define GUC_KLV_VF_CFG_SCHED_PRIORITY_LEN 1u 348 + #define GUC_SCHED_PRIORITY_LOW 0u 349 + #define GUC_SCHED_PRIORITY_NORMAL 1u 350 + #define GUC_SCHED_PRIORITY_HIGH 2u 353 351 354 352 /* 355 353 * Workaround keys:

+1 -12

drivers/gpu/drm/xe/display/ext/i915_irq.c

··· 53 53 54 54 bool intel_irqs_enabled(struct xe_device *xe) 55 55 { 56 - /* 57 - * XXX: i915 has a racy handling of the irq.enabled, since it doesn't 58 - * lock its transitions. Because of that, the irq.enabled sometimes 59 - * is not read with the irq.lock in place. 60 - * However, the most critical cases like vblank and page flips are 61 - * properly using the locks. 62 - * We cannot take the lock in here or run any kind of assert because 63 - * of i915 inconsistency. 64 - * But at this point the xe irq is better protected against races, 65 - * although the full solution would be protecting the i915 side. 66 - */ 67 - return xe->irq.enabled; 56 + return atomic_read(&xe->irq.enabled); 68 57 } 69 58 70 59 void intel_synchronize_irq(struct xe_device *xe)

+1 -24

drivers/gpu/drm/xe/display/intel_bo.c

··· 40 40 int intel_bo_read_from_page(struct drm_gem_object *obj, u64 offset, void *dst, int size) 41 41 { 42 42 struct xe_bo *bo = gem_to_xe_bo(obj); 43 - struct ttm_bo_kmap_obj map; 44 - void *src; 45 - bool is_iomem; 46 - int ret; 47 43 48 - ret = xe_bo_lock(bo, true); 49 - if (ret) 50 - return ret; 51 - 52 - ret = ttm_bo_kmap(&bo->ttm, offset >> PAGE_SHIFT, 1, &map); 53 - if (ret) 54 - goto out_unlock; 55 - 56 - offset &= ~PAGE_MASK; 57 - src = ttm_kmap_obj_virtual(&map, &is_iomem); 58 - src += offset; 59 - if (is_iomem) 60 - memcpy_fromio(dst, (void __iomem *)src, size); 61 - else 62 - memcpy(dst, src, size); 63 - 64 - ttm_bo_kunmap(&map); 65 - out_unlock: 66 - xe_bo_unlock(bo); 67 - return ret; 44 + return xe_bo_read(bo, offset, dst, size); 68 45 } 69 46 70 47 struct intel_frontbuffer *intel_bo_get_frontbuffer(struct drm_gem_object *obj)

+7 -5

drivers/gpu/drm/xe/display/xe_fb_pin.c

··· 161 161 } 162 162 163 163 vma->dpt = dpt; 164 - vma->node = dpt->ggtt_node; 164 + vma->node = dpt->ggtt_node[tile0->id]; 165 165 return 0; 166 166 } 167 167 ··· 213 213 if (xe_bo_is_vram(bo) && ggtt->flags & XE_GGTT_FLAGS_64K) 214 214 align = max_t(u32, align, SZ_64K); 215 215 216 - if (bo->ggtt_node && view->type == I915_GTT_VIEW_NORMAL) { 217 - vma->node = bo->ggtt_node; 216 + if (bo->ggtt_node[ggtt->tile->id] && view->type == I915_GTT_VIEW_NORMAL) { 217 + vma->node = bo->ggtt_node[ggtt->tile->id]; 218 218 } else if (view->type == I915_GTT_VIEW_NORMAL) { 219 219 u32 x, size = bo->ttm.base.size; 220 220 ··· 345 345 346 346 static void __xe_unpin_fb_vma(struct i915_vma *vma) 347 347 { 348 + u8 tile_id = vma->node->ggtt->tile->id; 349 + 348 350 if (vma->dpt) 349 351 xe_bo_unpin_map_no_vm(vma->dpt); 350 - else if (!xe_ggtt_node_allocated(vma->bo->ggtt_node) || 351 - vma->bo->ggtt_node->base.start != vma->node->base.start) 352 + else if (!xe_ggtt_node_allocated(vma->bo->ggtt_node[tile_id]) || 353 + vma->bo->ggtt_node[tile_id]->base.start != vma->node->base.start) 352 354 xe_ggtt_node_remove(vma->node, false); 353 355 354 356 ttm_bo_reserve(&vma->bo->ttm, false, false, NULL);

+2

drivers/gpu/drm/xe/regs/xe_gt_regs.h

··· 445 445 446 446 #define SAMPLER_MODE XE_REG_MCR(0xe18c, XE_REG_OPTION_MASKED) 447 447 #define ENABLE_SMALLPL REG_BIT(15) 448 + #define SMP_WAIT_FETCH_MERGING_COUNTER REG_GENMASK(11, 10) 449 + #define SMP_FORCE_128B_OVERFETCH REG_FIELD_PREP(SMP_WAIT_FETCH_MERGING_COUNTER, 1) 448 450 #define SC_DISABLE_POWER_OPTIMIZATION_EBB REG_BIT(9) 449 451 #define SAMPLER_ENABLE_HEADLESS_MSG REG_BIT(5) 450 452 #define INDIRECT_STATE_BASE_ADDR_OVERRIDE REG_BIT(0)

+1 -8

drivers/gpu/drm/xe/regs/xe_oa_regs.h

··· 41 41 42 42 #define OAG_OABUFFER XE_REG(0xdb08) 43 43 #define OABUFFER_SIZE_MASK REG_GENMASK(5, 3) 44 - #define OABUFFER_SIZE_128K REG_FIELD_PREP(OABUFFER_SIZE_MASK, 0) 45 - #define OABUFFER_SIZE_256K REG_FIELD_PREP(OABUFFER_SIZE_MASK, 1) 46 - #define OABUFFER_SIZE_512K REG_FIELD_PREP(OABUFFER_SIZE_MASK, 2) 47 - #define OABUFFER_SIZE_1M REG_FIELD_PREP(OABUFFER_SIZE_MASK, 3) 48 - #define OABUFFER_SIZE_2M REG_FIELD_PREP(OABUFFER_SIZE_MASK, 4) 49 - #define OABUFFER_SIZE_4M REG_FIELD_PREP(OABUFFER_SIZE_MASK, 5) 50 - #define OABUFFER_SIZE_8M REG_FIELD_PREP(OABUFFER_SIZE_MASK, 6) 51 - #define OABUFFER_SIZE_16M REG_FIELD_PREP(OABUFFER_SIZE_MASK, 7) 52 44 #define OAG_OABUFFER_MEMORY_SELECT REG_BIT(0) /* 0: PPGTT, 1: GGTT */ 53 45 54 46 #define OAG_OACONTROL XE_REG(0xdaf4) ··· 55 63 #define OAG_OA_DEBUG XE_REG(0xdaf8, XE_REG_OPTION_MASKED) 56 64 #define OAG_OA_DEBUG_DISABLE_MMIO_TRG REG_BIT(14) 57 65 #define OAG_OA_DEBUG_START_TRIGGER_SCOPE_CONTROL REG_BIT(13) 66 + #define OAG_OA_DEBUG_BUF_SIZE_SELECT REG_BIT(12) 58 67 #define OAG_OA_DEBUG_DISABLE_START_TRG_2_COUNT_QUAL REG_BIT(8) 59 68 #define OAG_OA_DEBUG_DISABLE_START_TRG_1_COUNT_QUAL REG_BIT(7) 60 69 #define OAG_OA_DEBUG_INCLUDE_CLK_RATIO REG_BIT(6)

+19

drivers/gpu/drm/xe/regs/xe_pmt.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2024 Intel Corporation 4 + */ 5 + #ifndef _XE_PMT_H_ 6 + #define _XE_PMT_H_ 7 + 8 + #define SOC_BASE 0x280000 9 + 10 + #define BMG_PMT_BASE_OFFSET 0xDB000 11 + #define BMG_DISCOVERY_OFFSET (SOC_BASE + BMG_PMT_BASE_OFFSET) 12 + 13 + #define BMG_TELEMETRY_BASE_OFFSET 0xE0000 14 + #define BMG_TELEMETRY_OFFSET (SOC_BASE + BMG_TELEMETRY_BASE_OFFSET) 15 + 16 + #define SG_REMAP_INDEX1 XE_REG(SOC_BASE + 0x08) 17 + #define SG_REMAP_BITS REG_GENMASK(31, 24) 18 + 19 + #endif

+7

drivers/gpu/drm/xe/tests/xe_bo.c

··· 49 49 KUNIT_FAIL(test, "Failed to submit bo clear.\n"); 50 50 return PTR_ERR(fence); 51 51 } 52 + 53 + if (dma_fence_wait_timeout(fence, false, 5 * HZ) <= 0) { 54 + dma_fence_put(fence); 55 + KUNIT_FAIL(test, "Timeout while clearing bo.\n"); 56 + return -ETIME; 57 + } 58 + 52 59 dma_fence_put(fence); 53 60 } 54 61

+11 -6

drivers/gpu/drm/xe/tests/xe_migrate.c

··· 83 83 bo->size, 84 84 ttm_bo_type_kernel, 85 85 region | 86 - XE_BO_FLAG_NEEDS_CPU_ACCESS); 86 + XE_BO_FLAG_NEEDS_CPU_ACCESS | 87 + XE_BO_FLAG_PINNED); 87 88 if (IS_ERR(remote)) { 88 89 KUNIT_FAIL(test, "Failed to allocate remote bo for %s: %pe\n", 89 90 str, remote); ··· 225 224 XE_BO_FLAG_VRAM_IF_DGFX(tile) | 226 225 XE_BO_FLAG_PINNED); 227 226 if (IS_ERR(tiny)) { 228 - KUNIT_FAIL(test, "Failed to allocate fake pt: %li\n", 229 - PTR_ERR(pt)); 227 + KUNIT_FAIL(test, "Failed to allocate tiny fake pt: %li\n", 228 + PTR_ERR(tiny)); 230 229 goto free_pt; 231 230 } 232 231 ··· 643 642 644 643 sys_bo = xe_bo_create_user(xe, NULL, NULL, SZ_4M, 645 644 DRM_XE_GEM_CPU_CACHING_WC, 646 - XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS); 645 + XE_BO_FLAG_SYSTEM | 646 + XE_BO_FLAG_NEEDS_CPU_ACCESS | 647 + XE_BO_FLAG_PINNED); 647 648 648 649 if (IS_ERR(sys_bo)) { 649 650 KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n", ··· 669 666 670 667 ccs_bo = xe_bo_create_user(xe, NULL, NULL, SZ_4M, 671 668 DRM_XE_GEM_CPU_CACHING_WC, 672 - bo_flags | XE_BO_FLAG_NEEDS_CPU_ACCESS); 669 + bo_flags | XE_BO_FLAG_NEEDS_CPU_ACCESS | 670 + XE_BO_FLAG_PINNED); 673 671 674 672 if (IS_ERR(ccs_bo)) { 675 673 KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n", ··· 694 690 695 691 vram_bo = xe_bo_create_user(xe, NULL, NULL, SZ_4M, 696 692 DRM_XE_GEM_CPU_CACHING_WC, 697 - bo_flags | XE_BO_FLAG_NEEDS_CPU_ACCESS); 693 + bo_flags | XE_BO_FLAG_NEEDS_CPU_ACCESS | 694 + XE_BO_FLAG_PINNED); 698 695 if (IS_ERR(vram_bo)) { 699 696 KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n", 700 697 PTR_ERR(vram_bo));

+4 -4

drivers/gpu/drm/xe/xe_assert.h

··· 14 14 #include "xe_step.h" 15 15 16 16 /** 17 - * DOC: Xe ASSERTs 17 + * DOC: Xe Asserts 18 18 * 19 19 * While Xe driver aims to be simpler than legacy i915 driver it is still 20 20 * complex enough that some changes introduced while adding new functionality ··· 103 103 * (&CONFIG_DRM_XE_DEBUG must be enabled) and cannot be used in expressions 104 104 * or as a condition. 105 105 * 106 - * See `Xe ASSERTs`_ for general usage guidelines. 106 + * See `Xe Asserts`_ for general usage guidelines. 107 107 */ 108 108 #define xe_assert(xe, condition) xe_assert_msg((xe), condition, "") 109 109 #define xe_assert_msg(xe, condition, msg, arg...) ({ \ ··· 138 138 * (&CONFIG_DRM_XE_DEBUG must be enabled) and cannot be used in expressions 139 139 * or as a condition. 140 140 * 141 - * See `Xe ASSERTs`_ for general usage guidelines. 141 + * See `Xe Asserts`_ for general usage guidelines. 142 142 */ 143 143 #define xe_tile_assert(tile, condition) xe_tile_assert_msg((tile), condition, "") 144 144 #define xe_tile_assert_msg(tile, condition, msg, arg...) ({ \ ··· 162 162 * (&CONFIG_DRM_XE_DEBUG must be enabled) and cannot be used in expressions 163 163 * or as a condition. 164 164 * 165 - * See `Xe ASSERTs`_ for general usage guidelines. 165 + * See `Xe Asserts`_ for general usage guidelines. 166 166 */ 167 167 #define xe_gt_assert(gt, condition) xe_gt_assert_msg((gt), condition, "") 168 168 #define xe_gt_assert_msg(gt, condition, msg, arg...) ({ \

+154 -28

drivers/gpu/drm/xe/xe_bo.c

··· 162 162 } 163 163 } 164 164 165 + static bool force_contiguous(u32 bo_flags) 166 + { 167 + /* 168 + * For eviction / restore on suspend / resume objects pinned in VRAM 169 + * must be contiguous, also only contiguous BOs support xe_bo_vmap. 170 + */ 171 + return bo_flags & (XE_BO_FLAG_PINNED | XE_BO_FLAG_GGTT); 172 + } 173 + 165 174 static void add_vram(struct xe_device *xe, struct xe_bo *bo, 166 175 struct ttm_place *places, u32 bo_flags, u32 mem_type, u32 *c) 167 176 { ··· 184 175 xe_assert(xe, vram && vram->usable_size); 185 176 io_size = vram->io_size; 186 177 187 - /* 188 - * For eviction / restore on suspend / resume objects 189 - * pinned in VRAM must be contiguous 190 - */ 191 - if (bo_flags & (XE_BO_FLAG_PINNED | 192 - XE_BO_FLAG_GGTT)) 178 + if (force_contiguous(bo_flags)) 193 179 place.flags |= TTM_PL_FLAG_CONTIGUOUS; 194 180 195 181 if (io_size < vram->usable_size) { ··· 216 212 217 213 bo->placements[*c] = (struct ttm_place) { 218 214 .mem_type = XE_PL_STOLEN, 219 - .flags = bo_flags & (XE_BO_FLAG_PINNED | 220 - XE_BO_FLAG_GGTT) ? 215 + .flags = force_contiguous(bo_flags) ? 221 216 TTM_PL_FLAG_CONTIGUOUS : 0, 222 217 }; 223 218 *c += 1; ··· 445 442 kfree(tt); 446 443 } 447 444 445 + static bool xe_ttm_resource_visible(struct ttm_resource *mem) 446 + { 447 + struct xe_ttm_vram_mgr_resource *vres = 448 + to_xe_ttm_vram_mgr_resource(mem); 449 + 450 + return vres->used_visible_size == mem->size; 451 + } 452 + 448 453 static int xe_ttm_io_mem_reserve(struct ttm_device *bdev, 449 454 struct ttm_resource *mem) 450 455 { ··· 464 453 return 0; 465 454 case XE_PL_VRAM0: 466 455 case XE_PL_VRAM1: { 467 - struct xe_ttm_vram_mgr_resource *vres = 468 - to_xe_ttm_vram_mgr_resource(mem); 469 456 struct xe_mem_region *vram = res_to_mem_region(mem); 470 457 471 - if (vres->used_visible_size < mem->size) 458 + if (!xe_ttm_resource_visible(mem)) 472 459 return -EINVAL; 473 460 474 461 mem->bus.offset = mem->start << PAGE_SHIFT; ··· 885 876 }; 886 877 struct ttm_operation_ctx ctx = { 887 878 .interruptible = false, 879 + .gfp_retry_mayfail = true, 888 880 }; 889 881 struct ttm_resource *new_mem; 890 882 int ret; ··· 947 937 { 948 938 struct ttm_operation_ctx ctx = { 949 939 .interruptible = false, 940 + .gfp_retry_mayfail = false, 950 941 }; 951 942 struct ttm_resource *new_mem; 952 943 struct ttm_place *place = &bo->placements[0]; ··· 1117 1106 static void xe_ttm_bo_swap_notify(struct ttm_buffer_object *ttm_bo) 1118 1107 { 1119 1108 struct ttm_operation_ctx ctx = { 1120 - .interruptible = false 1109 + .interruptible = false, 1110 + .gfp_retry_mayfail = false, 1121 1111 }; 1122 1112 1123 1113 if (ttm_bo->ttm) { ··· 1130 1118 } 1131 1119 } 1132 1120 1121 + static int xe_ttm_access_memory(struct ttm_buffer_object *ttm_bo, 1122 + unsigned long offset, void *buf, int len, 1123 + int write) 1124 + { 1125 + struct xe_bo *bo = ttm_to_xe_bo(ttm_bo); 1126 + struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev); 1127 + struct iosys_map vmap; 1128 + struct xe_res_cursor cursor; 1129 + struct xe_mem_region *vram; 1130 + int bytes_left = len; 1131 + 1132 + xe_bo_assert_held(bo); 1133 + xe_device_assert_mem_access(xe); 1134 + 1135 + if (!mem_type_is_vram(ttm_bo->resource->mem_type)) 1136 + return -EIO; 1137 + 1138 + /* FIXME: Use GPU for non-visible VRAM */ 1139 + if (!xe_ttm_resource_visible(ttm_bo->resource)) 1140 + return -EIO; 1141 + 1142 + vram = res_to_mem_region(ttm_bo->resource); 1143 + xe_res_first(ttm_bo->resource, offset & PAGE_MASK, 1144 + bo->size - (offset & PAGE_MASK), &cursor); 1145 + 1146 + do { 1147 + unsigned long page_offset = (offset & ~PAGE_MASK); 1148 + int byte_count = min((int)(PAGE_SIZE - page_offset), bytes_left); 1149 + 1150 + iosys_map_set_vaddr_iomem(&vmap, (u8 __iomem *)vram->mapping + 1151 + cursor.start); 1152 + if (write) 1153 + xe_map_memcpy_to(xe, &vmap, page_offset, buf, byte_count); 1154 + else 1155 + xe_map_memcpy_from(xe, buf, &vmap, page_offset, byte_count); 1156 + 1157 + buf += byte_count; 1158 + offset += byte_count; 1159 + bytes_left -= byte_count; 1160 + if (bytes_left) 1161 + xe_res_next(&cursor, PAGE_SIZE); 1162 + } while (bytes_left); 1163 + 1164 + return len; 1165 + } 1166 + 1133 1167 const struct ttm_device_funcs xe_ttm_funcs = { 1134 1168 .ttm_tt_create = xe_ttm_tt_create, 1135 1169 .ttm_tt_populate = xe_ttm_tt_populate, ··· 1185 1127 .move = xe_bo_move, 1186 1128 .io_mem_reserve = xe_ttm_io_mem_reserve, 1187 1129 .io_mem_pfn = xe_ttm_io_mem_pfn, 1130 + .access_memory = xe_ttm_access_memory, 1188 1131 .release_notify = xe_ttm_bo_release_notify, 1189 1132 .eviction_valuable = ttm_bo_eviction_valuable, 1190 1133 .delete_mem_notify = xe_ttm_bo_delete_mem_notify, ··· 1196 1137 { 1197 1138 struct xe_bo *bo = ttm_to_xe_bo(ttm_bo); 1198 1139 struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev); 1140 + struct xe_tile *tile; 1141 + u8 id; 1199 1142 1200 1143 if (bo->ttm.base.import_attach) 1201 1144 drm_prime_gem_destroy(&bo->ttm.base, NULL); ··· 1205 1144 1206 1145 xe_assert(xe, list_empty(&ttm_bo->base.gpuva.list)); 1207 1146 1208 - if (bo->ggtt_node && bo->ggtt_node->base.size) 1209 - xe_ggtt_remove_bo(bo->tile->mem.ggtt, bo); 1147 + for_each_tile(tile, xe, id) 1148 + if (bo->ggtt_node[id] && bo->ggtt_node[id]->base.size) 1149 + xe_ggtt_remove_bo(tile->mem.ggtt, bo); 1210 1150 1211 1151 #ifdef CONFIG_PROC_FS 1212 1152 if (bo->client) ··· 1305 1243 return ret; 1306 1244 } 1307 1245 1246 + static int xe_bo_vm_access(struct vm_area_struct *vma, unsigned long addr, 1247 + void *buf, int len, int write) 1248 + { 1249 + struct ttm_buffer_object *ttm_bo = vma->vm_private_data; 1250 + struct xe_bo *bo = ttm_to_xe_bo(ttm_bo); 1251 + struct xe_device *xe = xe_bo_device(bo); 1252 + int ret; 1253 + 1254 + xe_pm_runtime_get(xe); 1255 + ret = ttm_bo_vm_access(vma, addr, buf, len, write); 1256 + xe_pm_runtime_put(xe); 1257 + 1258 + return ret; 1259 + } 1260 + 1261 + /** 1262 + * xe_bo_read() - Read from an xe_bo 1263 + * @bo: The buffer object to read from. 1264 + * @offset: The byte offset to start reading from. 1265 + * @dst: Location to store the read. 1266 + * @size: Size in bytes for the read. 1267 + * 1268 + * Read @size bytes from the @bo, starting from @offset, storing into @dst. 1269 + * 1270 + * Return: Zero on success, or negative error. 1271 + */ 1272 + int xe_bo_read(struct xe_bo *bo, u64 offset, void *dst, int size) 1273 + { 1274 + int ret; 1275 + 1276 + ret = ttm_bo_access(&bo->ttm, offset, dst, size, 0); 1277 + if (ret >= 0 && ret != size) 1278 + ret = -EIO; 1279 + else if (ret == size) 1280 + ret = 0; 1281 + 1282 + return ret; 1283 + } 1284 + 1308 1285 static const struct vm_operations_struct xe_gem_vm_ops = { 1309 1286 .fault = xe_gem_fault, 1310 1287 .open = ttm_bo_vm_open, 1311 1288 .close = ttm_bo_vm_close, 1312 - .access = ttm_bo_vm_access 1289 + .access = xe_bo_vm_access, 1313 1290 }; 1314 1291 1315 1292 static const struct drm_gem_object_funcs xe_gem_object_funcs = { ··· 1402 1301 struct ttm_operation_ctx ctx = { 1403 1302 .interruptible = true, 1404 1303 .no_wait_gpu = false, 1304 + .gfp_retry_mayfail = true, 1405 1305 }; 1406 1306 struct ttm_placement *placement; 1407 1307 uint32_t alignment; ··· 1416 1314 xe_bo_free(bo); 1417 1315 return ERR_PTR(-EINVAL); 1418 1316 } 1317 + 1318 + /* XE_BO_FLAG_GGTTx requires XE_BO_FLAG_GGTT also be set */ 1319 + if ((flags & XE_BO_FLAG_GGTT_ALL) && !(flags & XE_BO_FLAG_GGTT)) 1320 + return ERR_PTR(-EINVAL); 1419 1321 1420 1322 if (flags & (XE_BO_FLAG_VRAM_MASK | XE_BO_FLAG_STOLEN) && 1421 1323 !(flags & XE_BO_FLAG_IGNORE_MIN_PAGE_SIZE) && ··· 1611 1505 bo->vm = vm; 1612 1506 1613 1507 if (bo->flags & XE_BO_FLAG_GGTT) { 1614 - if (!tile && flags & XE_BO_FLAG_STOLEN) 1615 - tile = xe_device_get_root_tile(xe); 1508 + struct xe_tile *t; 1509 + u8 id; 1616 1510 1617 - xe_assert(xe, tile); 1511 + if (!(bo->flags & XE_BO_FLAG_GGTT_ALL)) { 1512 + if (!tile && flags & XE_BO_FLAG_STOLEN) 1513 + tile = xe_device_get_root_tile(xe); 1618 1514 1619 - if (flags & XE_BO_FLAG_FIXED_PLACEMENT) { 1620 - err = xe_ggtt_insert_bo_at(tile->mem.ggtt, bo, 1621 - start + bo->size, U64_MAX); 1622 - } else { 1623 - err = xe_ggtt_insert_bo(tile->mem.ggtt, bo); 1515 + xe_assert(xe, tile); 1624 1516 } 1625 - if (err) 1626 - goto err_unlock_put_bo; 1517 + 1518 + for_each_tile(t, xe, id) { 1519 + if (t != tile && !(bo->flags & XE_BO_FLAG_GGTTx(t))) 1520 + continue; 1521 + 1522 + if (flags & XE_BO_FLAG_FIXED_PLACEMENT) { 1523 + err = xe_ggtt_insert_bo_at(t->mem.ggtt, bo, 1524 + start + bo->size, U64_MAX); 1525 + } else { 1526 + err = xe_ggtt_insert_bo(t->mem.ggtt, bo); 1527 + } 1528 + if (err) 1529 + goto err_unlock_put_bo; 1530 + } 1627 1531 } 1628 1532 1629 1533 return bo; ··· 2016 1900 struct ttm_operation_ctx ctx = { 2017 1901 .interruptible = true, 2018 1902 .no_wait_gpu = false, 1903 + .gfp_retry_mayfail = true, 2019 1904 }; 2020 1905 2021 1906 if (vm) { ··· 2027 1910 ctx.resv = xe_vm_resv(vm); 2028 1911 } 2029 1912 1913 + trace_xe_bo_validate(bo); 2030 1914 return ttm_bo_validate(&bo->ttm, &bo->placement, &ctx); 2031 1915 } 2032 1916 ··· 2079 1961 2080 1962 int xe_bo_vmap(struct xe_bo *bo) 2081 1963 { 1964 + struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev); 2082 1965 void *virtual; 2083 1966 bool is_iomem; 2084 1967 int ret; 2085 1968 2086 1969 xe_bo_assert_held(bo); 2087 1970 2088 - if (!(bo->flags & XE_BO_FLAG_NEEDS_CPU_ACCESS)) 1971 + if (drm_WARN_ON(&xe->drm, !(bo->flags & XE_BO_FLAG_NEEDS_CPU_ACCESS) || 1972 + !force_contiguous(bo->flags))) 2089 1973 return -EINVAL; 2090 1974 2091 1975 if (!iosys_map_is_null(&bo->vmap)) ··· 2363 2243 struct ttm_operation_ctx ctx = { 2364 2244 .interruptible = true, 2365 2245 .no_wait_gpu = false, 2246 + .gfp_retry_mayfail = true, 2366 2247 }; 2367 2248 struct ttm_placement placement; 2368 2249 struct ttm_place requested; ··· 2414 2293 .interruptible = false, 2415 2294 .no_wait_gpu = false, 2416 2295 .force_alloc = force_alloc, 2296 + .gfp_retry_mayfail = true, 2417 2297 }; 2418 2298 struct ttm_placement placement; 2419 2299 int ret; ··· 2494 2372 2495 2373 void xe_bo_put(struct xe_bo *bo) 2496 2374 { 2375 + struct xe_tile *tile; 2376 + u8 id; 2377 + 2497 2378 might_sleep(); 2498 2379 if (bo) { 2499 2380 #ifdef CONFIG_PROC_FS 2500 2381 if (bo->client) 2501 2382 might_lock(&bo->client->bos_lock); 2502 2383 #endif 2503 - if (bo->ggtt_node && bo->ggtt_node->ggtt) 2504 - might_lock(&bo->ggtt_node->ggtt->lock); 2384 + for_each_tile(tile, xe_bo_device(bo), id) 2385 + if (bo->ggtt_node[id] && bo->ggtt_node[id]->ggtt) 2386 + might_lock(&bo->ggtt_node[id]->ggtt->lock); 2505 2387 drm_gem_object_put(&bo->ttm.base); 2506 2388 } 2507 2389 }

+28 -5

drivers/gpu/drm/xe/xe_bo.h

··· 39 39 #define XE_BO_FLAG_NEEDS_64K BIT(15) 40 40 #define XE_BO_FLAG_NEEDS_2M BIT(16) 41 41 #define XE_BO_FLAG_GGTT_INVALIDATE BIT(17) 42 + #define XE_BO_FLAG_GGTT0 BIT(18) 43 + #define XE_BO_FLAG_GGTT1 BIT(19) 44 + #define XE_BO_FLAG_GGTT2 BIT(20) 45 + #define XE_BO_FLAG_GGTT3 BIT(21) 46 + #define XE_BO_FLAG_GGTT_ALL (XE_BO_FLAG_GGTT0 | \ 47 + XE_BO_FLAG_GGTT1 | \ 48 + XE_BO_FLAG_GGTT2 | \ 49 + XE_BO_FLAG_GGTT3) 50 + 42 51 /* this one is trigger internally only */ 43 52 #define XE_BO_FLAG_INTERNAL_TEST BIT(30) 44 53 #define XE_BO_FLAG_INTERNAL_64K BIT(31) 54 + 55 + #define XE_BO_FLAG_GGTTx(tile) \ 56 + (XE_BO_FLAG_GGTT0 << (tile)->id) 45 57 46 58 #define XE_PTE_SHIFT 12 47 59 #define XE_PAGE_SIZE (1 << XE_PTE_SHIFT) ··· 206 194 } 207 195 208 196 static inline u32 209 - xe_bo_ggtt_addr(struct xe_bo *bo) 197 + __xe_bo_ggtt_addr(struct xe_bo *bo, u8 tile_id) 210 198 { 211 - if (XE_WARN_ON(!bo->ggtt_node)) 199 + struct xe_ggtt_node *ggtt_node = bo->ggtt_node[tile_id]; 200 + 201 + if (XE_WARN_ON(!ggtt_node)) 212 202 return 0; 213 203 214 - XE_WARN_ON(bo->ggtt_node->base.size > bo->size); 215 - XE_WARN_ON(bo->ggtt_node->base.start + bo->ggtt_node->base.size > (1ull << 32)); 216 - return bo->ggtt_node->base.start; 204 + XE_WARN_ON(ggtt_node->base.size > bo->size); 205 + XE_WARN_ON(ggtt_node->base.start + ggtt_node->base.size > (1ull << 32)); 206 + return ggtt_node->base.start; 207 + } 208 + 209 + static inline u32 210 + xe_bo_ggtt_addr(struct xe_bo *bo) 211 + { 212 + xe_assert(xe_bo_device(bo), bo->tile); 213 + 214 + return __xe_bo_ggtt_addr(bo, bo->tile->id); 217 215 } 218 216 219 217 int xe_bo_vmap(struct xe_bo *bo); 220 218 void xe_bo_vunmap(struct xe_bo *bo); 219 + int xe_bo_read(struct xe_bo *bo, u64 offset, void *dst, int size); 221 220 222 221 bool mem_type_is_vram(u32 mem_type); 223 222 bool xe_bo_is_vram(struct xe_bo *bo);

+10 -4

drivers/gpu/drm/xe/xe_bo_evict.c

··· 152 152 } 153 153 154 154 if (bo->flags & XE_BO_FLAG_GGTT) { 155 - struct xe_tile *tile = bo->tile; 155 + struct xe_tile *tile; 156 + u8 id; 156 157 157 - mutex_lock(&tile->mem.ggtt->lock); 158 - xe_ggtt_map_bo(tile->mem.ggtt, bo); 159 - mutex_unlock(&tile->mem.ggtt->lock); 158 + for_each_tile(tile, xe, id) { 159 + if (tile != bo->tile && !(bo->flags & XE_BO_FLAG_GGTTx(tile))) 160 + continue; 161 + 162 + mutex_lock(&tile->mem.ggtt->lock); 163 + xe_ggtt_map_bo(tile->mem.ggtt, bo); 164 + mutex_unlock(&tile->mem.ggtt->lock); 165 + } 160 166 } 161 167 162 168 /*

+3 -2

drivers/gpu/drm/xe/xe_bo_types.h

··· 12 12 #include <drm/ttm/ttm_device.h> 13 13 #include <drm/ttm/ttm_placement.h> 14 14 15 + #include "xe_device_types.h" 15 16 #include "xe_ggtt_types.h" 16 17 17 18 struct xe_device; ··· 39 38 struct ttm_place placements[XE_BO_MAX_PLACEMENTS]; 40 39 /** @placement: current placement for this BO */ 41 40 struct ttm_placement placement; 42 - /** @ggtt_node: GGTT node if this BO is mapped in the GGTT */ 43 - struct xe_ggtt_node *ggtt_node; 41 + /** @ggtt_node: Array of GGTT nodes if this BO is mapped in the GGTTs */ 42 + struct xe_ggtt_node *ggtt_node[XE_MAX_TILES_PER_DEVICE]; 44 43 /** @vmap: iosys map of this buffer */ 45 44 struct iosys_map vmap; 46 45 /** @ttm_kmap: TTM bo kmap object for internal use only. Keep off. */

+87 -34

drivers/gpu/drm/xe/xe_devcoredump.c

··· 30 30 /** 31 31 * DOC: Xe device coredump 32 32 * 33 - * Devices overview: 34 33 * Xe uses dev_coredump infrastructure for exposing the crash errors in a 35 - * standardized way. 36 - * devcoredump exposes a temporary device under /sys/class/devcoredump/ 37 - * which is linked with our card device directly. 38 - * The core dump can be accessed either from 39 - * /sys/class/drm/card<n>/device/devcoredump/ or from 40 - * /sys/class/devcoredump/devcd<m> where 41 - * /sys/class/devcoredump/devcd<m>/failing_device is a link to 42 - * /sys/class/drm/card<n>/device/. 34 + * standardized way. Once a crash occurs, devcoredump exposes a temporary 35 + * node under ``/sys/class/devcoredump/devcd<m>/``. The same node is also 36 + * accessible in ``/sys/class/drm/card<n>/device/devcoredump/``. The 37 + * ``failing_device`` symlink points to the device that crashed and created the 38 + * coredump. 43 39 * 44 - * Snapshot at hang: 45 - * The 'data' file is printed with a drm_printer pointer at devcoredump read 46 - * time. For this reason, we need to take snapshots from when the hang has 47 - * happened, and not only when the user is reading the file. Otherwise the 48 - * information is outdated since the resets might have happened in between. 40 + * The following characteristics are observed by xe when creating a device 41 + * coredump: 49 42 * 50 - * 'First' failure snapshot: 51 - * In general, the first hang is the most critical one since the following hangs 52 - * can be a consequence of the initial hang. For this reason we only take the 53 - * snapshot of the 'first' failure and ignore subsequent calls of this function, 54 - * at least while the coredump device is alive. Dev_coredump has a delayed work 55 - * queue that will eventually delete the device and free all the dump 56 - * information. 43 + * **Snapshot at hang**: 44 + * The 'data' file contains a snapshot of the HW and driver states at the time 45 + * the hang happened. Due to the driver recovering from resets/crashes, it may 46 + * not correspond to the state of the system when the file is read by 47 + * userspace. 48 + * 49 + * **Coredump release**: 50 + * After a coredump is generated, it stays in kernel memory until released by 51 + * userpace by writing anything to it, or after an internal timer expires. The 52 + * exact timeout may vary and should not be relied upon. Example to release 53 + * a coredump: 54 + * 55 + * .. code-block:: shell 56 + * 57 + * $ > /sys/class/drm/card0/device/devcoredump/data 58 + * 59 + * **First failure only**: 60 + * In general, the first hang is the most critical one since the following 61 + * hangs can be a consequence of the initial hang. For this reason a snapshot 62 + * is taken only for the first failure. Until the devcoredump is released by 63 + * userspace or kernel, all subsequent hangs do not override the snapshot nor 64 + * create new ones. Devcoredump has a delayed work queue that will eventually 65 + * delete the file node and free all the dump information. 57 66 */ 58 67 59 68 #ifdef CONFIG_DEV_COREDUMP ··· 100 91 p = drm_coredump_printer(&iter); 101 92 102 93 drm_puts(&p, "**** Xe Device Coredump ****\n"); 94 + drm_printf(&p, "Reason: %s\n", ss->reason); 103 95 drm_puts(&p, "kernel: " UTS_RELEASE "\n"); 104 96 drm_puts(&p, "module: " KBUILD_MODNAME "\n"); 105 97 ··· 108 98 drm_printf(&p, "Snapshot time: %lld.%09ld\n", ts.tv_sec, ts.tv_nsec); 109 99 ts = ktime_to_timespec64(ss->boot_time); 110 100 drm_printf(&p, "Uptime: %lld.%09ld\n", ts.tv_sec, ts.tv_nsec); 111 - drm_printf(&p, "Process: %s\n", ss->process_name); 101 + drm_printf(&p, "Process: %s [%d]\n", ss->process_name, ss->pid); 112 102 xe_device_snapshot_print(xe, &p); 113 103 114 104 drm_printf(&p, "\n**** GT #%d ****\n", ss->gt->info.id); ··· 139 129 static void xe_devcoredump_snapshot_free(struct xe_devcoredump_snapshot *ss) 140 130 { 141 131 int i; 132 + 133 + kfree(ss->reason); 134 + ss->reason = NULL; 142 135 143 136 xe_guc_log_snapshot_free(ss->guc.log); 144 137 ss->guc.log = NULL; ··· 183 170 /* Ensure delayed work is captured before continuing */ 184 171 flush_work(&ss->work); 185 172 186 - if (!ss->read.buffer) 187 - return -ENODEV; 173 + mutex_lock(&coredump->lock); 188 174 189 - if (offset >= ss->read.size) 175 + if (!ss->read.buffer) { 176 + mutex_unlock(&coredump->lock); 177 + return -ENODEV; 178 + } 179 + 180 + if (offset >= ss->read.size) { 181 + mutex_unlock(&coredump->lock); 190 182 return 0; 183 + } 191 184 192 185 byte_copied = count < ss->read.size - offset ? count : 193 186 ss->read.size - offset; 194 187 memcpy(buffer, ss->read.buffer + offset, byte_copied); 188 + 189 + mutex_unlock(&coredump->lock); 195 190 196 191 return byte_copied; 197 192 } ··· 214 193 215 194 cancel_work_sync(&coredump->snapshot.work); 216 195 196 + mutex_lock(&coredump->lock); 197 + 217 198 xe_devcoredump_snapshot_free(&coredump->snapshot); 218 199 kvfree(coredump->snapshot.read.buffer); 219 200 220 201 /* To prevent stale data on next snapshot, clear everything */ 221 202 memset(&coredump->snapshot, 0, sizeof(coredump->snapshot)); 222 203 coredump->captured = false; 223 - coredump->job = NULL; 224 204 drm_info(&coredump_to_xe(coredump)->drm, 225 205 "Xe device coredump has been deleted.\n"); 206 + 207 + mutex_unlock(&coredump->lock); 226 208 } 227 209 228 210 static void xe_devcoredump_deferred_snap_work(struct work_struct *work) ··· 268 244 } 269 245 270 246 static void devcoredump_snapshot(struct xe_devcoredump *coredump, 247 + struct xe_exec_queue *q, 271 248 struct xe_sched_job *job) 272 249 { 273 250 struct xe_devcoredump_snapshot *ss = &coredump->snapshot; 274 - struct xe_exec_queue *q = job->q; 275 251 struct xe_guc *guc = exec_queue_to_guc(q); 276 252 u32 adj_logical_mask = q->logical_mask; 277 253 u32 width_mask = (0x1 << q->width) - 1; ··· 284 260 ss->snapshot_time = ktime_get_real(); 285 261 ss->boot_time = ktime_get_boottime(); 286 262 287 - if (q->vm && q->vm->xef) 263 + if (q->vm && q->vm->xef) { 288 264 process_name = q->vm->xef->process_name; 265 + ss->pid = q->vm->xef->pid; 266 + } 267 + 289 268 strscpy(ss->process_name, process_name); 290 269 291 270 ss->gt = q->gt; 292 - coredump->job = job; 293 271 INIT_WORK(&ss->work, xe_devcoredump_deferred_snap_work); 294 272 295 273 cookie = dma_fence_begin_signalling(); ··· 310 284 ss->guc.log = xe_guc_log_snapshot_capture(&guc->log, true); 311 285 ss->guc.ct = xe_guc_ct_snapshot_capture(&guc->ct); 312 286 ss->ge = xe_guc_exec_queue_snapshot_capture(q); 313 - ss->job = xe_sched_job_snapshot_capture(job); 287 + if (job) 288 + ss->job = xe_sched_job_snapshot_capture(job); 314 289 ss->vm = xe_vm_snapshot_capture(q->vm); 315 290 316 - xe_engine_snapshot_capture_for_job(job); 291 + xe_engine_snapshot_capture_for_queue(q); 317 292 318 293 queue_work(system_unbound_wq, &ss->work); 319 294 ··· 324 297 325 298 /** 326 299 * xe_devcoredump - Take the required snapshots and initialize coredump device. 300 + * @q: The faulty xe_exec_queue, where the issue was detected. 327 301 * @job: The faulty xe_sched_job, where the issue was detected. 302 + * @fmt: Printf format + args to describe the reason for the core dump 328 303 * 329 304 * This function should be called at the crash time within the serialized 330 305 * gt_reset. It is skipped if we still have the core dump device available 331 306 * with the information of the 'first' snapshot. 332 307 */ 333 - void xe_devcoredump(struct xe_sched_job *job) 308 + __printf(3, 4) 309 + void xe_devcoredump(struct xe_exec_queue *q, struct xe_sched_job *job, const char *fmt, ...) 334 310 { 335 - struct xe_device *xe = gt_to_xe(job->q->gt); 311 + struct xe_device *xe = gt_to_xe(q->gt); 336 312 struct xe_devcoredump *coredump = &xe->devcoredump; 313 + va_list varg; 314 + 315 + mutex_lock(&coredump->lock); 337 316 338 317 if (coredump->captured) { 339 318 drm_dbg(&xe->drm, "Multiple hangs are occurring, but only the first snapshot was taken\n"); 319 + mutex_unlock(&coredump->lock); 340 320 return; 341 321 } 342 322 343 323 coredump->captured = true; 344 - devcoredump_snapshot(coredump, job); 324 + 325 + va_start(varg, fmt); 326 + coredump->snapshot.reason = kvasprintf(GFP_ATOMIC, fmt, varg); 327 + va_end(varg); 328 + 329 + devcoredump_snapshot(coredump, q, job); 345 330 346 331 drm_info(&xe->drm, "Xe device coredump has been created\n"); 347 332 drm_info(&xe->drm, "Check your /sys/class/drm/card%d/device/devcoredump/data\n", 348 333 xe->drm.primary->index); 334 + 335 + mutex_unlock(&coredump->lock); 349 336 } 350 337 351 338 static void xe_driver_devcoredump_fini(void *arg) ··· 371 330 372 331 int xe_devcoredump_init(struct xe_device *xe) 373 332 { 333 + int err; 334 + 335 + err = drmm_mutex_init(&xe->drm, &xe->devcoredump.lock); 336 + if (err) 337 + return err; 338 + 339 + if (IS_ENABLED(CONFIG_LOCKDEP)) { 340 + fs_reclaim_acquire(GFP_KERNEL); 341 + might_lock(&xe->devcoredump.lock); 342 + fs_reclaim_release(GFP_KERNEL); 343 + } 344 + 374 345 return devm_add_action_or_reset(xe->drm.dev, xe_driver_devcoredump_fini, &xe->drm); 375 346 } 376 347

+5 -2

drivers/gpu/drm/xe/xe_devcoredump.h

··· 10 10 11 11 struct drm_printer; 12 12 struct xe_device; 13 + struct xe_exec_queue; 13 14 struct xe_sched_job; 14 15 15 16 #ifdef CONFIG_DEV_COREDUMP 16 - void xe_devcoredump(struct xe_sched_job *job); 17 + void xe_devcoredump(struct xe_exec_queue *q, struct xe_sched_job *job, const char *fmt, ...); 17 18 int xe_devcoredump_init(struct xe_device *xe); 18 19 #else 19 - static inline void xe_devcoredump(struct xe_sched_job *job) 20 + static inline void xe_devcoredump(struct xe_exec_queue *q, 21 + struct xe_sched_job *job, 22 + const char *fmt, ...) 20 23 { 21 24 } 22 25

+7 -3

drivers/gpu/drm/xe/xe_devcoredump_types.h

··· 28 28 ktime_t boot_time; 29 29 /** @process_name: Name of process that triggered this gpu hang */ 30 30 char process_name[TASK_COMM_LEN]; 31 + /** @pid: Process id of process that triggered this gpu hang */ 32 + pid_t pid; 33 + /** @reason: The reason the coredump was triggered */ 34 + char *reason; 31 35 32 36 /** @gt: Affected GT, used by forcewake for delayed capture */ 33 37 struct xe_gt *gt; ··· 80 76 * for reading the information. 81 77 */ 82 78 struct xe_devcoredump { 83 - /** @captured: The snapshot of the first hang has already been taken. */ 79 + /** @lock: protects access to entire structure */ 80 + struct mutex lock; 81 + /** @captured: The snapshot of the first hang has already been taken */ 84 82 bool captured; 85 83 /** @snapshot: Snapshot is captured at time of the first crash */ 86 84 struct xe_devcoredump_snapshot snapshot; 87 - /** @job: Point to the faulting job */ 88 - struct xe_sched_job *job; 89 85 }; 90 86 91 87 #endif

+8

drivers/gpu/drm/xe/xe_device.c

··· 44 44 #include "xe_memirq.h" 45 45 #include "xe_mmio.h" 46 46 #include "xe_module.h" 47 + #include "xe_oa.h" 47 48 #include "xe_observation.h" 48 49 #include "xe_pat.h" 49 50 #include "xe_pcode.h" ··· 56 55 #include "xe_ttm_sys_mgr.h" 57 56 #include "xe_vm.h" 58 57 #include "xe_vram.h" 58 + #include "xe_vsec.h" 59 59 #include "xe_wait_user_fence.h" 60 60 #include "xe_wa.h" 61 61 ··· 366 364 err = -ENOMEM; 367 365 goto err; 368 366 } 367 + 368 + err = drmm_mutex_init(&xe->drm, &xe->pmt.lock); 369 + if (err) 370 + goto err; 369 371 370 372 err = xe_display_create(xe); 371 373 if (WARN_ON(err)) ··· 764 758 765 759 for_each_gt(gt, xe, id) 766 760 xe_gt_sanitize_freq(gt); 761 + 762 + xe_vsec_init(xe); 767 763 768 764 return devm_add_action_or_reset(xe->drm.dev, xe_device_sanitize, xe); 769 765

+36 -21

drivers/gpu/drm/xe/xe_device_types.h

··· 16 16 #include "xe_heci_gsc.h" 17 17 #include "xe_lmtt_types.h" 18 18 #include "xe_memirq_types.h" 19 - #include "xe_oa.h" 19 + #include "xe_oa_types.h" 20 20 #include "xe_platform_types.h" 21 21 #include "xe_pt_types.h" 22 22 #include "xe_sriov_types.h" ··· 42 42 #define GRAPHICS_VERx100(xe) ((xe)->info.graphics_verx100) 43 43 #define MEDIA_VERx100(xe) ((xe)->info.media_verx100) 44 44 #define IS_DGFX(xe) ((xe)->info.is_dgfx) 45 - #define HAS_HECI_GSCFI(xe) ((xe)->info.has_heci_gscfi) 46 - #define HAS_HECI_CSCFI(xe) ((xe)->info.has_heci_cscfi) 47 45 48 46 #define XE_VRAM_FLAGS_NEED64K BIT(0) 49 47 ··· 294 296 /** @info.va_bits: Maximum bits of a virtual address */ 295 297 u8 va_bits; 296 298 297 - /** @info.is_dgfx: is discrete device */ 298 - u8 is_dgfx:1; 299 - /** @info.has_asid: Has address space ID */ 300 - u8 has_asid:1; 299 + /* 300 + * Keep all flags below alphabetically sorted 301 + */ 302 + 301 303 /** @info.force_execlist: Forced execlist submission */ 302 304 u8 force_execlist:1; 305 + /** @info.has_asid: Has address space ID */ 306 + u8 has_asid:1; 307 + /** @info.has_atomic_enable_pte_bit: Device has atomic enable PTE bit */ 308 + u8 has_atomic_enable_pte_bit:1; 309 + /** @info.has_device_atomics_on_smem: Supports device atomics on SMEM */ 310 + u8 has_device_atomics_on_smem:1; 303 311 /** @info.has_flat_ccs: Whether flat CCS metadata is used */ 304 312 u8 has_flat_ccs:1; 313 + /** @info.has_heci_cscfi: device has heci cscfi */ 314 + u8 has_heci_cscfi:1; 315 + /** @info.has_heci_gscfi: device has heci gscfi */ 316 + u8 has_heci_gscfi:1; 305 317 /** @info.has_llc: Device has a shared CPU+GPU last level cache */ 306 318 u8 has_llc:1; 307 319 /** @info.has_mmio_ext: Device has extra MMIO address range */ ··· 322 314 u8 has_sriov:1; 323 315 /** @info.has_usm: Device has unified shared memory support */ 324 316 u8 has_usm:1; 317 + /** @info.is_dgfx: is discrete device */ 318 + u8 is_dgfx:1; 325 319 /** 326 320 * @info.probe_display: Probe display hardware. If set to 327 321 * false, the driver will behave as if there is no display ··· 333 323 * state the firmware or bootloader left it in. 334 324 */ 335 325 u8 probe_display:1; 326 + /** @info.skip_guc_pc: Skip GuC based PM feature init */ 327 + u8 skip_guc_pc:1; 336 328 /** @info.skip_mtcfg: skip Multi-Tile configuration from MTCFG register */ 337 329 u8 skip_mtcfg:1; 338 330 /** @info.skip_pcode: skip access to PCODE uC */ 339 331 u8 skip_pcode:1; 340 - /** @info.has_heci_gscfi: device has heci gscfi */ 341 - u8 has_heci_gscfi:1; 342 - /** @info.has_heci_cscfi: device has heci cscfi */ 343 - u8 has_heci_cscfi:1; 344 - /** @info.skip_guc_pc: Skip GuC based PM feature init */ 345 - u8 skip_guc_pc:1; 346 - /** @info.has_atomic_enable_pte_bit: Device has atomic enable PTE bit */ 347 - u8 has_atomic_enable_pte_bit:1; 348 - /** @info.has_device_atomics_on_smem: Supports device atomics on SMEM */ 349 - u8 has_device_atomics_on_smem:1; 350 332 } info; 351 333 352 334 /** @irq: device interrupt state */ ··· 347 345 spinlock_t lock; 348 346 349 347 /** @irq.enabled: interrupts enabled on this device */ 350 - bool enabled; 348 + atomic_t enabled; 351 349 } irq; 352 350 353 351 /** @ttm: ttm device */ ··· 376 374 377 375 /** @sriov.pf: PF specific data */ 378 376 struct xe_device_pf pf; 377 + /** @sriov.vf: VF specific data */ 378 + struct xe_device_vf vf; 379 379 380 380 /** @sriov.wq: workqueue used by the virtualization workers */ 381 381 struct workqueue_struct *wq; ··· 484 480 /** @d3cold.lock: protect vram_threshold */ 485 481 struct mutex lock; 486 482 } d3cold; 483 + 484 + /** @pmt: Support the PMT driver callback interface */ 485 + struct { 486 + /** @pmt.lock: protect access for telemetry data */ 487 + struct mutex lock; 488 + } pmt; 487 489 488 490 /** 489 491 * @pm_callback_task: Track the active task that is running in either ··· 598 588 /** @vm.xe: xarray to store VMs */ 599 589 struct xarray xa; 600 590 /** 601 - * @vm.lock: Protects VM lookup + reference and removal a from 591 + * @vm.lock: Protects VM lookup + reference and removal from 602 592 * file xarray. Not an intended to be an outer lock which does 603 593 * thing while being held. 604 594 */ ··· 611 601 struct xarray xa; 612 602 /** 613 603 * @exec_queue.lock: Protects exec queue lookup + reference and 614 - * removal a frommfile xarray. Not an intended to be an outer 615 - * lock which does thing while being held. 604 + * removal from file xarray. Not intended to be an outer lock 605 + * which does things while being held. 616 606 */ 617 607 struct mutex lock; 608 + /** 609 + * @exec_queue.pending_removal: items pending to be removed to 610 + * synchronize GPU state update with ongoing query. 611 + */ 612 + atomic_t pending_removal; 618 613 } exec_queue; 619 614 620 615 /** @run_ticks: hw engine class run time in ticks for this drm client */

+56 -24

drivers/gpu/drm/xe/xe_drm_client.c

··· 269 269 } 270 270 } 271 271 272 + static struct xe_hw_engine *any_engine(struct xe_device *xe) 273 + { 274 + struct xe_gt *gt; 275 + unsigned long gt_id; 276 + 277 + for_each_gt(gt, xe, gt_id) { 278 + struct xe_hw_engine *hwe = xe_gt_any_hw_engine(gt); 279 + 280 + if (hwe) 281 + return hwe; 282 + } 283 + 284 + return NULL; 285 + } 286 + 287 + static bool force_wake_get_any_engine(struct xe_device *xe, 288 + struct xe_hw_engine **phwe, 289 + unsigned int *pfw_ref) 290 + { 291 + enum xe_force_wake_domains domain; 292 + unsigned int fw_ref; 293 + struct xe_hw_engine *hwe; 294 + struct xe_force_wake *fw; 295 + 296 + hwe = any_engine(xe); 297 + if (!hwe) 298 + return false; 299 + 300 + domain = xe_hw_engine_to_fw_domain(hwe); 301 + fw = gt_to_fw(hwe->gt); 302 + 303 + fw_ref = xe_force_wake_get(fw, domain); 304 + if (!xe_force_wake_ref_has_domain(fw_ref, domain)) { 305 + xe_force_wake_put(fw, fw_ref); 306 + return false; 307 + } 308 + 309 + *phwe = hwe; 310 + *pfw_ref = fw_ref; 311 + 312 + return true; 313 + } 314 + 272 315 static void show_run_ticks(struct drm_printer *p, struct drm_file *file) 273 316 { 274 317 unsigned long class, i, gt_id, capacity[XE_ENGINE_CLASS_MAX] = { }; ··· 323 280 u64 gpu_timestamp; 324 281 unsigned int fw_ref; 325 282 283 + /* 284 + * Wait for any exec queue going away: their cycles will get updated on 285 + * context switch out, so wait for that to happen 286 + */ 287 + wait_var_event(&xef->exec_queue.pending_removal, 288 + !atomic_read(&xef->exec_queue.pending_removal)); 289 + 326 290 xe_pm_runtime_get(xe); 291 + if (!force_wake_get_any_engine(xe, &hwe, &fw_ref)) { 292 + xe_pm_runtime_put(xe); 293 + return; 294 + } 327 295 328 296 /* Accumulate all the exec queues from this client */ 329 297 mutex_lock(&xef->exec_queue.lock); ··· 349 295 } 350 296 mutex_unlock(&xef->exec_queue.lock); 351 297 352 - /* Get the total GPU cycles */ 353 - for_each_gt(gt, xe, gt_id) { 354 - enum xe_force_wake_domains fw; 298 + gpu_timestamp = xe_hw_engine_read_timestamp(hwe); 355 299 356 - hwe = xe_gt_any_hw_engine(gt); 357 - if (!hwe) 358 - continue; 359 - 360 - fw = xe_hw_engine_to_fw_domain(hwe); 361 - 362 - fw_ref = xe_force_wake_get(gt_to_fw(gt), fw); 363 - if (!xe_force_wake_ref_has_domain(fw_ref, fw)) { 364 - hwe = NULL; 365 - xe_force_wake_put(gt_to_fw(gt), fw_ref); 366 - break; 367 - } 368 - 369 - gpu_timestamp = xe_hw_engine_read_timestamp(hwe); 370 - xe_force_wake_put(gt_to_fw(gt), fw_ref); 371 - break; 372 - } 373 - 300 + xe_force_wake_put(gt_to_fw(hwe->gt), fw_ref); 374 301 xe_pm_runtime_put(xe); 375 - 376 - if (unlikely(!hwe)) 377 - return; 378 302 379 303 for (class = 0; class < XE_ENGINE_CLASS_MAX; class++) { 380 304 const char *class_name;

+7

drivers/gpu/drm/xe/xe_exec_queue.c

··· 240 240 241 241 return q; 242 242 } 243 + ALLOW_ERROR_INJECTION(xe_exec_queue_create_bind, ERRNO); 243 244 244 245 void xe_exec_queue_destroy(struct kref *ref) 245 246 { ··· 263 262 264 263 /* 265 264 * Before releasing our ref to lrc and xef, accumulate our run ticks 265 + * and wakeup any waiters. 266 266 */ 267 267 xe_exec_queue_update_run_ticks(q); 268 + if (q->xef && atomic_dec_and_test(&q->xef->exec_queue.pending_removal)) 269 + wake_up_var(&q->xef->exec_queue.pending_removal); 268 270 269 271 for (i = 0; i < q->width; ++i) 270 272 xe_lrc_put(q->lrc[i]); ··· 830 826 831 827 mutex_lock(&xef->exec_queue.lock); 832 828 q = xa_erase(&xef->exec_queue.xa, args->exec_queue_id); 829 + if (q) 830 + atomic_inc(&xef->exec_queue.pending_removal); 833 831 mutex_unlock(&xef->exec_queue.lock); 832 + 834 833 if (XE_IOCTL_DBG(xe, !q)) 835 834 return -ENOENT; 836 835

+19 -16

drivers/gpu/drm/xe/xe_ggtt.c

··· 598 598 u64 start; 599 599 u64 offset, pte; 600 600 601 - if (XE_WARN_ON(!bo->ggtt_node)) 601 + if (XE_WARN_ON(!bo->ggtt_node[ggtt->tile->id])) 602 602 return; 603 603 604 - start = bo->ggtt_node->base.start; 604 + start = bo->ggtt_node[ggtt->tile->id]->base.start; 605 605 606 606 for (offset = 0; offset < bo->size; offset += XE_PAGE_SIZE) { 607 607 pte = ggtt->pt_ops->pte_encode_bo(bo, offset, pat_index); ··· 612 612 static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, 613 613 u64 start, u64 end) 614 614 { 615 - int err; 616 615 u64 alignment = bo->min_align > 0 ? bo->min_align : XE_PAGE_SIZE; 616 + u8 tile_id = ggtt->tile->id; 617 + int err; 617 618 618 619 if (xe_bo_is_vram(bo) && ggtt->flags & XE_GGTT_FLAGS_64K) 619 620 alignment = SZ_64K; 620 621 621 - if (XE_WARN_ON(bo->ggtt_node)) { 622 + if (XE_WARN_ON(bo->ggtt_node[tile_id])) { 622 623 /* Someone's already inserted this BO in the GGTT */ 623 - xe_tile_assert(ggtt->tile, bo->ggtt_node->base.size == bo->size); 624 + xe_tile_assert(ggtt->tile, bo->ggtt_node[tile_id]->base.size == bo->size); 624 625 return 0; 625 626 } 626 627 ··· 631 630 632 631 xe_pm_runtime_get_noresume(tile_to_xe(ggtt->tile)); 633 632 634 - bo->ggtt_node = xe_ggtt_node_init(ggtt); 635 - if (IS_ERR(bo->ggtt_node)) { 636 - err = PTR_ERR(bo->ggtt_node); 637 - bo->ggtt_node = NULL; 633 + bo->ggtt_node[tile_id] = xe_ggtt_node_init(ggtt); 634 + if (IS_ERR(bo->ggtt_node[tile_id])) { 635 + err = PTR_ERR(bo->ggtt_node[tile_id]); 636 + bo->ggtt_node[tile_id] = NULL; 638 637 goto out; 639 638 } 640 639 641 640 mutex_lock(&ggtt->lock); 642 - err = drm_mm_insert_node_in_range(&ggtt->mm, &bo->ggtt_node->base, bo->size, 643 - alignment, 0, start, end, 0); 641 + err = drm_mm_insert_node_in_range(&ggtt->mm, &bo->ggtt_node[tile_id]->base, 642 + bo->size, alignment, 0, start, end, 0); 644 643 if (err) { 645 - xe_ggtt_node_fini(bo->ggtt_node); 646 - bo->ggtt_node = NULL; 644 + xe_ggtt_node_fini(bo->ggtt_node[tile_id]); 645 + bo->ggtt_node[tile_id] = NULL; 647 646 } else { 648 647 xe_ggtt_map_bo(ggtt, bo); 649 648 } ··· 692 691 */ 693 692 void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) 694 693 { 695 - if (XE_WARN_ON(!bo->ggtt_node)) 694 + u8 tile_id = ggtt->tile->id; 695 + 696 + if (XE_WARN_ON(!bo->ggtt_node[tile_id])) 696 697 return; 697 698 698 699 /* This BO is not currently in the GGTT */ 699 - xe_tile_assert(ggtt->tile, bo->ggtt_node->base.size == bo->size); 700 + xe_tile_assert(ggtt->tile, bo->ggtt_node[tile_id]->base.size == bo->size); 700 701 701 - xe_ggtt_node_remove(bo->ggtt_node, 702 + xe_ggtt_node_remove(bo->ggtt_node[tile_id], 702 703 bo->flags & XE_BO_FLAG_GGTT_INVALIDATE); 703 704 } 704 705

+8 -2

drivers/gpu/drm/xe/xe_gpu_scheduler.h

··· 71 71 static inline 72 72 struct xe_sched_job *xe_sched_first_pending_job(struct xe_gpu_scheduler *sched) 73 73 { 74 - return list_first_entry_or_null(&sched->base.pending_list, 75 - struct xe_sched_job, drm.list); 74 + struct xe_sched_job *job; 75 + 76 + spin_lock(&sched->base.job_list_lock); 77 + job = list_first_entry_or_null(&sched->base.pending_list, 78 + struct xe_sched_job, drm.list); 79 + spin_unlock(&sched->base.job_list_lock); 80 + 81 + return job; 76 82 } 77 83 78 84 static inline int

+36 -11

drivers/gpu/drm/xe/xe_gsc_proxy.c

··· 139 139 return 0; 140 140 } 141 141 142 - static int validate_proxy_header(struct xe_gsc_proxy_header *header, 142 + static int validate_proxy_header(struct xe_gt *gt, 143 + struct xe_gsc_proxy_header *header, 143 144 u32 source, u32 dest, u32 max_size) 144 145 { 145 146 u32 type = FIELD_GET(GSC_PROXY_TYPE, header->hdr); 146 147 u32 length = FIELD_GET(GSC_PROXY_PAYLOAD_LENGTH, header->hdr); 148 + int ret = 0; 147 149 148 - if (header->destination != dest || header->source != source) 149 - return -ENOEXEC; 150 + if (header->destination != dest || header->source != source) { 151 + ret = -ENOEXEC; 152 + goto out; 153 + } 150 154 151 - if (length + PROXY_HDR_SIZE > max_size) 152 - return -E2BIG; 155 + if (length + PROXY_HDR_SIZE > max_size) { 156 + ret = -E2BIG; 157 + goto out; 158 + } 159 + 160 + /* We only care about the status if this is a message for the driver */ 161 + if (dest == GSC_PROXY_ADDRESSING_KMD && header->status != 0) { 162 + ret = -EIO; 163 + goto out; 164 + } 153 165 154 166 switch (type) { 155 167 case GSC_PROXY_MSG_TYPE_PROXY_PAYLOAD: ··· 169 157 break; 170 158 fallthrough; 171 159 case GSC_PROXY_MSG_TYPE_PROXY_INVALID: 172 - return -EIO; 160 + ret = -EIO; 161 + break; 173 162 default: 174 163 break; 175 164 } 176 165 177 - return 0; 166 + out: 167 + if (ret) 168 + xe_gt_err(gt, 169 + "GSC proxy error: s=0x%x[0x%x], d=0x%x[0x%x], t=%u, l=0x%x, st=0x%x\n", 170 + header->source, source, header->destination, dest, 171 + type, length, header->status); 172 + 173 + return ret; 178 174 } 179 175 180 176 #define proxy_header_wr(xe_, map_, offset_, field_, val_) \ ··· 248 228 xe_map_memcpy_from(xe, to_csme_hdr, &gsc->proxy.from_gsc, 249 229 reply_offset, PROXY_HDR_SIZE); 250 230 251 - /* stop if this was the last message */ 252 - if (FIELD_GET(GSC_PROXY_TYPE, to_csme_hdr->hdr) == GSC_PROXY_MSG_TYPE_PROXY_END) 231 + /* Check the status and stop if this was the last message */ 232 + if (FIELD_GET(GSC_PROXY_TYPE, to_csme_hdr->hdr) == GSC_PROXY_MSG_TYPE_PROXY_END) { 233 + ret = validate_proxy_header(gt, to_csme_hdr, 234 + GSC_PROXY_ADDRESSING_GSC, 235 + GSC_PROXY_ADDRESSING_KMD, 236 + GSC_PROXY_BUFFER_SIZE - reply_offset); 253 237 break; 238 + } 254 239 255 240 /* make sure the GSC-to-CSME proxy header is sane */ 256 - ret = validate_proxy_header(to_csme_hdr, 241 + ret = validate_proxy_header(gt, to_csme_hdr, 257 242 GSC_PROXY_ADDRESSING_GSC, 258 243 GSC_PROXY_ADDRESSING_CSME, 259 244 GSC_PROXY_BUFFER_SIZE - reply_offset); ··· 287 262 } 288 263 289 264 /* make sure the CSME-to-GSC proxy header is sane */ 290 - ret = validate_proxy_header(gsc->proxy.from_csme, 265 + ret = validate_proxy_header(gt, gsc->proxy.from_csme, 291 266 GSC_PROXY_ADDRESSING_CSME, 292 267 GSC_PROXY_ADDRESSING_GSC, 293 268 GSC_PROXY_BUFFER_SIZE - reply_offset);

+1 -3

drivers/gpu/drm/xe/xe_gt.c

··· 748 748 if (err) 749 749 return err; 750 750 751 - for_each_hw_engine(hwe, gt, id) { 751 + for_each_hw_engine(hwe, gt, id) 752 752 xe_reg_sr_apply_mmio(&hwe->reg_sr, gt); 753 - xe_reg_sr_apply_whitelist(hwe); 754 - } 755 753 756 754 /* Get CCS mode in sync between sw/hw */ 757 755 xe_gt_apply_ccs_mode(gt);

+31

drivers/gpu/drm/xe/xe_gt_printk.h

··· 60 60 xe_gt_info(gt, "%pV", vaf); 61 61 } 62 62 63 + static inline void __xe_gt_printfn_dbg(struct drm_printer *p, struct va_format *vaf) 64 + { 65 + struct xe_gt *gt = p->arg; 66 + struct drm_printer dbg; 67 + 68 + /* 69 + * The original xe_gt_dbg() callsite annotations are useless here, 70 + * redirect to the tweaked drm_dbg_printer() instead. 71 + */ 72 + dbg = drm_dbg_printer(&gt_to_xe(gt)->drm, DRM_UT_DRIVER, NULL); 73 + dbg.origin = p->origin; 74 + 75 + drm_printf(&dbg, "GT%u: %pV", gt->info.id, vaf); 76 + } 77 + 63 78 /** 64 79 * xe_gt_err_printer - Construct a &drm_printer that outputs to xe_gt_err() 65 80 * @gt: the &xe_gt pointer to use in xe_gt_err() ··· 101 86 struct drm_printer p = { 102 87 .printfn = __xe_gt_printfn_info, 103 88 .arg = gt, 89 + }; 90 + return p; 91 + } 92 + 93 + /** 94 + * xe_gt_dbg_printer - Construct a &drm_printer that outputs like xe_gt_dbg() 95 + * @gt: the &xe_gt pointer to use in xe_gt_dbg() 96 + * 97 + * Return: The &drm_printer object. 98 + */ 99 + static inline struct drm_printer xe_gt_dbg_printer(struct xe_gt *gt) 100 + { 101 + struct drm_printer p = { 102 + .printfn = __xe_gt_printfn_dbg, 103 + .arg = gt, 104 + .origin = (const void *)_THIS_IP_, 104 105 }; 105 106 return p; 106 107 }

+76 -2

drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c

··· 207 207 return pf_push_vf_cfg_u32(gt, vfid, GUC_KLV_VF_CFG_PREEMPT_TIMEOUT_KEY, *preempt_timeout); 208 208 } 209 209 210 + static int pf_push_vf_cfg_sched_priority(struct xe_gt *gt, unsigned int vfid, u32 priority) 211 + { 212 + return pf_push_vf_cfg_u32(gt, vfid, GUC_KLV_VF_CFG_SCHED_PRIORITY_KEY, priority); 213 + } 214 + 210 215 static int pf_push_vf_cfg_lmem(struct xe_gt *gt, unsigned int vfid, u64 size) 211 216 { 212 217 return pf_push_vf_cfg_u64(gt, vfid, GUC_KLV_VF_CFG_LMEM_SIZE_KEY, size); ··· 1545 1540 1546 1541 #ifdef CONFIG_DRM_XE_DEBUG_SRIOV 1547 1542 #define MAX_FAIR_LMEM SZ_128M /* XXX: make it small for the driver bringup */ 1548 - #else 1549 - #define MAX_FAIR_LMEM SZ_2G /* XXX: known issue with allocating BO over 2GiB */ 1550 1543 #endif 1551 1544 1552 1545 static u64 pf_estimate_fair_lmem(struct xe_gt *gt, unsigned int num_vfs) ··· 1768 1765 mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); 1769 1766 1770 1767 return preempt_timeout; 1768 + } 1769 + 1770 + static const char *sched_priority_unit(u32 priority) 1771 + { 1772 + return priority == GUC_SCHED_PRIORITY_LOW ? "(low)" : 1773 + priority == GUC_SCHED_PRIORITY_NORMAL ? "(normal)" : 1774 + priority == GUC_SCHED_PRIORITY_HIGH ? "(high)" : 1775 + "(?)"; 1776 + } 1777 + 1778 + static int pf_provision_sched_priority(struct xe_gt *gt, unsigned int vfid, u32 priority) 1779 + { 1780 + struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); 1781 + int err; 1782 + 1783 + err = pf_push_vf_cfg_sched_priority(gt, vfid, priority); 1784 + if (unlikely(err)) 1785 + return err; 1786 + 1787 + config->sched_priority = priority; 1788 + return 0; 1789 + } 1790 + 1791 + static int pf_get_sched_priority(struct xe_gt *gt, unsigned int vfid) 1792 + { 1793 + struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); 1794 + 1795 + return config->sched_priority; 1796 + } 1797 + 1798 + /** 1799 + * xe_gt_sriov_pf_config_set_sched_priority() - Configure scheduling priority. 1800 + * @gt: the &xe_gt 1801 + * @vfid: the VF identifier 1802 + * @priority: requested scheduling priority 1803 + * 1804 + * This function can only be called on PF. 1805 + * 1806 + * Return: 0 on success or a negative error code on failure. 1807 + */ 1808 + int xe_gt_sriov_pf_config_set_sched_priority(struct xe_gt *gt, unsigned int vfid, u32 priority) 1809 + { 1810 + int err; 1811 + 1812 + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); 1813 + err = pf_provision_sched_priority(gt, vfid, priority); 1814 + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); 1815 + 1816 + return pf_config_set_u32_done(gt, vfid, priority, 1817 + xe_gt_sriov_pf_config_get_sched_priority(gt, vfid), 1818 + "scheduling priority", sched_priority_unit, err); 1819 + } 1820 + 1821 + /** 1822 + * xe_gt_sriov_pf_config_get_sched_priority - Get VF's scheduling priority. 1823 + * @gt: the &xe_gt 1824 + * @vfid: the VF identifier 1825 + * 1826 + * This function can only be called on PF. 1827 + * 1828 + * Return: VF's (or PF's) scheduling priority. 1829 + */ 1830 + u32 xe_gt_sriov_pf_config_get_sched_priority(struct xe_gt *gt, unsigned int vfid) 1831 + { 1832 + u32 priority; 1833 + 1834 + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); 1835 + priority = pf_get_sched_priority(gt, vfid); 1836 + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); 1837 + 1838 + return priority; 1771 1839 } 1772 1840 1773 1841 static void pf_reset_config_sched(struct xe_gt *gt, struct xe_gt_sriov_config *config)

+3

drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h

··· 44 44 int xe_gt_sriov_pf_config_set_preempt_timeout(struct xe_gt *gt, unsigned int vfid, 45 45 u32 preempt_timeout); 46 46 47 + u32 xe_gt_sriov_pf_config_get_sched_priority(struct xe_gt *gt, unsigned int vfid); 48 + int xe_gt_sriov_pf_config_set_sched_priority(struct xe_gt *gt, unsigned int vfid, u32 priority); 49 + 47 50 u32 xe_gt_sriov_pf_config_get_threshold(struct xe_gt *gt, unsigned int vfid, 48 51 enum xe_guc_klv_threshold_index index); 49 52 int xe_gt_sriov_pf_config_set_threshold(struct xe_gt *gt, unsigned int vfid,

+2

drivers/gpu/drm/xe/xe_gt_sriov_pf_config_types.h

··· 33 33 u32 exec_quantum; 34 34 /** @preempt_timeout: preemption timeout in microseconds. */ 35 35 u32 preempt_timeout; 36 + /** @sched_priority: scheduling priority. */ 37 + u32 sched_priority; 36 38 /** @thresholds: GuC thresholds for adverse events notifications. */ 37 39 u32 thresholds[XE_GUC_KLV_NUM_THRESHOLDS]; 38 40 };

+5

drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c

··· 164 164 * │ │ ├── contexts_spare 165 165 * │ │ ├── exec_quantum_ms 166 166 * │ │ ├── preempt_timeout_us 167 + * │ │ ├── sched_priority 167 168 * │ ├── vf1 168 169 * │ │ ├── ggtt_quota 169 170 * │ │ ├── lmem_quota ··· 172 171 * │ │ ├── contexts_quota 173 172 * │ │ ├── exec_quantum_ms 174 173 * │ │ ├── preempt_timeout_us 174 + * │ │ ├── sched_priority 175 175 */ 176 176 177 177 #define DEFINE_SRIOV_GT_CONFIG_DEBUGFS_ATTRIBUTE(CONFIG, TYPE, FORMAT) \ ··· 211 209 DEFINE_SRIOV_GT_CONFIG_DEBUGFS_ATTRIBUTE(dbs, u32, "%llu\n"); 212 210 DEFINE_SRIOV_GT_CONFIG_DEBUGFS_ATTRIBUTE(exec_quantum, u32, "%llu\n"); 213 211 DEFINE_SRIOV_GT_CONFIG_DEBUGFS_ATTRIBUTE(preempt_timeout, u32, "%llu\n"); 212 + DEFINE_SRIOV_GT_CONFIG_DEBUGFS_ATTRIBUTE(sched_priority, u32, "%llu\n"); 214 213 215 214 /* 216 215 * /sys/kernel/debug/dri/0/ ··· 298 295 &exec_quantum_fops); 299 296 debugfs_create_file_unsafe("preempt_timeout_us", 0644, parent, parent, 300 297 &preempt_timeout_fops); 298 + debugfs_create_file_unsafe("sched_priority", 0644, parent, parent, 299 + &sched_priority_fops); 301 300 302 301 /* register all threshold attributes */ 303 302 #define register_threshold_attribute(TAG, NAME, ...) \

+1 -1

drivers/gpu/drm/xe/xe_gt_sriov_pf_helpers.h

··· 18 18 * is within a range of supported VF numbers (up to maximum number of VFs that 19 19 * driver can support, including VF0 that represents the PF itself). 20 20 * 21 - * Note: Effective only on debug builds. See `Xe ASSERTs`_ for more information. 21 + * Note: Effective only on debug builds. See `Xe Asserts`_ for more information. 22 22 */ 23 23 #define xe_gt_sriov_pf_assert_vfid(gt, vfid) xe_sriov_pf_assert_vfid(gt_to_xe(gt), (vfid)) 24 24

+23 -4

drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.c

··· 135 135 return 0; 136 136 } 137 137 138 - static int pf_provision_sched_if_idle(struct xe_gt *gt, bool enable) 138 + static void pf_bulk_reset_sched_priority(struct xe_gt *gt, u32 priority) 139 139 { 140 + unsigned int total_vfs = 1 + xe_gt_sriov_pf_get_totalvfs(gt); 141 + unsigned int n; 142 + 140 143 xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); 141 144 lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); 142 145 143 - return pf_update_policy_bool(gt, GUC_KLV_VGT_POLICY_SCHED_IF_IDLE_KEY, 144 - &gt->sriov.pf.policy.guc.sched_if_idle, 145 - enable); 146 + for (n = 0; n < total_vfs; n++) 147 + gt->sriov.pf.vfs[n].config.sched_priority = priority; 148 + } 149 + 150 + static int pf_provision_sched_if_idle(struct xe_gt *gt, bool enable) 151 + { 152 + int err; 153 + 154 + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); 155 + lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); 156 + 157 + err = pf_update_policy_bool(gt, GUC_KLV_VGT_POLICY_SCHED_IF_IDLE_KEY, 158 + &gt->sriov.pf.policy.guc.sched_if_idle, 159 + enable); 160 + 161 + if (!err) 162 + pf_bulk_reset_sched_priority(gt, enable ? GUC_SCHED_PRIORITY_NORMAL : 163 + GUC_SCHED_PRIORITY_LOW); 164 + return err; 146 165 } 147 166 148 167 static int pf_reprovision_sched_if_idle(struct xe_gt *gt)

+63

drivers/gpu/drm/xe/xe_gt_sriov_vf.c

··· 27 27 #include "xe_guc_relay.h" 28 28 #include "xe_mmio.h" 29 29 #include "xe_sriov.h" 30 + #include "xe_sriov_vf.h" 30 31 #include "xe_uc_fw.h" 31 32 #include "xe_wopcm.h" 32 33 ··· 222 221 return err; 223 222 224 223 return 0; 224 + } 225 + 226 + static int guc_action_vf_notify_resfix_done(struct xe_guc *guc) 227 + { 228 + u32 request[GUC_HXG_REQUEST_MSG_MIN_LEN] = { 229 + FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) | 230 + FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) | 231 + FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, GUC_ACTION_VF2GUC_NOTIFY_RESFIX_DONE), 232 + }; 233 + int ret; 234 + 235 + ret = xe_guc_mmio_send(guc, request, ARRAY_SIZE(request)); 236 + 237 + return ret > 0 ? -EPROTO : ret; 238 + } 239 + 240 + /** 241 + * xe_gt_sriov_vf_notify_resfix_done - Notify GuC about resource fixups apply completed. 242 + * @gt: the &xe_gt struct instance linked to target GuC 243 + * 244 + * Returns: 0 if the operation completed successfully, or a negative error 245 + * code otherwise. 246 + */ 247 + int xe_gt_sriov_vf_notify_resfix_done(struct xe_gt *gt) 248 + { 249 + struct xe_guc *guc = &gt->uc.guc; 250 + int err; 251 + 252 + xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); 253 + 254 + err = guc_action_vf_notify_resfix_done(guc); 255 + if (unlikely(err)) 256 + xe_gt_sriov_err(gt, "Failed to notify GuC about resource fixup done (%pe)\n", 257 + ERR_PTR(err)); 258 + else 259 + xe_gt_sriov_dbg_verbose(gt, "sent GuC resource fixup done\n"); 260 + 261 + return err; 225 262 } 226 263 227 264 static int guc_action_query_single_klv(struct xe_guc *guc, u32 key, ··· 729 690 failed: 730 691 xe_gt_sriov_err(gt, "Failed to get version info (%pe)\n", ERR_PTR(err)); 731 692 return err; 693 + } 694 + 695 + /** 696 + * xe_gt_sriov_vf_migrated_event_handler - Start a VF migration recovery, 697 + * or just mark that a GuC is ready for it. 698 + * @gt: the &xe_gt struct instance linked to target GuC 699 + * 700 + * This function shall be called only by VF. 701 + */ 702 + void xe_gt_sriov_vf_migrated_event_handler(struct xe_gt *gt) 703 + { 704 + struct xe_device *xe = gt_to_xe(gt); 705 + 706 + xe_gt_assert(gt, IS_SRIOV_VF(xe)); 707 + 708 + set_bit(gt->info.id, &xe->sriov.vf.migration.gt_flags); 709 + /* 710 + * We need to be certain that if all flags were set, at least one 711 + * thread will notice that and schedule the recovery. 712 + */ 713 + smp_mb__after_atomic(); 714 + 715 + xe_gt_sriov_info(gt, "ready for recovery after migration\n"); 716 + xe_sriov_vf_start_migration_recovery(xe); 732 717 } 733 718 734 719 static bool vf_is_negotiated(struct xe_gt *gt, u16 major, u16 minor)

+2

drivers/gpu/drm/xe/xe_gt_sriov_vf.h

··· 17 17 int xe_gt_sriov_vf_connect(struct xe_gt *gt); 18 18 int xe_gt_sriov_vf_query_runtime(struct xe_gt *gt); 19 19 int xe_gt_sriov_vf_prepare_ggtt(struct xe_gt *gt); 20 + int xe_gt_sriov_vf_notify_resfix_done(struct xe_gt *gt); 21 + void xe_gt_sriov_vf_migrated_event_handler(struct xe_gt *gt); 20 22 21 23 u32 xe_gt_sriov_vf_gmdid(struct xe_gt *gt); 22 24 u16 xe_gt_sriov_vf_guc_ids(struct xe_gt *gt);

+2 -6

drivers/gpu/drm/xe/xe_gt_stats.h

··· 6 6 #ifndef _XE_GT_STATS_H_ 7 7 #define _XE_GT_STATS_H_ 8 8 9 + #include "xe_gt_stats_types.h" 10 + 9 11 struct xe_gt; 10 12 struct drm_printer; 11 - 12 - enum xe_gt_stats_id { 13 - XE_GT_STATS_ID_TLB_INVAL, 14 - /* must be the last entry */ 15 - __XE_GT_STATS_NUM_IDS, 16 - }; 17 13 18 14 #ifdef CONFIG_DEBUG_FS 19 15 int xe_gt_stats_print_info(struct xe_gt *gt, struct drm_printer *p);

+15

drivers/gpu/drm/xe/xe_gt_stats_types.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2024 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_GT_STATS_TYPES_H_ 7 + #define _XE_GT_STATS_TYPES_H_ 8 + 9 + enum xe_gt_stats_id { 10 + XE_GT_STATS_ID_TLB_INVAL, 11 + /* must be the last entry */ 12 + __XE_GT_STATS_NUM_IDS, 13 + }; 14 + 15 + #endif

+2

drivers/gpu/drm/xe/xe_gt_throttle.c

··· 8 8 #include <regs/xe_gt_regs.h> 9 9 #include "xe_device.h" 10 10 #include "xe_gt.h" 11 + #include "xe_gt_printk.h" 11 12 #include "xe_gt_sysfs.h" 12 13 #include "xe_gt_throttle.h" 13 14 #include "xe_mmio.h" ··· 54 53 { 55 54 u32 status = xe_gt_throttle_get_limit_reasons(gt) & GT0_PERF_LIMIT_REASONS_MASK; 56 55 56 + xe_gt_dbg(gt, "throttle reasons: 0x%08x\n", status); 57 57 return status; 58 58 } 59 59

+19 -2

drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c

··· 65 65 __invalidation_fence_signal(xe, fence); 66 66 } 67 67 68 + void xe_gt_tlb_invalidation_fence_signal(struct xe_gt_tlb_invalidation_fence *fence) 69 + { 70 + if (WARN_ON_ONCE(!fence->gt)) 71 + return; 72 + 73 + __invalidation_fence_signal(gt_to_xe(fence->gt), fence); 74 + } 75 + 68 76 static void xe_gt_tlb_fence_timeout(struct work_struct *work) 69 77 { 70 78 struct xe_gt *gt = container_of(work, struct xe_gt, ··· 261 253 0, /* seqno, replaced in send_tlb_invalidation */ 262 254 MAKE_INVAL_OP(XE_GUC_TLB_INVAL_GUC), 263 255 }; 256 + int ret; 264 257 265 - return send_tlb_invalidation(&gt->uc.guc, fence, action, 266 - ARRAY_SIZE(action)); 258 + ret = send_tlb_invalidation(&gt->uc.guc, fence, action, 259 + ARRAY_SIZE(action)); 260 + /* 261 + * -ECANCELED indicates the CT is stopped for a GT reset. TLB caches 262 + * should be nuked on a GT reset so this error can be ignored. 263 + */ 264 + if (ret == -ECANCELED) 265 + return 0; 266 + 267 + return ret; 267 268 } 268 269 269 270 /**

+1

drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h

··· 28 28 void xe_gt_tlb_invalidation_fence_init(struct xe_gt *gt, 29 29 struct xe_gt_tlb_invalidation_fence *fence, 30 30 bool stack); 31 + void xe_gt_tlb_invalidation_fence_signal(struct xe_gt_tlb_invalidation_fence *fence); 31 32 32 33 static inline void 33 34 xe_gt_tlb_invalidation_fence_wait(struct xe_gt_tlb_invalidation_fence *fence)

+2 -2

drivers/gpu/drm/xe/xe_gt_types.h

··· 11 11 #include "xe_gt_idle_types.h" 12 12 #include "xe_gt_sriov_pf_types.h" 13 13 #include "xe_gt_sriov_vf_types.h" 14 - #include "xe_gt_stats.h" 14 + #include "xe_gt_stats_types.h" 15 15 #include "xe_hw_engine_types.h" 16 16 #include "xe_hw_fence_types.h" 17 - #include "xe_oa.h" 17 + #include "xe_oa_types.h" 18 18 #include "xe_reg_sr_types.h" 19 19 #include "xe_sa_types.h" 20 20 #include "xe_uc_types.h"

+318 -2

drivers/gpu/drm/xe/xe_guc.c

··· 44 44 struct xe_bo *bo) 45 45 { 46 46 struct xe_device *xe = guc_to_xe(guc); 47 - u32 addr = xe_bo_ggtt_addr(bo); 47 + u32 addr; 48 + 49 + /* 50 + * For most BOs, the address on the allocating tile is fine. However for 51 + * some, e.g. G2G CTB, the address on a specific tile is required as it 52 + * might be different for each tile. So, just always ask for the address 53 + * on the target GuC. 54 + */ 55 + addr = __xe_bo_ggtt_addr(bo, gt_to_tile(guc_to_gt(guc))->id); 48 56 49 57 /* GuC addresses above GUC_GGTT_TOP don't map through the GTT */ 50 58 xe_assert(xe, addr >= xe_wopcm_size(guc_to_xe(guc))); ··· 252 244 xe_mmio_write32(&gt->mmio, SOFT_SCRATCH(1 + i), guc->params[i]); 253 245 } 254 246 247 + static int guc_action_register_g2g_buffer(struct xe_guc *guc, u32 type, u32 dst_tile, u32 dst_dev, 248 + u32 desc_addr, u32 buff_addr, u32 size) 249 + { 250 + struct xe_gt *gt = guc_to_gt(guc); 251 + struct xe_device *xe = gt_to_xe(gt); 252 + u32 action[] = { 253 + XE_GUC_ACTION_REGISTER_G2G, 254 + FIELD_PREP(XE_G2G_REGISTER_SIZE, size / SZ_4K - 1) | 255 + FIELD_PREP(XE_G2G_REGISTER_TYPE, type) | 256 + FIELD_PREP(XE_G2G_REGISTER_TILE, dst_tile) | 257 + FIELD_PREP(XE_G2G_REGISTER_DEVICE, dst_dev), 258 + desc_addr, 259 + buff_addr, 260 + }; 261 + 262 + xe_assert(xe, (type == XE_G2G_TYPE_IN) || (type == XE_G2G_TYPE_OUT)); 263 + xe_assert(xe, !(size % SZ_4K)); 264 + 265 + return xe_guc_ct_send_block(&guc->ct, action, ARRAY_SIZE(action)); 266 + } 267 + 268 + static int guc_action_deregister_g2g_buffer(struct xe_guc *guc, u32 type, u32 dst_tile, u32 dst_dev) 269 + { 270 + struct xe_gt *gt = guc_to_gt(guc); 271 + struct xe_device *xe = gt_to_xe(gt); 272 + u32 action[] = { 273 + XE_GUC_ACTION_DEREGISTER_G2G, 274 + FIELD_PREP(XE_G2G_DEREGISTER_TYPE, type) | 275 + FIELD_PREP(XE_G2G_DEREGISTER_TILE, dst_tile) | 276 + FIELD_PREP(XE_G2G_DEREGISTER_DEVICE, dst_dev), 277 + }; 278 + 279 + xe_assert(xe, (type == XE_G2G_TYPE_IN) || (type == XE_G2G_TYPE_OUT)); 280 + 281 + return xe_guc_ct_send_block(&guc->ct, action, ARRAY_SIZE(action)); 282 + } 283 + 284 + #define G2G_DEV(gt) (((gt)->info.type == XE_GT_TYPE_MAIN) ? 0 : 1) 285 + 286 + #define G2G_BUFFER_SIZE (SZ_4K) 287 + #define G2G_DESC_SIZE (64) 288 + #define G2G_DESC_AREA_SIZE (SZ_4K) 289 + 290 + /* 291 + * Generate a unique id for each bi-directional CTB for each pair of 292 + * near and far tiles/devices. The id can then be used as an index into 293 + * a single allocation that is sub-divided into multiple CTBs. 294 + * 295 + * For example, with two devices per tile and two tiles, the table should 296 + * look like: 297 + * Far <tile>.<dev> 298 + * 0.0 0.1 1.0 1.1 299 + * N 0.0 --/-- 00/01 02/03 04/05 300 + * e 0.1 01/00 --/-- 06/07 08/09 301 + * a 1.0 03/02 07/06 --/-- 10/11 302 + * r 1.1 05/04 09/08 11/10 --/-- 303 + * 304 + * Where each entry is Rx/Tx channel id. 305 + * 306 + * So GuC #3 (tile 1, dev 1) talking to GuC #2 (tile 1, dev 0) would 307 + * be reading from channel #11 and writing to channel #10. Whereas, 308 + * GuC #2 talking to GuC #3 would be read on #10 and write to #11. 309 + */ 310 + static unsigned int g2g_slot(u32 near_tile, u32 near_dev, u32 far_tile, u32 far_dev, 311 + u32 type, u32 max_inst, bool have_dev) 312 + { 313 + u32 near = near_tile, far = far_tile; 314 + u32 idx = 0, x, y, direction; 315 + int i; 316 + 317 + if (have_dev) { 318 + near = (near << 1) | near_dev; 319 + far = (far << 1) | far_dev; 320 + } 321 + 322 + /* No need to send to one's self */ 323 + if (far == near) 324 + return -1; 325 + 326 + if (far > near) { 327 + /* Top right table half */ 328 + x = far; 329 + y = near; 330 + 331 + /* T/R is 'forwards' direction */ 332 + direction = type; 333 + } else { 334 + /* Bottom left table half */ 335 + x = near; 336 + y = far; 337 + 338 + /* B/L is 'backwards' direction */ 339 + direction = (1 - type); 340 + } 341 + 342 + /* Count the rows prior to the target */ 343 + for (i = y; i > 0; i--) 344 + idx += max_inst - i; 345 + 346 + /* Count this row up to the target */ 347 + idx += (x - 1 - y); 348 + 349 + /* Slots are in Rx/Tx pairs */ 350 + idx *= 2; 351 + 352 + /* Pick Rx/Tx direction */ 353 + idx += direction; 354 + 355 + return idx; 356 + } 357 + 358 + static int guc_g2g_register(struct xe_guc *near_guc, struct xe_gt *far_gt, u32 type, bool have_dev) 359 + { 360 + struct xe_gt *near_gt = guc_to_gt(near_guc); 361 + struct xe_device *xe = gt_to_xe(near_gt); 362 + struct xe_bo *g2g_bo; 363 + u32 near_tile = gt_to_tile(near_gt)->id; 364 + u32 near_dev = G2G_DEV(near_gt); 365 + u32 far_tile = gt_to_tile(far_gt)->id; 366 + u32 far_dev = G2G_DEV(far_gt); 367 + u32 max = xe->info.gt_count; 368 + u32 base, desc, buf; 369 + int slot; 370 + 371 + /* G2G is not allowed between different cards */ 372 + xe_assert(xe, xe == gt_to_xe(far_gt)); 373 + 374 + g2g_bo = near_guc->g2g.bo; 375 + xe_assert(xe, g2g_bo); 376 + 377 + slot = g2g_slot(near_tile, near_dev, far_tile, far_dev, type, max, have_dev); 378 + xe_assert(xe, slot >= 0); 379 + 380 + base = guc_bo_ggtt_addr(near_guc, g2g_bo); 381 + desc = base + slot * G2G_DESC_SIZE; 382 + buf = base + G2G_DESC_AREA_SIZE + slot * G2G_BUFFER_SIZE; 383 + 384 + xe_assert(xe, (desc - base + G2G_DESC_SIZE) <= G2G_DESC_AREA_SIZE); 385 + xe_assert(xe, (buf - base + G2G_BUFFER_SIZE) <= g2g_bo->size); 386 + 387 + return guc_action_register_g2g_buffer(near_guc, type, far_tile, far_dev, 388 + desc, buf, G2G_BUFFER_SIZE); 389 + } 390 + 391 + static void guc_g2g_deregister(struct xe_guc *guc, u32 far_tile, u32 far_dev, u32 type) 392 + { 393 + guc_action_deregister_g2g_buffer(guc, type, far_tile, far_dev); 394 + } 395 + 396 + static u32 guc_g2g_size(struct xe_guc *guc) 397 + { 398 + struct xe_gt *gt = guc_to_gt(guc); 399 + struct xe_device *xe = gt_to_xe(gt); 400 + unsigned int count = xe->info.gt_count; 401 + u32 num_channels = (count * (count - 1)) / 2; 402 + 403 + xe_assert(xe, num_channels * XE_G2G_TYPE_LIMIT * G2G_DESC_SIZE <= G2G_DESC_AREA_SIZE); 404 + 405 + return num_channels * XE_G2G_TYPE_LIMIT * G2G_BUFFER_SIZE + G2G_DESC_AREA_SIZE; 406 + } 407 + 408 + static bool xe_guc_g2g_wanted(struct xe_device *xe) 409 + { 410 + /* Can't do GuC to GuC communication if there is only one GuC */ 411 + if (xe->info.gt_count <= 1) 412 + return false; 413 + 414 + /* No current user */ 415 + return false; 416 + } 417 + 418 + static int guc_g2g_alloc(struct xe_guc *guc) 419 + { 420 + struct xe_gt *gt = guc_to_gt(guc); 421 + struct xe_device *xe = gt_to_xe(gt); 422 + struct xe_tile *tile = gt_to_tile(gt); 423 + struct xe_bo *bo; 424 + u32 g2g_size; 425 + 426 + if (guc->g2g.bo) 427 + return 0; 428 + 429 + if (gt->info.id != 0) { 430 + struct xe_gt *root_gt = xe_device_get_gt(xe, 0); 431 + struct xe_guc *root_guc = &root_gt->uc.guc; 432 + struct xe_bo *bo; 433 + 434 + bo = xe_bo_get(root_guc->g2g.bo); 435 + if (!bo) 436 + return -ENODEV; 437 + 438 + guc->g2g.bo = bo; 439 + guc->g2g.owned = false; 440 + return 0; 441 + } 442 + 443 + g2g_size = guc_g2g_size(guc); 444 + bo = xe_managed_bo_create_pin_map(xe, tile, g2g_size, 445 + XE_BO_FLAG_VRAM_IF_DGFX(tile) | 446 + XE_BO_FLAG_GGTT | 447 + XE_BO_FLAG_GGTT_ALL | 448 + XE_BO_FLAG_GGTT_INVALIDATE); 449 + if (IS_ERR(bo)) 450 + return PTR_ERR(bo); 451 + 452 + xe_map_memset(xe, &bo->vmap, 0, 0, g2g_size); 453 + guc->g2g.bo = bo; 454 + guc->g2g.owned = true; 455 + 456 + return 0; 457 + } 458 + 459 + static void guc_g2g_fini(struct xe_guc *guc) 460 + { 461 + if (!guc->g2g.bo) 462 + return; 463 + 464 + /* Unpinning the owned object is handled by generic shutdown */ 465 + if (!guc->g2g.owned) 466 + xe_bo_put(guc->g2g.bo); 467 + 468 + guc->g2g.bo = NULL; 469 + } 470 + 471 + static int guc_g2g_start(struct xe_guc *guc) 472 + { 473 + struct xe_gt *far_gt, *gt = guc_to_gt(guc); 474 + struct xe_device *xe = gt_to_xe(gt); 475 + unsigned int i, j; 476 + int t, err; 477 + bool have_dev; 478 + 479 + if (!guc->g2g.bo) { 480 + int ret; 481 + 482 + ret = guc_g2g_alloc(guc); 483 + if (ret) 484 + return ret; 485 + } 486 + 487 + /* GuC interface will need extending if more GT device types are ever created. */ 488 + xe_gt_assert(gt, (gt->info.type == XE_GT_TYPE_MAIN) || (gt->info.type == XE_GT_TYPE_MEDIA)); 489 + 490 + /* Channel numbering depends on whether there are multiple GTs per tile */ 491 + have_dev = xe->info.gt_count > xe->info.tile_count; 492 + 493 + for_each_gt(far_gt, xe, i) { 494 + u32 far_tile, far_dev; 495 + 496 + if (far_gt->info.id == gt->info.id) 497 + continue; 498 + 499 + far_tile = gt_to_tile(far_gt)->id; 500 + far_dev = G2G_DEV(far_gt); 501 + 502 + for (t = 0; t < XE_G2G_TYPE_LIMIT; t++) { 503 + err = guc_g2g_register(guc, far_gt, t, have_dev); 504 + if (err) { 505 + while (--t >= 0) 506 + guc_g2g_deregister(guc, far_tile, far_dev, t); 507 + goto err_deregister; 508 + } 509 + } 510 + } 511 + 512 + return 0; 513 + 514 + err_deregister: 515 + for_each_gt(far_gt, xe, j) { 516 + u32 tile, dev; 517 + 518 + if (far_gt->info.id == gt->info.id) 519 + continue; 520 + 521 + if (j >= i) 522 + break; 523 + 524 + tile = gt_to_tile(far_gt)->id; 525 + dev = G2G_DEV(far_gt); 526 + 527 + for (t = 0; t < XE_G2G_TYPE_LIMIT; t++) 528 + guc_g2g_deregister(guc, tile, dev, t); 529 + } 530 + 531 + return err; 532 + } 533 + 255 534 static void guc_fini_hw(void *arg) 256 535 { 257 536 struct xe_guc *guc = arg; ··· 548 253 fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); 549 254 xe_uc_fini_hw(&guc_to_gt(guc)->uc); 550 255 xe_force_wake_put(gt_to_fw(gt), fw_ref); 256 + 257 + guc_g2g_fini(guc); 551 258 } 552 259 553 260 /** ··· 720 423 721 424 int xe_guc_post_load_init(struct xe_guc *guc) 722 425 { 426 + int ret; 427 + 723 428 xe_guc_ads_populate_post_load(&guc->ads); 429 + 430 + if (xe_guc_g2g_wanted(guc_to_xe(guc))) { 431 + ret = guc_g2g_start(guc); 432 + if (ret) 433 + return ret; 434 + } 435 + 724 436 guc->submission_state.enabled = true; 725 437 726 438 return 0; ··· 1251 945 1252 946 BUILD_BUG_ON(VF_SW_FLAG_COUNT != MED_VF_SW_FLAG_COUNT); 1253 947 1254 - xe_assert(xe, !xe_guc_ct_enabled(&guc->ct)); 1255 948 xe_assert(xe, len); 1256 949 xe_assert(xe, len <= VF_SW_FLAG_COUNT); 1257 950 xe_assert(xe, len <= MED_VF_SW_FLAG_COUNT); ··· 1404 1099 return guc_self_cfg(guc, key, 2, val); 1405 1100 } 1406 1101 1102 + static void xe_guc_sw_0_irq_handler(struct xe_guc *guc) 1103 + { 1104 + struct xe_gt *gt = guc_to_gt(guc); 1105 + 1106 + if (IS_SRIOV_VF(gt_to_xe(gt))) 1107 + xe_gt_sriov_vf_migrated_event_handler(gt); 1108 + } 1109 + 1407 1110 void xe_guc_irq_handler(struct xe_guc *guc, const u16 iir) 1408 1111 { 1409 1112 if (iir & GUC_INTR_GUC2HOST) 1410 1113 xe_guc_ct_irq_handler(&guc->ct); 1114 + 1115 + if (iir & GUC_INTR_SW_INT_0) 1116 + xe_guc_sw_0_irq_handler(guc); 1411 1117 } 1412 1118 1413 1119 void xe_guc_sanitize(struct xe_guc *guc)

+2 -9

drivers/gpu/drm/xe/xe_guc_ads.c

··· 231 231 guc_ads_private_data_size(ads); 232 232 } 233 233 234 - static bool needs_wa_1607983814(struct xe_device *xe) 235 - { 236 - return GRAPHICS_VERx100(xe) < 1250; 237 - } 238 - 239 234 static size_t calculate_regset_size(struct xe_gt *gt) 240 235 { 241 236 struct xe_reg_sr_entry *sr_entry; ··· 245 250 246 251 count += ADS_REGSET_EXTRA_MAX * XE_NUM_HW_ENGINES; 247 252 248 - if (needs_wa_1607983814(gt_to_xe(gt))) 253 + if (XE_WA(gt, 1607983814)) 249 254 count += LNCFCMOCS_REG_COUNT; 250 255 251 256 return count * sizeof(struct guc_mmio_reg); ··· 704 709 struct iosys_map *regset_map, 705 710 struct xe_hw_engine *hwe) 706 711 { 707 - struct xe_device *xe = ads_to_xe(ads); 708 712 struct xe_hw_engine *hwe_rcs_reset_domain = 709 713 xe_gt_any_hw_engine_by_reset_domain(hwe->gt, XE_ENGINE_CLASS_RENDER); 710 714 struct xe_reg_sr_entry *entry; ··· 734 740 guc_mmio_regset_write_one(ads, regset_map, e->reg, count++); 735 741 } 736 742 737 - /* Wa_1607983814 */ 738 - if (needs_wa_1607983814(xe) && hwe->class == XE_ENGINE_CLASS_RENDER) { 743 + if (XE_WA(hwe->gt, 1607983814) && hwe->class == XE_ENGINE_CLASS_RENDER) { 739 744 for (i = 0; i < LNCFCMOCS_REG_COUNT; i++) { 740 745 guc_mmio_regset_write_one(ads, regset_map, 741 746 XELP_LNCFCMOCS(i), count++);

+14 -19

drivers/gpu/drm/xe/xe_guc_capture.c

··· 1806 1806 if (!devcore_snapshot->matched_node) 1807 1807 return; 1808 1808 1809 - xe_gt_assert(gt, snapshot->source <= XE_ENGINE_CAPTURE_SOURCE_GUC); 1810 1809 xe_gt_assert(gt, snapshot->hwe); 1811 1810 1812 1811 capture_class = xe_engine_class_to_guc_capture_class(snapshot->hwe->class); ··· 1814 1815 snapshot->name ? snapshot->name : "", 1815 1816 snapshot->logical_instance); 1816 1817 drm_printf(p, "\tCapture_source: %s\n", 1817 - snapshot->source == XE_ENGINE_CAPTURE_SOURCE_GUC ? "GuC" : "Manual"); 1818 + devcore_snapshot->matched_node->source == XE_ENGINE_CAPTURE_SOURCE_GUC ? 1819 + "GuC" : "Manual"); 1818 1820 drm_printf(p, "\tCoverage: %s\n", grptype[devcore_snapshot->matched_node->is_partial]); 1819 1821 drm_printf(p, "\tForcewake: domain 0x%x, ref %d\n", 1820 1822 snapshot->forcewake.domain, snapshot->forcewake.ref); ··· 1840 1840 } 1841 1841 1842 1842 /** 1843 - * xe_guc_capture_get_matching_and_lock - Matching GuC capture for the job. 1844 - * @job: The job object. 1843 + * xe_guc_capture_get_matching_and_lock - Matching GuC capture for the queue. 1844 + * @q: The exec queue object 1845 1845 * 1846 - * Search within the capture outlist for the job, could be used for check if 1847 - * GuC capture is ready for the job. 1846 + * Search within the capture outlist for the queue, could be used for check if 1847 + * GuC capture is ready for the queue. 1848 1848 * If found, the locked boolean of the node will be flagged. 1849 1849 * 1850 1850 * Returns: found guc-capture node ptr else NULL 1851 1851 */ 1852 1852 struct __guc_capture_parsed_output * 1853 - xe_guc_capture_get_matching_and_lock(struct xe_sched_job *job) 1853 + xe_guc_capture_get_matching_and_lock(struct xe_exec_queue *q) 1854 1854 { 1855 1855 struct xe_hw_engine *hwe; 1856 1856 enum xe_hw_engine_id id; 1857 - struct xe_exec_queue *q; 1858 1857 struct xe_device *xe; 1859 1858 u16 guc_class = GUC_LAST_ENGINE_CLASS + 1; 1860 1859 struct xe_devcoredump_snapshot *ss; 1861 1860 1862 - if (!job) 1863 - return NULL; 1864 - 1865 - q = job->q; 1866 1861 if (!q || !q->gt) 1867 1862 return NULL; 1868 1863 ··· 1869 1874 if (ss->matched_node && ss->matched_node->source == XE_ENGINE_CAPTURE_SOURCE_GUC) 1870 1875 return ss->matched_node; 1871 1876 1872 - /* Find hwe for the job */ 1877 + /* Find hwe for the queue */ 1873 1878 for_each_hw_engine(hwe, q->gt, id) { 1874 1879 if (hwe != q->hwe) 1875 1880 continue; ··· 1901 1906 } 1902 1907 1903 1908 /** 1904 - * xe_engine_snapshot_capture_for_job - Take snapshot of associated engine 1905 - * @job: The job object 1909 + * xe_engine_snapshot_capture_for_queue - Take snapshot of associated engine 1910 + * @q: The exec queue object 1906 1911 * 1907 1912 * Take snapshot of associated HW Engine 1908 1913 * 1909 1914 * Returns: None. 1910 1915 */ 1911 1916 void 1912 - xe_engine_snapshot_capture_for_job(struct xe_sched_job *job) 1917 + xe_engine_snapshot_capture_for_queue(struct xe_exec_queue *q) 1913 1918 { 1914 - struct xe_exec_queue *q = job->q; 1915 1919 struct xe_device *xe = gt_to_xe(q->gt); 1916 1920 struct xe_devcoredump *coredump = &xe->devcoredump; 1917 1921 struct xe_hw_engine *hwe; ··· 1928 1934 } 1929 1935 1930 1936 if (!coredump->snapshot.hwe[id]) { 1931 - coredump->snapshot.hwe[id] = xe_hw_engine_snapshot_capture(hwe, job); 1937 + coredump->snapshot.hwe[id] = 1938 + xe_hw_engine_snapshot_capture(hwe, q); 1932 1939 } else { 1933 1940 struct __guc_capture_parsed_output *new; 1934 1941 1935 - new = xe_guc_capture_get_matching_and_lock(job); 1942 + new = xe_guc_capture_get_matching_and_lock(q); 1936 1943 if (new) { 1937 1944 struct xe_guc *guc = &q->gt->uc.guc; 1938 1945

+3 -3

drivers/gpu/drm/xe/xe_guc_capture.h

··· 11 11 #include "xe_guc.h" 12 12 #include "xe_guc_fwif.h" 13 13 14 + struct xe_exec_queue; 14 15 struct xe_guc; 15 16 struct xe_hw_engine; 16 17 struct xe_hw_engine_snapshot; 17 - struct xe_sched_job; 18 18 19 19 static inline enum guc_capture_list_class_type xe_guc_class_to_capture_class(u16 class) 20 20 { ··· 50 50 const struct __guc_mmio_reg_descr_group * 51 51 xe_guc_capture_get_reg_desc_list(struct xe_gt *gt, u32 owner, u32 type, 52 52 enum guc_capture_list_class_type capture_class, bool is_ext); 53 - struct __guc_capture_parsed_output *xe_guc_capture_get_matching_and_lock(struct xe_sched_job *job); 53 + struct __guc_capture_parsed_output *xe_guc_capture_get_matching_and_lock(struct xe_exec_queue *q); 54 54 void xe_engine_manual_capture(struct xe_hw_engine *hwe, struct xe_hw_engine_snapshot *snapshot); 55 55 void xe_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot, struct drm_printer *p); 56 - void xe_engine_snapshot_capture_for_job(struct xe_sched_job *job); 56 + void xe_engine_snapshot_capture_for_queue(struct xe_exec_queue *q); 57 57 void xe_guc_capture_steered_list_init(struct xe_guc *guc); 58 58 void xe_guc_capture_put_matched_nodes(struct xe_guc *guc); 59 59 int xe_guc_capture_init(struct xe_guc *guc);

+28 -4

drivers/gpu/drm/xe/xe_guc_ct.c

··· 54 54 CT_DEAD_PARSE_G2H_UNKNOWN, /* 0x1000 */ 55 55 CT_DEAD_PARSE_G2H_ORIGIN, /* 0x2000 */ 56 56 CT_DEAD_PARSE_G2H_TYPE, /* 0x4000 */ 57 + CT_DEAD_CRASH, /* 0x8000 */ 57 58 }; 58 59 59 60 static void ct_dead_worker_func(struct work_struct *w); ··· 470 469 * after any existing dead state has been dumped. 471 470 */ 472 471 spin_lock_irq(&ct->dead.lock); 473 - if (ct->dead.reason) 472 + if (ct->dead.reason) { 474 473 ct->dead.reason |= (1 << CT_DEAD_STATE_REARM); 474 + queue_work(system_unbound_wq, &ct->dead.worker); 475 + } 475 476 spin_unlock_irq(&ct->dead.lock); 476 477 #endif 477 478 ··· 1020 1017 } 1021 1018 1022 1019 ret = wait_event_timeout(ct->g2h_fence_wq, g2h_fence.done, HZ); 1023 - 1024 1020 if (!ret) { 1025 1021 LNL_FLUSH_WORK(&ct->g2h_worker); 1026 1022 if (g2h_fence.done) { ··· 1119 1117 case XE_GUC_ACTION_TLB_INVALIDATION_DONE: 1120 1118 g2h_release_space(ct, len); 1121 1119 } 1120 + 1121 + return 0; 1122 + } 1123 + 1124 + static int guc_crash_process_msg(struct xe_guc_ct *ct, u32 action) 1125 + { 1126 + struct xe_gt *gt = ct_to_gt(ct); 1127 + 1128 + if (action == XE_GUC_ACTION_NOTIFY_CRASH_DUMP_POSTED) 1129 + xe_gt_err(gt, "GuC Crash dump notification\n"); 1130 + else if (action == XE_GUC_ACTION_NOTIFY_EXCEPTION) 1131 + xe_gt_err(gt, "GuC Exception notification\n"); 1132 + else 1133 + xe_gt_err(gt, "Unknown GuC crash notification: 0x%04X\n", action); 1134 + 1135 + CT_DEAD(ct, NULL, CRASH); 1136 + 1137 + kick_reset(ct); 1122 1138 1123 1139 return 0; 1124 1140 } ··· 1315 1295 case GUC_ACTION_GUC2PF_ADVERSE_EVENT: 1316 1296 ret = xe_gt_sriov_pf_monitor_process_guc2pf(gt, hxg, hxg_len); 1317 1297 break; 1298 + case XE_GUC_ACTION_NOTIFY_CRASH_DUMP_POSTED: 1299 + case XE_GUC_ACTION_NOTIFY_EXCEPTION: 1300 + ret = guc_crash_process_msg(ct, action); 1301 + break; 1318 1302 default: 1319 1303 xe_gt_err(gt, "unexpected G2H action 0x%04x\n", action); 1320 1304 } 1321 1305 1322 1306 if (ret) { 1323 - xe_gt_err(gt, "G2H action 0x%04x failed (%pe)\n", 1324 - action, ERR_PTR(ret)); 1307 + xe_gt_err(gt, "G2H action %#04x failed (%pe) len %u msg %*ph\n", 1308 + action, ERR_PTR(ret), hxg_len, (int)sizeof(u32) * hxg_len, hxg); 1325 1309 CT_DEAD(ct, NULL, PROCESS_FAILED); 1326 1310 } 1327 1311

+1

drivers/gpu/drm/xe/xe_guc_fwif.h

··· 17 17 #define G2H_LEN_DW_TLB_INVALIDATE 3 18 18 19 19 #define GUC_ID_MAX 65535 20 + #define GUC_ID_UNKNOWN 0xffffffff 20 21 21 22 #define GUC_CONTEXT_DISABLE 0 22 23 #define GUC_CONTEXT_ENABLE 1

+2

drivers/gpu/drm/xe/xe_guc_klv_helpers.c

··· 49 49 return "begin_db_id"; 50 50 case GUC_KLV_VF_CFG_BEGIN_CONTEXT_ID_KEY: 51 51 return "begin_ctx_id"; 52 + case GUC_KLV_VF_CFG_SCHED_PRIORITY_KEY: 53 + return "sched_priority"; 52 54 53 55 /* VF CFG threshold keys */ 54 56 #define define_threshold_key_to_string_case(TAG, NAME, ...) \

+78 -73

drivers/gpu/drm/xe/xe_guc_submit.c

··· 412 412 static void init_policies(struct xe_guc *guc, struct xe_exec_queue *q) 413 413 { 414 414 struct exec_queue_policy policy; 415 - struct xe_device *xe = guc_to_xe(guc); 416 415 enum xe_exec_queue_priority prio = q->sched_props.priority; 417 416 u32 timeslice_us = q->sched_props.timeslice_us; 418 417 u32 preempt_timeout_us = q->sched_props.preempt_timeout_us; 419 418 420 - xe_assert(xe, exec_queue_registered(q)); 419 + xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); 421 420 422 421 __guc_exec_queue_policy_start_klv(&policy, q->guc->id); 423 422 __guc_exec_queue_policy_add_priority(&policy, xe_exec_queue_prio_to_guc[prio]); ··· 450 451 struct guc_ctxt_registration_info *info) 451 452 { 452 453 #define MAX_MLRC_REG_SIZE (13 + XE_HW_ENGINE_MAX_INSTANCE * 2) 453 - struct xe_device *xe = guc_to_xe(guc); 454 454 u32 action[MAX_MLRC_REG_SIZE]; 455 455 int len = 0; 456 456 int i; 457 457 458 - xe_assert(xe, xe_exec_queue_is_parallel(q)); 458 + xe_gt_assert(guc_to_gt(guc), xe_exec_queue_is_parallel(q)); 459 459 460 460 action[len++] = XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC; 461 461 action[len++] = info->flags; ··· 477 479 action[len++] = upper_32_bits(xe_lrc_descriptor(lrc)); 478 480 } 479 481 480 - xe_assert(xe, len <= MAX_MLRC_REG_SIZE); 482 + xe_gt_assert(guc_to_gt(guc), len <= MAX_MLRC_REG_SIZE); 481 483 #undef MAX_MLRC_REG_SIZE 482 484 483 485 xe_guc_ct_send(&guc->ct, action, len, 0, 0); ··· 511 513 struct xe_lrc *lrc = q->lrc[0]; 512 514 struct guc_ctxt_registration_info info; 513 515 514 - xe_assert(xe, !exec_queue_registered(q)); 516 + xe_gt_assert(guc_to_gt(guc), !exec_queue_registered(q)); 515 517 516 518 memset(&info, 0, sizeof(info)); 517 519 info.context_idx = q->guc->id; ··· 601 603 if (wq_wait_for_space(q, wq_space_until_wrap(q))) 602 604 return -ENODEV; 603 605 604 - xe_assert(xe, FIELD_FIT(WQ_LEN_MASK, len_dw)); 606 + xe_gt_assert(guc_to_gt(guc), FIELD_FIT(WQ_LEN_MASK, len_dw)); 605 607 606 608 parallel_write(xe, map, wq[q->guc->wqi_tail / sizeof(u32)], 607 609 FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) | ··· 641 643 wqi[i++] = lrc->ring.tail / sizeof(u64); 642 644 } 643 645 644 - xe_assert(xe, i == wqi_size / sizeof(u32)); 646 + xe_gt_assert(guc_to_gt(guc), i == wqi_size / sizeof(u32)); 645 647 646 648 iosys_map_incr(&map, offsetof(struct guc_submit_parallel_scratch, 647 649 wq[q->guc->wqi_tail / sizeof(u32)])); 648 650 xe_map_memcpy_to(xe, &map, 0, wqi, wqi_size); 649 651 q->guc->wqi_tail += wqi_size; 650 - xe_assert(xe, q->guc->wqi_tail <= WQ_SIZE); 652 + xe_gt_assert(guc_to_gt(guc), q->guc->wqi_tail <= WQ_SIZE); 651 653 652 654 xe_device_wmb(xe); 653 655 ··· 659 661 static void submit_exec_queue(struct xe_exec_queue *q) 660 662 { 661 663 struct xe_guc *guc = exec_queue_to_guc(q); 662 - struct xe_device *xe = guc_to_xe(guc); 663 664 struct xe_lrc *lrc = q->lrc[0]; 664 665 u32 action[3]; 665 666 u32 g2h_len = 0; ··· 666 669 int len = 0; 667 670 bool extra_submit = false; 668 671 669 - xe_assert(xe, exec_queue_registered(q)); 672 + xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); 670 673 671 674 if (xe_exec_queue_is_parallel(q)) 672 675 wq_item_append(q); ··· 713 716 struct xe_sched_job *job = to_xe_sched_job(drm_job); 714 717 struct xe_exec_queue *q = job->q; 715 718 struct xe_guc *guc = exec_queue_to_guc(q); 716 - struct xe_device *xe = guc_to_xe(guc); 717 719 struct dma_fence *fence = NULL; 718 720 bool lr = xe_exec_queue_is_lr(q); 719 721 720 - xe_assert(xe, !(exec_queue_destroyed(q) || exec_queue_pending_disable(q)) || 721 - exec_queue_banned(q) || exec_queue_suspended(q)); 722 + xe_gt_assert(guc_to_gt(guc), !(exec_queue_destroyed(q) || exec_queue_pending_disable(q)) || 723 + exec_queue_banned(q) || exec_queue_suspended(q)); 722 724 723 725 trace_xe_sched_job_run(job); 724 726 ··· 819 823 */ 820 824 void xe_guc_submit_wedge(struct xe_guc *guc) 821 825 { 822 - struct xe_device *xe = guc_to_xe(guc); 826 + struct xe_gt *gt = guc_to_gt(guc); 823 827 struct xe_exec_queue *q; 824 828 unsigned long index; 825 829 int err; ··· 829 833 err = devm_add_action_or_reset(guc_to_xe(guc)->drm.dev, 830 834 guc_submit_wedged_fini, guc); 831 835 if (err) { 832 - drm_err(&xe->drm, "Failed to register xe_guc_submit clean-up on wedged.mode=2. Although device is wedged.\n"); 836 + xe_gt_err(gt, "Failed to register clean-up on wedged.mode=2; " 837 + "Although device is wedged.\n"); 833 838 return; 834 839 } 835 840 ··· 862 865 container_of(w, struct xe_guc_exec_queue, lr_tdr); 863 866 struct xe_exec_queue *q = ge->q; 864 867 struct xe_guc *guc = exec_queue_to_guc(q); 865 - struct xe_device *xe = guc_to_xe(guc); 866 868 struct xe_gpu_scheduler *sched = &ge->sched; 867 869 bool wedged; 868 870 869 - xe_assert(xe, xe_exec_queue_is_lr(q)); 871 + xe_gt_assert(guc_to_gt(guc), xe_exec_queue_is_lr(q)); 870 872 trace_xe_exec_queue_lr_cleanup(q); 871 873 872 874 wedged = guc_submit_hint_wedged(exec_queue_to_guc(q)); ··· 899 903 !exec_queue_pending_disable(q) || 900 904 xe_guc_read_stopped(guc), HZ * 5); 901 905 if (!ret) { 902 - drm_warn(&xe->drm, "Schedule disable failed to respond"); 906 + xe_gt_warn(q->gt, "Schedule disable failed to respond, guc_id=%d\n", 907 + q->guc->id); 908 + xe_devcoredump(q, NULL, "Schedule disable failed to respond, guc_id=%d\n", 909 + q->guc->id); 903 910 xe_sched_submission_start(sched); 904 911 xe_gt_reset_async(q->gt); 905 912 return; 906 913 } 907 914 } 915 + 916 + if (!exec_queue_killed(q) && !xe_lrc_ring_is_idle(q->lrc[0])) 917 + xe_devcoredump(q, NULL, "LR job cleanup, guc_id=%d", q->guc->id); 908 918 909 919 xe_sched_submission_start(sched); 910 920 } ··· 1070 1068 * do manual capture first and decide later if we need to use it 1071 1069 */ 1072 1070 if (!exec_queue_killed(q) && !xe->devcoredump.captured && 1073 - !xe_guc_capture_get_matching_and_lock(job)) { 1071 + !xe_guc_capture_get_matching_and_lock(q)) { 1074 1072 /* take force wake before engine register manual capture */ 1075 1073 fw_ref = xe_force_wake_get(gt_to_fw(q->gt), XE_FORCEWAKE_ALL); 1076 1074 if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) 1077 1075 xe_gt_info(q->gt, "failed to get forcewake for coredump capture\n"); 1078 1076 1079 - xe_engine_snapshot_capture_for_job(job); 1077 + xe_engine_snapshot_capture_for_queue(q); 1080 1078 1081 1079 xe_force_wake_put(gt_to_fw(q->gt), fw_ref); 1082 1080 } ··· 1134 1132 if (!ret || xe_guc_read_stopped(guc)) { 1135 1133 trigger_reset: 1136 1134 if (!ret) 1137 - xe_gt_warn(guc_to_gt(guc), "Schedule disable failed to respond"); 1135 + xe_gt_warn(guc_to_gt(guc), 1136 + "Schedule disable failed to respond, guc_id=%d", 1137 + q->guc->id); 1138 + xe_devcoredump(q, job, 1139 + "Schedule disable failed to respond, guc_id=%d, ret=%d, guc_read=%d", 1140 + q->guc->id, ret, xe_guc_read_stopped(guc)); 1138 1141 set_exec_queue_extra_ref(q); 1139 1142 xe_exec_queue_get(q); /* GT reset owns this */ 1140 1143 set_exec_queue_banned(q); ··· 1169 1162 trace_xe_sched_job_timedout(job); 1170 1163 1171 1164 if (!exec_queue_killed(q)) 1172 - xe_devcoredump(job); 1165 + xe_devcoredump(q, job, 1166 + "Timedout job - seqno=%u, lrc_seqno=%u, guc_id=%d, flags=0x%lx", 1167 + xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job), 1168 + q->guc->id, q->flags); 1173 1169 1174 1170 /* 1175 1171 * Kernel jobs should never fail, nor should VM jobs if they do ··· 1287 1277 { 1288 1278 struct xe_exec_queue *q = msg->private_data; 1289 1279 struct xe_guc *guc = exec_queue_to_guc(q); 1290 - struct xe_device *xe = guc_to_xe(guc); 1291 1280 1292 - xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_PERMANENT)); 1281 + xe_gt_assert(guc_to_gt(guc), !(q->flags & EXEC_QUEUE_FLAG_PERMANENT)); 1293 1282 trace_xe_exec_queue_cleanup_entity(q); 1294 1283 1295 1284 if (exec_queue_registered(q)) ··· 1324 1315 static void suspend_fence_signal(struct xe_exec_queue *q) 1325 1316 { 1326 1317 struct xe_guc *guc = exec_queue_to_guc(q); 1327 - struct xe_device *xe = guc_to_xe(guc); 1328 1318 1329 - xe_assert(xe, exec_queue_suspended(q) || exec_queue_killed(q) || 1330 - xe_guc_read_stopped(guc)); 1331 - xe_assert(xe, q->guc->suspend_pending); 1319 + xe_gt_assert(guc_to_gt(guc), exec_queue_suspended(q) || exec_queue_killed(q) || 1320 + xe_guc_read_stopped(guc)); 1321 + xe_gt_assert(guc_to_gt(guc), q->guc->suspend_pending); 1332 1322 1333 1323 __suspend_fence_signal(q); 1334 1324 } ··· 1423 1415 { 1424 1416 struct xe_gpu_scheduler *sched; 1425 1417 struct xe_guc *guc = exec_queue_to_guc(q); 1426 - struct xe_device *xe = guc_to_xe(guc); 1427 1418 struct xe_guc_exec_queue *ge; 1428 1419 long timeout; 1429 1420 int err, i; 1430 1421 1431 - xe_assert(xe, xe_device_uc_enabled(guc_to_xe(guc))); 1422 + xe_gt_assert(guc_to_gt(guc), xe_device_uc_enabled(guc_to_xe(guc))); 1432 1423 1433 1424 ge = kzalloc(sizeof(*ge), GFP_KERNEL); 1434 1425 if (!ge) ··· 1640 1633 struct xe_gpu_scheduler *sched = &q->guc->sched; 1641 1634 struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_RESUME; 1642 1635 struct xe_guc *guc = exec_queue_to_guc(q); 1643 - struct xe_device *xe = guc_to_xe(guc); 1644 1636 1645 - xe_assert(xe, !q->guc->suspend_pending); 1637 + xe_gt_assert(guc_to_gt(guc), !q->guc->suspend_pending); 1646 1638 1647 1639 xe_sched_msg_lock(sched); 1648 1640 guc_exec_queue_try_add_msg(q, msg, RESUME); ··· 1714 1708 ban = true; 1715 1709 } 1716 1710 } else if (xe_exec_queue_is_lr(q) && 1717 - (xe_lrc_ring_head(q->lrc[0]) != xe_lrc_ring_tail(q->lrc[0]))) { 1711 + !xe_lrc_ring_is_idle(q->lrc[0])) { 1718 1712 ban = true; 1719 1713 } 1720 1714 ··· 1753 1747 { 1754 1748 struct xe_exec_queue *q; 1755 1749 unsigned long index; 1756 - struct xe_device *xe = guc_to_xe(guc); 1757 1750 1758 - xe_assert(xe, xe_guc_read_stopped(guc) == 1); 1751 + xe_gt_assert(guc_to_gt(guc), xe_guc_read_stopped(guc) == 1); 1759 1752 1760 1753 mutex_lock(&guc->submission_state.lock); 1761 1754 ··· 1796 1791 { 1797 1792 struct xe_exec_queue *q; 1798 1793 unsigned long index; 1799 - struct xe_device *xe = guc_to_xe(guc); 1800 1794 1801 - xe_assert(xe, xe_guc_read_stopped(guc) == 1); 1795 + xe_gt_assert(guc_to_gt(guc), xe_guc_read_stopped(guc) == 1); 1802 1796 1803 1797 mutex_lock(&guc->submission_state.lock); 1804 1798 atomic_dec(&guc->submission_state.stopped); ··· 1818 1814 static struct xe_exec_queue * 1819 1815 g2h_exec_queue_lookup(struct xe_guc *guc, u32 guc_id) 1820 1816 { 1821 - struct xe_device *xe = guc_to_xe(guc); 1817 + struct xe_gt *gt = guc_to_gt(guc); 1822 1818 struct xe_exec_queue *q; 1823 1819 1824 1820 if (unlikely(guc_id >= GUC_ID_MAX)) { 1825 - drm_err(&xe->drm, "Invalid guc_id %u", guc_id); 1821 + xe_gt_err(gt, "Invalid guc_id %u\n", guc_id); 1826 1822 return NULL; 1827 1823 } 1828 1824 1829 1825 q = xa_load(&guc->submission_state.exec_queue_lookup, guc_id); 1830 1826 if (unlikely(!q)) { 1831 - drm_err(&xe->drm, "Not engine present for guc_id %u", guc_id); 1827 + xe_gt_err(gt, "Not engine present for guc_id %u\n", guc_id); 1832 1828 return NULL; 1833 1829 } 1834 1830 1835 - xe_assert(xe, guc_id >= q->guc->id); 1836 - xe_assert(xe, guc_id < (q->guc->id + q->width)); 1831 + xe_gt_assert(guc_to_gt(guc), guc_id >= q->guc->id); 1832 + xe_gt_assert(guc_to_gt(guc), guc_id < (q->guc->id + q->width)); 1837 1833 1838 1834 return q; 1839 1835 } ··· 1902 1898 1903 1899 int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len) 1904 1900 { 1905 - struct xe_device *xe = guc_to_xe(guc); 1906 1901 struct xe_exec_queue *q; 1907 - u32 guc_id = msg[0]; 1908 - u32 runnable_state = msg[1]; 1902 + u32 guc_id, runnable_state; 1909 1903 1910 - if (unlikely(len < 2)) { 1911 - drm_err(&xe->drm, "Invalid length %u", len); 1904 + if (unlikely(len < 2)) 1912 1905 return -EPROTO; 1913 - } 1906 + 1907 + guc_id = msg[0]; 1908 + runnable_state = msg[1]; 1914 1909 1915 1910 q = g2h_exec_queue_lookup(guc, guc_id); 1916 1911 if (unlikely(!q)) ··· 1943 1940 1944 1941 int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len) 1945 1942 { 1946 - struct xe_device *xe = guc_to_xe(guc); 1947 1943 struct xe_exec_queue *q; 1948 - u32 guc_id = msg[0]; 1944 + u32 guc_id; 1949 1945 1950 - if (unlikely(len < 1)) { 1951 - drm_err(&xe->drm, "Invalid length %u", len); 1946 + if (unlikely(len < 1)) 1952 1947 return -EPROTO; 1953 - } 1948 + 1949 + guc_id = msg[0]; 1954 1950 1955 1951 q = g2h_exec_queue_lookup(guc, guc_id); 1956 1952 if (unlikely(!q)) ··· 1971 1969 int xe_guc_exec_queue_reset_handler(struct xe_guc *guc, u32 *msg, u32 len) 1972 1970 { 1973 1971 struct xe_gt *gt = guc_to_gt(guc); 1974 - struct xe_device *xe = guc_to_xe(guc); 1975 1972 struct xe_exec_queue *q; 1976 - u32 guc_id = msg[0]; 1973 + u32 guc_id; 1977 1974 1978 - if (unlikely(len < 1)) { 1979 - drm_err(&xe->drm, "Invalid length %u", len); 1975 + if (unlikely(len < 1)) 1980 1976 return -EPROTO; 1981 - } 1977 + 1978 + guc_id = msg[0]; 1982 1979 1983 1980 q = g2h_exec_queue_lookup(guc, guc_id); 1984 1981 if (unlikely(!q)) ··· 2017 2016 { 2018 2017 u32 status; 2019 2018 2020 - if (unlikely(len != XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION_DATA_LEN)) { 2021 - xe_gt_dbg(guc_to_gt(guc), "Invalid length %u", len); 2019 + if (unlikely(len != XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION_DATA_LEN)) 2022 2020 return -EPROTO; 2023 - } 2024 2021 2025 2022 status = msg[0] & XE_GUC_STATE_CAPTURE_EVENT_STATUS_MASK; 2026 2023 if (status == XE_GUC_STATE_CAPTURE_EVENT_STATUS_NOSPACE) ··· 2033 2034 u32 len) 2034 2035 { 2035 2036 struct xe_gt *gt = guc_to_gt(guc); 2036 - struct xe_device *xe = guc_to_xe(guc); 2037 2037 struct xe_exec_queue *q; 2038 - u32 guc_id = msg[0]; 2038 + u32 guc_id; 2039 2039 2040 - if (unlikely(len < 1)) { 2041 - drm_err(&xe->drm, "Invalid length %u", len); 2040 + if (unlikely(len < 1)) 2042 2041 return -EPROTO; 2042 + 2043 + guc_id = msg[0]; 2044 + 2045 + if (guc_id == GUC_ID_UNKNOWN) { 2046 + /* 2047 + * GuC uses GUC_ID_UNKNOWN if it can not map the CAT fault to any PF/VF 2048 + * context. In such case only PF will be notified about that fault. 2049 + */ 2050 + xe_gt_err_ratelimited(gt, "Memory CAT error reported by GuC!\n"); 2051 + return 0; 2043 2052 } 2044 2053 2045 2054 q = g2h_exec_queue_lookup(guc, guc_id); ··· 2069 2062 2070 2063 int xe_guc_exec_queue_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len) 2071 2064 { 2072 - struct xe_device *xe = guc_to_xe(guc); 2065 + struct xe_gt *gt = guc_to_gt(guc); 2073 2066 u8 guc_class, instance; 2074 2067 u32 reason; 2075 2068 2076 - if (unlikely(len != 3)) { 2077 - drm_err(&xe->drm, "Invalid length %u", len); 2069 + if (unlikely(len != 3)) 2078 2070 return -EPROTO; 2079 - } 2080 2071 2081 2072 guc_class = msg[0]; 2082 2073 instance = msg[1]; 2083 2074 reason = msg[2]; 2084 2075 2085 2076 /* Unexpected failure of a hardware feature, log an actual error */ 2086 - drm_err(&xe->drm, "GuC engine reset request failed on %d:%d because 0x%08X", 2087 - guc_class, instance, reason); 2077 + xe_gt_err(gt, "GuC engine reset request failed on %d:%d because 0x%08X", 2078 + guc_class, instance, reason); 2088 2079 2089 - xe_gt_reset_async(guc_to_gt(guc)); 2080 + xe_gt_reset_async(gt); 2090 2081 2091 2082 return 0; 2092 2083 }

+10

drivers/gpu/drm/xe/xe_guc_types.h

··· 64 64 struct xe_guc_pc pc; 65 65 /** @dbm: GuC Doorbell Manager */ 66 66 struct xe_guc_db_mgr dbm; 67 + 68 + /** @g2g: GuC to GuC communication state */ 69 + struct { 70 + /** @g2g.bo: Storage for GuC to GuC communication channels */ 71 + struct xe_bo *bo; 72 + /** @g2g.owned: Is the BO owned by this GT or just mapped in */ 73 + bool owned; 74 + } g2g; 75 + 67 76 /** @submission_state: GuC submission state */ 68 77 struct { 69 78 /** @submission_state.idm: GuC context ID Manager */ ··· 88 79 /** @submission_state.fini_wq: submit fini wait queue */ 89 80 wait_queue_head_t fini_wq; 90 81 } submission_state; 82 + 91 83 /** @hwconfig: Hardware config state */ 92 84 struct { 93 85 /** @hwconfig.bo: buffer object of the hardware config */

+4 -4

drivers/gpu/drm/xe/xe_heci_gsc.c

··· 92 92 { 93 93 struct xe_heci_gsc *heci_gsc = &xe->heci_gsc; 94 94 95 - if (!HAS_HECI_GSCFI(xe) && !HAS_HECI_CSCFI(xe)) 95 + if (!xe->info.has_heci_gscfi && !xe->info.has_heci_cscfi) 96 96 return; 97 97 98 98 if (heci_gsc->adev) { ··· 177 177 const struct heci_gsc_def *def; 178 178 int ret; 179 179 180 - if (!HAS_HECI_GSCFI(xe) && !HAS_HECI_CSCFI(xe)) 180 + if (!xe->info.has_heci_gscfi && !xe->info.has_heci_cscfi) 181 181 return; 182 182 183 183 heci_gsc->irq = -1; ··· 222 222 if ((iir & GSC_IRQ_INTF(1)) == 0) 223 223 return; 224 224 225 - if (!HAS_HECI_GSCFI(xe)) { 225 + if (!xe->info.has_heci_gscfi) { 226 226 drm_warn_once(&xe->drm, "GSC irq: not supported"); 227 227 return; 228 228 } ··· 242 242 if ((iir & CSC_IRQ_INTF(1)) == 0) 243 243 return; 244 244 245 - if (!HAS_HECI_CSCFI(xe)) { 245 + if (!xe->info.has_heci_cscfi) { 246 246 drm_warn_once(&xe->drm, "CSC irq: not supported"); 247 247 return; 248 248 }

+4 -7

drivers/gpu/drm/xe/xe_hw_engine.c

··· 574 574 xe_gt_assert(gt, gt->info.engine_mask & BIT(id)); 575 575 576 576 xe_reg_sr_apply_mmio(&hwe->reg_sr, gt); 577 - xe_reg_sr_apply_whitelist(hwe); 578 577 579 578 hwe->hwsp = xe_managed_bo_create_pin_map(xe, tile, SZ_4K, 580 579 XE_BO_FLAG_VRAM_IF_DGFX(tile) | ··· 828 829 /** 829 830 * xe_hw_engine_snapshot_capture - Take a quick snapshot of the HW Engine. 830 831 * @hwe: Xe HW Engine. 831 - * @job: The job object. 832 + * @q: The exec queue object. 832 833 * 833 834 * This can be printed out in a later stage like during dev_coredump 834 835 * analysis. ··· 837 838 * caller, using `xe_hw_engine_snapshot_free`. 838 839 */ 839 840 struct xe_hw_engine_snapshot * 840 - xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe, struct xe_sched_job *job) 841 + xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe, struct xe_exec_queue *q) 841 842 { 842 843 struct xe_hw_engine_snapshot *snapshot; 843 844 struct __guc_capture_parsed_output *node; ··· 863 864 if (IS_SRIOV_VF(gt_to_xe(hwe->gt))) 864 865 return snapshot; 865 866 866 - if (job) { 867 + if (q) { 867 868 /* If got guc capture, set source to GuC */ 868 - node = xe_guc_capture_get_matching_and_lock(job); 869 + node = xe_guc_capture_get_matching_and_lock(q); 869 870 if (node) { 870 871 struct xe_device *xe = gt_to_xe(hwe->gt); 871 872 struct xe_devcoredump *coredump = &xe->devcoredump; 872 873 873 874 coredump->snapshot.matched_node = node; 874 - snapshot->source = XE_ENGINE_CAPTURE_SOURCE_GUC; 875 875 xe_gt_dbg(hwe->gt, "Found and locked GuC-err-capture node"); 876 876 return snapshot; 877 877 } ··· 878 880 879 881 /* otherwise, do manual capture */ 880 882 xe_engine_manual_capture(hwe, snapshot); 881 - snapshot->source = XE_ENGINE_CAPTURE_SOURCE_MANUAL; 882 883 xe_gt_dbg(hwe->gt, "Proceeding with manual engine snapshot"); 883 884 884 885 return snapshot;

+2 -2

drivers/gpu/drm/xe/xe_hw_engine.h

··· 11 11 struct drm_printer; 12 12 struct drm_xe_engine_class_instance; 13 13 struct xe_device; 14 - struct xe_sched_job; 14 + struct xe_exec_queue; 15 15 16 16 #ifdef CONFIG_DRM_XE_JOB_TIMEOUT_MIN 17 17 #define XE_HW_ENGINE_JOB_TIMEOUT_MIN CONFIG_DRM_XE_JOB_TIMEOUT_MIN ··· 56 56 u32 xe_hw_engine_mask_per_class(struct xe_gt *gt, 57 57 enum xe_engine_class engine_class); 58 58 struct xe_hw_engine_snapshot * 59 - xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe, struct xe_sched_job *job); 59 + xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe, struct xe_exec_queue *q); 60 60 void xe_hw_engine_snapshot_free(struct xe_hw_engine_snapshot *snapshot); 61 61 void xe_hw_engine_print(struct xe_hw_engine *hwe, struct drm_printer *p); 62 62 void xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe);

-2

drivers/gpu/drm/xe/xe_hw_engine_types.h

··· 165 165 struct xe_hw_engine_snapshot { 166 166 /** @name: name of the hw engine */ 167 167 char *name; 168 - /** @source: Data source, either manual or GuC */ 169 - enum xe_hw_engine_snapshot_source_id source; 170 168 /** @hwe: hw engine */ 171 169 struct xe_hw_engine *hwe; 172 170 /** @logical_instance: logical instance of this hw engine */

+11 -26

drivers/gpu/drm/xe/xe_irq.c

··· 192 192 if (xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_OTHER)) { 193 193 gsc_mask = irqs | GSC_ER_COMPLETE; 194 194 heci_mask = GSC_IRQ_INTF(1); 195 - } else if (HAS_HECI_GSCFI(xe)) { 195 + } else if (xe->info.has_heci_gscfi) { 196 196 gsc_mask = GSC_IRQ_INTF(1); 197 197 } 198 198 ··· 325 325 326 326 if (class == XE_ENGINE_CLASS_OTHER) { 327 327 /* HECI GSCFI interrupts come from outside of GT */ 328 - if (HAS_HECI_GSCFI(xe) && instance == OTHER_GSC_INSTANCE) 328 + if (xe->info.has_heci_gscfi && instance == OTHER_GSC_INSTANCE) 329 329 xe_heci_gsc_irq_handler(xe, intr_vec); 330 330 else 331 331 gt_other_irq_handler(engine_gt, instance, intr_vec); ··· 348 348 unsigned long intr_dw[2]; 349 349 u32 identity[32]; 350 350 351 - spin_lock(&xe->irq.lock); 352 - if (!xe->irq.enabled) { 353 - spin_unlock(&xe->irq.lock); 351 + if (!atomic_read(&xe->irq.enabled)) 354 352 return IRQ_NONE; 355 - } 356 - spin_unlock(&xe->irq.lock); 357 353 358 354 master_ctl = xelp_intr_disable(xe); 359 355 if (!master_ctl) { ··· 413 417 414 418 /* TODO: This really shouldn't be copied+pasted */ 415 419 416 - spin_lock(&xe->irq.lock); 417 - if (!xe->irq.enabled) { 418 - spin_unlock(&xe->irq.lock); 420 + if (!atomic_read(&xe->irq.enabled)) 419 421 return IRQ_NONE; 420 - } 421 - spin_unlock(&xe->irq.lock); 422 422 423 423 master_tile_ctl = dg1_intr_disable(xe); 424 424 if (!master_tile_ctl) { ··· 451 459 * the primary tile. 452 460 */ 453 461 if (id == 0) { 454 - if (HAS_HECI_CSCFI(xe)) 462 + if (xe->info.has_heci_cscfi) 455 463 xe_heci_csc_irq_handler(xe, master_ctl); 456 464 xe_display_irq_handler(xe, master_ctl); 457 465 gu_misc_iir = gu_misc_irq_ack(xe, master_ctl); ··· 500 508 501 509 if ((tile->media_gt && 502 510 xe_hw_engine_mask_per_class(tile->media_gt, XE_ENGINE_CLASS_OTHER)) || 503 - HAS_HECI_GSCFI(tile_to_xe(tile))) { 511 + tile_to_xe(tile)->info.has_heci_gscfi) { 504 512 xe_mmio_write32(mmio, GUNIT_GSC_INTR_ENABLE, 0); 505 513 xe_mmio_write32(mmio, GUNIT_GSC_INTR_MASK, ~0); 506 514 xe_mmio_write32(mmio, HECI2_RSVD_INTR_MASK, ~0); ··· 636 644 struct xe_tile *tile; 637 645 unsigned int id; 638 646 639 - spin_lock(&xe->irq.lock); 640 - if (!xe->irq.enabled) { 641 - spin_unlock(&xe->irq.lock); 647 + if (!atomic_read(&xe->irq.enabled)) 642 648 return IRQ_NONE; 643 - } 644 - spin_unlock(&xe->irq.lock); 645 649 646 650 for_each_tile(tile, xe, id) 647 651 xe_memirq_handler(&tile->memirq); ··· 662 674 struct pci_dev *pdev = to_pci_dev(xe->drm.dev); 663 675 int irq; 664 676 665 - if (!xe->irq.enabled) 677 + if (!atomic_xchg(&xe->irq.enabled, 0)) 666 678 return; 667 679 668 - xe->irq.enabled = false; 669 680 xe_irq_reset(xe); 670 681 671 682 irq = pci_irq_vector(pdev, 0); ··· 711 724 return err; 712 725 } 713 726 714 - xe->irq.enabled = true; 727 + atomic_set(&xe->irq.enabled, 1); 715 728 716 729 xe_irq_postinstall(xe); 717 730 ··· 731 744 { 732 745 int irq = to_pci_dev(xe->drm.dev)->irq; 733 746 734 - spin_lock_irq(&xe->irq.lock); 735 - xe->irq.enabled = false; /* no new irqs */ 736 - spin_unlock_irq(&xe->irq.lock); 747 + atomic_set(&xe->irq.enabled, 0); /* no new irqs */ 737 748 738 749 synchronize_irq(irq); /* flush irqs */ 739 750 xe_irq_reset(xe); /* turn irqs off */ ··· 747 762 * 1. no irq will arrive before the postinstall 748 763 * 2. display is not yet resumed 749 764 */ 750 - xe->irq.enabled = true; 765 + atomic_set(&xe->irq.enabled, 1); 751 766 xe_irq_reset(xe); 752 767 xe_irq_postinstall(xe); /* turn irqs on */ 753 768

+29

drivers/gpu/drm/xe/xe_lrc.c

··· 25 25 #include "xe_map.h" 26 26 #include "xe_memirq.h" 27 27 #include "xe_sriov.h" 28 + #include "xe_trace_lrc.h" 28 29 #include "xe_vm.h" 29 30 #include "xe_wa.h" 30 31 ··· 1061 1060 return xe_lrc_read_ctx_reg(lrc, CTX_RING_TAIL) & TAIL_ADDR; 1062 1061 } 1063 1062 1063 + static u32 xe_lrc_ring_start(struct xe_lrc *lrc) 1064 + { 1065 + if (xe_lrc_has_indirect_ring_state(lrc)) 1066 + return xe_lrc_read_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START); 1067 + else 1068 + return xe_lrc_read_ctx_reg(lrc, CTX_RING_START); 1069 + } 1070 + 1064 1071 void xe_lrc_set_ring_head(struct xe_lrc *lrc, u32 head) 1065 1072 { 1066 1073 if (xe_lrc_has_indirect_ring_state(lrc)) ··· 1644 1635 xe_vm_get(lrc->bo->vm); 1645 1636 1646 1637 snapshot->context_desc = xe_lrc_ggtt_addr(lrc); 1638 + snapshot->ring_addr = __xe_lrc_ring_ggtt_addr(lrc); 1647 1639 snapshot->indirect_context_desc = xe_lrc_indirect_ring_ggtt_addr(lrc); 1648 1640 snapshot->head = xe_lrc_ring_head(lrc); 1649 1641 snapshot->tail.internal = lrc->ring.tail; 1650 1642 snapshot->tail.memory = xe_lrc_ring_tail(lrc); 1643 + snapshot->start = xe_lrc_ring_start(lrc); 1651 1644 snapshot->start_seqno = xe_lrc_start_seqno(lrc); 1652 1645 snapshot->seqno = xe_lrc_seqno(lrc); 1653 1646 snapshot->lrc_bo = xe_bo_get(lrc->bo); ··· 1703 1692 return; 1704 1693 1705 1694 drm_printf(p, "\tHW Context Desc: 0x%08x\n", snapshot->context_desc); 1695 + drm_printf(p, "\tHW Ring address: 0x%08x\n", 1696 + snapshot->ring_addr); 1706 1697 drm_printf(p, "\tHW Indirect Ring State: 0x%08x\n", 1707 1698 snapshot->indirect_context_desc); 1708 1699 drm_printf(p, "\tLRC Head: (memory) %u\n", snapshot->head); 1709 1700 drm_printf(p, "\tLRC Tail: (internal) %u, (memory) %u\n", 1710 1701 snapshot->tail.internal, snapshot->tail.memory); 1702 + drm_printf(p, "\tRing start: (memory) 0x%08x\n", snapshot->start); 1711 1703 drm_printf(p, "\tStart seqno: (memory) %d\n", snapshot->start_seqno); 1712 1704 drm_printf(p, "\tSeqno: (memory) %d\n", snapshot->seqno); 1713 1705 drm_printf(p, "\tTimestamp: 0x%08x\n", snapshot->ctx_timestamp); ··· 1772 1758 1773 1759 lrc->ctx_timestamp = xe_lrc_ctx_timestamp(lrc); 1774 1760 1761 + trace_xe_lrc_update_timestamp(lrc, *old_ts); 1762 + 1775 1763 return lrc->ctx_timestamp; 1764 + } 1765 + 1766 + /** 1767 + * xe_lrc_ring_is_idle() - LRC is idle 1768 + * @lrc: Pointer to the lrc. 1769 + * 1770 + * Compare LRC ring head and tail to determine if idle. 1771 + * 1772 + * Return: True is ring is idle, False otherwise 1773 + */ 1774 + bool xe_lrc_ring_is_idle(struct xe_lrc *lrc) 1775 + { 1776 + return xe_lrc_ring_head(lrc) == xe_lrc_ring_tail(lrc); 1776 1777 }

+4

drivers/gpu/drm/xe/xe_lrc.h

··· 25 25 unsigned long lrc_size, lrc_offset; 26 26 27 27 u32 context_desc; 28 + u32 ring_addr; 28 29 u32 indirect_context_desc; 29 30 u32 head; 31 + u32 start; 30 32 struct { 31 33 u32 internal; 32 34 u32 memory; ··· 79 77 u32 xe_lrc_ring_head(struct xe_lrc *lrc); 80 78 u32 xe_lrc_ring_space(struct xe_lrc *lrc); 81 79 void xe_lrc_write_ring(struct xe_lrc *lrc, const void *data, size_t size); 80 + 81 + bool xe_lrc_ring_is_idle(struct xe_lrc *lrc); 82 82 83 83 u32 xe_lrc_indirect_ring_ggtt_addr(struct xe_lrc *lrc); 84 84 u32 xe_lrc_ggtt_addr(struct xe_lrc *lrc);

+8 -4

drivers/gpu/drm/xe/xe_macros.h

··· 10 10 11 11 #define XE_WARN_ON WARN_ON 12 12 13 - #define XE_IOCTL_DBG(xe, cond) \ 14 - ((cond) && (drm_dbg(&(xe)->drm, \ 15 - "Ioctl argument check failed at %s:%d: %s", \ 16 - __FILE__, __LINE__, #cond), 1)) 13 + #define XE_IOCTL_DBG(xe, cond) ({ \ 14 + int cond__ = !!(cond); \ 15 + if (cond__) \ 16 + drm_dbg(&(xe)->drm, \ 17 + "Ioctl argument check failed at %s:%d: %s", \ 18 + __FILE__, __LINE__, #cond); \ 19 + cond__; \ 20 + }) 17 21 18 22 #endif

+10 -16

drivers/gpu/drm/xe/xe_memirq.c

··· 155 155 * 156 156 */ 157 157 158 - static void __release_xe_bo(struct drm_device *drm, void *arg) 159 - { 160 - struct xe_bo *bo = arg; 161 - 162 - xe_bo_unpin_map_no_vm(bo); 163 - } 164 - 165 158 static inline bool hw_reports_to_instance_zero(struct xe_memirq *memirq) 166 159 { 167 160 /* ··· 177 184 BUILD_BUG_ON(!IS_ALIGNED(XE_MEMIRQ_SOURCE_OFFSET(0), SZ_64)); 178 185 BUILD_BUG_ON(!IS_ALIGNED(XE_MEMIRQ_STATUS_OFFSET(0), SZ_4K)); 179 186 180 - /* XXX: convert to managed bo */ 181 - bo = xe_bo_create_pin_map(xe, tile, NULL, bo_size, 182 - ttm_bo_type_kernel, 183 - XE_BO_FLAG_SYSTEM | 184 - XE_BO_FLAG_GGTT | 185 - XE_BO_FLAG_GGTT_INVALIDATE | 186 - XE_BO_FLAG_NEEDS_UC | 187 - XE_BO_FLAG_NEEDS_CPU_ACCESS); 187 + bo = xe_managed_bo_create_pin_map(xe, tile, bo_size, 188 + XE_BO_FLAG_SYSTEM | 189 + XE_BO_FLAG_GGTT | 190 + XE_BO_FLAG_GGTT_INVALIDATE | 191 + XE_BO_FLAG_NEEDS_UC | 192 + XE_BO_FLAG_NEEDS_CPU_ACCESS); 188 193 if (IS_ERR(bo)) { 189 194 err = PTR_ERR(bo); 190 195 goto out; ··· 206 215 xe_bo_ggtt_addr(bo), bo_size, XE_MEMIRQ_SOURCE_OFFSET(0), 207 216 XE_MEMIRQ_STATUS_OFFSET(0)); 208 217 209 - return drmm_add_action_or_reset(&xe->drm, __release_xe_bo, memirq->bo); 218 + return 0; 210 219 211 220 out: 212 221 memirq_err(memirq, "Failed to allocate memirq page (%pe)\n", ERR_PTR(err)); ··· 433 442 434 443 if (memirq_received(memirq, status, ilog2(GUC_INTR_GUC2HOST), name)) 435 444 xe_guc_irq_handler(guc, GUC_INTR_GUC2HOST); 445 + 446 + if (memirq_received(memirq, status, ilog2(GUC_INTR_SW_INT_0), name)) 447 + xe_guc_irq_handler(guc, GUC_INTR_SW_INT_0); 436 448 } 437 449 438 450 /**

+1 -1

drivers/gpu/drm/xe/xe_module.c

··· 19 19 20 20 struct xe_modparam xe_modparam = { 21 21 .probe_display = true, 22 - .guc_log_level = 5, 22 + .guc_log_level = 3, 23 23 .force_probe = CONFIG_DRM_XE_FORCE_PROBE, 24 24 .wedged_mode = 1, 25 25 /* the rest are 0 by default */

+43 -12

drivers/gpu/drm/xe/xe_oa.c

··· 96 96 struct drm_xe_sync __user *syncs_user; 97 97 int num_syncs; 98 98 struct xe_sync_entry *syncs; 99 + size_t oa_buffer_size; 99 100 }; 100 101 101 102 struct xe_oa_config_bo { ··· 404 403 405 404 static void xe_oa_init_oa_buffer(struct xe_oa_stream *stream) 406 405 { 407 - struct xe_mmio *mmio = &stream->gt->mmio; 408 406 u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo); 409 - u32 oa_buf = gtt_offset | OABUFFER_SIZE_16M | OAG_OABUFFER_MEMORY_SELECT; 407 + int size_exponent = __ffs(stream->oa_buffer.bo->size); 408 + u32 oa_buf = gtt_offset | OAG_OABUFFER_MEMORY_SELECT; 409 + struct xe_mmio *mmio = &stream->gt->mmio; 410 410 unsigned long flags; 411 + 412 + /* 413 + * If oa buffer size is more than 16MB (exponent greater than 24), the 414 + * oa buffer size field is multiplied by 8 in xe_oa_enable_metric_set. 415 + */ 416 + oa_buf |= REG_FIELD_PREP(OABUFFER_SIZE_MASK, 417 + size_exponent > 24 ? size_exponent - 20 : size_exponent - 17); 411 418 412 419 spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); 413 420 ··· 910 901 xe_file_put(stream->xef); 911 902 } 912 903 913 - static int xe_oa_alloc_oa_buffer(struct xe_oa_stream *stream) 904 + static int xe_oa_alloc_oa_buffer(struct xe_oa_stream *stream, size_t size) 914 905 { 915 906 struct xe_bo *bo; 916 907 917 - BUILD_BUG_ON_NOT_POWER_OF_2(XE_OA_BUFFER_SIZE); 918 - BUILD_BUG_ON(XE_OA_BUFFER_SIZE < SZ_128K || XE_OA_BUFFER_SIZE > SZ_16M); 919 - 920 908 bo = xe_bo_create_pin_map(stream->oa->xe, stream->gt->tile, NULL, 921 - XE_OA_BUFFER_SIZE, ttm_bo_type_kernel, 909 + size, ttm_bo_type_kernel, 922 910 XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT); 923 911 if (IS_ERR(bo)) 924 912 return PTR_ERR(bo); ··· 1093 1087 0 : OAG_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS); 1094 1088 } 1095 1089 1090 + static u32 oag_buf_size_select(const struct xe_oa_stream *stream) 1091 + { 1092 + return _MASKED_FIELD(OAG_OA_DEBUG_BUF_SIZE_SELECT, 1093 + stream->oa_buffer.bo->size > SZ_16M ? 1094 + OAG_OA_DEBUG_BUF_SIZE_SELECT : 0); 1095 + } 1096 + 1096 1097 static int xe_oa_enable_metric_set(struct xe_oa_stream *stream) 1097 1098 { 1098 1099 struct xe_mmio *mmio = &stream->gt->mmio; ··· 1132 1119 xe_mmio_write32(mmio, __oa_regs(stream)->oa_debug, 1133 1120 _MASKED_BIT_ENABLE(oa_debug) | 1134 1121 oag_report_ctx_switches(stream) | 1122 + oag_buf_size_select(stream) | 1135 1123 oag_configure_mmio_trigger(stream, true)); 1136 1124 1137 1125 xe_mmio_write32(mmio, __oa_regs(stream)->oa_ctx_ctrl, stream->periodic ? ··· 1274 1260 return 0; 1275 1261 } 1276 1262 1263 + static int xe_oa_set_prop_oa_buffer_size(struct xe_oa *oa, u64 value, 1264 + struct xe_oa_open_param *param) 1265 + { 1266 + if (!is_power_of_2(value) || value < SZ_128K || value > SZ_128M) { 1267 + drm_dbg(&oa->xe->drm, "OA buffer size invalid %llu\n", value); 1268 + return -EINVAL; 1269 + } 1270 + param->oa_buffer_size = value; 1271 + return 0; 1272 + } 1273 + 1277 1274 static int xe_oa_set_prop_ret_inval(struct xe_oa *oa, u64 value, 1278 1275 struct xe_oa_open_param *param) 1279 1276 { ··· 1305 1280 [DRM_XE_OA_PROPERTY_NO_PREEMPT] = xe_oa_set_no_preempt, 1306 1281 [DRM_XE_OA_PROPERTY_NUM_SYNCS] = xe_oa_set_prop_num_syncs, 1307 1282 [DRM_XE_OA_PROPERTY_SYNCS] = xe_oa_set_prop_syncs_user, 1283 + [DRM_XE_OA_PROPERTY_OA_BUFFER_SIZE] = xe_oa_set_prop_oa_buffer_size, 1308 1284 }; 1309 1285 1310 1286 static const xe_oa_set_property_fn xe_oa_set_property_funcs_config[] = { ··· 1320 1294 [DRM_XE_OA_PROPERTY_NO_PREEMPT] = xe_oa_set_prop_ret_inval, 1321 1295 [DRM_XE_OA_PROPERTY_NUM_SYNCS] = xe_oa_set_prop_num_syncs, 1322 1296 [DRM_XE_OA_PROPERTY_SYNCS] = xe_oa_set_prop_syncs_user, 1297 + [DRM_XE_OA_PROPERTY_OA_BUFFER_SIZE] = xe_oa_set_prop_ret_inval, 1323 1298 }; 1324 1299 1325 1300 static int xe_oa_user_ext_set_property(struct xe_oa *oa, enum xe_oa_user_extn_from from, ··· 1580 1553 1581 1554 static long xe_oa_info_locked(struct xe_oa_stream *stream, unsigned long arg) 1582 1555 { 1583 - struct drm_xe_oa_stream_info info = { .oa_buf_size = XE_OA_BUFFER_SIZE, }; 1556 + struct drm_xe_oa_stream_info info = { .oa_buf_size = stream->oa_buffer.bo->size, }; 1584 1557 void __user *uaddr = (void __user *)arg; 1585 1558 1586 1559 if (copy_to_user(uaddr, &info, sizeof(info))) ··· 1666 1639 } 1667 1640 1668 1641 /* Can mmap the entire OA buffer or nothing (no partial OA buffer mmaps) */ 1669 - if (vma->vm_end - vma->vm_start != XE_OA_BUFFER_SIZE) { 1642 + if (vma->vm_end - vma->vm_start != stream->oa_buffer.bo->size) { 1670 1643 drm_dbg(&stream->oa->xe->drm, "Wrong mmap size, must be OA buffer size\n"); 1671 1644 return -EINVAL; 1672 1645 } ··· 1810 1783 if (GRAPHICS_VER(stream->oa->xe) >= 20 && 1811 1784 stream->hwe->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG && stream->sample) 1812 1785 stream->oa_buffer.circ_size = 1813 - XE_OA_BUFFER_SIZE - XE_OA_BUFFER_SIZE % stream->oa_buffer.format->size; 1786 + param->oa_buffer_size - 1787 + param->oa_buffer_size % stream->oa_buffer.format->size; 1814 1788 else 1815 - stream->oa_buffer.circ_size = XE_OA_BUFFER_SIZE; 1789 + stream->oa_buffer.circ_size = param->oa_buffer_size; 1816 1790 1817 1791 if (stream->exec_q && engine_supports_mi_query(stream->hwe)) { 1818 1792 /* If we don't find the context offset, just return error */ ··· 1856 1828 goto err_fw_put; 1857 1829 } 1858 1830 1859 - ret = xe_oa_alloc_oa_buffer(stream); 1831 + ret = xe_oa_alloc_oa_buffer(stream, param->oa_buffer_size); 1860 1832 if (ret) 1861 1833 goto err_fw_put; 1862 1834 ··· 2152 2124 oa_freq_hz = div64_u64(NSEC_PER_SEC, oa_period); 2153 2125 drm_dbg(&oa->xe->drm, "Using periodic sampling freq %lld Hz\n", oa_freq_hz); 2154 2126 } 2127 + 2128 + if (!param.oa_buffer_size) 2129 + param.oa_buffer_size = DEFAULT_XE_OA_BUFFER_SIZE; 2155 2130 2156 2131 ret = xe_oa_parse_syncs(oa, &param); 2157 2132 if (ret)

+1 -1

drivers/gpu/drm/xe/xe_oa_types.h

··· 15 15 #include "regs/xe_reg_defs.h" 16 16 #include "xe_hw_engine_types.h" 17 17 18 - #define XE_OA_BUFFER_SIZE SZ_16M 18 + #define DEFAULT_XE_OA_BUFFER_SIZE SZ_16M 19 19 20 20 enum xe_oa_report_header { 21 21 HDR_32_BIT = 0,

-3

drivers/gpu/drm/xe/xe_pm.c

··· 738 738 xe->d3cold.allowed = false; 739 739 740 740 mutex_unlock(&xe->d3cold.lock); 741 - 742 - drm_dbg(&xe->drm, 743 - "d3cold: allowed=%s\n", str_yes_no(xe->d3cold.allowed)); 744 741 } 745 742 746 743 /**

+4 -2

drivers/gpu/drm/xe/xe_pt.c

··· 136 136 xe_pt_free(pt); 137 137 return ERR_PTR(err); 138 138 } 139 + ALLOW_ERROR_INJECTION(xe_pt_create, ERRNO); 139 140 140 141 /** 141 142 * xe_pt_populate_empty() - Populate a page-table bo with scratch- or zero ··· 1334 1333 queue_work(system_wq, &ifence->work); 1335 1334 } else { 1336 1335 ifence->base.base.error = ifence->fence->error; 1337 - dma_fence_signal(&ifence->base.base); 1338 - dma_fence_put(&ifence->base.base); 1336 + xe_gt_tlb_invalidation_fence_signal(&ifence->base); 1339 1337 } 1340 1338 dma_fence_put(ifence->fence); 1341 1339 } ··· 1851 1851 1852 1852 return 0; 1853 1853 } 1854 + ALLOW_ERROR_INJECTION(xe_pt_update_ops_prepare, ERRNO); 1854 1855 1855 1856 static void bind_op_commit(struct xe_vm *vm, struct xe_tile *tile, 1856 1857 struct xe_vm_pgtable_update_ops *pt_update_ops, ··· 2132 2131 2133 2132 return ERR_PTR(err); 2134 2133 } 2134 + ALLOW_ERROR_INJECTION(xe_pt_update_ops_run, ERRNO); 2135 2135 2136 2136 /** 2137 2137 * xe_pt_update_ops_fini() - Finish PT update operations

+3 -1

drivers/gpu/drm/xe/xe_query.c

··· 23 23 #include "xe_guc_hwconfig.h" 24 24 #include "xe_macros.h" 25 25 #include "xe_mmio.h" 26 + #include "xe_oa.h" 26 27 #include "xe_ttm_vram_mgr.h" 27 28 #include "xe_wa.h" 28 29 ··· 671 670 du->oa_unit_id = u->oa_unit_id; 672 671 du->oa_unit_type = u->type; 673 672 du->oa_timestamp_freq = xe_oa_timestamp_frequency(gt); 674 - du->capabilities = DRM_XE_OA_CAPS_BASE | DRM_XE_OA_CAPS_SYNCS; 673 + du->capabilities = DRM_XE_OA_CAPS_BASE | DRM_XE_OA_CAPS_SYNCS | 674 + DRM_XE_OA_CAPS_OA_BUFFER_SIZE; 675 675 676 676 j = 0; 677 677 for_each_hw_engine(hwe, gt, hwe_id) {

+6 -78

drivers/gpu/drm/xe/xe_reg_sr.c

··· 24 24 #include "xe_hw_engine_types.h" 25 25 #include "xe_macros.h" 26 26 #include "xe_mmio.h" 27 - #include "xe_reg_whitelist.h" 28 27 #include "xe_rtp_types.h" 29 - 30 - #define XE_REG_SR_GROW_STEP_DEFAULT 16 31 28 32 29 static void reg_sr_fini(struct drm_device *drm, void *arg) 33 30 { 34 31 struct xe_reg_sr *sr = arg; 32 + struct xe_reg_sr_entry *entry; 33 + unsigned long reg; 34 + 35 + xa_for_each(&sr->xa, reg, entry) 36 + kfree(entry); 35 37 36 38 xa_destroy(&sr->xa); 37 - kfree(sr->pool.arr); 38 - memset(&sr->pool, 0, sizeof(sr->pool)); 39 39 } 40 40 41 41 int xe_reg_sr_init(struct xe_reg_sr *sr, const char *name, struct xe_device *xe) 42 42 { 43 43 xa_init(&sr->xa); 44 - memset(&sr->pool, 0, sizeof(sr->pool)); 45 - sr->pool.grow_step = XE_REG_SR_GROW_STEP_DEFAULT; 46 44 sr->name = name; 47 45 48 46 return drmm_add_action_or_reset(&xe->drm, reg_sr_fini, sr); 49 47 } 50 48 EXPORT_SYMBOL_IF_KUNIT(xe_reg_sr_init); 51 - 52 - static struct xe_reg_sr_entry *alloc_entry(struct xe_reg_sr *sr) 53 - { 54 - if (sr->pool.used == sr->pool.allocated) { 55 - struct xe_reg_sr_entry *arr; 56 - 57 - arr = krealloc_array(sr->pool.arr, 58 - ALIGN(sr->pool.allocated + 1, sr->pool.grow_step), 59 - sizeof(*arr), GFP_KERNEL); 60 - if (!arr) 61 - return NULL; 62 - 63 - sr->pool.arr = arr; 64 - sr->pool.allocated += sr->pool.grow_step; 65 - } 66 - 67 - return &sr->pool.arr[sr->pool.used++]; 68 - } 69 49 70 50 static bool compatible_entries(const struct xe_reg_sr_entry *e1, 71 51 const struct xe_reg_sr_entry *e2) ··· 92 112 return 0; 93 113 } 94 114 95 - pentry = alloc_entry(sr); 115 + pentry = kmalloc(sizeof(*pentry), GFP_KERNEL); 96 116 if (!pentry) { 97 117 ret = -ENOMEM; 98 118 goto fail; ··· 189 209 err_force_wake: 190 210 xe_force_wake_put(gt_to_fw(gt), fw_ref); 191 211 xe_gt_err(gt, "Failed to apply, err=-ETIMEDOUT\n"); 192 - } 193 - 194 - void xe_reg_sr_apply_whitelist(struct xe_hw_engine *hwe) 195 - { 196 - struct xe_reg_sr *sr = &hwe->reg_whitelist; 197 - struct xe_gt *gt = hwe->gt; 198 - struct xe_device *xe = gt_to_xe(gt); 199 - struct xe_reg_sr_entry *entry; 200 - struct drm_printer p; 201 - u32 mmio_base = hwe->mmio_base; 202 - unsigned long reg; 203 - unsigned int slot = 0; 204 - unsigned int fw_ref; 205 - 206 - if (xa_empty(&sr->xa)) 207 - return; 208 - 209 - drm_dbg(&xe->drm, "Whitelisting %s registers\n", sr->name); 210 - 211 - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); 212 - if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) 213 - goto err_force_wake; 214 - 215 - p = drm_dbg_printer(&xe->drm, DRM_UT_DRIVER, NULL); 216 - xa_for_each(&sr->xa, reg, entry) { 217 - if (slot == RING_MAX_NONPRIV_SLOTS) { 218 - xe_gt_err(gt, 219 - "hwe %s: maximum register whitelist slots (%d) reached, refusing to add more\n", 220 - hwe->name, RING_MAX_NONPRIV_SLOTS); 221 - break; 222 - } 223 - 224 - xe_reg_whitelist_print_entry(&p, 0, reg, entry); 225 - xe_mmio_write32(&gt->mmio, RING_FORCE_TO_NONPRIV(mmio_base, slot), 226 - reg | entry->set_bits); 227 - slot++; 228 - } 229 - 230 - /* And clear the rest just in case of garbage */ 231 - for (; slot < RING_MAX_NONPRIV_SLOTS; slot++) { 232 - u32 addr = RING_NOPID(mmio_base).addr; 233 - 234 - xe_mmio_write32(&gt->mmio, RING_FORCE_TO_NONPRIV(mmio_base, slot), addr); 235 - } 236 - 237 - xe_force_wake_put(gt_to_fw(gt), fw_ref); 238 - 239 - return; 240 - 241 - err_force_wake: 242 - xe_force_wake_put(gt_to_fw(gt), fw_ref); 243 - drm_err(&xe->drm, "Failed to apply, err=-ETIMEDOUT\n"); 244 212 } 245 213 246 214 /**

-6

drivers/gpu/drm/xe/xe_reg_sr_types.h

··· 20 20 }; 21 21 22 22 struct xe_reg_sr { 23 - struct { 24 - struct xe_reg_sr_entry *arr; 25 - unsigned int used; 26 - unsigned int allocated; 27 - unsigned int grow_step; 28 - } pool; 29 23 struct xarray xa; 30 24 const char *name; 31 25

+37

drivers/gpu/drm/xe/xe_reg_whitelist.c

··· 10 10 #include "regs/xe_oa_regs.h" 11 11 #include "regs/xe_regs.h" 12 12 #include "xe_gt_types.h" 13 + #include "xe_gt_printk.h" 13 14 #include "xe_platform_types.h" 15 + #include "xe_reg_sr.h" 14 16 #include "xe_rtp.h" 15 17 #include "xe_step.h" 16 18 ··· 91 89 {} 92 90 }; 93 91 92 + static void whitelist_apply_to_hwe(struct xe_hw_engine *hwe) 93 + { 94 + struct xe_reg_sr *sr = &hwe->reg_whitelist; 95 + struct xe_reg_sr_entry *entry; 96 + struct drm_printer p; 97 + unsigned long reg; 98 + unsigned int slot; 99 + 100 + xe_gt_dbg(hwe->gt, "Add %s whitelist to engine\n", sr->name); 101 + p = xe_gt_dbg_printer(hwe->gt); 102 + 103 + slot = 0; 104 + xa_for_each(&sr->xa, reg, entry) { 105 + struct xe_reg_sr_entry hwe_entry = { 106 + .reg = RING_FORCE_TO_NONPRIV(hwe->mmio_base, slot), 107 + .set_bits = entry->reg.addr | entry->set_bits, 108 + .clr_bits = ~0u, 109 + .read_mask = entry->read_mask, 110 + }; 111 + 112 + if (slot == RING_MAX_NONPRIV_SLOTS) { 113 + xe_gt_err(hwe->gt, 114 + "hwe %s: maximum register whitelist slots (%d) reached, refusing to add more\n", 115 + hwe->name, RING_MAX_NONPRIV_SLOTS); 116 + break; 117 + } 118 + 119 + xe_reg_whitelist_print_entry(&p, 0, reg, entry); 120 + xe_reg_sr_add(&hwe->reg_sr, &hwe_entry, hwe->gt); 121 + 122 + slot++; 123 + } 124 + } 125 + 94 126 /** 95 127 * xe_reg_whitelist_process_engine - process table of registers to whitelist 96 128 * @hwe: engine instance to process whitelist for ··· 138 102 struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe); 139 103 140 104 xe_rtp_process_to_sr(&ctx, register_whitelist, &hwe->reg_whitelist); 105 + whitelist_apply_to_hwe(hwe); 141 106 } 142 107 143 108 /**

+4

drivers/gpu/drm/xe/xe_sriov.c

··· 14 14 #include "xe_mmio.h" 15 15 #include "xe_sriov.h" 16 16 #include "xe_sriov_pf.h" 17 + #include "xe_sriov_vf.h" 17 18 18 19 /** 19 20 * xe_sriov_mode_to_string - Convert enum value to string. ··· 114 113 if (err) 115 114 return err; 116 115 } 116 + 117 + if (IS_SRIOV_VF(xe)) 118 + xe_sriov_vf_init_early(xe); 117 119 118 120 xe_assert(xe, !xe->sriov.wq); 119 121 xe->sriov.wq = alloc_workqueue("xe-sriov-wq", 0, 0);

+1 -1

drivers/gpu/drm/xe/xe_sriov_pf_helpers.h

··· 20 20 * is within a range of supported VF numbers (up to maximum number of VFs that 21 21 * driver can support, including VF0 that represents the PF itself). 22 22 * 23 - * Note: Effective only on debug builds. See `Xe ASSERTs`_ for more information. 23 + * Note: Effective only on debug builds. See `Xe Asserts`_ for more information. 24 24 */ 25 25 #define xe_sriov_pf_assert_vfid(xe, vfid) \ 26 26 xe_assert((xe), (vfid) <= xe_sriov_pf_get_totalvfs(xe))

+17

drivers/gpu/drm/xe/xe_sriov_types.h

··· 9 9 #include <linux/build_bug.h> 10 10 #include <linux/mutex.h> 11 11 #include <linux/types.h> 12 + #include <linux/workqueue_types.h> 12 13 13 14 /** 14 15 * VFID - Virtual Function Identifier ··· 55 54 56 55 /** @master_lock: protects all VFs configurations across GTs */ 57 56 struct mutex master_lock; 57 + }; 58 + 59 + /** 60 + * struct xe_device_vf - Xe Virtual Function related data 61 + * 62 + * The data in this structure is valid only if driver is running in the 63 + * @XE_SRIOV_MODE_VF mode. 64 + */ 65 + struct xe_device_vf { 66 + /** @migration: VF Migration state data */ 67 + struct { 68 + /** @migration.worker: VF migration recovery worker */ 69 + struct work_struct worker; 70 + /** @migration.gt_flags: Per-GT request flags for VF migration recovery */ 71 + unsigned long gt_flags; 72 + } migration; 58 73 }; 59 74 60 75 #endif

+263

drivers/gpu/drm/xe/xe_sriov_vf.c

··· 1 + // SPDX-License-Identifier: MIT 2 + /* 3 + * Copyright © 2023-2024 Intel Corporation 4 + */ 5 + 6 + #include <drm/drm_managed.h> 7 + 8 + #include "xe_assert.h" 9 + #include "xe_device.h" 10 + #include "xe_gt_sriov_printk.h" 11 + #include "xe_gt_sriov_vf.h" 12 + #include "xe_pm.h" 13 + #include "xe_sriov.h" 14 + #include "xe_sriov_printk.h" 15 + #include "xe_sriov_vf.h" 16 + 17 + /** 18 + * DOC: VF restore procedure in PF KMD and VF KMD 19 + * 20 + * Restoring previously saved state of a VF is one of core features of 21 + * SR-IOV. All major VM Management applications allow saving and restoring 22 + * the VM state, and doing that to a VM which uses SRIOV VF as one of 23 + * the accessible devices requires support from KMD on both PF and VF side. 24 + * VMM initiates all required operations through VFIO module, which then 25 + * translates them into PF KMD calls. This description will focus on these 26 + * calls, leaving out the module which initiates these steps (VFIO). 27 + * 28 + * In order to start the restore procedure, GuC needs to keep the VF in 29 + * proper state. The PF driver can ensure GuC set it to VF_READY state 30 + * by provisioning the VF, which in turn can be done after Function Level 31 + * Reset of said VF (or after it was freshly created - in that case FLR 32 + * is not needed). The FLR procedure ends with GuC sending message 33 + * `GUC_PF_NOTIFY_VF_FLR_DONE`, and then provisioning data is sent to GuC. 34 + * After the provisioning is completed, the VF needs to be paused, and 35 + * at that point the actual restore can begin. 36 + * 37 + * During VF Restore, state of several resources is restored. These may 38 + * include local memory content (system memory is restored by VMM itself), 39 + * values of MMIO registers, stateless compression metadata and others. 40 + * The final resource which also needs restoring is state of the VF 41 + * submission maintained within GuC. For that, `GUC_PF_OPCODE_VF_RESTORE` 42 + * message is used, with reference to the state blob to be consumed by 43 + * GuC. 44 + * 45 + * Next, when VFIO is asked to set the VM into running state, the PF driver 46 + * sends `GUC_PF_TRIGGER_VF_RESUME` to GuC. When sent after restore, this 47 + * changes VF state within GuC to `VF_RESFIX_BLOCKED` rather than the 48 + * usual `VF_RUNNING`. At this point GuC triggers an interrupt to inform 49 + * the VF KMD within the VM that it was migrated. 50 + * 51 + * As soon as Virtual GPU of the VM starts, the VF driver within receives 52 + * the MIGRATED interrupt and schedules post-migration recovery worker. 53 + * That worker queries GuC for new provisioning (using MMIO communication), 54 + * and applies fixups to any non-virtualized resources used by the VF. 55 + * 56 + * When the VF driver is ready to continue operation on the newly connected 57 + * hardware, it sends `VF2GUC_NOTIFY_RESFIX_DONE` which causes it to 58 + * enter the long awaited `VF_RUNNING` state, and therefore start handling 59 + * CTB messages and scheduling workloads from the VF:: 60 + * 61 + * PF GuC VF 62 + * [ ] | | 63 + * [ ] PF2GUC_VF_CONTROL(pause) | | 64 + * [ ]---------------------------> [ ] | 65 + * [ ] [ ] GuC sets new VF state to | 66 + * [ ] [ ]------- VF_READY_PAUSED | 67 + * [ ] [ ] | | 68 + * [ ] [ ] <----- | 69 + * [ ] success [ ] | 70 + * [ ] <---------------------------[ ] | 71 + * [ ] | | 72 + * [ ] PF loads resources from the | | 73 + * [ ]------- saved image supplied | | 74 + * [ ] | | | 75 + * [ ] <----- | | 76 + * [ ] | | 77 + * [ ] GUC_PF_OPCODE_VF_RESTORE | | 78 + * [ ]---------------------------> [ ] | 79 + * [ ] [ ] GuC loads contexts and CTB | 80 + * [ ] [ ]------- state from image | 81 + * [ ] [ ] | | 82 + * [ ] [ ] <----- | 83 + * [ ] [ ] | 84 + * [ ] [ ] GuC sets new VF state to | 85 + * [ ] [ ]------- VF_RESFIX_PAUSED | 86 + * [ ] [ ] | | 87 + * [ ] success [ ] <----- | 88 + * [ ] <---------------------------[ ] | 89 + * [ ] | | 90 + * [ ] GUC_PF_TRIGGER_VF_RESUME | | 91 + * [ ]---------------------------> [ ] | 92 + * [ ] [ ] GuC sets new VF state to | 93 + * [ ] [ ]------- VF_RESFIX_BLOCKED | 94 + * [ ] [ ] | | 95 + * [ ] [ ] <----- | 96 + * [ ] [ ] | 97 + * [ ] [ ] GUC_INTR_SW_INT_0 | 98 + * [ ] success [ ]---------------------------> [ ] 99 + * [ ] <---------------------------[ ] [ ] 100 + * | | VF2GUC_QUERY_SINGLE_KLV [ ] 101 + * | [ ] <---------------------------[ ] 102 + * | [ ] [ ] 103 + * | [ ] new VF provisioning [ ] 104 + * | [ ]---------------------------> [ ] 105 + * | | [ ] 106 + * | | VF driver applies post [ ] 107 + * | | migration fixups -------[ ] 108 + * | | | [ ] 109 + * | | -----> [ ] 110 + * | | [ ] 111 + * | | VF2GUC_NOTIFY_RESFIX_DONE [ ] 112 + * | [ ] <---------------------------[ ] 113 + * | [ ] [ ] 114 + * | [ ] GuC sets new VF state to [ ] 115 + * | [ ]------- VF_RUNNING [ ] 116 + * | [ ] | [ ] 117 + * | [ ] <----- [ ] 118 + * | [ ] success [ ] 119 + * | [ ]---------------------------> [ ] 120 + * | | | 121 + * | | | 122 + */ 123 + 124 + static void migration_worker_func(struct work_struct *w); 125 + 126 + /** 127 + * xe_sriov_vf_init_early - Initialize SR-IOV VF specific data. 128 + * @xe: the &xe_device to initialize 129 + */ 130 + void xe_sriov_vf_init_early(struct xe_device *xe) 131 + { 132 + INIT_WORK(&xe->sriov.vf.migration.worker, migration_worker_func); 133 + } 134 + 135 + /** 136 + * vf_post_migration_requery_guc - Re-query GuC for current VF provisioning. 137 + * @xe: the &xe_device struct instance 138 + * 139 + * After migration, we need to re-query all VF configuration to make sure 140 + * they match previous provisioning. Note that most of VF provisioning 141 + * shall be the same, except GGTT range, since GGTT is not virtualized per-VF. 142 + * 143 + * Returns: 0 if the operation completed successfully, or a negative error 144 + * code otherwise. 145 + */ 146 + static int vf_post_migration_requery_guc(struct xe_device *xe) 147 + { 148 + struct xe_gt *gt; 149 + unsigned int id; 150 + int err, ret = 0; 151 + 152 + for_each_gt(gt, xe, id) { 153 + err = xe_gt_sriov_vf_query_config(gt); 154 + ret = ret ?: err; 155 + } 156 + 157 + return ret; 158 + } 159 + 160 + /* 161 + * vf_post_migration_imminent - Check if post-restore recovery is coming. 162 + * @xe: the &xe_device struct instance 163 + * 164 + * Return: True if migration recovery worker will soon be running. Any worker currently 165 + * executing does not affect the result. 166 + */ 167 + static bool vf_post_migration_imminent(struct xe_device *xe) 168 + { 169 + return xe->sriov.vf.migration.gt_flags != 0 || 170 + work_pending(&xe->sriov.vf.migration.worker); 171 + } 172 + 173 + /* 174 + * Notify all GuCs about resource fixups apply finished. 175 + */ 176 + static void vf_post_migration_notify_resfix_done(struct xe_device *xe) 177 + { 178 + struct xe_gt *gt; 179 + unsigned int id; 180 + 181 + for_each_gt(gt, xe, id) { 182 + if (vf_post_migration_imminent(xe)) 183 + goto skip; 184 + xe_gt_sriov_vf_notify_resfix_done(gt); 185 + } 186 + return; 187 + 188 + skip: 189 + drm_dbg(&xe->drm, "another recovery imminent, skipping notifications\n"); 190 + } 191 + 192 + static void vf_post_migration_recovery(struct xe_device *xe) 193 + { 194 + int err; 195 + 196 + drm_dbg(&xe->drm, "migration recovery in progress\n"); 197 + xe_pm_runtime_get(xe); 198 + err = vf_post_migration_requery_guc(xe); 199 + if (vf_post_migration_imminent(xe)) 200 + goto defer; 201 + if (unlikely(err)) 202 + goto fail; 203 + 204 + /* FIXME: add the recovery steps */ 205 + vf_post_migration_notify_resfix_done(xe); 206 + xe_pm_runtime_put(xe); 207 + drm_notice(&xe->drm, "migration recovery ended\n"); 208 + return; 209 + defer: 210 + xe_pm_runtime_put(xe); 211 + drm_dbg(&xe->drm, "migration recovery deferred\n"); 212 + return; 213 + fail: 214 + xe_pm_runtime_put(xe); 215 + drm_err(&xe->drm, "migration recovery failed (%pe)\n", ERR_PTR(err)); 216 + xe_device_declare_wedged(xe); 217 + } 218 + 219 + static void migration_worker_func(struct work_struct *w) 220 + { 221 + struct xe_device *xe = container_of(w, struct xe_device, 222 + sriov.vf.migration.worker); 223 + 224 + vf_post_migration_recovery(xe); 225 + } 226 + 227 + static bool vf_ready_to_recovery_on_all_gts(struct xe_device *xe) 228 + { 229 + struct xe_gt *gt; 230 + unsigned int id; 231 + 232 + for_each_gt(gt, xe, id) { 233 + if (!test_bit(id, &xe->sriov.vf.migration.gt_flags)) { 234 + xe_gt_sriov_dbg_verbose(gt, "still not ready to recover\n"); 235 + return false; 236 + } 237 + } 238 + return true; 239 + } 240 + 241 + /** 242 + * xe_sriov_vf_start_migration_recovery - Start VF migration recovery. 243 + * @xe: the &xe_device to start recovery on 244 + * 245 + * This function shall be called only by VF. 246 + */ 247 + void xe_sriov_vf_start_migration_recovery(struct xe_device *xe) 248 + { 249 + bool started; 250 + 251 + xe_assert(xe, IS_SRIOV_VF(xe)); 252 + 253 + if (!vf_ready_to_recovery_on_all_gts(xe)) 254 + return; 255 + 256 + WRITE_ONCE(xe->sriov.vf.migration.gt_flags, 0); 257 + /* Ensure other threads see that no flags are set now. */ 258 + smp_mb(); 259 + 260 + started = queue_work(xe->sriov.wq, &xe->sriov.vf.migration.worker); 261 + drm_info(&xe->drm, "VF migration recovery %s\n", started ? 262 + "scheduled" : "already in progress"); 263 + }

+14

drivers/gpu/drm/xe/xe_sriov_vf.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2023-2024 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_SRIOV_VF_H_ 7 + #define _XE_SRIOV_VF_H_ 8 + 9 + struct xe_device; 10 + 11 + void xe_sriov_vf_init_early(struct xe_device *xe); 12 + void xe_sriov_vf_start_migration_recovery(struct xe_device *xe); 13 + 14 + #endif

+8 -3

drivers/gpu/drm/xe/xe_trace.h

··· 211 211 __string(dev, __dev_name_eq(job->q)) 212 212 __field(u32, seqno) 213 213 __field(u32, lrc_seqno) 214 + __field(u8, gt_id) 214 215 __field(u16, guc_id) 215 216 __field(u32, guc_state) 216 217 __field(u32, flags) ··· 224 223 __assign_str(dev); 225 224 __entry->seqno = xe_sched_job_seqno(job); 226 225 __entry->lrc_seqno = xe_sched_job_lrc_seqno(job); 226 + __entry->gt_id = job->q->gt->info.id; 227 227 __entry->guc_id = job->q->guc->id; 228 228 __entry->guc_state = 229 229 atomic_read(&job->q->guc->state); ··· 234 232 __entry->batch_addr = (u64)job->ptrs[0].batch_addr; 235 233 ), 236 234 237 - TP_printk("dev=%s, fence=%p, seqno=%u, lrc_seqno=%u, guc_id=%d, batch_addr=0x%012llx, guc_state=0x%x, flags=0x%x, error=%d", 235 + TP_printk("dev=%s, fence=%p, seqno=%u, lrc_seqno=%u, gt=%u, guc_id=%d, batch_addr=0x%012llx, guc_state=0x%x, flags=0x%x, error=%d", 238 236 __get_str(dev), __entry->fence, __entry->seqno, 239 - __entry->lrc_seqno, __entry->guc_id, 237 + __entry->lrc_seqno, __entry->gt_id, __entry->guc_id, 240 238 __entry->batch_addr, __entry->guc_state, 241 239 __entry->flags, __entry->error) 242 240 ); ··· 284 282 __string(dev, __dev_name_eq(((struct xe_exec_queue *)msg->private_data))) 285 283 __field(u32, opcode) 286 284 __field(u16, guc_id) 285 + __field(u8, gt_id) 287 286 ), 288 287 289 288 TP_fast_assign( ··· 292 289 __entry->opcode = msg->opcode; 293 290 __entry->guc_id = 294 291 ((struct xe_exec_queue *)msg->private_data)->guc->id; 292 + __entry->gt_id = 293 + ((struct xe_exec_queue *)msg->private_data)->gt->info.id; 295 294 ), 296 295 297 - TP_printk("dev=%s, guc_id=%d, opcode=%u", __get_str(dev), __entry->guc_id, 296 + TP_printk("dev=%s, gt=%u guc_id=%d, opcode=%u", __get_str(dev), __entry->gt_id, __entry->guc_id, 298 297 __entry->opcode) 299 298 ); 300 299

+5

drivers/gpu/drm/xe/xe_trace_bo.h

··· 48 48 TP_ARGS(bo) 49 49 ); 50 50 51 + DEFINE_EVENT(xe_bo, xe_bo_validate, 52 + TP_PROTO(struct xe_bo *bo), 53 + TP_ARGS(bo) 54 + ); 55 + 51 56 TRACE_EVENT(xe_bo_move, 52 57 TP_PROTO(struct xe_bo *bo, uint32_t new_placement, uint32_t old_placement, 53 58 bool move_lacks_source),

+9

drivers/gpu/drm/xe/xe_trace_lrc.c

··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* 3 + * Copyright © 2024 Intel Corporation 4 + */ 5 + 6 + #ifndef __CHECKER__ 7 + #define CREATE_TRACE_POINTS 8 + #include "xe_trace_lrc.h" 9 + #endif

+52

drivers/gpu/drm/xe/xe_trace_lrc.h

··· 1 + /* SPDX-License-Identifier: GPL-2.0-only */ 2 + /* 3 + * Copyright © 2024 Intel Corporation 4 + */ 5 + 6 + #undef TRACE_SYSTEM 7 + #define TRACE_SYSTEM xe 8 + 9 + #if !defined(_XE_TRACE_LRC_H_) || defined(TRACE_HEADER_MULTI_READ) 10 + #define _XE_TRACE_LRC_H_ 11 + 12 + #include <linux/tracepoint.h> 13 + #include <linux/types.h> 14 + 15 + #include "xe_gt_types.h" 16 + #include "xe_lrc.h" 17 + #include "xe_lrc_types.h" 18 + 19 + #define __dev_name_lrc(lrc) dev_name(gt_to_xe((lrc)->fence_ctx.gt)->drm.dev) 20 + 21 + TRACE_EVENT(xe_lrc_update_timestamp, 22 + TP_PROTO(struct xe_lrc *lrc, uint32_t old), 23 + TP_ARGS(lrc, old), 24 + TP_STRUCT__entry( 25 + __field(struct xe_lrc *, lrc) 26 + __field(u32, old) 27 + __field(u32, new) 28 + __string(name, lrc->fence_ctx.name) 29 + __string(device_id, __dev_name_lrc(lrc)) 30 + ), 31 + 32 + TP_fast_assign( 33 + __entry->lrc = lrc; 34 + __entry->old = old; 35 + __entry->new = lrc->ctx_timestamp; 36 + __assign_str(name); 37 + __assign_str(device_id); 38 + ), 39 + TP_printk("lrc=:%p lrc->name=%s old=%u new=%u device_id:%s", 40 + __entry->lrc, __get_str(name), 41 + __entry->old, __entry->new, 42 + __get_str(device_id)) 43 + ); 44 + 45 + #endif 46 + 47 + /* This part must be outside protection */ 48 + #undef TRACE_INCLUDE_PATH 49 + #undef TRACE_INCLUDE_FILE 50 + #define TRACE_INCLUDE_PATH ../../drivers/gpu/drm/xe 51 + #define TRACE_INCLUDE_FILE xe_trace_lrc 52 + #include <trace/define_trace.h>

+13 -40

drivers/gpu/drm/xe/xe_ttm_vram_mgr.c

··· 52 52 struct xe_ttm_vram_mgr *mgr = to_xe_ttm_vram_mgr(man); 53 53 struct xe_ttm_vram_mgr_resource *vres; 54 54 struct drm_buddy *mm = &mgr->mm; 55 - u64 size, remaining_size, min_page_size; 55 + u64 size, min_page_size; 56 56 unsigned long lpfn; 57 57 int err; 58 58 ··· 98 98 goto error_fini; 99 99 } 100 100 101 - if (WARN_ON(min_page_size > SZ_2G)) { /* FIXME: sg limit */ 102 - err = -EINVAL; 103 - goto error_fini; 104 - } 105 - 106 - if (WARN_ON((size > SZ_2G && 107 - (vres->base.placement & TTM_PL_FLAG_CONTIGUOUS)))) { 108 - err = -EINVAL; 109 - goto error_fini; 110 - } 111 - 112 101 if (WARN_ON(!IS_ALIGNED(size, min_page_size))) { 113 102 err = -EINVAL; 114 103 goto error_fini; ··· 105 116 106 117 mutex_lock(&mgr->lock); 107 118 if (lpfn <= mgr->visible_size >> PAGE_SHIFT && size > mgr->visible_avail) { 108 - mutex_unlock(&mgr->lock); 109 119 err = -ENOSPC; 110 - goto error_fini; 120 + goto error_unlock; 111 121 } 112 122 113 - if (place->fpfn + (size >> PAGE_SHIFT) != place->lpfn && 123 + if (place->fpfn + (size >> PAGE_SHIFT) != lpfn && 114 124 place->flags & TTM_PL_FLAG_CONTIGUOUS) { 115 125 size = roundup_pow_of_two(size); 116 126 min_page_size = size; ··· 117 129 lpfn = max_t(unsigned long, place->fpfn + (size >> PAGE_SHIFT), lpfn); 118 130 } 119 131 120 - remaining_size = size; 121 - do { 122 - /* 123 - * Limit maximum size to 2GiB due to SG table limitations. 124 - * FIXME: Should maybe be handled as part of sg construction. 125 - */ 126 - u64 alloc_size = min_t(u64, remaining_size, SZ_2G); 127 - 128 - err = drm_buddy_alloc_blocks(mm, (u64)place->fpfn << PAGE_SHIFT, 129 - (u64)lpfn << PAGE_SHIFT, 130 - alloc_size, 131 - min_page_size, 132 - &vres->blocks, 133 - vres->flags); 134 - if (err) 135 - goto error_free_blocks; 136 - 137 - remaining_size -= alloc_size; 138 - } while (remaining_size); 132 + err = drm_buddy_alloc_blocks(mm, (u64)place->fpfn << PAGE_SHIFT, 133 + (u64)lpfn << PAGE_SHIFT, size, 134 + min_page_size, &vres->blocks, vres->flags); 135 + if (err) 136 + goto error_unlock; 139 137 140 138 if (place->flags & TTM_PL_FLAG_CONTIGUOUS) { 141 139 if (!drm_buddy_block_trim(mm, NULL, vres->base.size, &vres->blocks)) ··· 168 194 169 195 *res = &vres->base; 170 196 return 0; 171 - 172 - error_free_blocks: 173 - drm_buddy_free_list(mm, &vres->blocks, 0); 197 + error_unlock: 174 198 mutex_unlock(&mgr->lock); 175 199 error_fini: 176 200 ttm_resource_fini(man, &vres->base); ··· 365 393 xe_res_first(res, offset, length, &cursor); 366 394 while (cursor.remaining) { 367 395 num_entries++; 368 - xe_res_next(&cursor, cursor.size); 396 + /* Limit maximum size to 2GiB due to SG table limitations. */ 397 + xe_res_next(&cursor, min_t(u64, cursor.size, SZ_2G)); 369 398 } 370 399 371 400 r = sg_alloc_table(*sgt, num_entries, GFP_KERNEL); ··· 386 413 xe_res_first(res, offset, length, &cursor); 387 414 for_each_sgtable_sg((*sgt), sg, i) { 388 415 phys_addr_t phys = cursor.start + tile->mem.vram.io_start; 389 - size_t size = cursor.size; 416 + size_t size = min_t(u64, cursor.size, SZ_2G); 390 417 dma_addr_t addr; 391 418 392 419 addr = dma_map_resource(dev, phys, size, dir, ··· 399 426 sg_dma_address(sg) = addr; 400 427 sg_dma_len(sg) = size; 401 428 402 - xe_res_next(&cursor, cursor.size); 429 + xe_res_next(&cursor, size); 403 430 } 404 431 405 432 return 0;

+16 -16

drivers/gpu/drm/xe/xe_vm.c

··· 732 732 vops->pt_update_ops[i].ops = 733 733 kmalloc_array(vops->pt_update_ops[i].num_ops, 734 734 sizeof(*vops->pt_update_ops[i].ops), 735 - GFP_KERNEL); 735 + GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 736 736 if (!vops->pt_update_ops[i].ops) 737 737 return array_of_binds ? -ENOBUFS : -ENOMEM; 738 738 } 739 739 740 740 return 0; 741 741 } 742 + ALLOW_ERROR_INJECTION(xe_vma_ops_alloc, ERRNO); 742 743 743 744 static void xe_vma_ops_fini(struct xe_vma_ops *vops) 744 745 { ··· 1352 1351 1353 1352 return 0; 1354 1353 } 1354 + ALLOW_ERROR_INJECTION(xe_vm_create_scratch, ERRNO); 1355 1355 1356 1356 static void xe_vm_free_scratch(struct xe_vm *vm) 1357 1357 { ··· 1979 1977 1980 1978 return ops; 1981 1979 } 1980 + ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_create, ERRNO); 1982 1981 1983 1982 static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op, 1984 1983 u16 pat_index, unsigned int flags) ··· 2359 2356 bool validate) 2360 2357 { 2361 2358 struct xe_bo *bo = xe_vma_bo(vma); 2359 + struct xe_vm *vm = xe_vma_vm(vma); 2362 2360 int err = 0; 2363 2361 2364 2362 if (bo) { 2365 2363 if (!bo->vm) 2366 2364 err = drm_exec_lock_obj(exec, &bo->ttm.base); 2367 2365 if (!err && validate) 2368 - err = xe_bo_validate(bo, xe_vma_vm(vma), true); 2366 + err = xe_bo_validate(bo, vm, 2367 + !xe_vm_in_preempt_fence_mode(vm)); 2369 2368 } 2370 2369 2371 2370 return err; ··· 2701 2696 drm_exec_fini(&exec); 2702 2697 return err; 2703 2698 } 2699 + ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_execute, ERRNO); 2704 2700 2705 2701 #define SUPPORTED_FLAGS_STUB \ 2706 2702 (DRM_XE_VM_BIND_FLAG_READONLY | \ ··· 2738 2732 2739 2733 *bind_ops = kvmalloc_array(args->num_binds, 2740 2734 sizeof(struct drm_xe_vm_bind_op), 2741 - GFP_KERNEL | __GFP_ACCOUNT); 2735 + GFP_KERNEL | __GFP_ACCOUNT | 2736 + __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 2742 2737 if (!*bind_ops) 2743 2738 return args->num_binds > 1 ? -ENOBUFS : -ENOMEM; 2744 2739 ··· 2979 2972 2980 2973 if (args->num_binds) { 2981 2974 bos = kvcalloc(args->num_binds, sizeof(*bos), 2982 - GFP_KERNEL | __GFP_ACCOUNT); 2975 + GFP_KERNEL | __GFP_ACCOUNT | 2976 + __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 2983 2977 if (!bos) { 2984 2978 err = -ENOMEM; 2985 2979 goto release_vm_lock; 2986 2980 } 2987 2981 2988 2982 ops = kvcalloc(args->num_binds, sizeof(*ops), 2989 - GFP_KERNEL | __GFP_ACCOUNT); 2983 + GFP_KERNEL | __GFP_ACCOUNT | 2984 + __GFP_RETRY_MAYFAIL | __GFP_NOWARN); 2990 2985 if (!ops) { 2991 2986 err = -ENOMEM; 2992 2987 goto release_vm_lock; ··· 3311 3302 3312 3303 for (int i = 0; i < snap->num_snaps; i++) { 3313 3304 struct xe_bo *bo = snap->snap[i].bo; 3314 - struct iosys_map src; 3315 3305 int err; 3316 3306 3317 3307 if (IS_ERR(snap->snap[i].data)) ··· 3323 3315 } 3324 3316 3325 3317 if (bo) { 3326 - xe_bo_lock(bo, false); 3327 - err = ttm_bo_vmap(&bo->ttm, &src); 3328 - if (!err) { 3329 - xe_map_memcpy_from(xe_bo_device(bo), 3330 - snap->snap[i].data, 3331 - &src, snap->snap[i].bo_ofs, 3332 - snap->snap[i].len); 3333 - ttm_bo_vunmap(&bo->ttm, &src); 3334 - } 3335 - xe_bo_unlock(bo); 3318 + err = xe_bo_read(bo, snap->snap[i].bo_ofs, 3319 + snap->snap[i].data, snap->snap[i].len); 3336 3320 } else { 3337 3321 void __user *userptr = (void __user *)(size_t)snap->snap[i].bo_ofs; 3338 3322

+11 -11

drivers/gpu/drm/xe/xe_vm_doc.h

··· 64 64 * update page level 2 PDE[1] to page level 3b phys address (GPU) 65 65 * 66 66 * bind BO2 0x1ff000-0x201000 67 - * update page level 3a PTE[511] to BO2 phys addres (GPU) 68 - * update page level 3b PTE[0] to BO2 phys addres + 0x1000 (GPU) 67 + * update page level 3a PTE[511] to BO2 phys address (GPU) 68 + * update page level 3b PTE[0] to BO2 phys address + 0x1000 (GPU) 69 69 * 70 70 * GPU bypass 71 71 * ~~~~~~~~~~ ··· 192 192 * 193 193 * If a VM is in fault mode (TODO: link to fault mode), new bind operations that 194 194 * create mappings are by default deferred to the page fault handler (first 195 - * use). This behavior can be overriden by setting the flag 195 + * use). This behavior can be overridden by setting the flag 196 196 * DRM_XE_VM_BIND_FLAG_IMMEDIATE which indicates to creating the mapping 197 197 * immediately. 198 198 * ··· 209 209 * 210 210 * Since this a core kernel managed memory the kernel can move this memory 211 211 * whenever it wants. We register an invalidation MMU notifier to alert XE when 212 - * a user poiter is about to move. The invalidation notifier needs to block 212 + * a user pointer is about to move. The invalidation notifier needs to block 213 213 * until all pending users (jobs or compute mode engines) of the userptr are 214 214 * idle to ensure no faults. This done by waiting on all of VM's dma-resv slots. 215 215 * ··· 252 252 * Rebind worker 253 253 * ------------- 254 254 * 255 - * The rebind worker is very similar to an exec. It is resposible for rebinding 255 + * The rebind worker is very similar to an exec. It is responsible for rebinding 256 256 * evicted BOs or userptrs, waiting on those operations, installing new preempt 257 257 * fences, and finally resuming executing of engines in the VM. 258 258 * ··· 317 317 * are not allowed, only long running workloads and ULLS are enabled on a faulting 318 318 * VM. 319 319 * 320 - * Defered VM binds 320 + * Deferred VM binds 321 321 * ---------------- 322 322 * 323 323 * By default, on a faulting VM binds just allocate the VMA and the actual 324 - * updating of the page tables is defered to the page fault handler. This 324 + * updating of the page tables is deferred to the page fault handler. This 325 325 * behavior can be overridden by setting the flag DRM_XE_VM_BIND_FLAG_IMMEDIATE in 326 326 * the VM bind which will then do the bind immediately. 327 327 * ··· 500 500 * Slot waiting 501 501 * ------------ 502 502 * 503 - * 1. The exection of all jobs from kernel ops shall wait on all slots 503 + * 1. The execution of all jobs from kernel ops shall wait on all slots 504 504 * (DMA_RESV_USAGE_PREEMPT_FENCE) of either an external BO or VM (depends on if 505 505 * kernel op is operating on external or private BO) 506 506 * 507 - * 2. In non-compute mode, the exection of all jobs from rebinds in execs shall 507 + * 2. In non-compute mode, the execution of all jobs from rebinds in execs shall 508 508 * wait on the DMA_RESV_USAGE_KERNEL slot of either an external BO or VM 509 509 * (depends on if the rebind is operatiing on an external or private BO) 510 510 * 511 - * 3. In non-compute mode, the exection of all jobs from execs shall wait on the 511 + * 3. In non-compute mode, the execution of all jobs from execs shall wait on the 512 512 * last rebind job 513 513 * 514 - * 4. In compute mode, the exection of all jobs from rebinds in the rebind 514 + * 4. In compute mode, the execution of all jobs from rebinds in the rebind 515 515 * worker shall wait on the DMA_RESV_USAGE_KERNEL slot of either an external BO 516 516 * or VM (depends on if rebind is operating on external or private BO) 517 517 *

+233

drivers/gpu/drm/xe/xe_vsec.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright © 2024 Intel Corporation */ 3 + #include <linux/bitfield.h> 4 + #include <linux/bits.h> 5 + #include <linux/cleanup.h> 6 + #include <linux/errno.h> 7 + #include <linux/intel_vsec.h> 8 + #include <linux/module.h> 9 + #include <linux/mutex.h> 10 + #include <linux/pci.h> 11 + #include <linux/types.h> 12 + 13 + #include "xe_device.h" 14 + #include "xe_device_types.h" 15 + #include "xe_drv.h" 16 + #include "xe_mmio.h" 17 + #include "xe_platform_types.h" 18 + #include "xe_pm.h" 19 + #include "xe_vsec.h" 20 + 21 + #include "regs/xe_pmt.h" 22 + 23 + /* PMT GUID value for BMG devices. NOTE: this is NOT a PCI id */ 24 + #define BMG_DEVICE_ID 0xE2F8 25 + 26 + static struct intel_vsec_header bmg_telemetry = { 27 + .length = 0x10, 28 + .id = VSEC_ID_TELEMETRY, 29 + .num_entries = 2, 30 + .entry_size = 4, 31 + .tbir = 0, 32 + .offset = BMG_DISCOVERY_OFFSET, 33 + }; 34 + 35 + static struct intel_vsec_header bmg_punit_crashlog = { 36 + .length = 0x10, 37 + .id = VSEC_ID_CRASHLOG, 38 + .num_entries = 1, 39 + .entry_size = 4, 40 + .tbir = 0, 41 + .offset = BMG_DISCOVERY_OFFSET + 0x60, 42 + }; 43 + 44 + static struct intel_vsec_header bmg_oobmsm_crashlog = { 45 + .length = 0x10, 46 + .id = VSEC_ID_CRASHLOG, 47 + .num_entries = 1, 48 + .entry_size = 4, 49 + .tbir = 0, 50 + .offset = BMG_DISCOVERY_OFFSET + 0x78, 51 + }; 52 + 53 + static struct intel_vsec_header *bmg_capabilities[] = { 54 + &bmg_telemetry, 55 + &bmg_punit_crashlog, 56 + &bmg_oobmsm_crashlog, 57 + NULL 58 + }; 59 + 60 + enum xe_vsec { 61 + XE_VSEC_UNKNOWN = 0, 62 + XE_VSEC_BMG, 63 + }; 64 + 65 + static struct intel_vsec_platform_info xe_vsec_info[] = { 66 + [XE_VSEC_BMG] = { 67 + .caps = VSEC_CAP_TELEMETRY | VSEC_CAP_CRASHLOG, 68 + .headers = bmg_capabilities, 69 + }, 70 + { } 71 + }; 72 + 73 + /* 74 + * The GUID will have the following bits to decode: 75 + * [0:3] - {Telemetry space iteration number (0,1,..)} 76 + * [4:7] - Segment (SEGMENT_INDEPENDENT-0, Client-1, Server-2) 77 + * [8:11] - SOC_SKU 78 + * [12:27] – Device ID – changes for each down bin SKU’s 79 + * [28:29] - Capability Type (Crashlog-0, Telemetry Aggregator-1, Watcher-2) 80 + * [30:31] - Record-ID (0-PUNIT, 1-OOBMSM_0, 2-OOBMSM_1) 81 + */ 82 + #define GUID_TELEM_ITERATION GENMASK(3, 0) 83 + #define GUID_SEGMENT GENMASK(7, 4) 84 + #define GUID_SOC_SKU GENMASK(11, 8) 85 + #define GUID_DEVICE_ID GENMASK(27, 12) 86 + #define GUID_CAP_TYPE GENMASK(29, 28) 87 + #define GUID_RECORD_ID GENMASK(31, 30) 88 + 89 + #define PUNIT_TELEMETRY_OFFSET 0x0200 90 + #define PUNIT_WATCHER_OFFSET 0x14A0 91 + #define OOBMSM_0_WATCHER_OFFSET 0x18D8 92 + #define OOBMSM_1_TELEMETRY_OFFSET 0x1000 93 + 94 + enum record_id { 95 + PUNIT, 96 + OOBMSM_0, 97 + OOBMSM_1, 98 + }; 99 + 100 + enum capability { 101 + CRASHLOG, 102 + TELEMETRY, 103 + WATCHER, 104 + }; 105 + 106 + static int xe_guid_decode(u32 guid, int *index, u32 *offset) 107 + { 108 + u32 record_id = FIELD_GET(GUID_RECORD_ID, guid); 109 + u32 cap_type = FIELD_GET(GUID_CAP_TYPE, guid); 110 + u32 device_id = FIELD_GET(GUID_DEVICE_ID, guid); 111 + 112 + if (device_id != BMG_DEVICE_ID) 113 + return -ENODEV; 114 + 115 + if (cap_type > WATCHER) 116 + return -EINVAL; 117 + 118 + *offset = 0; 119 + 120 + if (cap_type == CRASHLOG) { 121 + *index = record_id == PUNIT ? 2 : 4; 122 + return 0; 123 + } 124 + 125 + switch (record_id) { 126 + case PUNIT: 127 + *index = 0; 128 + if (cap_type == TELEMETRY) 129 + *offset = PUNIT_TELEMETRY_OFFSET; 130 + else 131 + *offset = PUNIT_WATCHER_OFFSET; 132 + break; 133 + 134 + case OOBMSM_0: 135 + *index = 1; 136 + if (cap_type == WATCHER) 137 + *offset = OOBMSM_0_WATCHER_OFFSET; 138 + break; 139 + 140 + case OOBMSM_1: 141 + *index = 1; 142 + if (cap_type == TELEMETRY) 143 + *offset = OOBMSM_1_TELEMETRY_OFFSET; 144 + break; 145 + default: 146 + return -EINVAL; 147 + } 148 + 149 + return 0; 150 + } 151 + 152 + static int xe_pmt_telem_read(struct pci_dev *pdev, u32 guid, u64 *data, loff_t user_offset, 153 + u32 count) 154 + { 155 + struct xe_device *xe = pdev_to_xe_device(pdev); 156 + void __iomem *telem_addr = xe->mmio.regs + BMG_TELEMETRY_OFFSET; 157 + u32 mem_region; 158 + u32 offset; 159 + int ret; 160 + 161 + ret = xe_guid_decode(guid, &mem_region, &offset); 162 + if (ret) 163 + return ret; 164 + 165 + telem_addr += offset + user_offset; 166 + 167 + guard(mutex)(&xe->pmt.lock); 168 + 169 + /* indicate that we are not at an appropriate power level */ 170 + if (!xe_pm_runtime_get_if_active(xe)) 171 + return -ENODATA; 172 + 173 + /* set SoC re-mapper index register based on GUID memory region */ 174 + xe_mmio_rmw32(xe_root_tile_mmio(xe), SG_REMAP_INDEX1, SG_REMAP_BITS, 175 + REG_FIELD_PREP(SG_REMAP_BITS, mem_region)); 176 + 177 + memcpy_fromio(data, telem_addr, count); 178 + xe_pm_runtime_put(xe); 179 + 180 + return count; 181 + } 182 + 183 + static struct pmt_callbacks xe_pmt_cb = { 184 + .read_telem = xe_pmt_telem_read, 185 + }; 186 + 187 + static const int vsec_platforms[] = { 188 + [XE_BATTLEMAGE] = XE_VSEC_BMG, 189 + }; 190 + 191 + static enum xe_vsec get_platform_info(struct xe_device *xe) 192 + { 193 + if (xe->info.platform > XE_BATTLEMAGE) 194 + return XE_VSEC_UNKNOWN; 195 + 196 + return vsec_platforms[xe->info.platform]; 197 + } 198 + 199 + /** 200 + * xe_vsec_init - Initialize resources and add intel_vsec auxiliary 201 + * interface 202 + * @xe: valid xe instance 203 + */ 204 + void xe_vsec_init(struct xe_device *xe) 205 + { 206 + struct intel_vsec_platform_info *info; 207 + struct device *dev = xe->drm.dev; 208 + struct pci_dev *pdev = to_pci_dev(dev); 209 + enum xe_vsec platform; 210 + 211 + platform = get_platform_info(xe); 212 + if (platform == XE_VSEC_UNKNOWN) 213 + return; 214 + 215 + info = &xe_vsec_info[platform]; 216 + if (!info->headers) 217 + return; 218 + 219 + switch (platform) { 220 + case XE_VSEC_BMG: 221 + info->priv_data = &xe_pmt_cb; 222 + break; 223 + default: 224 + break; 225 + } 226 + 227 + /* 228 + * Register a VSEC. Cleanup is handled using device managed 229 + * resources. 230 + */ 231 + intel_vsec_register(pdev, info); 232 + } 233 + MODULE_IMPORT_NS("INTEL_VSEC");

+11

drivers/gpu/drm/xe/xe_vsec.h

··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + /* Copyright © 2024 Intel Corporation */ 3 + 4 + #ifndef _XE_VSEC_H_ 5 + #define _XE_VSEC_H_ 6 + 7 + struct xe_device; 8 + 9 + void xe_vsec_init(struct xe_device *xe); 10 + 11 + #endif

+6

drivers/gpu/drm/xe/xe_wa.c

··· 607 607 FUNC(xe_rtp_match_first_render_or_compute)), 608 608 XE_RTP_ACTIONS(SET(ROW_CHICKEN4, DISABLE_TDL_PUSH)) 609 609 }, 610 + { XE_RTP_NAME("16024792527"), 611 + XE_RTP_RULES(GRAPHICS_VERSION(3000), GRAPHICS_STEP(A0, B0), 612 + FUNC(xe_rtp_match_first_render_or_compute)), 613 + XE_RTP_ACTIONS(FIELD_SET(SAMPLER_MODE, SMP_WAIT_FETCH_MERGING_COUNTER, 614 + SMP_FORCE_128B_OVERFETCH)) 615 + }, 610 616 611 617 {} 612 618 };

+1

drivers/gpu/drm/xe/xe_wa_oob.rules

··· 1 + 1607983814 GRAPHICS_VERSION_RANGE(1200, 1210) 1 2 22012773006 GRAPHICS_VERSION_RANGE(1200, 1250) 2 3 14014475959 GRAPHICS_VERSION_RANGE(1270, 1271), GRAPHICS_STEP(A0, B0) 3 4 PLATFORM(DG2)

+235

include/drm/intel/xe_pciids.h

··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2022 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_PCIIDS_H_ 7 + #define _XE_PCIIDS_H_ 8 + 9 + /* 10 + * Lists below can be turned into initializers for a struct pci_device_id 11 + * by defining INTEL_VGA_DEVICE: 12 + * 13 + * #define INTEL_VGA_DEVICE(id, info) { \ 14 + * 0x8086, id, \ 15 + * ~0, ~0, \ 16 + * 0x030000, 0xff0000, \ 17 + * (unsigned long) info } 18 + * 19 + * And then calling like: 20 + * 21 + * XE_TGL_12_GT1_IDS(INTEL_VGA_DEVICE, ## __VA_ARGS__) 22 + * 23 + * To turn them into something else, just provide a different macro passed as 24 + * first argument. 25 + */ 26 + 27 + /* TGL */ 28 + #define XE_TGL_GT1_IDS(MACRO__, ...) \ 29 + MACRO__(0x9A60, ## __VA_ARGS__), \ 30 + MACRO__(0x9A68, ## __VA_ARGS__), \ 31 + MACRO__(0x9A70, ## __VA_ARGS__) 32 + 33 + #define XE_TGL_GT2_IDS(MACRO__, ...) \ 34 + MACRO__(0x9A40, ## __VA_ARGS__), \ 35 + MACRO__(0x9A49, ## __VA_ARGS__), \ 36 + MACRO__(0x9A59, ## __VA_ARGS__), \ 37 + MACRO__(0x9A78, ## __VA_ARGS__), \ 38 + MACRO__(0x9AC0, ## __VA_ARGS__), \ 39 + MACRO__(0x9AC9, ## __VA_ARGS__), \ 40 + MACRO__(0x9AD9, ## __VA_ARGS__), \ 41 + MACRO__(0x9AF8, ## __VA_ARGS__) 42 + 43 + #define XE_TGL_IDS(MACRO__, ...) \ 44 + XE_TGL_GT1_IDS(MACRO__, ## __VA_ARGS__),\ 45 + XE_TGL_GT2_IDS(MACRO__, ## __VA_ARGS__) 46 + 47 + /* RKL */ 48 + #define XE_RKL_IDS(MACRO__, ...) \ 49 + MACRO__(0x4C80, ## __VA_ARGS__), \ 50 + MACRO__(0x4C8A, ## __VA_ARGS__), \ 51 + MACRO__(0x4C8B, ## __VA_ARGS__), \ 52 + MACRO__(0x4C8C, ## __VA_ARGS__), \ 53 + MACRO__(0x4C90, ## __VA_ARGS__), \ 54 + MACRO__(0x4C9A, ## __VA_ARGS__) 55 + 56 + /* DG1 */ 57 + #define XE_DG1_IDS(MACRO__, ...) \ 58 + MACRO__(0x4905, ## __VA_ARGS__), \ 59 + MACRO__(0x4906, ## __VA_ARGS__), \ 60 + MACRO__(0x4907, ## __VA_ARGS__), \ 61 + MACRO__(0x4908, ## __VA_ARGS__), \ 62 + MACRO__(0x4909, ## __VA_ARGS__) 63 + 64 + /* ADL-S */ 65 + #define XE_ADLS_IDS(MACRO__, ...) \ 66 + MACRO__(0x4680, ## __VA_ARGS__), \ 67 + MACRO__(0x4682, ## __VA_ARGS__), \ 68 + MACRO__(0x4688, ## __VA_ARGS__), \ 69 + MACRO__(0x468A, ## __VA_ARGS__), \ 70 + MACRO__(0x468B, ## __VA_ARGS__), \ 71 + MACRO__(0x4690, ## __VA_ARGS__), \ 72 + MACRO__(0x4692, ## __VA_ARGS__), \ 73 + MACRO__(0x4693, ## __VA_ARGS__) 74 + 75 + /* ADL-P */ 76 + #define XE_ADLP_IDS(MACRO__, ...) \ 77 + MACRO__(0x46A0, ## __VA_ARGS__), \ 78 + MACRO__(0x46A1, ## __VA_ARGS__), \ 79 + MACRO__(0x46A2, ## __VA_ARGS__), \ 80 + MACRO__(0x46A3, ## __VA_ARGS__), \ 81 + MACRO__(0x46A6, ## __VA_ARGS__), \ 82 + MACRO__(0x46A8, ## __VA_ARGS__), \ 83 + MACRO__(0x46AA, ## __VA_ARGS__), \ 84 + MACRO__(0x462A, ## __VA_ARGS__), \ 85 + MACRO__(0x4626, ## __VA_ARGS__), \ 86 + MACRO__(0x4628, ## __VA_ARGS__), \ 87 + MACRO__(0x46B0, ## __VA_ARGS__), \ 88 + MACRO__(0x46B1, ## __VA_ARGS__), \ 89 + MACRO__(0x46B2, ## __VA_ARGS__), \ 90 + MACRO__(0x46B3, ## __VA_ARGS__), \ 91 + MACRO__(0x46C0, ## __VA_ARGS__), \ 92 + MACRO__(0x46C1, ## __VA_ARGS__), \ 93 + MACRO__(0x46C2, ## __VA_ARGS__), \ 94 + MACRO__(0x46C3, ## __VA_ARGS__) 95 + 96 + /* ADL-N */ 97 + #define XE_ADLN_IDS(MACRO__, ...) \ 98 + MACRO__(0x46D0, ## __VA_ARGS__), \ 99 + MACRO__(0x46D1, ## __VA_ARGS__), \ 100 + MACRO__(0x46D2, ## __VA_ARGS__), \ 101 + MACRO__(0x46D3, ## __VA_ARGS__), \ 102 + MACRO__(0x46D4, ## __VA_ARGS__) 103 + 104 + /* RPL-S */ 105 + #define XE_RPLS_IDS(MACRO__, ...) \ 106 + MACRO__(0xA780, ## __VA_ARGS__), \ 107 + MACRO__(0xA781, ## __VA_ARGS__), \ 108 + MACRO__(0xA782, ## __VA_ARGS__), \ 109 + MACRO__(0xA783, ## __VA_ARGS__), \ 110 + MACRO__(0xA788, ## __VA_ARGS__), \ 111 + MACRO__(0xA789, ## __VA_ARGS__), \ 112 + MACRO__(0xA78A, ## __VA_ARGS__), \ 113 + MACRO__(0xA78B, ## __VA_ARGS__) 114 + 115 + /* RPL-U */ 116 + #define XE_RPLU_IDS(MACRO__, ...) \ 117 + MACRO__(0xA721, ## __VA_ARGS__), \ 118 + MACRO__(0xA7A1, ## __VA_ARGS__), \ 119 + MACRO__(0xA7A9, ## __VA_ARGS__), \ 120 + MACRO__(0xA7AC, ## __VA_ARGS__), \ 121 + MACRO__(0xA7AD, ## __VA_ARGS__) 122 + 123 + /* RPL-P */ 124 + #define XE_RPLP_IDS(MACRO__, ...) \ 125 + MACRO__(0xA720, ## __VA_ARGS__), \ 126 + MACRO__(0xA7A0, ## __VA_ARGS__), \ 127 + MACRO__(0xA7A8, ## __VA_ARGS__), \ 128 + MACRO__(0xA7AA, ## __VA_ARGS__), \ 129 + MACRO__(0xA7AB, ## __VA_ARGS__) 130 + 131 + /* DG2 */ 132 + #define XE_DG2_G10_IDS(MACRO__, ...) \ 133 + MACRO__(0x5690, ## __VA_ARGS__), \ 134 + MACRO__(0x5691, ## __VA_ARGS__), \ 135 + MACRO__(0x5692, ## __VA_ARGS__), \ 136 + MACRO__(0x56A0, ## __VA_ARGS__), \ 137 + MACRO__(0x56A1, ## __VA_ARGS__), \ 138 + MACRO__(0x56A2, ## __VA_ARGS__), \ 139 + MACRO__(0x56BE, ## __VA_ARGS__), \ 140 + MACRO__(0x56BF, ## __VA_ARGS__) 141 + 142 + #define XE_DG2_G11_IDS(MACRO__, ...) \ 143 + MACRO__(0x5693, ## __VA_ARGS__), \ 144 + MACRO__(0x5694, ## __VA_ARGS__), \ 145 + MACRO__(0x5695, ## __VA_ARGS__), \ 146 + MACRO__(0x56A5, ## __VA_ARGS__), \ 147 + MACRO__(0x56A6, ## __VA_ARGS__), \ 148 + MACRO__(0x56B0, ## __VA_ARGS__), \ 149 + MACRO__(0x56B1, ## __VA_ARGS__), \ 150 + MACRO__(0x56BA, ## __VA_ARGS__), \ 151 + MACRO__(0x56BB, ## __VA_ARGS__), \ 152 + MACRO__(0x56BC, ## __VA_ARGS__), \ 153 + MACRO__(0x56BD, ## __VA_ARGS__) 154 + 155 + #define XE_DG2_G12_IDS(MACRO__, ...) \ 156 + MACRO__(0x5696, ## __VA_ARGS__), \ 157 + MACRO__(0x5697, ## __VA_ARGS__), \ 158 + MACRO__(0x56A3, ## __VA_ARGS__), \ 159 + MACRO__(0x56A4, ## __VA_ARGS__), \ 160 + MACRO__(0x56B2, ## __VA_ARGS__), \ 161 + MACRO__(0x56B3, ## __VA_ARGS__) 162 + 163 + #define XE_DG2_IDS(MACRO__, ...) \ 164 + XE_DG2_G10_IDS(MACRO__, ## __VA_ARGS__),\ 165 + XE_DG2_G11_IDS(MACRO__, ## __VA_ARGS__),\ 166 + XE_DG2_G12_IDS(MACRO__, ## __VA_ARGS__) 167 + 168 + #define XE_ATS_M150_IDS(MACRO__, ...) \ 169 + MACRO__(0x56C0, ## __VA_ARGS__), \ 170 + MACRO__(0x56C2, ## __VA_ARGS__) 171 + 172 + #define XE_ATS_M75_IDS(MACRO__, ...) \ 173 + MACRO__(0x56C1, ## __VA_ARGS__) 174 + 175 + #define XE_ATS_M_IDS(MACRO__, ...) \ 176 + XE_ATS_M150_IDS(MACRO__, ## __VA_ARGS__),\ 177 + XE_ATS_M75_IDS(MACRO__, ## __VA_ARGS__) 178 + 179 + /* ARL */ 180 + #define XE_ARL_IDS(MACRO__, ...) \ 181 + MACRO__(0x7D41, ## __VA_ARGS__), \ 182 + MACRO__(0x7D51, ## __VA_ARGS__), \ 183 + MACRO__(0x7D67, ## __VA_ARGS__), \ 184 + MACRO__(0x7DD1, ## __VA_ARGS__), \ 185 + MACRO__(0xB640, ## __VA_ARGS__) 186 + 187 + /* MTL */ 188 + #define XE_MTL_IDS(MACRO__, ...) \ 189 + MACRO__(0x7D40, ## __VA_ARGS__), \ 190 + MACRO__(0x7D45, ## __VA_ARGS__), \ 191 + MACRO__(0x7D55, ## __VA_ARGS__), \ 192 + MACRO__(0x7D60, ## __VA_ARGS__), \ 193 + MACRO__(0x7DD5, ## __VA_ARGS__) 194 + 195 + /* PVC */ 196 + #define XE_PVC_IDS(MACRO__, ...) \ 197 + MACRO__(0x0B69, ## __VA_ARGS__), \ 198 + MACRO__(0x0B6E, ## __VA_ARGS__), \ 199 + MACRO__(0x0BD4, ## __VA_ARGS__), \ 200 + MACRO__(0x0BD5, ## __VA_ARGS__), \ 201 + MACRO__(0x0BD6, ## __VA_ARGS__), \ 202 + MACRO__(0x0BD7, ## __VA_ARGS__), \ 203 + MACRO__(0x0BD8, ## __VA_ARGS__), \ 204 + MACRO__(0x0BD9, ## __VA_ARGS__), \ 205 + MACRO__(0x0BDA, ## __VA_ARGS__), \ 206 + MACRO__(0x0BDB, ## __VA_ARGS__), \ 207 + MACRO__(0x0BE0, ## __VA_ARGS__), \ 208 + MACRO__(0x0BE1, ## __VA_ARGS__), \ 209 + MACRO__(0x0BE5, ## __VA_ARGS__) 210 + 211 + #define XE_LNL_IDS(MACRO__, ...) \ 212 + MACRO__(0x6420, ## __VA_ARGS__), \ 213 + MACRO__(0x64A0, ## __VA_ARGS__), \ 214 + MACRO__(0x64B0, ## __VA_ARGS__) 215 + 216 + #define XE_BMG_IDS(MACRO__, ...) \ 217 + MACRO__(0xE202, ## __VA_ARGS__), \ 218 + MACRO__(0xE20B, ## __VA_ARGS__), \ 219 + MACRO__(0xE20C, ## __VA_ARGS__), \ 220 + MACRO__(0xE20D, ## __VA_ARGS__), \ 221 + MACRO__(0xE212, ## __VA_ARGS__) 222 + 223 + #define XE_PTL_IDS(MACRO__, ...) \ 224 + MACRO__(0xB080, ## __VA_ARGS__), \ 225 + MACRO__(0xB081, ## __VA_ARGS__), \ 226 + MACRO__(0xB082, ## __VA_ARGS__), \ 227 + MACRO__(0xB090, ## __VA_ARGS__), \ 228 + MACRO__(0xB091, ## __VA_ARGS__), \ 229 + MACRO__(0xB092, ## __VA_ARGS__), \ 230 + MACRO__(0xB0A0, ## __VA_ARGS__), \ 231 + MACRO__(0xB0A1, ## __VA_ARGS__), \ 232 + MACRO__(0xB0A2, ## __VA_ARGS__), \ 233 + MACRO__(0xB0B0, ## __VA_ARGS__) 234 + 235 + #endif

+2

include/drm/ttm/ttm_bo.h

··· 421 421 int ttm_bo_evict_first(struct ttm_device *bdev, 422 422 struct ttm_resource_manager *man, 423 423 struct ttm_operation_ctx *ctx); 424 + int ttm_bo_access(struct ttm_buffer_object *bo, unsigned long offset, 425 + void *buf, int len, int write); 424 426 vm_fault_t ttm_bo_vm_reserve(struct ttm_buffer_object *bo, 425 427 struct vm_fault *vmf); 426 428 vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf,

+9

include/uapi/drm/xe_drm.h

··· 1486 1486 __u64 capabilities; 1487 1487 #define DRM_XE_OA_CAPS_BASE (1 << 0) 1488 1488 #define DRM_XE_OA_CAPS_SYNCS (1 << 1) 1489 + #define DRM_XE_OA_CAPS_OA_BUFFER_SIZE (1 << 2) 1489 1490 1490 1491 /** @oa_timestamp_freq: OA timestamp freq */ 1491 1492 __u64 oa_timestamp_freq; ··· 1652 1651 * to the VM bind case. 1653 1652 */ 1654 1653 DRM_XE_OA_PROPERTY_SYNCS, 1654 + 1655 + /** 1656 + * @DRM_XE_OA_PROPERTY_OA_BUFFER_SIZE: Size of OA buffer to be 1657 + * allocated by the driver in bytes. Supported sizes are powers of 1658 + * 2 from 128 KiB to 128 MiB. When not specified, a 16 MiB OA 1659 + * buffer is allocated by default. 1660 + */ 1661 + DRM_XE_OA_PROPERTY_OA_BUFFER_SIZE, 1655 1662 }; 1656 1663 1657 1664 /**