Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'drm-xe-next-2024-06-26' of https://gitlab.freedesktop.org/drm/xe/kernel into drm-next

UAPI Changes:
- New uapi adding OA functionality to Xe (Ashutosh)

Cross-subsystem Changes:
- devcoredump: Add dev_coredumpm_timeout (Jose)

Driver Changes:
- More SRIOV preparation, including GuC communication improvements (Michal)
- Kconfig update: do not select ACPI_BUTTON (Jani)
- Rework GPU page fault handling (Brost)
- Forcewake clean-up and fixes (Himal, Michal)
- Drop EXEC_QUEUE_FLAG_BANNED (Brost)
- Xe/Xe2 Workarounds fixes and additions (Tejas, Akshata, Sai, Vinay)
- Xe devcoredump changes (Jose)
- Tracing cleanup and add mmio tracing (RK)
- Add BMG PCI IDs (Roper)
- Scheduler fixes and improvements (Brost)
- Some overal driver clean-up around headers and print macros (Michal)
- Rename xe_exec_queue::compute to xe_exec_queue::lr (Francois)
- Improve RTP rules to allow easier 'OR' conditions in WA declaration (Lucas)
- Use ttm_uncached for BO with NEEDS_UC flag (Michal)
- Other OA related work and fixes (Ashutosh, Michal, Jose)
- Simplify locking in new_vma (Brost)
- Remove xe_irq_shutdown (Ilia)

Signed-off-by: Dave Airlie <airlied@redhat.com>

From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/ZnyW9RdC_aWSla_q@intel.com

+5359 -833
+12 -11
drivers/base/devcoredump.c
··· 18 18 /* global disable flag, for security purposes */ 19 19 static bool devcd_disabled; 20 20 21 - /* if data isn't read by userspace after 5 minutes then delete it */ 22 - #define DEVCD_TIMEOUT (HZ * 60 * 5) 23 - 24 21 struct devcd_entry { 25 22 struct device devcd_dev; 26 23 void *data; ··· 325 328 EXPORT_SYMBOL_GPL(dev_coredump_put); 326 329 327 330 /** 328 - * dev_coredumpm - create device coredump with read/free methods 331 + * dev_coredumpm_timeout - create device coredump with read/free methods with a 332 + * custom timeout. 329 333 * @dev: the struct device for the crashed device 330 334 * @owner: the module that contains the read/free functions, use %THIS_MODULE 331 335 * @data: data cookie for the @read/@free functions ··· 334 336 * @gfp: allocation flags 335 337 * @read: function to read from the given buffer 336 338 * @free: function to free the given buffer 339 + * @timeout: time in jiffies to remove coredump 337 340 * 338 341 * Creates a new device coredump for the given device. If a previous one hasn't 339 342 * been read yet, the new coredump is discarded. The data lifetime is determined 340 343 * by the device coredump framework and when it is no longer needed the @free 341 344 * function will be called to free the data. 342 345 */ 343 - void dev_coredumpm(struct device *dev, struct module *owner, 344 - void *data, size_t datalen, gfp_t gfp, 345 - ssize_t (*read)(char *buffer, loff_t offset, size_t count, 346 - void *data, size_t datalen), 347 - void (*free)(void *data)) 346 + void dev_coredumpm_timeout(struct device *dev, struct module *owner, 347 + void *data, size_t datalen, gfp_t gfp, 348 + ssize_t (*read)(char *buffer, loff_t offset, 349 + size_t count, void *data, 350 + size_t datalen), 351 + void (*free)(void *data), 352 + unsigned long timeout) 348 353 { 349 354 static atomic_t devcd_count = ATOMIC_INIT(0); 350 355 struct devcd_entry *devcd; ··· 404 403 dev_set_uevent_suppress(&devcd->devcd_dev, false); 405 404 kobject_uevent(&devcd->devcd_dev.kobj, KOBJ_ADD); 406 405 INIT_DELAYED_WORK(&devcd->del_wk, devcd_del); 407 - schedule_delayed_work(&devcd->del_wk, DEVCD_TIMEOUT); 406 + schedule_delayed_work(&devcd->del_wk, timeout); 408 407 mutex_unlock(&devcd->mutex); 409 408 return; 410 409 put_device: ··· 415 414 free: 416 415 free(data); 417 416 } 418 - EXPORT_SYMBOL_GPL(dev_coredumpm); 417 + EXPORT_SYMBOL_GPL(dev_coredumpm_timeout); 419 418 420 419 /** 421 420 * dev_coredumpsg - create device coredump that uses scatterlist as data
-1
drivers/gpu/drm/xe/Kconfig
··· 25 25 select BACKLIGHT_CLASS_DEVICE if ACPI 26 26 select INPUT if ACPI 27 27 select ACPI_VIDEO if X86 && ACPI 28 - select ACPI_BUTTON if ACPI 29 28 select X86_PLATFORM_DEVICES if X86 && ACPI 30 29 select ACPI_WMI if X86 && ACPI 31 30 select SYNC_FILE
+7
drivers/gpu/drm/xe/Makefile
··· 24 24 $(call cmd,wa_oob) 25 25 26 26 uses_generated_oob := \ 27 + $(obj)/xe_ggtt.o \ 27 28 $(obj)/xe_gsc.o \ 29 + $(obj)/xe_gt.o \ 28 30 $(obj)/xe_guc.o \ 29 31 $(obj)/xe_guc_ads.o \ 32 + $(obj)/xe_guc_pc.o \ 30 33 $(obj)/xe_migrate.o \ 31 34 $(obj)/xe_ring_ops.o \ 32 35 $(obj)/xe_vm.o \ ··· 95 92 xe_mmio.o \ 96 93 xe_mocs.o \ 97 94 xe_module.o \ 95 + xe_oa.o \ 98 96 xe_pat.o \ 99 97 xe_pci.o \ 100 98 xe_pcode.o \ 99 + xe_perf.o \ 101 100 xe_pm.o \ 102 101 xe_preempt_fence.o \ 103 102 xe_pt.o \ ··· 117 112 xe_tile.o \ 118 113 xe_tile_sysfs.o \ 119 114 xe_trace.o \ 115 + xe_trace_bo.o \ 116 + xe_trace_guc.o \ 120 117 xe_ttm_sys_mgr.o \ 121 118 xe_ttm_stolen_mgr.o \ 122 119 xe_ttm_vram_mgr.o \
-6
drivers/gpu/drm/xe/abi/guc_actions_abi.h
··· 128 128 XE_GUC_ACTION_CONTEXT_RESET_NOTIFICATION = 0x1008, 129 129 XE_GUC_ACTION_ENGINE_FAILURE_NOTIFICATION = 0x1009, 130 130 XE_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES = 0x100B, 131 - XE_GUC_ACTION_SETUP_PC_GUCRC = 0x3004, 132 131 XE_GUC_ACTION_AUTHENTICATE_HUC = 0x4000, 133 132 XE_GUC_ACTION_GET_HWCONFIG = 0x4100, 134 133 XE_GUC_ACTION_REGISTER_CONTEXT = 0x4502, ··· 150 151 XE_GUC_ACTION_NOTIFY_CRASH_DUMP_POSTED = 0x8004, 151 152 XE_GUC_ACTION_NOTIFY_EXCEPTION = 0x8005, 152 153 XE_GUC_ACTION_LIMIT 153 - }; 154 - 155 - enum xe_guc_rc_options { 156 - XE_GUCRC_HOST_CONTROL, 157 - XE_GUCRC_FIRMWARE_CONTROL, 158 154 }; 159 155 160 156 enum xe_guc_preempt_options {
+22
drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h
··· 246 246 #define HOST2GUC_PC_SLPC_REQUEST_MSG_1_EVENT_ARGC (0xffu << 0) 247 247 #define HOST2GUC_PC_SLPC_REQUEST_MSG_N_EVENT_DATA_N GUC_HXG_REQUEST_MSG_n_DATAn 248 248 249 + /** 250 + * DOC: SETUP_PC_GUCRC 251 + * 252 + * +---+-------+--------------------------------------------------------------+ 253 + * | | Bits | Description | 254 + * +===+=======+==============================================================+ 255 + * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_HOST_ | 256 + * | +-------+--------------------------------------------------------------+ 257 + * | | 30:28 | TYPE = GUC_HXG_TYPE_FAST_REQUEST_ | 258 + * | +-------+--------------------------------------------------------------+ 259 + * | | 27:16 | DATA0 = MBZ | 260 + * | +-------+--------------------------------------------------------------+ 261 + * | | 15:0 | ACTION = _`GUC_ACTION_HOST2GUC_SETUP_PC_GUCRC` = 0x3004 | 262 + * +---+-------+--------------------------------------------------------------+ 263 + * | 1 | 31:0 | **MODE** = GUCRC_HOST_CONTROL(0), GUCRC_FIRMWARE_CONTROL(1) | 264 + * +---+-------+--------------------------------------------------------------+ 265 + */ 266 + 267 + #define GUC_ACTION_HOST2GUC_SETUP_PC_GUCRC 0x3004u 268 + #define GUCRC_HOST_CONTROL 0u 269 + #define GUCRC_FIRMWARE_CONTROL 1u 270 + 249 271 #endif
+31
drivers/gpu/drm/xe/abi/guc_errors_abi.h
··· 8 8 9 9 enum xe_guc_response_status { 10 10 XE_GUC_RESPONSE_STATUS_SUCCESS = 0x0, 11 + XE_GUC_RESPONSE_ERROR_PROTOCOL = 0x04, 12 + XE_GUC_RESPONSE_INVALID_STATE = 0x0A, 13 + XE_GUC_RESPONSE_UNSUPPORTED_VERSION = 0x0B, 14 + XE_GUC_RESPONSE_INVALID_VFID = 0x0C, 15 + XE_GUC_RESPONSE_UNPROVISIONED_VF = 0x0D, 16 + XE_GUC_RESPONSE_INVALID_EVENT = 0x0E, 11 17 XE_GUC_RESPONSE_NOT_SUPPORTED = 0x20, 18 + XE_GUC_RESPONSE_UNKNOWN_ACTION = 0x30, 19 + XE_GUC_RESPONSE_ACTION_ABORTED = 0x31, 20 + XE_GUC_RESPONSE_NO_PERMISSION = 0x40, 21 + XE_GUC_RESPONSE_CANNOT_COMPLETE_ACTION = 0x41, 22 + XE_GUC_RESPONSE_INVALID_KLV_DATA = 0x50, 23 + XE_GUC_RESPONSE_INVALID_PARAMS = 0x60, 24 + XE_GUC_RESPONSE_INVALID_BUFFER_RANGE = 0x70, 25 + XE_GUC_RESPONSE_INVALID_BUFFER = 0x71, 26 + XE_GUC_RESPONSE_INVALID_GGTT_ADDRESS = 0x80, 27 + XE_GUC_RESPONSE_PENDING_ACTION = 0x90, 28 + XE_GUC_RESPONSE_INVALID_SIZE = 0x102, 29 + XE_GUC_RESPONSE_MALFORMED_KLV = 0x103, 30 + XE_GUC_RESPONSE_INVALID_KLV_KEY = 0x105, 31 + XE_GUC_RESPONSE_DATA_TOO_LARGE = 0x106, 32 + XE_GUC_RESPONSE_VF_MIGRATED = 0x107, 12 33 XE_GUC_RESPONSE_NO_ATTRIBUTE_TABLE = 0x201, 13 34 XE_GUC_RESPONSE_NO_DECRYPTION_KEY = 0x202, 14 35 XE_GUC_RESPONSE_DECRYPTION_FAILED = 0x204, 36 + XE_GUC_RESPONSE_VGT_DISABLED = 0x300, 37 + XE_GUC_RESPONSE_CTB_FULL = 0x301, 38 + XE_GUC_RESPONSE_VGT_UNAUTHORIZED_REQUEST = 0x302, 39 + XE_GUC_RESPONSE_CTB_INVALID = 0x303, 40 + XE_GUC_RESPONSE_CTB_NOT_REGISTERED = 0x304, 41 + XE_GUC_RESPONSE_CTB_IN_USE = 0x305, 42 + XE_GUC_RESPONSE_CTB_INVALID_DESC = 0x306, 43 + XE_GUC_RESPONSE_CTB_SOURCE_INVALID_DESCRIPTOR = 0x30D, 44 + XE_GUC_RESPONSE_CTB_DESTINATION_INVALID_DESCRIPTOR = 0x30E, 45 + XE_GUC_RESPONSE_INVALID_CONFIG_STATE = 0x30F, 15 46 XE_GUC_RESPONSE_STATUS_GENERIC_FAIL = 0xF000, 16 47 }; 17 48
+28 -13
drivers/gpu/drm/xe/abi/guc_messages_abi.h
··· 92 92 #define GUC_HXG_REQUEST_MSG_n_DATAn GUC_HXG_MSG_n_PAYLOAD 93 93 94 94 /** 95 + * DOC: HXG Fast Request 96 + * 97 + * The `HXG Request`_ message should be used to initiate asynchronous activity 98 + * for which confirmation or return data is not expected. 99 + * 100 + * If confirmation is required then `HXG Request`_ shall be used instead. 101 + * 102 + * The recipient of this message may only use `HXG Failure`_ message if it was 103 + * unable to accept this request (like invalid data). 104 + * 105 + * Format of `HXG Fast Request`_ message is same as `HXG Request`_ except @TYPE. 106 + * 107 + * +---+-------+--------------------------------------------------------------+ 108 + * | | Bits | Description | 109 + * +===+=======+==============================================================+ 110 + * | 0 | 31 | ORIGIN - see `HXG Message`_ | 111 + * | +-------+--------------------------------------------------------------+ 112 + * | | 30:28 | TYPE = `GUC_HXG_TYPE_FAST_REQUEST`_ | 113 + * | +-------+--------------------------------------------------------------+ 114 + * | | 27:16 | DATA0 - see `HXG Request`_ | 115 + * | +-------+--------------------------------------------------------------+ 116 + * | | 15:0 | ACTION - see `HXG Request`_ | 117 + * +---+-------+--------------------------------------------------------------+ 118 + * |...| | DATAn - see `HXG Request`_ | 119 + * +---+-------+--------------------------------------------------------------+ 120 + */ 121 + 122 + /** 95 123 * DOC: HXG Event 96 124 * 97 125 * The `HXG Event`_ message should be used to initiate asynchronous activity ··· 247 219 #define GUC_HXG_RESPONSE_MSG_MIN_LEN GUC_HXG_MSG_MIN_LEN 248 220 #define GUC_HXG_RESPONSE_MSG_0_DATA0 GUC_HXG_MSG_0_AUX 249 221 #define GUC_HXG_RESPONSE_MSG_n_DATAn GUC_HXG_MSG_n_PAYLOAD 250 - 251 - /* deprecated */ 252 - #define INTEL_GUC_MSG_TYPE_SHIFT 28 253 - #define INTEL_GUC_MSG_TYPE_MASK (0xF << INTEL_GUC_MSG_TYPE_SHIFT) 254 - #define INTEL_GUC_MSG_DATA_SHIFT 16 255 - #define INTEL_GUC_MSG_DATA_MASK (0xFFF << INTEL_GUC_MSG_DATA_SHIFT) 256 - #define INTEL_GUC_MSG_CODE_SHIFT 0 257 - #define INTEL_GUC_MSG_CODE_MASK (0xFFFF << INTEL_GUC_MSG_CODE_SHIFT) 258 - 259 - enum intel_guc_msg_type { 260 - INTEL_GUC_MSG_TYPE_REQUEST = 0x0, 261 - INTEL_GUC_MSG_TYPE_RESPONSE = 0xF, 262 - }; 263 222 264 223 #endif
+2 -2
drivers/gpu/drm/xe/display/xe_fb_pin.c
··· 171 171 u64 pte = ggtt->pt_ops->pte_encode_bo(bo, src_idx * XE_PAGE_SIZE, 172 172 xe->pat.idx[XE_CACHE_NONE]); 173 173 174 - xe_ggtt_set_pte(ggtt, *ggtt_ofs, pte); 174 + ggtt->pt_ops->ggtt_set_pte(ggtt, *ggtt_ofs, pte); 175 175 *ggtt_ofs += XE_PAGE_SIZE; 176 176 src_idx -= src_stride; 177 177 } ··· 217 217 u64 pte = ggtt->pt_ops->pte_encode_bo(bo, x, 218 218 xe->pat.idx[XE_CACHE_NONE]); 219 219 220 - xe_ggtt_set_pte(ggtt, vma->node.start + x, pte); 220 + ggtt->pt_ops->ggtt_set_pte(ggtt, vma->node.start + x, pte); 221 221 } 222 222 } else { 223 223 u32 i, ggtt_ofs;
+5
drivers/gpu/drm/xe/instructions/xe_mi_commands.h
··· 45 45 #define MI_LRI_MMIO_REMAP_EN REG_BIT(17) 46 46 #define MI_LRI_NUM_REGS(x) XE_INSTR_NUM_DW(2 * (x) + 1) 47 47 #define MI_LRI_FORCE_POSTED REG_BIT(12) 48 + #define MI_LRI_LEN(x) (((x) & 0xff) + 1) 48 49 49 50 #define MI_FLUSH_DW __MI_INSTR(0x26) 50 51 #define MI_FLUSH_DW_STORE_INDEX REG_BIT(21) ··· 59 58 60 59 #define MI_LOAD_REGISTER_MEM (__MI_INSTR(0x29) | XE_INSTR_NUM_DW(4)) 61 60 #define MI_LRM_USE_GGTT REG_BIT(22) 61 + 62 + #define MI_COPY_MEM_MEM (__MI_INSTR(0x2e) | XE_INSTR_NUM_DW(5)) 63 + #define MI_COPY_MEM_MEM_SRC_GGTT REG_BIT(22) 64 + #define MI_COPY_MEM_MEM_DST_GGTT REG_BIT(21) 62 65 63 66 #define MI_BATCH_BUFFER_START __MI_INSTR(0x31) 64 67
+2
drivers/gpu/drm/xe/regs/xe_engine_regs.h
··· 129 129 #define RING_EXECLIST_STATUS_HI(base) XE_REG((base) + 0x234 + 4) 130 130 131 131 #define RING_CONTEXT_CONTROL(base) XE_REG((base) + 0x244, XE_REG_OPTION_MASKED) 132 + #define CTX_CTRL_OAC_CONTEXT_ENABLE REG_BIT(8) 133 + #define CTX_CTRL_RUN_ALONE REG_BIT(7) 132 134 #define CTX_CTRL_INDIRECT_RING_STATE_ENABLE REG_BIT(4) 133 135 #define CTX_CTRL_INHIBIT_SYN_CTX_SWITCH REG_BIT(3) 134 136 #define CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT REG_BIT(0)
+8 -3
drivers/gpu/drm/xe/regs/xe_gt_regs.h
··· 170 170 171 171 #define SQCNT1 XE_REG_MCR(0x8718) 172 172 #define XELPMP_SQCNT1 XE_REG(0x8718) 173 + #define SQCNT1_PMON_ENABLE REG_BIT(30) 174 + #define SQCNT1_OABPC REG_BIT(29) 173 175 #define ENFORCE_RAR REG_BIT(23) 174 176 175 177 #define XEHP_SQCM XE_REG_MCR(0x8724) ··· 434 432 #define ROW_CHICKEN XE_REG_MCR(0xe4f0, XE_REG_OPTION_MASKED) 435 433 #define UGM_BACKUP_MODE REG_BIT(13) 436 434 #define MDQ_ARBITRATION_MODE REG_BIT(12) 435 + #define STALL_DOP_GATING_DISABLE REG_BIT(5) 437 436 #define EARLY_EOT_DIS REG_BIT(1) 438 437 439 438 #define ROW_CHICKEN2 XE_REG_MCR(0xe4f4, XE_REG_OPTION_MASKED) ··· 493 490 ((ccs) << ((cslice) * CCS_MODE_CSLICE_WIDTH)) 494 491 495 492 #define FORCEWAKE_ACK_GT XE_REG(0x130044) 496 - #define FORCEWAKE_KERNEL BIT(0) 497 - #define FORCEWAKE_USER BIT(1) 498 - #define FORCEWAKE_KERNEL_FALLBACK BIT(15) 493 + 494 + /* Applicable for all FORCEWAKE_DOMAIN and FORCEWAKE_ACK_DOMAIN regs */ 495 + #define FORCEWAKE_KERNEL 0 496 + #define FORCEWAKE_MT(bit) BIT(bit) 497 + #define FORCEWAKE_MT_MASK(bit) BIT((bit) + 16) 499 498 500 499 #define MTL_MEDIA_PERF_LIMIT_REASONS XE_REG(0x138030) 501 500 #define MTL_MEDIA_MC6 XE_REG(0x138048)
+100
drivers/gpu/drm/xe/regs/xe_oa_regs.h
··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2023 Intel Corporation 4 + */ 5 + 6 + #ifndef __XE_OA_REGS__ 7 + #define __XE_OA_REGS__ 8 + 9 + #define RPM_CONFIG1 XE_REG(0xd04) 10 + #define GT_NOA_ENABLE REG_BIT(9) 11 + 12 + #define EU_PERF_CNTL0 XE_REG(0xe458) 13 + #define EU_PERF_CNTL4 XE_REG(0xe45c) 14 + #define EU_PERF_CNTL1 XE_REG(0xe558) 15 + #define EU_PERF_CNTL5 XE_REG(0xe55c) 16 + #define EU_PERF_CNTL2 XE_REG(0xe658) 17 + #define EU_PERF_CNTL6 XE_REG(0xe65c) 18 + #define EU_PERF_CNTL3 XE_REG(0xe758) 19 + 20 + #define OA_TLB_INV_CR XE_REG(0xceec) 21 + 22 + /* OAR unit */ 23 + #define OAR_OACONTROL XE_REG(0x2960) 24 + #define OAR_OACONTROL_COUNTER_SEL_MASK REG_GENMASK(3, 1) 25 + #define OAR_OACONTROL_COUNTER_ENABLE REG_BIT(0) 26 + 27 + #define OACTXCONTROL(base) XE_REG((base) + 0x360) 28 + #define OAR_OASTATUS XE_REG(0x2968) 29 + #define OA_COUNTER_RESUME REG_BIT(0) 30 + 31 + /* OAG unit */ 32 + #define OAG_OAGLBCTXCTRL XE_REG(0x2b28) 33 + #define OAG_OAGLBCTXCTRL_TIMER_PERIOD_MASK REG_GENMASK(7, 2) 34 + #define OAG_OAGLBCTXCTRL_TIMER_ENABLE REG_BIT(1) 35 + #define OAG_OAGLBCTXCTRL_COUNTER_RESUME REG_BIT(0) 36 + 37 + #define OAG_OAHEADPTR XE_REG(0xdb00) 38 + #define OAG_OAHEADPTR_MASK REG_GENMASK(31, 6) 39 + #define OAG_OATAILPTR XE_REG(0xdb04) 40 + #define OAG_OATAILPTR_MASK REG_GENMASK(31, 6) 41 + 42 + #define OAG_OABUFFER XE_REG(0xdb08) 43 + #define OABUFFER_SIZE_MASK REG_GENMASK(5, 3) 44 + #define OABUFFER_SIZE_128K REG_FIELD_PREP(OABUFFER_SIZE_MASK, 0) 45 + #define OABUFFER_SIZE_256K REG_FIELD_PREP(OABUFFER_SIZE_MASK, 1) 46 + #define OABUFFER_SIZE_512K REG_FIELD_PREP(OABUFFER_SIZE_MASK, 2) 47 + #define OABUFFER_SIZE_1M REG_FIELD_PREP(OABUFFER_SIZE_MASK, 3) 48 + #define OABUFFER_SIZE_2M REG_FIELD_PREP(OABUFFER_SIZE_MASK, 4) 49 + #define OABUFFER_SIZE_4M REG_FIELD_PREP(OABUFFER_SIZE_MASK, 5) 50 + #define OABUFFER_SIZE_8M REG_FIELD_PREP(OABUFFER_SIZE_MASK, 6) 51 + #define OABUFFER_SIZE_16M REG_FIELD_PREP(OABUFFER_SIZE_MASK, 7) 52 + #define OAG_OABUFFER_MEMORY_SELECT REG_BIT(0) /* 0: PPGTT, 1: GGTT */ 53 + 54 + #define OAG_OACONTROL XE_REG(0xdaf4) 55 + #define OAG_OACONTROL_OA_CCS_SELECT_MASK REG_GENMASK(18, 16) 56 + #define OAG_OACONTROL_OA_COUNTER_SEL_MASK REG_GENMASK(4, 2) 57 + #define OAG_OACONTROL_OA_COUNTER_ENABLE REG_BIT(0) 58 + /* Common to all OA units */ 59 + #define OA_OACONTROL_REPORT_BC_MASK REG_GENMASK(9, 9) 60 + #define OA_OACONTROL_COUNTER_SIZE_MASK REG_GENMASK(8, 8) 61 + 62 + #define OAG_OA_DEBUG XE_REG(0xdaf8, XE_REG_OPTION_MASKED) 63 + #define OAG_OA_DEBUG_DISABLE_MMIO_TRG REG_BIT(14) 64 + #define OAG_OA_DEBUG_START_TRIGGER_SCOPE_CONTROL REG_BIT(13) 65 + #define OAG_OA_DEBUG_DISABLE_START_TRG_2_COUNT_QUAL REG_BIT(8) 66 + #define OAG_OA_DEBUG_DISABLE_START_TRG_1_COUNT_QUAL REG_BIT(7) 67 + #define OAG_OA_DEBUG_INCLUDE_CLK_RATIO REG_BIT(6) 68 + #define OAG_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS REG_BIT(5) 69 + #define OAG_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS REG_BIT(1) 70 + 71 + #define OAG_OASTATUS XE_REG(0xdafc) 72 + #define OASTATUS_MMIO_TRG_Q_FULL REG_BIT(6) 73 + #define OASTATUS_COUNTER_OVERFLOW REG_BIT(2) 74 + #define OASTATUS_BUFFER_OVERFLOW REG_BIT(1) 75 + #define OASTATUS_REPORT_LOST REG_BIT(0) 76 + #define OAG_MMIOTRIGGER XE_REG(0xdb1c) 77 + /* OAC unit */ 78 + #define OAC_OACONTROL XE_REG(0x15114) 79 + 80 + /* OAM unit */ 81 + #define OAM_HEAD_POINTER_OFFSET (0x1a0) 82 + #define OAM_TAIL_POINTER_OFFSET (0x1a4) 83 + #define OAM_BUFFER_OFFSET (0x1a8) 84 + #define OAM_CONTEXT_CONTROL_OFFSET (0x1bc) 85 + #define OAM_CONTROL_OFFSET (0x194) 86 + #define OAM_CONTROL_COUNTER_SEL_MASK REG_GENMASK(3, 1) 87 + #define OAM_DEBUG_OFFSET (0x198) 88 + #define OAM_STATUS_OFFSET (0x19c) 89 + #define OAM_MMIO_TRG_OFFSET (0x1d0) 90 + 91 + #define OAM_HEAD_POINTER(base) XE_REG((base) + OAM_HEAD_POINTER_OFFSET) 92 + #define OAM_TAIL_POINTER(base) XE_REG((base) + OAM_TAIL_POINTER_OFFSET) 93 + #define OAM_BUFFER(base) XE_REG((base) + OAM_BUFFER_OFFSET) 94 + #define OAM_CONTEXT_CONTROL(base) XE_REG((base) + OAM_CONTEXT_CONTROL_OFFSET) 95 + #define OAM_CONTROL(base) XE_REG((base) + OAM_CONTROL_OFFSET) 96 + #define OAM_DEBUG(base) XE_REG((base) + OAM_DEBUG_OFFSET) 97 + #define OAM_STATUS(base) XE_REG((base) + OAM_STATUS_OFFSET) 98 + #define OAM_MMIO_TRG(base) XE_REG((base) + OAM_MMIO_TRG_OFFSET) 99 + 100 + #endif
+61 -3
drivers/gpu/drm/xe/tests/xe_rtp_test.c
··· 91 91 }, 92 92 }, 93 93 { 94 + .name = "match-or", 95 + .expected_reg = REGULAR_REG1, 96 + .expected_set_bits = REG_BIT(0) | REG_BIT(1) | REG_BIT(2), 97 + .expected_clr_bits = REG_BIT(0) | REG_BIT(1) | REG_BIT(2), 98 + .expected_count = 1, 99 + .entries = (const struct xe_rtp_entry_sr[]) { 100 + { XE_RTP_NAME("first"), 101 + XE_RTP_RULES(FUNC(match_yes), OR, FUNC(match_no)), 102 + XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(0))) 103 + }, 104 + { XE_RTP_NAME("middle"), 105 + XE_RTP_RULES(FUNC(match_no), FUNC(match_no), OR, 106 + FUNC(match_yes), OR, 107 + FUNC(match_no)), 108 + XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(1))) 109 + }, 110 + { XE_RTP_NAME("last"), 111 + XE_RTP_RULES(FUNC(match_no), OR, FUNC(match_yes)), 112 + XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(2))) 113 + }, 114 + { XE_RTP_NAME("no-match"), 115 + XE_RTP_RULES(FUNC(match_no), OR, FUNC(match_no)), 116 + XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(3))) 117 + }, 118 + {} 119 + }, 120 + }, 121 + { 122 + .name = "match-or-xfail", 123 + .expected_reg = REGULAR_REG1, 124 + .expected_count = 0, 125 + .entries = (const struct xe_rtp_entry_sr[]) { 126 + { XE_RTP_NAME("leading-or"), 127 + XE_RTP_RULES(OR, FUNC(match_yes)), 128 + XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(0))) 129 + }, 130 + { XE_RTP_NAME("trailing-or"), 131 + /* 132 + * First condition is match_no, otherwise the failure 133 + * wouldn't really trigger as RTP stops processing as 134 + * soon as it has a matching set of rules 135 + */ 136 + XE_RTP_RULES(FUNC(match_no), OR), 137 + XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(1))) 138 + }, 139 + { XE_RTP_NAME("no-or-or-yes"), 140 + XE_RTP_RULES(FUNC(match_no), OR, OR, FUNC(match_yes)), 141 + XE_RTP_ACTIONS(SET(REGULAR_REG1, REG_BIT(2))) 142 + }, 143 + {} 144 + }, 145 + }, 146 + { 94 147 .name = "no-match-no-add-multiple-rules", 95 148 .expected_reg = REGULAR_REG1, 96 149 .expected_set_bits = REG_BIT(0), ··· 308 255 } 309 256 310 257 KUNIT_EXPECT_EQ(test, count, param->expected_count); 311 - KUNIT_EXPECT_EQ(test, sr_entry->clr_bits, param->expected_clr_bits); 312 - KUNIT_EXPECT_EQ(test, sr_entry->set_bits, param->expected_set_bits); 313 - KUNIT_EXPECT_EQ(test, sr_entry->reg.raw, param->expected_reg.raw); 258 + if (count) { 259 + KUNIT_EXPECT_EQ(test, sr_entry->clr_bits, param->expected_clr_bits); 260 + KUNIT_EXPECT_EQ(test, sr_entry->set_bits, param->expected_set_bits); 261 + KUNIT_EXPECT_EQ(test, sr_entry->reg.raw, param->expected_reg.raw); 262 + } else { 263 + KUNIT_EXPECT_NULL(test, sr_entry); 264 + } 265 + 314 266 KUNIT_EXPECT_EQ(test, reg_sr->errors, param->expected_sr_errors); 315 267 } 316 268
+10 -1
drivers/gpu/drm/xe/xe_bo.c
··· 25 25 #include "xe_pm.h" 26 26 #include "xe_preempt_fence.h" 27 27 #include "xe_res_cursor.h" 28 - #include "xe_trace.h" 28 + #include "xe_trace_bo.h" 29 29 #include "xe_ttm_stolen_mgr.h" 30 30 #include "xe_vm.h" 31 31 ··· 377 377 if ((!bo->cpu_caching && bo->flags & XE_BO_FLAG_SCANOUT) || 378 378 (xe->info.graphics_verx100 >= 1270 && bo->flags & XE_BO_FLAG_PAGETABLE)) 379 379 caching = ttm_write_combined; 380 + 381 + if (bo->flags & XE_BO_FLAG_NEEDS_UC) { 382 + /* 383 + * Valid only for internally-created buffers only, for 384 + * which cpu_caching is never initialized. 385 + */ 386 + xe_assert(xe, bo->cpu_caching == 0); 387 + caching = ttm_uncached; 388 + } 380 389 381 390 err = ttm_tt_init(&tt->ttm, &bo->ttm, page_flags, caching, extra_pages); 382 391 if (err) {
+6 -2
drivers/gpu/drm/xe/xe_devcoredump.c
··· 53 53 54 54 #ifdef CONFIG_DEV_COREDUMP 55 55 56 + /* 1 hour timeout */ 57 + #define XE_COREDUMP_TIMEOUT_JIFFIES (60 * 60 * HZ) 58 + 56 59 static struct xe_device *coredump_to_xe(const struct xe_devcoredump *coredump) 57 60 { 58 61 return container_of(coredump, struct xe_device, devcoredump); ··· 250 247 drm_info(&xe->drm, "Check your /sys/class/drm/card%d/device/devcoredump/data\n", 251 248 xe->drm.primary->index); 252 249 253 - dev_coredumpm(xe->drm.dev, THIS_MODULE, coredump, 0, GFP_KERNEL, 254 - xe_devcoredump_read, xe_devcoredump_free); 250 + dev_coredumpm_timeout(xe->drm.dev, THIS_MODULE, coredump, 0, GFP_KERNEL, 251 + xe_devcoredump_read, xe_devcoredump_free, 252 + XE_COREDUMP_TIMEOUT_JIFFIES); 255 253 } 256 254 257 255 static void xe_driver_devcoredump_fini(void *arg)
+36 -9
drivers/gpu/drm/xe/xe_device.c
··· 44 44 #include "xe_module.h" 45 45 #include "xe_pat.h" 46 46 #include "xe_pcode.h" 47 + #include "xe_perf.h" 47 48 #include "xe_pm.h" 48 49 #include "xe_query.h" 49 50 #include "xe_sriov.h" ··· 142 141 DRM_RENDER_ALLOW), 143 142 DRM_IOCTL_DEF_DRV(XE_WAIT_USER_FENCE, xe_wait_user_fence_ioctl, 144 143 DRM_RENDER_ALLOW), 144 + DRM_IOCTL_DEF_DRV(XE_PERF, xe_perf_ioctl, DRM_RENDER_ALLOW), 145 145 }; 146 146 147 147 static long xe_drm_ioctl(struct file *file, unsigned int cmd, unsigned long arg) ··· 486 484 return 0; 487 485 } 488 486 487 + static void update_device_info(struct xe_device *xe) 488 + { 489 + /* disable features that are not available/applicable to VFs */ 490 + if (IS_SRIOV_VF(xe)) { 491 + xe->info.enable_display = 0; 492 + xe->info.has_heci_gscfi = 0; 493 + xe->info.skip_guc_pc = 1; 494 + xe->info.skip_pcode = 1; 495 + } 496 + } 497 + 489 498 /** 490 499 * xe_device_probe_early: Device early probe 491 500 * @xe: xe device instance ··· 516 503 return err; 517 504 518 505 xe_sriov_probe_early(xe); 506 + 507 + update_device_info(xe); 519 508 520 509 err = xe_pcode_probe_early(xe); 521 510 if (err) ··· 634 619 635 620 err = xe_device_set_has_flat_ccs(xe); 636 621 if (err) 637 - goto err_irq_shutdown; 622 + goto err; 638 623 639 624 err = xe_vram_probe(xe); 640 625 if (err) 641 - goto err_irq_shutdown; 626 + goto err; 642 627 643 628 for_each_tile(tile, xe, id) { 644 629 err = xe_tile_init_noalloc(tile); 645 630 if (err) 646 - goto err_irq_shutdown; 631 + goto err; 647 632 } 648 633 649 634 /* Allocate and map stolen after potential VRAM resize */ ··· 657 642 */ 658 643 err = xe_display_init_noaccel(xe); 659 644 if (err) 660 - goto err_irq_shutdown; 645 + goto err; 661 646 662 647 for_each_gt(gt, xe, id) { 663 648 last_gt = id; ··· 669 654 670 655 xe_heci_gsc_init(xe); 671 656 672 - err = xe_display_init(xe); 657 + err = xe_oa_init(xe); 673 658 if (err) 674 659 goto err_fini_gt; 660 + 661 + err = xe_display_init(xe); 662 + if (err) 663 + goto err_fini_oa; 675 664 676 665 err = drm_dev_register(&xe->drm, 0); 677 666 if (err) ··· 683 664 684 665 xe_display_register(xe); 685 666 667 + xe_oa_register(xe); 668 + 686 669 xe_debugfs_register(xe); 687 670 688 671 xe_hwmon_register(xe); 672 + 673 + for_each_gt(gt, xe, id) 674 + xe_gt_sanitize_freq(gt); 689 675 690 676 return devm_add_action_or_reset(xe->drm.dev, xe_device_sanitize, xe); 691 677 692 678 err_fini_display: 693 679 xe_display_driver_remove(xe); 680 + 681 + err_fini_oa: 682 + xe_oa_fini(xe); 694 683 695 684 err_fini_gt: 696 685 for_each_gt(gt, xe, id) { ··· 708 681 break; 709 682 } 710 683 711 - err_irq_shutdown: 712 - xe_irq_shutdown(xe); 713 684 err: 714 685 xe_display_fini(xe); 715 686 return err; ··· 726 701 struct xe_gt *gt; 727 702 u8 id; 728 703 704 + xe_oa_unregister(xe); 705 + 729 706 xe_device_remove_display(xe); 730 707 731 708 xe_display_fini(xe); 709 + 710 + xe_oa_fini(xe); 732 711 733 712 xe_heci_gsc_fini(xe); 734 713 735 714 for_each_gt(gt, xe, id) 736 715 xe_gt_remove(gt); 737 - 738 - xe_irq_shutdown(xe); 739 716 } 740 717 741 718 void xe_device_shutdown(struct xe_device *xe)
+4
drivers/gpu/drm/xe/xe_device_types.h
··· 17 17 #include "xe_gt_types.h" 18 18 #include "xe_lmtt_types.h" 19 19 #include "xe_memirq_types.h" 20 + #include "xe_oa.h" 20 21 #include "xe_platform_types.h" 21 22 #include "xe_pt_types.h" 22 23 #include "xe_sriov_types.h" ··· 462 461 463 462 /** @heci_gsc: graphics security controller */ 464 463 struct xe_heci_gsc heci_gsc; 464 + 465 + /** @oa: oa perf counter subsystem */ 466 + struct xe_oa oa; 465 467 466 468 /** @needs_flr_on_fini: requests function-reset on fini */ 467 469 bool needs_flr_on_fini;
+4 -4
drivers/gpu/drm/xe/xe_exec.c
··· 141 141 q->width != args->num_batch_buffer)) 142 142 return -EINVAL; 143 143 144 - if (XE_IOCTL_DBG(xe, q->flags & EXEC_QUEUE_FLAG_BANNED)) { 144 + if (XE_IOCTL_DBG(xe, q->ops->reset_status(q))) { 145 145 err = -ECANCELED; 146 146 goto err_exec_queue; 147 147 } ··· 259 259 260 260 /* Wait behind rebinds */ 261 261 if (!xe_vm_in_lr_mode(vm)) { 262 - err = drm_sched_job_add_resv_dependencies(&job->drm, 263 - xe_vm_resv(vm), 264 - DMA_RESV_USAGE_KERNEL); 262 + err = xe_sched_job_add_deps(job, 263 + xe_vm_resv(vm), 264 + DMA_RESV_USAGE_KERNEL); 265 265 if (err) 266 266 goto err_put_job; 267 267 }
+4 -4
drivers/gpu/drm/xe/xe_exec_queue.c
··· 67 67 q->fence_irq = &gt->fence_irq[hwe->class]; 68 68 q->ring_ops = gt->ring_ops[hwe->class]; 69 69 q->ops = gt->exec_queue_ops; 70 - INIT_LIST_HEAD(&q->compute.link); 70 + INIT_LIST_HEAD(&q->lr.link); 71 71 INIT_LIST_HEAD(&q->multi_gt_link); 72 72 73 73 q->sched_props.timeslice_us = hwe->eclass->sched_props.timeslice_us; ··· 633 633 return PTR_ERR(q); 634 634 635 635 if (xe_vm_in_preempt_fence_mode(vm)) { 636 - q->compute.context = dma_fence_context_alloc(1); 637 - spin_lock_init(&q->compute.lock); 636 + q->lr.context = dma_fence_context_alloc(1); 637 + spin_lock_init(&q->lr.lock); 638 638 639 639 err = xe_vm_add_compute_exec_queue(vm, q); 640 640 if (XE_IOCTL_DBG(xe, err)) ··· 677 677 678 678 switch (args->property) { 679 679 case DRM_XE_EXEC_QUEUE_GET_PROPERTY_BAN: 680 - args->value = !!(q->flags & EXEC_QUEUE_FLAG_BANNED); 680 + args->value = q->ops->reset_status(q); 681 681 ret = 0; 682 682 break; 683 683 default:
+12 -14
drivers/gpu/drm/xe/xe_exec_queue_types.h
··· 70 70 */ 71 71 struct dma_fence *last_fence; 72 72 73 - /* queue no longer allowed to submit */ 74 - #define EXEC_QUEUE_FLAG_BANNED BIT(0) 75 73 /* queue used for kernel submission only */ 76 - #define EXEC_QUEUE_FLAG_KERNEL BIT(1) 74 + #define EXEC_QUEUE_FLAG_KERNEL BIT(0) 77 75 /* kernel engine only destroyed at driver unload */ 78 - #define EXEC_QUEUE_FLAG_PERMANENT BIT(2) 76 + #define EXEC_QUEUE_FLAG_PERMANENT BIT(1) 79 77 /* for VM jobs. Caller needs to hold rpm ref when creating queue with this flag */ 80 - #define EXEC_QUEUE_FLAG_VM BIT(3) 78 + #define EXEC_QUEUE_FLAG_VM BIT(2) 81 79 /* child of VM queue for multi-tile VM jobs */ 82 - #define EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD BIT(4) 80 + #define EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD BIT(3) 83 81 /* kernel exec_queue only, set priority to highest level */ 84 - #define EXEC_QUEUE_FLAG_HIGH_PRIORITY BIT(5) 82 + #define EXEC_QUEUE_FLAG_HIGH_PRIORITY BIT(4) 85 83 86 84 /** 87 85 * @flags: flags for this exec queue, should statically setup aside from ban ··· 113 115 enum xe_exec_queue_priority priority; 114 116 } sched_props; 115 117 116 - /** @compute: compute exec queue state */ 118 + /** @lr: long-running exec queue state */ 117 119 struct { 118 - /** @compute.pfence: preemption fence */ 120 + /** @lr.pfence: preemption fence */ 119 121 struct dma_fence *pfence; 120 - /** @compute.context: preemption fence context */ 122 + /** @lr.context: preemption fence context */ 121 123 u64 context; 122 - /** @compute.seqno: preemption fence seqno */ 124 + /** @lr.seqno: preemption fence seqno */ 123 125 u32 seqno; 124 - /** @compute.link: link into VM's list of exec queues */ 126 + /** @lr.link: link into VM's list of exec queues */ 125 127 struct list_head link; 126 - /** @compute.lock: preemption fences lock */ 128 + /** @lr.lock: preemption fences lock */ 127 129 spinlock_t lock; 128 - } compute; 130 + } lr; 129 131 130 132 /** @ops: submission backend exec queue operations */ 131 133 const struct xe_exec_queue_ops *ops;
+52 -50
drivers/gpu/drm/xe/xe_force_wake.c
··· 10 10 #include "regs/xe_gt_regs.h" 11 11 #include "regs/xe_reg_defs.h" 12 12 #include "xe_gt.h" 13 + #include "xe_gt_printk.h" 13 14 #include "xe_mmio.h" 15 + #include "xe_sriov.h" 14 16 15 17 #define XE_FORCE_WAKE_ACK_TIMEOUT_MS 50 16 18 17 - static struct xe_gt * 18 - fw_to_gt(struct xe_force_wake *fw) 19 + static const char *str_wake_sleep(bool wake) 19 20 { 20 - return fw->gt; 21 - } 22 - 23 - static struct xe_device * 24 - fw_to_xe(struct xe_force_wake *fw) 25 - { 26 - return gt_to_xe(fw_to_gt(fw)); 21 + return wake ? "wake" : "sleep"; 27 22 } 28 23 29 24 static void domain_init(struct xe_force_wake_domain *domain, 30 25 enum xe_force_wake_domain_id id, 31 - struct xe_reg reg, struct xe_reg ack, u32 val, u32 mask) 26 + struct xe_reg reg, struct xe_reg ack) 32 27 { 33 28 domain->id = id; 34 29 domain->reg_ctl = reg; 35 30 domain->reg_ack = ack; 36 - domain->val = val; 37 - domain->mask = mask; 31 + domain->val = FORCEWAKE_MT(FORCEWAKE_KERNEL); 32 + domain->mask = FORCEWAKE_MT_MASK(FORCEWAKE_KERNEL); 38 33 } 39 34 40 35 void xe_force_wake_init_gt(struct xe_gt *gt, struct xe_force_wake *fw) ··· 46 51 domain_init(&fw->domains[XE_FW_DOMAIN_ID_GT], 47 52 XE_FW_DOMAIN_ID_GT, 48 53 FORCEWAKE_GT, 49 - FORCEWAKE_ACK_GT_MTL, 50 - BIT(0), BIT(16)); 54 + FORCEWAKE_ACK_GT_MTL); 51 55 } else { 52 56 domain_init(&fw->domains[XE_FW_DOMAIN_ID_GT], 53 57 XE_FW_DOMAIN_ID_GT, 54 58 FORCEWAKE_GT, 55 - FORCEWAKE_ACK_GT, 56 - BIT(0), BIT(16)); 59 + FORCEWAKE_ACK_GT); 57 60 } 58 61 } 59 62 ··· 66 73 domain_init(&fw->domains[XE_FW_DOMAIN_ID_RENDER], 67 74 XE_FW_DOMAIN_ID_RENDER, 68 75 FORCEWAKE_RENDER, 69 - FORCEWAKE_ACK_RENDER, 70 - BIT(0), BIT(16)); 76 + FORCEWAKE_ACK_RENDER); 71 77 72 78 for (i = XE_HW_ENGINE_VCS0, j = 0; i <= XE_HW_ENGINE_VCS7; ++i, ++j) { 73 79 if (!(gt->info.engine_mask & BIT(i))) ··· 75 83 domain_init(&fw->domains[XE_FW_DOMAIN_ID_MEDIA_VDBOX0 + j], 76 84 XE_FW_DOMAIN_ID_MEDIA_VDBOX0 + j, 77 85 FORCEWAKE_MEDIA_VDBOX(j), 78 - FORCEWAKE_ACK_MEDIA_VDBOX(j), 79 - BIT(0), BIT(16)); 86 + FORCEWAKE_ACK_MEDIA_VDBOX(j)); 80 87 } 81 88 82 89 for (i = XE_HW_ENGINE_VECS0, j = 0; i <= XE_HW_ENGINE_VECS3; ++i, ++j) { ··· 85 94 domain_init(&fw->domains[XE_FW_DOMAIN_ID_MEDIA_VEBOX0 + j], 86 95 XE_FW_DOMAIN_ID_MEDIA_VEBOX0 + j, 87 96 FORCEWAKE_MEDIA_VEBOX(j), 88 - FORCEWAKE_ACK_MEDIA_VEBOX(j), 89 - BIT(0), BIT(16)); 97 + FORCEWAKE_ACK_MEDIA_VEBOX(j)); 90 98 } 91 99 92 100 if (gt->info.engine_mask & BIT(XE_HW_ENGINE_GSCCS0)) 93 101 domain_init(&fw->domains[XE_FW_DOMAIN_ID_GSC], 94 102 XE_FW_DOMAIN_ID_GSC, 95 103 FORCEWAKE_GSC, 96 - FORCEWAKE_ACK_GSC, 97 - BIT(0), BIT(16)); 104 + FORCEWAKE_ACK_GSC); 105 + } 106 + 107 + static void __domain_ctl(struct xe_gt *gt, struct xe_force_wake_domain *domain, bool wake) 108 + { 109 + if (IS_SRIOV_VF(gt_to_xe(gt))) 110 + return; 111 + 112 + xe_mmio_write32(gt, domain->reg_ctl, domain->mask | (wake ? domain->val : 0)); 113 + } 114 + 115 + static int __domain_wait(struct xe_gt *gt, struct xe_force_wake_domain *domain, bool wake) 116 + { 117 + u32 value; 118 + int ret; 119 + 120 + if (IS_SRIOV_VF(gt_to_xe(gt))) 121 + return 0; 122 + 123 + ret = xe_mmio_wait32(gt, domain->reg_ack, domain->val, wake ? domain->val : 0, 124 + XE_FORCE_WAKE_ACK_TIMEOUT_MS * USEC_PER_MSEC, 125 + &value, true); 126 + if (ret) 127 + xe_gt_notice(gt, "Force wake domain %d failed to ack %s (%pe) reg[%#x] = %#x\n", 128 + domain->id, str_wake_sleep(wake), ERR_PTR(ret), 129 + domain->reg_ack.addr, value); 130 + 131 + return ret; 98 132 } 99 133 100 134 static void domain_wake(struct xe_gt *gt, struct xe_force_wake_domain *domain) 101 135 { 102 - xe_mmio_write32(gt, domain->reg_ctl, domain->mask | domain->val); 136 + __domain_ctl(gt, domain, true); 103 137 } 104 138 105 139 static int domain_wake_wait(struct xe_gt *gt, 106 140 struct xe_force_wake_domain *domain) 107 141 { 108 - return xe_mmio_wait32(gt, domain->reg_ack, domain->val, domain->val, 109 - XE_FORCE_WAKE_ACK_TIMEOUT_MS * USEC_PER_MSEC, 110 - NULL, true); 142 + return __domain_wait(gt, domain, true); 111 143 } 112 144 113 145 static void domain_sleep(struct xe_gt *gt, struct xe_force_wake_domain *domain) 114 146 { 115 - xe_mmio_write32(gt, domain->reg_ctl, domain->mask); 147 + __domain_ctl(gt, domain, false); 116 148 } 117 149 118 150 static int domain_sleep_wait(struct xe_gt *gt, 119 151 struct xe_force_wake_domain *domain) 120 152 { 121 - return xe_mmio_wait32(gt, domain->reg_ack, domain->val, 0, 122 - XE_FORCE_WAKE_ACK_TIMEOUT_MS * USEC_PER_MSEC, 123 - NULL, true); 153 + return __domain_wait(gt, domain, false); 124 154 } 125 155 126 156 #define for_each_fw_domain_masked(domain__, mask__, fw__, tmp__) \ ··· 153 141 int xe_force_wake_get(struct xe_force_wake *fw, 154 142 enum xe_force_wake_domains domains) 155 143 { 156 - struct xe_device *xe = fw_to_xe(fw); 157 - struct xe_gt *gt = fw_to_gt(fw); 144 + struct xe_gt *gt = fw->gt; 158 145 struct xe_force_wake_domain *domain; 159 146 enum xe_force_wake_domains tmp, woken = 0; 160 147 unsigned long flags; 161 - int ret, ret2 = 0; 148 + int ret = 0; 162 149 163 150 spin_lock_irqsave(&fw->lock, flags); 164 151 for_each_fw_domain_masked(domain, domains, fw, tmp) { ··· 167 156 } 168 157 } 169 158 for_each_fw_domain_masked(domain, woken, fw, tmp) { 170 - ret = domain_wake_wait(gt, domain); 171 - ret2 |= ret; 172 - if (ret) 173 - drm_notice(&xe->drm, "Force wake domain (%d) failed to ack wake, ret=%d\n", 174 - domain->id, ret); 159 + ret |= domain_wake_wait(gt, domain); 175 160 } 176 161 fw->awake_domains |= woken; 177 162 spin_unlock_irqrestore(&fw->lock, flags); 178 163 179 - return ret2; 164 + return ret; 180 165 } 181 166 182 167 int xe_force_wake_put(struct xe_force_wake *fw, 183 168 enum xe_force_wake_domains domains) 184 169 { 185 - struct xe_device *xe = fw_to_xe(fw); 186 - struct xe_gt *gt = fw_to_gt(fw); 170 + struct xe_gt *gt = fw->gt; 187 171 struct xe_force_wake_domain *domain; 188 172 enum xe_force_wake_domains tmp, sleep = 0; 189 173 unsigned long flags; 190 - int ret, ret2 = 0; 174 + int ret = 0; 191 175 192 176 spin_lock_irqsave(&fw->lock, flags); 193 177 for_each_fw_domain_masked(domain, domains, fw, tmp) { ··· 192 186 } 193 187 } 194 188 for_each_fw_domain_masked(domain, sleep, fw, tmp) { 195 - ret = domain_sleep_wait(gt, domain); 196 - ret2 |= ret; 197 - if (ret) 198 - drm_notice(&xe->drm, "Force wake domain (%d) failed to ack sleep, ret=%d\n", 199 - domain->id, ret); 189 + ret |= domain_sleep_wait(gt, domain); 200 190 } 201 191 fw->awake_domains &= ~sleep; 202 192 spin_unlock_irqrestore(&fw->lock, flags); 203 193 204 - return ret2; 194 + return ret; 205 195 }
+12 -1
drivers/gpu/drm/xe/xe_force_wake.h
··· 24 24 xe_force_wake_ref(struct xe_force_wake *fw, 25 25 enum xe_force_wake_domains domain) 26 26 { 27 - xe_gt_assert(fw->gt, domain); 27 + xe_gt_assert(fw->gt, domain != XE_FORCEWAKE_ALL); 28 28 return fw->domains[ffs(domain) - 1].ref; 29 29 } 30 30 31 + /** 32 + * xe_force_wake_assert_held - asserts domain is awake 33 + * @fw : xe_force_wake structure 34 + * @domain: xe_force_wake_domains apart from XE_FORCEWAKE_ALL 35 + * 36 + * xe_force_wake_assert_held() is designed to confirm a particular 37 + * forcewake domain's wakefulness; it doesn't verify the wakefulness of 38 + * multiple domains. Make sure the caller doesn't input multiple 39 + * domains(XE_FORCEWAKE_ALL) as a parameter. 40 + */ 31 41 static inline void 32 42 xe_force_wake_assert_held(struct xe_force_wake *fw, 33 43 enum xe_force_wake_domains domain) 34 44 { 45 + xe_gt_assert(fw->gt, domain != XE_FORCEWAKE_ALL); 35 46 xe_gt_assert(fw->gt, fw->awake_domains & domain); 36 47 } 37 48
+37 -5
drivers/gpu/drm/xe/xe_ggtt.c
··· 11 11 #include <drm/drm_drv.h> 12 12 #include <drm/drm_managed.h> 13 13 #include <drm/intel/i915_drm.h> 14 + #include <generated/xe_wa_oob.h> 14 15 15 16 #include "regs/xe_gt_regs.h" 16 17 #include "regs/xe_gtt_defs.h" ··· 24 23 #include "xe_gt_sriov_vf.h" 25 24 #include "xe_gt_tlb_invalidation.h" 26 25 #include "xe_map.h" 26 + #include "xe_mmio.h" 27 27 #include "xe_pm.h" 28 28 #include "xe_sriov.h" 29 + #include "xe_wa.h" 29 30 #include "xe_wopcm.h" 30 31 31 32 static u64 xelp_ggtt_pte_encode_bo(struct xe_bo *bo, u64 bo_offset, ··· 72 69 return ggms ? SZ_1M << ggms : 0; 73 70 } 74 71 75 - void xe_ggtt_set_pte(struct xe_ggtt *ggtt, u64 addr, u64 pte) 72 + static void ggtt_update_access_counter(struct xe_ggtt *ggtt) 73 + { 74 + /* 75 + * Wa_22019338487: GMD_ID is a RO register, a dummy write forces gunit 76 + * to wait for completion of prior GTT writes before letting this through. 77 + * This needs to be done for all GGTT writes originating from the CPU. 78 + */ 79 + lockdep_assert_held(&ggtt->lock); 80 + 81 + if ((++ggtt->access_count % 63) == 0) { 82 + xe_mmio_write32(ggtt->tile->media_gt, GMD_ID, 0x0); 83 + ggtt->access_count = 0; 84 + } 85 + } 86 + 87 + static void xe_ggtt_set_pte(struct xe_ggtt *ggtt, u64 addr, u64 pte) 76 88 { 77 89 xe_tile_assert(ggtt->tile, !(addr & XE_PTE_MASK)); 78 90 xe_tile_assert(ggtt->tile, addr < ggtt->size); 79 91 80 92 writeq(pte, &ggtt->gsm[addr >> XE_PTE_SHIFT]); 93 + } 94 + 95 + static void xe_ggtt_set_pte_and_flush(struct xe_ggtt *ggtt, u64 addr, u64 pte) 96 + { 97 + xe_ggtt_set_pte(ggtt, addr, pte); 98 + ggtt_update_access_counter(ggtt); 81 99 } 82 100 83 101 static void xe_ggtt_clear(struct xe_ggtt *ggtt, u64 start, u64 size) ··· 116 92 scratch_pte = 0; 117 93 118 94 while (start < end) { 119 - xe_ggtt_set_pte(ggtt, start, scratch_pte); 95 + ggtt->pt_ops->ggtt_set_pte(ggtt, start, scratch_pte); 120 96 start += XE_PAGE_SIZE; 121 97 } 122 98 } ··· 148 124 149 125 static const struct xe_ggtt_pt_ops xelp_pt_ops = { 150 126 .pte_encode_bo = xelp_ggtt_pte_encode_bo, 127 + .ggtt_set_pte = xe_ggtt_set_pte, 151 128 }; 152 129 153 130 static const struct xe_ggtt_pt_ops xelpg_pt_ops = { 154 131 .pte_encode_bo = xelpg_ggtt_pte_encode_bo, 132 + .ggtt_set_pte = xe_ggtt_set_pte, 133 + }; 134 + 135 + static const struct xe_ggtt_pt_ops xelpg_pt_wa_ops = { 136 + .pte_encode_bo = xelpg_ggtt_pte_encode_bo, 137 + .ggtt_set_pte = xe_ggtt_set_pte_and_flush, 155 138 }; 156 139 157 140 /* ··· 218 187 ggtt->size = GUC_GGTT_TOP; 219 188 220 189 if (GRAPHICS_VERx100(xe) >= 1270) 221 - ggtt->pt_ops = &xelpg_pt_ops; 190 + ggtt->pt_ops = ggtt->tile->media_gt && XE_WA(ggtt->tile->media_gt, 22019338487) ? 191 + &xelpg_pt_wa_ops : &xelpg_pt_ops; 222 192 else 223 193 ggtt->pt_ops = &xelp_pt_ops; 224 194 ··· 426 394 427 395 for (offset = 0; offset < bo->size; offset += XE_PAGE_SIZE) { 428 396 pte = ggtt->pt_ops->pte_encode_bo(bo, offset, pat_index); 429 - xe_ggtt_set_pte(ggtt, start + offset, pte); 397 + ggtt->pt_ops->ggtt_set_pte(ggtt, start + offset, pte); 430 398 } 431 399 } 432 400 ··· 534 502 return; 535 503 536 504 while (start < end) { 537 - xe_ggtt_set_pte(ggtt, start, pte); 505 + ggtt->pt_ops->ggtt_set_pte(ggtt, start, pte); 538 506 start += XE_PAGE_SIZE; 539 507 } 540 508
-1
drivers/gpu/drm/xe/xe_ggtt.h
··· 10 10 11 11 struct drm_printer; 12 12 13 - void xe_ggtt_set_pte(struct xe_ggtt *ggtt, u64 addr, u64 pte); 14 13 int xe_ggtt_init_early(struct xe_ggtt *ggtt); 15 14 int xe_ggtt_init(struct xe_ggtt *ggtt); 16 15 void xe_ggtt_printk(struct xe_ggtt *ggtt, const char *prefix);
+8 -4
drivers/gpu/drm/xe/xe_ggtt_types.h
··· 13 13 struct xe_bo; 14 14 struct xe_gt; 15 15 16 - struct xe_ggtt_pt_ops { 17 - u64 (*pte_encode_bo)(struct xe_bo *bo, u64 bo_offset, u16 pat_index); 18 - }; 19 - 20 16 struct xe_ggtt { 21 17 struct xe_tile *tile; 22 18 ··· 30 34 const struct xe_ggtt_pt_ops *pt_ops; 31 35 32 36 struct drm_mm mm; 37 + 38 + /** @access_count: counts GGTT writes */ 39 + unsigned int access_count; 40 + }; 41 + 42 + struct xe_ggtt_pt_ops { 43 + u64 (*pte_encode_bo)(struct xe_bo *bo, u64 bo_offset, u16 pat_index); 44 + void (*ggtt_set_pte)(struct xe_ggtt *ggtt, u64 addr, u64 pte); 33 45 }; 34 46 35 47 #endif
+5
drivers/gpu/drm/xe/xe_gsc.c
··· 22 22 #include "xe_gt.h" 23 23 #include "xe_gt_mcr.h" 24 24 #include "xe_gt_printk.h" 25 + #include "xe_guc_pc.h" 25 26 #include "xe_huc.h" 26 27 #include "xe_map.h" 27 28 #include "xe_mmio.h" ··· 285 284 return ret; 286 285 287 286 xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_TRANSFERRED); 287 + 288 + /* GSC load is done, restore expected GT frequencies */ 289 + xe_gt_sanitize_freq(gt); 290 + 288 291 xe_gt_dbg(gt, "GSC FW async load completed\n"); 289 292 290 293 /* HuC auth failure is not fatal */
+24
drivers/gpu/drm/xe/xe_gt.c
··· 9 9 10 10 #include <drm/drm_managed.h> 11 11 #include <drm/xe_drm.h> 12 + #include <generated/xe_wa_oob.h> 12 13 13 14 #include "instructions/xe_gfxpipe_commands.h" 14 15 #include "instructions/xe_mi_commands.h" ··· 55 54 #include "xe_sriov.h" 56 55 #include "xe_tuning.h" 57 56 #include "xe_uc.h" 57 + #include "xe_uc_fw.h" 58 58 #include "xe_vm.h" 59 59 #include "xe_wa.h" 60 60 #include "xe_wopcm.h" ··· 680 678 /* Get CCS mode in sync between sw/hw */ 681 679 xe_gt_apply_ccs_mode(gt); 682 680 681 + /* Restore GT freq to expected values */ 682 + xe_gt_sanitize_freq(gt); 683 + 683 684 return 0; 684 685 } 685 686 ··· 804 799 xe_gt_err(gt, "suspend failed (%pe)\n", ERR_PTR(err)); 805 800 806 801 return err; 802 + } 803 + 804 + /** 805 + * xe_gt_sanitize_freq() - Restore saved frequencies if necessary. 806 + * @gt: the GT object 807 + * 808 + * Called after driver init/GSC load completes to restore GT frequencies if we 809 + * limited them for any WAs. 810 + */ 811 + int xe_gt_sanitize_freq(struct xe_gt *gt) 812 + { 813 + int ret = 0; 814 + 815 + if ((!xe_uc_fw_is_available(&gt->uc.gsc.fw) || 816 + xe_uc_fw_is_loaded(&gt->uc.gsc.fw)) && 817 + XE_WA(gt, 22019338487)) 818 + ret = xe_guc_pc_restore_stashed_freq(&gt->uc.guc.pc); 819 + 820 + return ret; 807 821 } 808 822 809 823 int xe_gt_resume(struct xe_gt *gt)
+1
drivers/gpu/drm/xe/xe_gt.h
··· 56 56 int xe_gt_resume(struct xe_gt *gt); 57 57 void xe_gt_reset_async(struct xe_gt *gt); 58 58 void xe_gt_sanitize(struct xe_gt *gt); 59 + int xe_gt_sanitize_freq(struct xe_gt *gt); 59 60 void xe_gt_remove(struct xe_gt *gt); 60 61 61 62 /**
+8 -1
drivers/gpu/drm/xe/xe_gt_ccs_mode.c
··· 12 12 #include "xe_gt_printk.h" 13 13 #include "xe_gt_sysfs.h" 14 14 #include "xe_mmio.h" 15 + #include "xe_sriov.h" 15 16 16 17 static void __xe_gt_apply_ccs_mode(struct xe_gt *gt, u32 num_engines) 17 18 { ··· 76 75 77 76 void xe_gt_apply_ccs_mode(struct xe_gt *gt) 78 77 { 79 - if (!gt->ccs_mode) 78 + if (!gt->ccs_mode || IS_SRIOV_VF(gt_to_xe(gt))) 80 79 return; 81 80 82 81 __xe_gt_apply_ccs_mode(gt, gt->ccs_mode); ··· 110 109 struct xe_device *xe = gt_to_xe(gt); 111 110 u32 num_engines, num_slices; 112 111 int ret; 112 + 113 + if (IS_SRIOV(xe)) { 114 + xe_gt_dbg(gt, "Can't change compute mode when running as %s\n", 115 + xe_sriov_mode_to_string(xe_device_sriov_mode(xe))); 116 + return -EOPNOTSUPP; 117 + } 113 118 114 119 ret = kstrtou32(buff, 0, &num_engines); 115 120 if (ret)
+20
drivers/gpu/drm/xe/xe_gt_clock.c
··· 3 3 * Copyright © 2022 Intel Corporation 4 4 */ 5 5 6 + #include <linux/math64.h> 7 + 6 8 #include "xe_gt_clock.h" 7 9 8 10 #include "regs/xe_gt_regs.h" ··· 80 78 81 79 gt->info.reference_clock = freq; 82 80 return 0; 81 + } 82 + 83 + static u64 div_u64_roundup(u64 n, u32 d) 84 + { 85 + return div_u64(n + d - 1, d); 86 + } 87 + 88 + /** 89 + * xe_gt_clock_interval_to_ms - Convert sampled GT clock ticks to msec 90 + * 91 + * @gt: the &xe_gt 92 + * @count: count of GT clock ticks 93 + * 94 + * Returns: time in msec 95 + */ 96 + u64 xe_gt_clock_interval_to_ms(struct xe_gt *gt, u64 count) 97 + { 98 + return div_u64_roundup(count * MSEC_PER_SEC, gt->info.reference_clock); 83 99 }
+1
drivers/gpu/drm/xe/xe_gt_clock.h
··· 11 11 struct xe_gt; 12 12 13 13 int xe_gt_clock_init(struct xe_gt *gt); 14 + u64 xe_gt_clock_interval_to_ms(struct xe_gt *gt, u64 count); 14 15 15 16 #endif
+16
drivers/gpu/drm/xe/xe_gt_idle.c
··· 15 15 #include "xe_macros.h" 16 16 #include "xe_mmio.h" 17 17 #include "xe_pm.h" 18 + #include "xe_sriov.h" 18 19 19 20 /** 20 21 * DOC: Xe GT Idle ··· 101 100 u32 pg_enable; 102 101 int i, j; 103 102 103 + if (IS_SRIOV_VF(xe)) 104 + return; 105 + 104 106 /* Disable CPG for PVC */ 105 107 if (xe->info.platform == XE_PVC) 106 108 return; ··· 134 130 135 131 void xe_gt_idle_disable_pg(struct xe_gt *gt) 136 132 { 133 + if (IS_SRIOV_VF(gt_to_xe(gt))) 134 + return; 135 + 137 136 xe_device_assert_mem_access(gt_to_xe(gt)); 138 137 XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FW_GT)); 139 138 ··· 221 214 struct kobject *kobj; 222 215 int err; 223 216 217 + if (IS_SRIOV_VF(xe)) 218 + return 0; 219 + 224 220 kobj = kobject_create_and_add("gtidle", gt->sysfs); 225 221 if (!kobj) 226 222 return -ENOMEM; ··· 256 246 xe_device_assert_mem_access(gt_to_xe(gt)); 257 247 xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT); 258 248 249 + if (IS_SRIOV_VF(gt_to_xe(gt))) 250 + return; 251 + 259 252 /* Units of 1280 ns for a total of 5s */ 260 253 xe_mmio_write32(gt, RC_IDLE_HYSTERSIS, 0x3B9ACA); 261 254 /* Enable RC6 */ ··· 270 257 { 271 258 xe_device_assert_mem_access(gt_to_xe(gt)); 272 259 xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT); 260 + 261 + if (IS_SRIOV_VF(gt_to_xe(gt))) 262 + return; 273 263 274 264 xe_mmio_write32(gt, RC_CONTROL, 0); 275 265 xe_mmio_write32(gt, RC_STATE, 0);
+68 -86
drivers/gpu/drm/xe/xe_gt_pagefault.c
··· 19 19 #include "xe_guc.h" 20 20 #include "xe_guc_ct.h" 21 21 #include "xe_migrate.h" 22 - #include "xe_trace.h" 22 + #include "xe_trace_bo.h" 23 23 #include "xe_vm.h" 24 24 25 25 struct pagefault { ··· 125 125 return 0; 126 126 } 127 127 128 + static int handle_vma_pagefault(struct xe_tile *tile, struct pagefault *pf, 129 + struct xe_vma *vma) 130 + { 131 + struct xe_vm *vm = xe_vma_vm(vma); 132 + struct drm_exec exec; 133 + struct dma_fence *fence; 134 + ktime_t end = 0; 135 + int err; 136 + bool atomic; 137 + 138 + trace_xe_vma_pagefault(vma); 139 + atomic = access_is_atomic(pf->access_type); 140 + 141 + /* Check if VMA is valid */ 142 + if (vma_is_valid(tile, vma) && !atomic) 143 + return 0; 144 + 145 + retry_userptr: 146 + if (xe_vma_is_userptr(vma) && 147 + xe_vma_userptr_check_repin(to_userptr_vma(vma))) { 148 + struct xe_userptr_vma *uvma = to_userptr_vma(vma); 149 + 150 + err = xe_vma_userptr_pin_pages(uvma); 151 + if (err) 152 + return err; 153 + } 154 + 155 + /* Lock VM and BOs dma-resv */ 156 + drm_exec_init(&exec, 0, 0); 157 + drm_exec_until_all_locked(&exec) { 158 + err = xe_pf_begin(&exec, vma, atomic, tile->id); 159 + drm_exec_retry_on_contention(&exec); 160 + if (xe_vm_validate_should_retry(&exec, err, &end)) 161 + err = -EAGAIN; 162 + if (err) 163 + goto unlock_dma_resv; 164 + 165 + /* Bind VMA only to the GT that has faulted */ 166 + trace_xe_vma_pf_bind(vma); 167 + fence = xe_vma_rebind(vm, vma, BIT(tile->id)); 168 + if (IS_ERR(fence)) { 169 + err = PTR_ERR(fence); 170 + if (xe_vm_validate_should_retry(&exec, err, &end)) 171 + err = -EAGAIN; 172 + goto unlock_dma_resv; 173 + } 174 + } 175 + 176 + dma_fence_wait(fence, false); 177 + dma_fence_put(fence); 178 + vma->tile_invalidated &= ~BIT(tile->id); 179 + 180 + unlock_dma_resv: 181 + drm_exec_fini(&exec); 182 + if (err == -EAGAIN) 183 + goto retry_userptr; 184 + 185 + return err; 186 + } 187 + 128 188 static int handle_pagefault(struct xe_gt *gt, struct pagefault *pf) 129 189 { 130 190 struct xe_device *xe = gt_to_xe(gt); 131 191 struct xe_tile *tile = gt_to_tile(gt); 132 - struct drm_exec exec; 133 192 struct xe_vm *vm; 134 193 struct xe_vma *vma = NULL; 135 - struct dma_fence *fence; 136 - bool write_locked; 137 - int ret = 0; 138 - bool atomic; 194 + int err; 139 195 140 196 /* SW isn't expected to handle TRTT faults */ 141 197 if (pf->trva_fault) ··· 208 152 if (!vm) 209 153 return -EINVAL; 210 154 211 - retry_userptr: 212 155 /* 213 - * TODO: Avoid exclusive lock if VM doesn't have userptrs, or 214 - * start out read-locked? 156 + * TODO: Change to read lock? Using write lock for simplicity. 215 157 */ 216 158 down_write(&vm->lock); 217 - write_locked = true; 218 159 vma = lookup_vma(vm, pf->page_addr); 219 160 if (!vma) { 220 - ret = -EINVAL; 161 + err = -EINVAL; 221 162 goto unlock_vm; 222 163 } 223 164 224 - if (!xe_vma_is_userptr(vma) || 225 - !xe_vma_userptr_check_repin(to_userptr_vma(vma))) { 226 - downgrade_write(&vm->lock); 227 - write_locked = false; 228 - } 165 + err = handle_vma_pagefault(tile, pf, vma); 229 166 230 - trace_xe_vma_pagefault(vma); 231 - 232 - atomic = access_is_atomic(pf->access_type); 233 - 234 - /* Check if VMA is valid */ 235 - if (vma_is_valid(tile, vma) && !atomic) 236 - goto unlock_vm; 237 - 238 - /* TODO: Validate fault */ 239 - 240 - if (xe_vma_is_userptr(vma) && write_locked) { 241 - struct xe_userptr_vma *uvma = to_userptr_vma(vma); 242 - 243 - spin_lock(&vm->userptr.invalidated_lock); 244 - list_del_init(&uvma->userptr.invalidate_link); 245 - spin_unlock(&vm->userptr.invalidated_lock); 246 - 247 - ret = xe_vma_userptr_pin_pages(uvma); 248 - if (ret) 249 - goto unlock_vm; 250 - 251 - downgrade_write(&vm->lock); 252 - write_locked = false; 253 - } 254 - 255 - /* Lock VM and BOs dma-resv */ 256 - drm_exec_init(&exec, 0, 0); 257 - drm_exec_until_all_locked(&exec) { 258 - ret = xe_pf_begin(&exec, vma, atomic, tile->id); 259 - drm_exec_retry_on_contention(&exec); 260 - if (ret) 261 - goto unlock_dma_resv; 262 - 263 - /* Bind VMA only to the GT that has faulted */ 264 - trace_xe_vma_pf_bind(vma); 265 - fence = xe_vma_rebind(vm, vma, BIT(tile->id)); 266 - if (IS_ERR(fence)) { 267 - ret = PTR_ERR(fence); 268 - goto unlock_dma_resv; 269 - } 270 - } 271 - 272 - /* 273 - * XXX: Should we drop the lock before waiting? This only helps if doing 274 - * GPU binds which is currently only done if we have to wait for more 275 - * than 10ms on a move. 276 - */ 277 - dma_fence_wait(fence, false); 278 - dma_fence_put(fence); 279 - 280 - if (xe_vma_is_userptr(vma)) 281 - ret = xe_vma_userptr_check_repin(to_userptr_vma(vma)); 282 - vma->tile_invalidated &= ~BIT(tile->id); 283 - 284 - unlock_dma_resv: 285 - drm_exec_fini(&exec); 286 167 unlock_vm: 287 - if (!ret) 168 + if (!err) 288 169 vm->usm.last_fault_vma = vma; 289 - if (write_locked) 290 - up_write(&vm->lock); 291 - else 292 - up_read(&vm->lock); 293 - if (ret == -EAGAIN) 294 - goto retry_userptr; 295 - 296 - if (!ret) { 297 - ret = xe_gt_tlb_invalidation_vma(gt, NULL, vma); 298 - if (ret >= 0) 299 - ret = 0; 300 - } 170 + up_write(&vm->lock); 301 171 xe_vm_put(vm); 302 172 303 - return ret; 173 + return err; 304 174 } 305 175 306 176 static int send_pagefault_reply(struct xe_guc *guc,
+13 -2
drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
··· 1290 1290 struct xe_tile *tile; 1291 1291 unsigned int tid; 1292 1292 1293 + xe_assert(xe, IS_DGFX(xe)); 1294 + xe_assert(xe, IS_SRIOV_PF(xe)); 1295 + 1293 1296 for_each_tile(tile, xe, tid) { 1294 1297 lmtt = &tile->sriov.pf.lmtt; 1295 1298 xe_lmtt_drop_pages(lmtt, vfid); ··· 1310 1307 unsigned int gtid; 1311 1308 unsigned int tid; 1312 1309 int err; 1310 + 1311 + xe_assert(xe, IS_DGFX(xe)); 1312 + xe_assert(xe, IS_SRIOV_PF(xe)); 1313 1313 1314 1314 total = 0; 1315 1315 for_each_tile(tile, xe, tid) ··· 1359 1353 1360 1354 static void pf_release_vf_config_lmem(struct xe_gt *gt, struct xe_gt_sriov_config *config) 1361 1355 { 1356 + xe_gt_assert(gt, IS_DGFX(gt_to_xe(gt))); 1362 1357 xe_gt_assert(gt, !xe_gt_is_media_type(gt)); 1363 1358 lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); 1364 1359 ··· 1378 1371 int err; 1379 1372 1380 1373 xe_gt_assert(gt, vfid); 1374 + xe_gt_assert(gt, IS_DGFX(xe)); 1381 1375 xe_gt_assert(gt, !xe_gt_is_media_type(gt)); 1382 1376 1383 1377 size = round_up(size, pf_get_lmem_alignment(gt)); ··· 1846 1838 static void pf_release_vf_config(struct xe_gt *gt, unsigned int vfid) 1847 1839 { 1848 1840 struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); 1841 + struct xe_device *xe = gt_to_xe(gt); 1849 1842 1850 1843 if (!xe_gt_is_media_type(gt)) { 1851 1844 pf_release_vf_config_ggtt(gt, config); 1852 - pf_release_vf_config_lmem(gt, config); 1853 - pf_update_vf_lmtt(gt_to_xe(gt), vfid); 1845 + if (IS_DGFX(xe)) { 1846 + pf_release_vf_config_lmem(gt, config); 1847 + pf_update_vf_lmtt(xe, vfid); 1848 + } 1854 1849 } 1855 1850 pf_release_config_ctxs(gt, config); 1856 1851 pf_release_config_dbs(gt, config);
+21
drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c
··· 130 130 } 131 131 132 132 /** 133 + * xe_gt_sriov_pf_control_trigger_flr - Start a VF FLR sequence. 134 + * @gt: the &xe_gt 135 + * @vfid: the VF identifier 136 + * 137 + * This function is for PF only. 138 + * 139 + * Return: 0 on success or a negative error code on failure. 140 + */ 141 + int xe_gt_sriov_pf_control_trigger_flr(struct xe_gt *gt, unsigned int vfid) 142 + { 143 + int err; 144 + 145 + /* XXX pf_send_vf_flr_start() expects ct->lock */ 146 + mutex_lock(&gt->uc.guc.ct.lock); 147 + err = pf_send_vf_flr_start(gt, vfid); 148 + mutex_unlock(&gt->uc.guc.ct.lock); 149 + 150 + return err; 151 + } 152 + 153 + /** 133 154 * DOC: The VF FLR Flow with GuC 134 155 * 135 156 * PF GUC PCI
+1
drivers/gpu/drm/xe/xe_gt_sriov_pf_control.h
··· 14 14 int xe_gt_sriov_pf_control_pause_vf(struct xe_gt *gt, unsigned int vfid); 15 15 int xe_gt_sriov_pf_control_resume_vf(struct xe_gt *gt, unsigned int vfid); 16 16 int xe_gt_sriov_pf_control_stop_vf(struct xe_gt *gt, unsigned int vfid); 17 + int xe_gt_sriov_pf_control_trigger_flr(struct xe_gt *gt, unsigned int vfid); 17 18 18 19 #ifdef CONFIG_PCI_IOV 19 20 int xe_gt_sriov_pf_control_process_guc2pf(struct xe_gt *gt, const u32 *msg, u32 len);
+19 -12
drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
··· 13 13 #include "xe_guc.h" 14 14 #include "xe_guc_ct.h" 15 15 #include "xe_mmio.h" 16 + #include "xe_sriov.h" 16 17 #include "xe_trace.h" 17 18 #include "regs/xe_guc_regs.h" 18 19 ··· 23 22 { 24 23 struct xe_gt *gt = container_of(work, struct xe_gt, 25 24 tlb_invalidation.fence_tdr.work); 25 + struct xe_device *xe = gt_to_xe(gt); 26 26 struct xe_gt_tlb_invalidation_fence *fence, *next; 27 27 28 28 spin_lock_irq(&gt->tlb_invalidation.pending_lock); ··· 35 33 if (msecs_to_jiffies(since_inval_ms) < TLB_TIMEOUT) 36 34 break; 37 35 38 - trace_xe_gt_tlb_invalidation_fence_timeout(fence); 36 + trace_xe_gt_tlb_invalidation_fence_timeout(xe, fence); 39 37 xe_gt_err(gt, "TLB invalidation fence timeout, seqno=%d recv=%d", 40 38 fence->seqno, gt->tlb_invalidation.seqno_recv); 41 39 ··· 73 71 } 74 72 75 73 static void 76 - __invalidation_fence_signal(struct xe_gt_tlb_invalidation_fence *fence) 74 + __invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence) 77 75 { 78 - trace_xe_gt_tlb_invalidation_fence_signal(fence); 76 + trace_xe_gt_tlb_invalidation_fence_signal(xe, fence); 79 77 dma_fence_signal(&fence->base); 80 78 dma_fence_put(&fence->base); 81 79 } 82 80 83 81 static void 84 - invalidation_fence_signal(struct xe_gt_tlb_invalidation_fence *fence) 82 + invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence) 85 83 { 86 84 list_del(&fence->link); 87 - __invalidation_fence_signal(fence); 85 + __invalidation_fence_signal(xe, fence); 88 86 } 89 87 90 88 /** ··· 123 121 124 122 list_for_each_entry_safe(fence, next, 125 123 &gt->tlb_invalidation.pending_fences, link) 126 - invalidation_fence_signal(fence); 124 + invalidation_fence_signal(gt_to_xe(gt), fence); 127 125 spin_unlock_irq(&gt->tlb_invalidation.pending_lock); 128 126 mutex_unlock(&gt->uc.guc.ct.lock); 129 127 } ··· 146 144 u32 *action, int len) 147 145 { 148 146 struct xe_gt *gt = guc_to_gt(guc); 147 + struct xe_device *xe = gt_to_xe(gt); 149 148 int seqno; 150 149 int ret; 151 150 ··· 160 157 seqno = gt->tlb_invalidation.seqno; 161 158 if (fence) { 162 159 fence->seqno = seqno; 163 - trace_xe_gt_tlb_invalidation_fence_send(fence); 160 + trace_xe_gt_tlb_invalidation_fence_send(xe, fence); 164 161 } 165 162 action[1] = seqno; 166 163 ret = xe_guc_ct_send_locked(&guc->ct, action, len, ··· 174 171 * we can just go ahead and signal the fence here. 175 172 */ 176 173 if (tlb_invalidation_seqno_past(gt, seqno)) { 177 - __invalidation_fence_signal(fence); 174 + __invalidation_fence_signal(xe, fence); 178 175 } else { 179 176 fence->invalidation_time = ktime_get(); 180 177 list_add_tail(&fence->link, ··· 187 184 } 188 185 spin_unlock_irq(&gt->tlb_invalidation.pending_lock); 189 186 } else if (ret < 0 && fence) { 190 - __invalidation_fence_signal(fence); 187 + __invalidation_fence_signal(xe, fence); 191 188 } 192 189 if (!ret) { 193 190 gt->tlb_invalidation.seqno = (gt->tlb_invalidation.seqno + 1) % ··· 250 247 251 248 xe_gt_tlb_invalidation_wait(gt, seqno); 252 249 } else if (xe_device_uc_enabled(xe) && !xe_device_wedged(xe)) { 250 + if (IS_SRIOV_VF(xe)) 251 + return 0; 252 + 253 253 xe_gt_WARN_ON(gt, xe_force_wake_get(gt_to_fw(gt), XE_FW_GT)); 254 254 if (xe->info.platform == XE_PVC || GRAPHICS_VER(xe) >= 20) { 255 255 xe_mmio_write32(gt, PVC_GUC_TLB_INV_DESC1, ··· 300 294 /* Execlists not supported */ 301 295 if (gt_to_xe(gt)->info.force_execlist) { 302 296 if (fence) 303 - __invalidation_fence_signal(fence); 297 + __invalidation_fence_signal(xe, fence); 304 298 305 299 return 0; 306 300 } ··· 438 432 int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len) 439 433 { 440 434 struct xe_gt *gt = guc_to_gt(guc); 435 + struct xe_device *xe = gt_to_xe(gt); 441 436 struct xe_gt_tlb_invalidation_fence *fence, *next; 442 437 unsigned long flags; 443 438 ··· 475 468 476 469 list_for_each_entry_safe(fence, next, 477 470 &gt->tlb_invalidation.pending_fences, link) { 478 - trace_xe_gt_tlb_invalidation_fence_recv(fence); 471 + trace_xe_gt_tlb_invalidation_fence_recv(xe, fence); 479 472 480 473 if (!tlb_invalidation_seqno_past(gt, fence->seqno)) 481 474 break; 482 475 483 - invalidation_fence_signal(fence); 476 + invalidation_fence_signal(xe, fence); 484 477 } 485 478 486 479 if (!list_empty(&gt->tlb_invalidation.pending_fences))
+4
drivers/gpu/drm/xe/xe_gt_types.h
··· 12 12 #include "xe_gt_sriov_vf_types.h" 13 13 #include "xe_hw_engine_types.h" 14 14 #include "xe_hw_fence_types.h" 15 + #include "xe_oa.h" 15 16 #include "xe_reg_sr_types.h" 16 17 #include "xe_sa_types.h" 17 18 #include "xe_uc_types.h" ··· 388 387 */ 389 388 u8 instances_per_class[XE_ENGINE_CLASS_MAX]; 390 389 } user_engines; 390 + 391 + /** @oa: oa perf counter subsystem per gt info */ 392 + struct xe_oa_gt oa; 391 393 }; 392 394 393 395 #endif
+22 -11
drivers/gpu/drm/xe/xe_guc.c
··· 476 476 xe_mmio_write32(gt, GUC_SHIM_CONTROL, shim_flags); 477 477 478 478 xe_mmio_write32(gt, GT_PM_CONFIG, GT_DOORBELL_ENABLE); 479 + 480 + /* Make sure GuC receives ARAT interrupts */ 481 + xe_mmio_rmw32(gt, PMINTRMSK, ARAT_EXPIRED_INTRMSK, 0); 479 482 } 480 483 481 484 /* ··· 702 699 { 703 700 int ret; 704 701 702 + /* Raise GT freq to speed up HuC/GuC load */ 703 + xe_guc_pc_raise_unslice(&guc->pc); 704 + 705 705 guc_write_params(guc); 706 706 guc_prepare_xfer(guc); 707 707 ··· 790 784 791 785 xe_guc_ads_populate_minimal(&guc->ads); 792 786 793 - /* Raise GT freq to speed up HuC/GuC load */ 794 787 xe_guc_pc_init_early(&guc->pc); 795 788 796 789 ret = __xe_guc_upload(guc); ··· 859 854 struct xe_device *xe = guc_to_xe(guc); 860 855 int err; 861 856 862 - guc_enable_irq(guc); 863 - 864 857 if (IS_SRIOV_VF(xe) && xe_device_has_memirq(xe)) { 865 858 struct xe_gt *gt = guc_to_gt(guc); 866 859 struct xe_tile *tile = gt_to_tile(gt); ··· 866 863 err = xe_memirq_init_guc(&tile->sriov.vf.memirq, guc); 867 864 if (err) 868 865 return err; 866 + } else { 867 + guc_enable_irq(guc); 869 868 } 870 - 871 - xe_mmio_rmw32(guc_to_gt(guc), PMINTRMSK, 872 - ARAT_EXPIRED_INTRMSK, 0); 873 869 874 870 err = xe_guc_ct_enable(&guc->ct); 875 871 if (err) ··· 1096 1094 1097 1095 void xe_guc_sanitize(struct xe_guc *guc) 1098 1096 { 1099 - xe_uc_fw_change_status(&guc->fw, XE_UC_FIRMWARE_LOADABLE); 1097 + xe_uc_fw_sanitize(&guc->fw); 1100 1098 xe_guc_ct_disable(&guc->ct); 1101 1099 guc->submission_state.enabled = false; 1102 1100 } ··· 1113 1111 1114 1112 void xe_guc_stop_prepare(struct xe_guc *guc) 1115 1113 { 1116 - XE_WARN_ON(xe_guc_pc_stop(&guc->pc)); 1114 + if (!IS_SRIOV_VF(guc_to_xe(guc))) { 1115 + int err; 1116 + 1117 + err = xe_guc_pc_stop(&guc->pc); 1118 + xe_gt_WARN(guc_to_gt(guc), err, "Failed to stop GuC PC: %pe\n", 1119 + ERR_PTR(err)); 1120 + } 1117 1121 } 1118 1122 1119 1123 void xe_guc_stop(struct xe_guc *guc) ··· 1131 1123 1132 1124 int xe_guc_start(struct xe_guc *guc) 1133 1125 { 1134 - int ret; 1126 + if (!IS_SRIOV_VF(guc_to_xe(guc))) { 1127 + int err; 1135 1128 1136 - ret = xe_guc_pc_start(&guc->pc); 1137 - XE_WARN_ON(ret); 1129 + err = xe_guc_pc_start(&guc->pc); 1130 + xe_gt_WARN(guc_to_gt(guc), err, "Failed to start GuC PC: %pe\n", 1131 + ERR_PTR(err)); 1132 + } 1138 1133 1139 1134 return xe_guc_submit_start(guc); 1140 1135 }
+61 -11
drivers/gpu/drm/xe/xe_guc_ct.c
··· 29 29 #include "xe_guc_submit.h" 30 30 #include "xe_map.h" 31 31 #include "xe_pm.h" 32 - #include "xe_trace.h" 32 + #include "xe_trace_guc.h" 33 33 34 34 /* Used when a CT send wants to block and / or receive data */ 35 35 struct g2h_fence { ··· 126 126 xa_destroy(&ct->fence_lookup); 127 127 } 128 128 129 + static void receive_g2h(struct xe_guc_ct *ct); 129 130 static void g2h_worker_func(struct work_struct *w); 131 + static void safe_mode_worker_func(struct work_struct *w); 130 132 131 133 static void primelockdep(struct xe_guc_ct *ct) 132 134 { ··· 157 155 spin_lock_init(&ct->fast_lock); 158 156 xa_init(&ct->fence_lookup); 159 157 INIT_WORK(&ct->g2h_worker, g2h_worker_func); 158 + INIT_DELAYED_WORK(&ct->safe_mode_worker, safe_mode_worker_func); 160 159 init_waitqueue_head(&ct->wq); 161 160 init_waitqueue_head(&ct->g2h_fence_wq); 162 161 ··· 324 321 mutex_unlock(&ct->lock); 325 322 } 326 323 324 + static bool ct_needs_safe_mode(struct xe_guc_ct *ct) 325 + { 326 + return !pci_dev_msi_enabled(to_pci_dev(ct_to_xe(ct)->drm.dev)); 327 + } 328 + 329 + static bool ct_restart_safe_mode_worker(struct xe_guc_ct *ct) 330 + { 331 + if (!ct_needs_safe_mode(ct)) 332 + return false; 333 + 334 + queue_delayed_work(ct->g2h_wq, &ct->safe_mode_worker, HZ / 10); 335 + return true; 336 + } 337 + 338 + static void safe_mode_worker_func(struct work_struct *w) 339 + { 340 + struct xe_guc_ct *ct = container_of(w, struct xe_guc_ct, safe_mode_worker.work); 341 + 342 + receive_g2h(ct); 343 + 344 + if (!ct_restart_safe_mode_worker(ct)) 345 + xe_gt_dbg(ct_to_gt(ct), "GuC CT safe-mode canceled\n"); 346 + } 347 + 348 + static void ct_enter_safe_mode(struct xe_guc_ct *ct) 349 + { 350 + if (ct_restart_safe_mode_worker(ct)) 351 + xe_gt_dbg(ct_to_gt(ct), "GuC CT safe-mode enabled\n"); 352 + } 353 + 354 + static void ct_exit_safe_mode(struct xe_guc_ct *ct) 355 + { 356 + if (cancel_delayed_work_sync(&ct->safe_mode_worker)) 357 + xe_gt_dbg(ct_to_gt(ct), "GuC CT safe-mode disabled\n"); 358 + } 359 + 327 360 int xe_guc_ct_enable(struct xe_guc_ct *ct) 328 361 { 329 362 struct xe_device *xe = ct_to_xe(ct); ··· 389 350 wake_up_all(&ct->wq); 390 351 xe_gt_dbg(gt, "GuC CT communication channel enabled\n"); 391 352 353 + if (ct_needs_safe_mode(ct)) 354 + ct_enter_safe_mode(ct); 355 + 392 356 return 0; 393 357 394 358 err_out: ··· 415 373 void xe_guc_ct_disable(struct xe_guc_ct *ct) 416 374 { 417 375 xe_guc_ct_set_state(ct, XE_GUC_CT_STATE_DISABLED); 376 + ct_exit_safe_mode(ct); 418 377 stop_g2h_handler(ct); 419 378 } 420 379 ··· 571 528 /* Update descriptor */ 572 529 desc_write(xe, h2g, tail, h2g->info.tail); 573 530 574 - trace_xe_guc_ctb_h2g(gt->info.id, *(action - 1), full_len, 531 + trace_xe_guc_ctb_h2g(xe, gt->info.id, *(action - 1), full_len, 575 532 desc_read(xe, h2g, head), h2g->info.tail); 576 533 577 534 return 0; ··· 684 641 u32 g2h_len, u32 num_g2h, 685 642 struct g2h_fence *g2h_fence) 686 643 { 644 + struct xe_device *xe = ct_to_xe(ct); 687 645 struct xe_gt *gt = ct_to_gt(ct); 688 646 struct drm_printer p = xe_gt_info_printer(gt); 689 647 unsigned int sleep_period_ms = 1; ··· 712 668 if (sleep_period_ms == 1024) 713 669 goto broken; 714 670 715 - trace_xe_guc_ct_h2g_flow_control(h2g->info.head, h2g->info.tail, 671 + trace_xe_guc_ct_h2g_flow_control(xe, h2g->info.head, h2g->info.tail, 716 672 h2g->info.size, 717 673 h2g->info.space, 718 674 len + GUC_CTB_HDR_LEN); ··· 724 680 struct xe_device *xe = ct_to_xe(ct); 725 681 struct guc_ctb *g2h = &ct->ctbs.g2h; 726 682 727 - trace_xe_guc_ct_g2h_flow_control(g2h->info.head, 683 + trace_xe_guc_ct_g2h_flow_control(xe, g2h->info.head, 728 684 desc_read(xe, g2h, tail), 729 685 g2h->info.size, 730 686 g2h->info.space, ··· 877 833 } 878 834 879 835 if (g2h_fence.retry) { 880 - xe_gt_warn(gt, "H2G retry, action 0x%04x, reason %u", 881 - action[0], g2h_fence.reason); 836 + xe_gt_dbg(gt, "H2G action %#x retrying: reason %#x\n", 837 + action[0], g2h_fence.reason); 882 838 goto retry; 883 839 } 884 840 if (g2h_fence.fail) { 885 - xe_gt_err(gt, "H2G send failed, action 0x%04x, error %d, hint %u", 841 + xe_gt_err(gt, "H2G request %#x failed: error %#x hint %#x\n", 886 842 action[0], g2h_fence.error, g2h_fence.hint); 887 843 ret = -EIO; 888 844 } ··· 1214 1170 g2h->info.head = (head + avail) % g2h->info.size; 1215 1171 desc_write(xe, g2h, head, g2h->info.head); 1216 1172 1217 - trace_xe_guc_ctb_g2h(ct_to_gt(ct)->info.id, action, len, 1218 - g2h->info.head, tail); 1173 + trace_xe_guc_ctb_g2h(xe, ct_to_gt(ct)->info.id, 1174 + action, len, g2h->info.head, tail); 1219 1175 1220 1176 return len; 1221 1177 } ··· 1304 1260 return 1; 1305 1261 } 1306 1262 1307 - static void g2h_worker_func(struct work_struct *w) 1263 + static void receive_g2h(struct xe_guc_ct *ct) 1308 1264 { 1309 - struct xe_guc_ct *ct = container_of(w, struct xe_guc_ct, g2h_worker); 1310 1265 struct xe_gt *gt = ct_to_gt(ct); 1311 1266 bool ongoing; 1312 1267 int ret; ··· 1352 1309 1353 1310 if (ongoing) 1354 1311 xe_pm_runtime_put(ct_to_xe(ct)); 1312 + } 1313 + 1314 + static void g2h_worker_func(struct work_struct *w) 1315 + { 1316 + struct xe_guc_ct *ct = container_of(w, struct xe_guc_ct, g2h_worker); 1317 + 1318 + receive_g2h(ct); 1355 1319 } 1356 1320 1357 1321 static void guc_ctb_snapshot_capture(struct xe_device *xe, struct guc_ctb *ctb,
+2
drivers/gpu/drm/xe/xe_guc_ct_types.h
··· 110 110 u32 g2h_outstanding; 111 111 /** @g2h_worker: worker to process G2H messages */ 112 112 struct work_struct g2h_worker; 113 + /** @safe_mode_worker: worker to check G2H messages with IRQ disabled */ 114 + struct delayed_work safe_mode_worker; 113 115 /** @state: CT state */ 114 116 enum xe_guc_ct_state state; 115 117 /** @fence_seqno: G2H fence seqno - 16 bits used by CT */
+171 -35
drivers/gpu/drm/xe/xe_guc_pc.c
··· 8 8 #include <linux/delay.h> 9 9 10 10 #include <drm/drm_managed.h> 11 + #include <generated/xe_wa_oob.h> 11 12 12 - #include "abi/guc_actions_abi.h" 13 13 #include "abi/guc_actions_slpc_abi.h" 14 14 #include "regs/xe_gt_regs.h" 15 15 #include "regs/xe_regs.h" ··· 18 18 #include "xe_force_wake.h" 19 19 #include "xe_gt.h" 20 20 #include "xe_gt_idle.h" 21 - #include "xe_gt_sysfs.h" 21 + #include "xe_gt_printk.h" 22 22 #include "xe_gt_types.h" 23 + #include "xe_guc.h" 23 24 #include "xe_guc_ct.h" 24 25 #include "xe_map.h" 25 26 #include "xe_mmio.h" 26 27 #include "xe_pcode.h" 28 + #include "xe_pm.h" 29 + #include "xe_wa.h" 27 30 28 31 #define MCHBAR_MIRROR_BASE_SNB 0x140000 29 32 ··· 43 40 44 41 #define GT_FREQUENCY_MULTIPLIER 50 45 42 #define GT_FREQUENCY_SCALER 3 43 + 44 + #define LNL_MERT_FREQ_CAP 800 46 45 47 46 /** 48 47 * DOC: GuC Power Conservation (PC) ··· 72 67 * 73 68 */ 74 69 75 - static struct xe_guc * 76 - pc_to_guc(struct xe_guc_pc *pc) 70 + static struct xe_guc *pc_to_guc(struct xe_guc_pc *pc) 77 71 { 78 72 return container_of(pc, struct xe_guc, pc); 79 73 } 80 74 81 - static struct xe_device * 82 - pc_to_xe(struct xe_guc_pc *pc) 75 + static struct xe_guc_ct *pc_to_ct(struct xe_guc_pc *pc) 83 76 { 84 - struct xe_guc *guc = pc_to_guc(pc); 85 - struct xe_gt *gt = container_of(guc, struct xe_gt, uc.guc); 86 - 87 - return gt_to_xe(gt); 77 + return &pc_to_guc(pc)->ct; 88 78 } 89 79 90 - static struct xe_gt * 91 - pc_to_gt(struct xe_guc_pc *pc) 80 + static struct xe_gt *pc_to_gt(struct xe_guc_pc *pc) 92 81 { 93 - return container_of(pc, struct xe_gt, uc.guc.pc); 82 + return guc_to_gt(pc_to_guc(pc)); 94 83 } 95 84 96 - static struct iosys_map * 97 - pc_to_maps(struct xe_guc_pc *pc) 85 + static struct xe_device *pc_to_xe(struct xe_guc_pc *pc) 86 + { 87 + return guc_to_xe(pc_to_guc(pc)); 88 + } 89 + 90 + static struct iosys_map *pc_to_maps(struct xe_guc_pc *pc) 98 91 { 99 92 return &pc->bo->vmap; 100 93 } ··· 133 130 134 131 static int pc_action_reset(struct xe_guc_pc *pc) 135 132 { 136 - struct xe_guc_ct *ct = &pc_to_guc(pc)->ct; 137 - int ret; 133 + struct xe_guc_ct *ct = pc_to_ct(pc); 138 134 u32 action[] = { 139 135 GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST, 140 136 SLPC_EVENT(SLPC_EVENT_RESET, 2), 141 137 xe_bo_ggtt_addr(pc->bo), 142 138 0, 143 139 }; 140 + int ret; 144 141 145 142 ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0); 146 143 if (ret) 147 - drm_err(&pc_to_xe(pc)->drm, "GuC PC reset: %pe", ERR_PTR(ret)); 144 + xe_gt_err(pc_to_gt(pc), "GuC PC reset failed: %pe\n", 145 + ERR_PTR(ret)); 148 146 149 147 return ret; 150 148 } 151 149 152 150 static int pc_action_query_task_state(struct xe_guc_pc *pc) 153 151 { 154 - struct xe_guc_ct *ct = &pc_to_guc(pc)->ct; 155 - int ret; 152 + struct xe_guc_ct *ct = pc_to_ct(pc); 156 153 u32 action[] = { 157 154 GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST, 158 155 SLPC_EVENT(SLPC_EVENT_QUERY_TASK_STATE, 2), 159 156 xe_bo_ggtt_addr(pc->bo), 160 157 0, 161 158 }; 159 + int ret; 162 160 163 161 if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING)) 164 162 return -EAGAIN; ··· 167 163 /* Blocking here to ensure the results are ready before reading them */ 168 164 ret = xe_guc_ct_send_block(ct, action, ARRAY_SIZE(action)); 169 165 if (ret) 170 - drm_err(&pc_to_xe(pc)->drm, 171 - "GuC PC query task state failed: %pe", ERR_PTR(ret)); 166 + xe_gt_err(pc_to_gt(pc), "GuC PC query task state failed: %pe\n", 167 + ERR_PTR(ret)); 172 168 173 169 return ret; 174 170 } 175 171 176 172 static int pc_action_set_param(struct xe_guc_pc *pc, u8 id, u32 value) 177 173 { 178 - struct xe_guc_ct *ct = &pc_to_guc(pc)->ct; 179 - int ret; 174 + struct xe_guc_ct *ct = pc_to_ct(pc); 180 175 u32 action[] = { 181 176 GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST, 182 177 SLPC_EVENT(SLPC_EVENT_PARAMETER_SET, 2), 183 178 id, 184 179 value, 185 180 }; 181 + int ret; 186 182 187 183 if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING)) 188 184 return -EAGAIN; 189 185 190 186 ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0); 191 187 if (ret) 192 - drm_err(&pc_to_xe(pc)->drm, "GuC PC set param failed: %pe", 193 - ERR_PTR(ret)); 188 + xe_gt_err(pc_to_gt(pc), "GuC PC set param[%u]=%u failed: %pe\n", 189 + id, value, ERR_PTR(ret)); 190 + 191 + return ret; 192 + } 193 + 194 + static int pc_action_unset_param(struct xe_guc_pc *pc, u8 id) 195 + { 196 + u32 action[] = { 197 + GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST, 198 + SLPC_EVENT(SLPC_EVENT_PARAMETER_UNSET, 1), 199 + id, 200 + }; 201 + struct xe_guc_ct *ct = &pc_to_guc(pc)->ct; 202 + int ret; 203 + 204 + if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING)) 205 + return -EAGAIN; 206 + 207 + ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0); 208 + if (ret) 209 + xe_gt_err(pc_to_gt(pc), "GuC PC unset param failed: %pe", 210 + ERR_PTR(ret)); 194 211 195 212 return ret; 196 213 } 197 214 198 215 static int pc_action_setup_gucrc(struct xe_guc_pc *pc, u32 mode) 199 216 { 200 - struct xe_guc_ct *ct = &pc_to_guc(pc)->ct; 217 + struct xe_guc_ct *ct = pc_to_ct(pc); 201 218 u32 action[] = { 202 - XE_GUC_ACTION_SETUP_PC_GUCRC, 219 + GUC_ACTION_HOST2GUC_SETUP_PC_GUCRC, 203 220 mode, 204 221 }; 205 222 int ret; 206 223 207 224 ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0); 208 225 if (ret) 209 - drm_err(&pc_to_xe(pc)->drm, "GuC RC enable failed: %pe", 210 - ERR_PTR(ret)); 226 + xe_gt_err(pc_to_gt(pc), "GuC RC enable mode=%u failed: %pe\n", 227 + mode, ERR_PTR(ret)); 211 228 return ret; 212 229 } 213 230 ··· 699 674 tgl_init_fused_rp_values(pc); 700 675 } 701 676 677 + static u32 pc_max_freq_cap(struct xe_guc_pc *pc) 678 + { 679 + struct xe_gt *gt = pc_to_gt(pc); 680 + 681 + if (XE_WA(gt, 22019338487)) 682 + return min(LNL_MERT_FREQ_CAP, pc->rp0_freq); 683 + else 684 + return pc->rp0_freq; 685 + } 686 + 702 687 /** 703 - * xe_guc_pc_init_early - Initialize RPx values and request a higher GT 688 + * xe_guc_pc_raise_unslice - Initialize RPx values and request a higher GT 704 689 * frequency to allow faster GuC load times 690 + * @pc: Xe_GuC_PC instance 691 + */ 692 + void xe_guc_pc_raise_unslice(struct xe_guc_pc *pc) 693 + { 694 + struct xe_gt *gt = pc_to_gt(pc); 695 + 696 + xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT); 697 + pc_set_cur_freq(pc, pc_max_freq_cap(pc)); 698 + } 699 + 700 + /** 701 + * xe_guc_pc_init_early - Initialize RPx values 705 702 * @pc: Xe_GuC_PC instance 706 703 */ 707 704 void xe_guc_pc_init_early(struct xe_guc_pc *pc) ··· 732 685 733 686 xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT); 734 687 pc_init_fused_rp_values(pc); 735 - pc_set_cur_freq(pc, pc->rp0_freq); 736 688 } 737 689 738 690 static int pc_adjust_freq_bounds(struct xe_guc_pc *pc) ··· 787 741 return ret; 788 742 } 789 743 744 + static int pc_set_mert_freq_cap(struct xe_guc_pc *pc) 745 + { 746 + int ret = 0; 747 + 748 + if (XE_WA(pc_to_gt(pc), 22019338487)) { 749 + /* 750 + * Get updated min/max and stash them. 751 + */ 752 + ret = xe_guc_pc_get_min_freq(pc, &pc->stashed_min_freq); 753 + if (!ret) 754 + ret = xe_guc_pc_get_max_freq(pc, &pc->stashed_max_freq); 755 + if (ret) 756 + return ret; 757 + 758 + /* 759 + * Ensure min and max are bound by MERT_FREQ_CAP until driver loads. 760 + */ 761 + mutex_lock(&pc->freq_lock); 762 + ret = pc_set_min_freq(pc, min(pc->rpe_freq, pc_max_freq_cap(pc))); 763 + if (!ret) 764 + ret = pc_set_max_freq(pc, min(pc->rp0_freq, pc_max_freq_cap(pc))); 765 + mutex_unlock(&pc->freq_lock); 766 + } 767 + 768 + return ret; 769 + } 770 + 771 + /** 772 + * xe_guc_pc_restore_stashed_freq - Set min/max back to stashed values 773 + * @pc: The GuC PC 774 + * 775 + * Returns: 0 on success, 776 + * error code on failure 777 + */ 778 + int xe_guc_pc_restore_stashed_freq(struct xe_guc_pc *pc) 779 + { 780 + int ret = 0; 781 + 782 + mutex_lock(&pc->freq_lock); 783 + ret = pc_set_max_freq(pc, pc->stashed_max_freq); 784 + if (!ret) 785 + ret = pc_set_min_freq(pc, pc->stashed_min_freq); 786 + mutex_unlock(&pc->freq_lock); 787 + 788 + return ret; 789 + } 790 + 790 791 /** 791 792 * xe_guc_pc_gucrc_disable - Disable GuC RC 792 793 * @pc: Xe_GuC_PC instance ··· 851 758 if (xe->info.skip_guc_pc) 852 759 return 0; 853 760 854 - ret = pc_action_setup_gucrc(pc, XE_GUCRC_HOST_CONTROL); 761 + ret = pc_action_setup_gucrc(pc, GUCRC_HOST_CONTROL); 855 762 if (ret) 856 763 return ret; 857 764 ··· 864 771 XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); 865 772 866 773 return 0; 774 + } 775 + 776 + /** 777 + * xe_guc_pc_override_gucrc_mode - override GUCRC mode 778 + * @pc: Xe_GuC_PC instance 779 + * @mode: new value of the mode. 780 + * 781 + * Return: 0 on success, negative error code on error 782 + */ 783 + int xe_guc_pc_override_gucrc_mode(struct xe_guc_pc *pc, enum slpc_gucrc_mode mode) 784 + { 785 + int ret; 786 + 787 + xe_pm_runtime_get(pc_to_xe(pc)); 788 + ret = pc_action_set_param(pc, SLPC_PARAM_PWRGATE_RC_MODE, mode); 789 + xe_pm_runtime_put(pc_to_xe(pc)); 790 + 791 + return ret; 792 + } 793 + 794 + /** 795 + * xe_guc_pc_unset_gucrc_mode - unset GUCRC mode override 796 + * @pc: Xe_GuC_PC instance 797 + * 798 + * Return: 0 on success, negative error code on error 799 + */ 800 + int xe_guc_pc_unset_gucrc_mode(struct xe_guc_pc *pc) 801 + { 802 + int ret; 803 + 804 + xe_pm_runtime_get(pc_to_xe(pc)); 805 + ret = pc_action_unset_param(pc, SLPC_PARAM_PWRGATE_RC_MODE); 806 + xe_pm_runtime_put(pc_to_xe(pc)); 807 + 808 + return ret; 867 809 } 868 810 869 811 static void pc_init_pcode_freq(struct xe_guc_pc *pc) ··· 974 846 goto out; 975 847 976 848 if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_RUNNING)) { 977 - drm_err(&pc_to_xe(pc)->drm, "GuC PC Start failed\n"); 849 + xe_gt_err(gt, "GuC PC Start failed\n"); 978 850 ret = -EIO; 979 851 goto out; 980 852 } 981 853 982 854 ret = pc_init_freqs(pc); 855 + if (ret) 856 + goto out; 857 + 858 + ret = pc_set_mert_freq_cap(pc); 983 859 if (ret) 984 860 goto out; 985 861 ··· 993 861 goto out; 994 862 } 995 863 996 - ret = pc_action_setup_gucrc(pc, XE_GUCRC_FIRMWARE_CONTROL); 864 + ret = pc_action_setup_gucrc(pc, GUCRC_FIRMWARE_CONTROL); 997 865 998 866 out: 999 867 XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); ··· 1035 903 XE_WARN_ON(xe_force_wake_get(gt_to_fw(pc_to_gt(pc)), XE_FORCEWAKE_ALL)); 1036 904 XE_WARN_ON(xe_guc_pc_gucrc_disable(pc)); 1037 905 XE_WARN_ON(xe_guc_pc_stop(pc)); 906 + 907 + /* Bind requested freq to mert_freq_cap before unload */ 908 + pc_set_cur_freq(pc, min(pc_max_freq_cap(pc), pc->rpe_freq)); 909 + 1038 910 xe_force_wake_put(gt_to_fw(pc_to_gt(pc)), XE_FORCEWAKE_ALL); 1039 911 } 1040 912
+5
drivers/gpu/drm/xe/xe_guc_pc.h
··· 9 9 #include <linux/types.h> 10 10 11 11 struct xe_guc_pc; 12 + enum slpc_gucrc_mode; 12 13 13 14 int xe_guc_pc_init(struct xe_guc_pc *pc); 14 15 int xe_guc_pc_start(struct xe_guc_pc *pc); 15 16 int xe_guc_pc_stop(struct xe_guc_pc *pc); 16 17 int xe_guc_pc_gucrc_disable(struct xe_guc_pc *pc); 18 + int xe_guc_pc_override_gucrc_mode(struct xe_guc_pc *pc, enum slpc_gucrc_mode mode); 19 + int xe_guc_pc_unset_gucrc_mode(struct xe_guc_pc *pc); 17 20 18 21 u32 xe_guc_pc_get_act_freq(struct xe_guc_pc *pc); 19 22 int xe_guc_pc_get_cur_freq(struct xe_guc_pc *pc, u32 *freq); ··· 32 29 u64 xe_guc_pc_rc6_residency(struct xe_guc_pc *pc); 33 30 u64 xe_guc_pc_mc6_residency(struct xe_guc_pc *pc); 34 31 void xe_guc_pc_init_early(struct xe_guc_pc *pc); 32 + int xe_guc_pc_restore_stashed_freq(struct xe_guc_pc *pc); 33 + void xe_guc_pc_raise_unslice(struct xe_guc_pc *pc); 35 34 36 35 #endif /* _XE_GUC_PC_H_ */
+4
drivers/gpu/drm/xe/xe_guc_pc_types.h
··· 25 25 u32 user_requested_min; 26 26 /** @user_requested_max: Stash the maximum requested freq by user */ 27 27 u32 user_requested_max; 28 + /** @stashed_min_freq: Stash the current minimum freq */ 29 + u32 stashed_min_freq; 30 + /** @stashed_max_freq: Stash the current maximum freq */ 31 + u32 stashed_max_freq; 28 32 /** @freq_lock: Let's protect the frequencies */ 29 33 struct mutex freq_lock; 30 34 /** @freq_ready: Only handle freq changes, if they are really ready */
+275 -75
drivers/gpu/drm/xe/xe_guc_submit.c
··· 10 10 #include <linux/circ_buf.h> 11 11 #include <linux/delay.h> 12 12 #include <linux/dma-fence-array.h> 13 + #include <linux/math64.h> 13 14 14 15 #include <drm/drm_managed.h> 15 16 ··· 24 23 #include "xe_force_wake.h" 25 24 #include "xe_gpu_scheduler.h" 26 25 #include "xe_gt.h" 26 + #include "xe_gt_clock.h" 27 27 #include "xe_gt_printk.h" 28 28 #include "xe_guc.h" 29 29 #include "xe_guc_ct.h" ··· 63 61 #define EXEC_QUEUE_STATE_RESET (1 << 6) 64 62 #define EXEC_QUEUE_STATE_KILLED (1 << 7) 65 63 #define EXEC_QUEUE_STATE_WEDGED (1 << 8) 64 + #define EXEC_QUEUE_STATE_BANNED (1 << 9) 65 + #define EXEC_QUEUE_STATE_CHECK_TIMEOUT (1 << 10) 66 + #define EXEC_QUEUE_STATE_EXTRA_REF (1 << 11) 66 67 67 68 static bool exec_queue_registered(struct xe_exec_queue *q) 68 69 { ··· 139 134 140 135 static bool exec_queue_banned(struct xe_exec_queue *q) 141 136 { 142 - return (q->flags & EXEC_QUEUE_FLAG_BANNED); 137 + return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_BANNED; 143 138 } 144 139 145 140 static void set_exec_queue_banned(struct xe_exec_queue *q) 146 141 { 147 - q->flags |= EXEC_QUEUE_FLAG_BANNED; 142 + atomic_or(EXEC_QUEUE_STATE_BANNED, &q->guc->state); 148 143 } 149 144 150 145 static bool exec_queue_suspended(struct xe_exec_queue *q) ··· 192 187 atomic_or(EXEC_QUEUE_STATE_WEDGED, &q->guc->state); 193 188 } 194 189 190 + static bool exec_queue_check_timeout(struct xe_exec_queue *q) 191 + { 192 + return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_CHECK_TIMEOUT; 193 + } 194 + 195 + static void set_exec_queue_check_timeout(struct xe_exec_queue *q) 196 + { 197 + atomic_or(EXEC_QUEUE_STATE_CHECK_TIMEOUT, &q->guc->state); 198 + } 199 + 200 + static void clear_exec_queue_check_timeout(struct xe_exec_queue *q) 201 + { 202 + atomic_and(~EXEC_QUEUE_STATE_CHECK_TIMEOUT, &q->guc->state); 203 + } 204 + 205 + static bool exec_queue_extra_ref(struct xe_exec_queue *q) 206 + { 207 + return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_EXTRA_REF; 208 + } 209 + 210 + static void set_exec_queue_extra_ref(struct xe_exec_queue *q) 211 + { 212 + atomic_or(EXEC_QUEUE_STATE_EXTRA_REF, &q->guc->state); 213 + } 214 + 195 215 static bool exec_queue_killed_or_banned_or_wedged(struct xe_exec_queue *q) 196 216 { 197 - return exec_queue_banned(q) || (atomic_read(&q->guc->state) & 198 - (EXEC_QUEUE_STATE_WEDGED | EXEC_QUEUE_STATE_KILLED)); 217 + return (atomic_read(&q->guc->state) & 218 + (EXEC_QUEUE_STATE_WEDGED | EXEC_QUEUE_STATE_KILLED | 219 + EXEC_QUEUE_STATE_BANNED)); 199 220 } 200 221 201 222 #ifdef CONFIG_PROVE_LOCKING ··· 949 918 xe_sched_submission_start(sched); 950 919 } 951 920 921 + #define ADJUST_FIVE_PERCENT(__t) mul_u64_u32_div(__t, 105, 100) 922 + 923 + static bool check_timeout(struct xe_exec_queue *q, struct xe_sched_job *job) 924 + { 925 + struct xe_gt *gt = guc_to_gt(exec_queue_to_guc(q)); 926 + u32 ctx_timestamp = xe_lrc_ctx_timestamp(q->lrc[0]); 927 + u32 ctx_job_timestamp = xe_lrc_ctx_job_timestamp(q->lrc[0]); 928 + u32 timeout_ms = q->sched_props.job_timeout_ms; 929 + u32 diff; 930 + u64 running_time_ms; 931 + 932 + /* 933 + * Counter wraps at ~223s at the usual 19.2MHz, be paranoid catch 934 + * possible overflows with a high timeout. 935 + */ 936 + xe_gt_assert(gt, timeout_ms < 100 * MSEC_PER_SEC); 937 + 938 + if (ctx_timestamp < ctx_job_timestamp) 939 + diff = ctx_timestamp + U32_MAX - ctx_job_timestamp; 940 + else 941 + diff = ctx_timestamp - ctx_job_timestamp; 942 + 943 + /* 944 + * Ensure timeout is within 5% to account for an GuC scheduling latency 945 + */ 946 + running_time_ms = 947 + ADJUST_FIVE_PERCENT(xe_gt_clock_interval_to_ms(gt, diff)); 948 + 949 + xe_gt_dbg(gt, 950 + "Check job timeout: seqno=%u, lrc_seqno=%u, guc_id=%d, running_time_ms=%llu, timeout_ms=%u, diff=0x%08x", 951 + xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job), 952 + q->guc->id, running_time_ms, timeout_ms, diff); 953 + 954 + return running_time_ms >= timeout_ms; 955 + } 956 + 957 + static void enable_scheduling(struct xe_exec_queue *q) 958 + { 959 + MAKE_SCHED_CONTEXT_ACTION(q, ENABLE); 960 + struct xe_guc *guc = exec_queue_to_guc(q); 961 + int ret; 962 + 963 + xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q)); 964 + xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); 965 + xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q)); 966 + xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_enable(q)); 967 + 968 + set_exec_queue_pending_enable(q); 969 + set_exec_queue_enabled(q); 970 + trace_xe_exec_queue_scheduling_enable(q); 971 + 972 + xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 973 + G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1); 974 + 975 + ret = wait_event_timeout(guc->ct.wq, 976 + !exec_queue_pending_enable(q) || 977 + guc_read_stopped(guc), HZ * 5); 978 + if (!ret || guc_read_stopped(guc)) { 979 + xe_gt_warn(guc_to_gt(guc), "Schedule enable failed to respond"); 980 + set_exec_queue_banned(q); 981 + xe_gt_reset_async(q->gt); 982 + xe_sched_tdr_queue_imm(&q->guc->sched); 983 + } 984 + } 985 + 986 + static void disable_scheduling(struct xe_exec_queue *q, bool immediate) 987 + { 988 + MAKE_SCHED_CONTEXT_ACTION(q, DISABLE); 989 + struct xe_guc *guc = exec_queue_to_guc(q); 990 + 991 + xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q)); 992 + xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); 993 + xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q)); 994 + 995 + if (immediate) 996 + set_min_preemption_timeout(guc, q); 997 + clear_exec_queue_enabled(q); 998 + set_exec_queue_pending_disable(q); 999 + trace_xe_exec_queue_scheduling_disable(q); 1000 + 1001 + xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 1002 + G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1); 1003 + } 1004 + 1005 + static void __deregister_exec_queue(struct xe_guc *guc, struct xe_exec_queue *q) 1006 + { 1007 + u32 action[] = { 1008 + XE_GUC_ACTION_DEREGISTER_CONTEXT, 1009 + q->guc->id, 1010 + }; 1011 + 1012 + xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q)); 1013 + xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); 1014 + xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_enable(q)); 1015 + xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q)); 1016 + 1017 + set_exec_queue_destroyed(q); 1018 + trace_xe_exec_queue_deregister(q); 1019 + 1020 + xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 1021 + G2H_LEN_DW_DEREGISTER_CONTEXT, 1); 1022 + } 1023 + 952 1024 static enum drm_gpu_sched_stat 953 1025 guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) 954 1026 { ··· 1059 925 struct xe_sched_job *tmp_job; 1060 926 struct xe_exec_queue *q = job->q; 1061 927 struct xe_gpu_scheduler *sched = &q->guc->sched; 1062 - struct xe_device *xe = guc_to_xe(exec_queue_to_guc(q)); 928 + struct xe_guc *guc = exec_queue_to_guc(q); 1063 929 int err = -ETIME; 1064 930 int i = 0; 1065 - bool wedged; 931 + bool wedged, skip_timeout_check; 1066 932 1067 933 /* 1068 934 * TDR has fired before free job worker. Common if exec queue ··· 1074 940 return DRM_GPU_SCHED_STAT_NOMINAL; 1075 941 } 1076 942 1077 - drm_notice(&xe->drm, "Timedout job: seqno=%u, lrc_seqno=%u, guc_id=%d, flags=0x%lx", 1078 - xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job), 1079 - q->guc->id, q->flags); 1080 - xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_KERNEL, 1081 - "Kernel-submitted job timed out\n"); 1082 - xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q), 1083 - "VM job timed out on non-killed execqueue\n"); 1084 - 1085 - if (!exec_queue_killed(q)) 1086 - xe_devcoredump(job); 1087 - 1088 - trace_xe_sched_job_timedout(job); 1089 - 1090 - wedged = guc_submit_hint_wedged(exec_queue_to_guc(q)); 1091 - 1092 943 /* Kill the run_job entry point */ 1093 944 xe_sched_submission_stop(sched); 1094 945 1095 - /* 1096 - * Kernel jobs should never fail, nor should VM jobs if they do 1097 - * somethings has gone wrong and the GT needs a reset 1098 - */ 1099 - if (!wedged && (q->flags & EXEC_QUEUE_FLAG_KERNEL || 1100 - (q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q)))) { 1101 - if (!xe_sched_invalidate_job(job, 2)) { 1102 - xe_sched_add_pending_job(sched, job); 1103 - xe_sched_submission_start(sched); 1104 - xe_gt_reset_async(q->gt); 1105 - goto out; 1106 - } 1107 - } 946 + /* Must check all state after stopping scheduler */ 947 + skip_timeout_check = exec_queue_reset(q) || 948 + exec_queue_killed_or_banned_or_wedged(q) || 949 + exec_queue_destroyed(q); 1108 950 1109 - /* Engine state now stable, disable scheduling if needed */ 951 + /* Job hasn't started, can't be timed out */ 952 + if (!skip_timeout_check && !xe_sched_job_started(job)) 953 + goto rearm; 954 + 955 + /* 956 + * XXX: Sampling timeout doesn't work in wedged mode as we have to 957 + * modify scheduling state to read timestamp. We could read the 958 + * timestamp from a register to accumulate current running time but this 959 + * doesn't work for SRIOV. For now assuming timeouts in wedged mode are 960 + * genuine timeouts. 961 + */ 962 + wedged = guc_submit_hint_wedged(exec_queue_to_guc(q)); 963 + 964 + /* Engine state now stable, disable scheduling to check timestamp */ 1110 965 if (!wedged && exec_queue_registered(q)) { 1111 - struct xe_guc *guc = exec_queue_to_guc(q); 1112 966 int ret; 1113 967 1114 968 if (exec_queue_reset(q)) 1115 969 err = -EIO; 1116 - set_exec_queue_banned(q); 970 + 1117 971 if (!exec_queue_destroyed(q)) { 1118 - xe_exec_queue_get(q); 1119 - disable_scheduling_deregister(guc, q); 972 + /* 973 + * Wait for any pending G2H to flush out before 974 + * modifying state 975 + */ 976 + ret = wait_event_timeout(guc->ct.wq, 977 + !exec_queue_pending_enable(q) || 978 + guc_read_stopped(guc), HZ * 5); 979 + if (!ret || guc_read_stopped(guc)) 980 + goto trigger_reset; 981 + 982 + /* 983 + * Flag communicates to G2H handler that schedule 984 + * disable originated from a timeout check. The G2H then 985 + * avoid triggering cleanup or deregistering the exec 986 + * queue. 987 + */ 988 + set_exec_queue_check_timeout(q); 989 + disable_scheduling(q, skip_timeout_check); 1120 990 } 1121 991 1122 992 /* ··· 1136 998 !exec_queue_pending_disable(q) || 1137 999 guc_read_stopped(guc), HZ * 5); 1138 1000 if (!ret || guc_read_stopped(guc)) { 1139 - drm_warn(&xe->drm, "Schedule disable failed to respond"); 1140 - xe_sched_add_pending_job(sched, job); 1141 - xe_sched_submission_start(sched); 1001 + trigger_reset: 1002 + if (!ret) 1003 + xe_gt_warn(guc_to_gt(guc), "Schedule disable failed to respond"); 1004 + set_exec_queue_extra_ref(q); 1005 + xe_exec_queue_get(q); /* GT reset owns this */ 1006 + set_exec_queue_banned(q); 1142 1007 xe_gt_reset_async(q->gt); 1143 1008 xe_sched_tdr_queue_imm(sched); 1144 - goto out; 1009 + goto rearm; 1145 1010 } 1011 + } 1012 + 1013 + /* 1014 + * Check if job is actually timed out, if so restart job execution and TDR 1015 + */ 1016 + if (!wedged && !skip_timeout_check && !check_timeout(q, job) && 1017 + !exec_queue_reset(q) && exec_queue_registered(q)) { 1018 + clear_exec_queue_check_timeout(q); 1019 + goto sched_enable; 1020 + } 1021 + 1022 + xe_gt_notice(guc_to_gt(guc), "Timedout job: seqno=%u, lrc_seqno=%u, guc_id=%d, flags=0x%lx", 1023 + xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job), 1024 + q->guc->id, q->flags); 1025 + trace_xe_sched_job_timedout(job); 1026 + 1027 + if (!exec_queue_killed(q)) 1028 + xe_devcoredump(job); 1029 + 1030 + /* 1031 + * Kernel jobs should never fail, nor should VM jobs if they do 1032 + * somethings has gone wrong and the GT needs a reset 1033 + */ 1034 + xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_KERNEL, 1035 + "Kernel-submitted job timed out\n"); 1036 + xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q), 1037 + "VM job timed out on non-killed execqueue\n"); 1038 + if (!wedged && (q->flags & EXEC_QUEUE_FLAG_KERNEL || 1039 + (q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q)))) { 1040 + if (!xe_sched_invalidate_job(job, 2)) { 1041 + clear_exec_queue_check_timeout(q); 1042 + xe_gt_reset_async(q->gt); 1043 + goto rearm; 1044 + } 1045 + } 1046 + 1047 + /* Finish cleaning up exec queue via deregister */ 1048 + set_exec_queue_banned(q); 1049 + if (!wedged && exec_queue_registered(q) && !exec_queue_destroyed(q)) { 1050 + set_exec_queue_extra_ref(q); 1051 + xe_exec_queue_get(q); 1052 + __deregister_exec_queue(guc, q); 1146 1053 } 1147 1054 1148 1055 /* Stop fence signaling */ ··· 1211 1028 /* Start fence signaling */ 1212 1029 xe_hw_fence_irq_start(q->fence_irq); 1213 1030 1214 - out: 1031 + return DRM_GPU_SCHED_STAT_NOMINAL; 1032 + 1033 + sched_enable: 1034 + enable_scheduling(q); 1035 + rearm: 1036 + /* 1037 + * XXX: Ideally want to adjust timeout based on current exection time 1038 + * but there is not currently an easy way to do in DRM scheduler. With 1039 + * some thought, do this in a follow up. 1040 + */ 1041 + xe_sched_add_pending_job(sched, job); 1042 + xe_sched_submission_start(sched); 1043 + 1215 1044 return DRM_GPU_SCHED_STAT_NOMINAL; 1216 1045 } 1217 1046 ··· 1326 1131 guc_read_stopped(guc)); 1327 1132 1328 1133 if (!guc_read_stopped(guc)) { 1329 - MAKE_SCHED_CONTEXT_ACTION(q, DISABLE); 1330 1134 s64 since_resume_ms = 1331 1135 ktime_ms_delta(ktime_get(), 1332 1136 q->guc->resume_time); ··· 1336 1142 msleep(wait_ms); 1337 1143 1338 1144 set_exec_queue_suspended(q); 1339 - clear_exec_queue_enabled(q); 1340 - set_exec_queue_pending_disable(q); 1341 - trace_xe_exec_queue_scheduling_disable(q); 1342 - 1343 - xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 1344 - G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1); 1145 + disable_scheduling(q, false); 1345 1146 } 1346 1147 } else if (q->guc->suspend_pending) { 1347 1148 set_exec_queue_suspended(q); ··· 1347 1158 static void __guc_exec_queue_process_msg_resume(struct xe_sched_msg *msg) 1348 1159 { 1349 1160 struct xe_exec_queue *q = msg->private_data; 1350 - struct xe_guc *guc = exec_queue_to_guc(q); 1351 1161 1352 1162 if (guc_exec_queue_allowed_to_change_state(q)) { 1353 - MAKE_SCHED_CONTEXT_ACTION(q, ENABLE); 1354 - 1355 1163 q->guc->resume_time = RESUME_PENDING; 1356 1164 clear_exec_queue_suspended(q); 1357 - set_exec_queue_pending_enable(q); 1358 - set_exec_queue_enabled(q); 1359 - trace_xe_exec_queue_scheduling_enable(q); 1360 - 1361 - xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 1362 - G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1); 1165 + enable_scheduling(q); 1363 1166 } else { 1364 1167 clear_exec_queue_suspended(q); 1365 1168 } ··· 1613 1432 1614 1433 /* Clean up lost G2H + reset engine state */ 1615 1434 if (exec_queue_registered(q)) { 1616 - if ((exec_queue_banned(q) && exec_queue_destroyed(q)) || 1617 - xe_exec_queue_is_lr(q)) 1435 + if (exec_queue_extra_ref(q) || xe_exec_queue_is_lr(q)) 1618 1436 xe_exec_queue_put(q); 1619 1437 else if (exec_queue_destroyed(q)) 1620 1438 __guc_exec_queue_fini(guc, q); ··· 1622 1442 set_exec_queue_suspended(q); 1623 1443 suspend_fence_signal(q); 1624 1444 } 1625 - atomic_and(EXEC_QUEUE_STATE_DESTROYED | EXEC_QUEUE_STATE_SUSPENDED, 1445 + atomic_and(EXEC_QUEUE_STATE_WEDGED | EXEC_QUEUE_STATE_BANNED | 1446 + EXEC_QUEUE_STATE_KILLED | EXEC_QUEUE_STATE_DESTROYED | 1447 + EXEC_QUEUE_STATE_SUSPENDED, 1626 1448 &q->guc->state); 1627 1449 q->guc->resume_time = 0; 1628 1450 trace_xe_exec_queue_stop(q); ··· 1767 1585 q->guc->id, 1768 1586 }; 1769 1587 1588 + xe_gt_assert(guc_to_gt(guc), exec_queue_destroyed(q)); 1589 + xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); 1590 + xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q)); 1591 + xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_enable(q)); 1592 + 1770 1593 trace_xe_exec_queue_deregister(q); 1771 1594 1772 1595 xe_guc_ct_send_g2h_handler(&guc->ct, action, ARRAY_SIZE(action)); 1773 1596 } 1774 1597 1775 - static void handle_sched_done(struct xe_guc *guc, struct xe_exec_queue *q) 1598 + static void handle_sched_done(struct xe_guc *guc, struct xe_exec_queue *q, 1599 + u32 runnable_state) 1776 1600 { 1777 1601 trace_xe_exec_queue_scheduling_done(q); 1778 1602 1779 - if (exec_queue_pending_enable(q)) { 1603 + if (runnable_state == 1) { 1604 + xe_gt_assert(guc_to_gt(guc), exec_queue_pending_enable(q)); 1605 + 1780 1606 q->guc->resume_time = ktime_get(); 1781 1607 clear_exec_queue_pending_enable(q); 1782 1608 smp_wmb(); 1783 1609 wake_up_all(&guc->ct.wq); 1784 1610 } else { 1611 + bool check_timeout = exec_queue_check_timeout(q); 1612 + 1613 + xe_gt_assert(guc_to_gt(guc), runnable_state == 0); 1614 + xe_gt_assert(guc_to_gt(guc), exec_queue_pending_disable(q)); 1615 + 1785 1616 clear_exec_queue_pending_disable(q); 1786 1617 if (q->guc->suspend_pending) { 1787 1618 suspend_fence_signal(q); 1788 1619 } else { 1789 - if (exec_queue_banned(q)) { 1620 + if (exec_queue_banned(q) || check_timeout) { 1790 1621 smp_wmb(); 1791 1622 wake_up_all(&guc->ct.wq); 1792 1623 } 1793 - deregister_exec_queue(guc, q); 1624 + if (!check_timeout) 1625 + deregister_exec_queue(guc, q); 1794 1626 } 1795 1627 } 1796 1628 } ··· 1814 1618 struct xe_device *xe = guc_to_xe(guc); 1815 1619 struct xe_exec_queue *q; 1816 1620 u32 guc_id = msg[0]; 1621 + u32 runnable_state = msg[1]; 1817 1622 1818 1623 if (unlikely(len < 2)) { 1819 1624 drm_err(&xe->drm, "Invalid length %u", len); ··· 1827 1630 1828 1631 if (unlikely(!exec_queue_pending_enable(q) && 1829 1632 !exec_queue_pending_disable(q))) { 1830 - drm_err(&xe->drm, "Unexpected engine state 0x%04x", 1831 - atomic_read(&q->guc->state)); 1633 + xe_gt_err(guc_to_gt(guc), 1634 + "SCHED_DONE: Unexpected engine state 0x%04x, guc_id=%d, runnable_state=%u", 1635 + atomic_read(&q->guc->state), q->guc->id, 1636 + runnable_state); 1832 1637 return -EPROTO; 1833 1638 } 1834 1639 1835 - handle_sched_done(guc, q); 1640 + handle_sched_done(guc, q, runnable_state); 1836 1641 1837 1642 return 0; 1838 1643 } ··· 1845 1646 1846 1647 clear_exec_queue_registered(q); 1847 1648 1848 - if (exec_queue_banned(q) || xe_exec_queue_is_lr(q)) 1649 + if (exec_queue_extra_ref(q) || xe_exec_queue_is_lr(q)) 1849 1650 xe_exec_queue_put(q); 1850 1651 else 1851 1652 __guc_exec_queue_fini(guc, q); ··· 1868 1669 1869 1670 if (!exec_queue_destroyed(q) || exec_queue_pending_disable(q) || 1870 1671 exec_queue_pending_enable(q) || exec_queue_enabled(q)) { 1871 - drm_err(&xe->drm, "Unexpected engine state 0x%04x", 1872 - atomic_read(&q->guc->state)); 1672 + xe_gt_err(guc_to_gt(guc), 1673 + "DEREGISTER_DONE: Unexpected engine state 0x%04x, guc_id=%d", 1674 + atomic_read(&q->guc->state), q->guc->id); 1873 1675 return -EPROTO; 1874 1676 } 1875 1677 ··· 1909 1709 * guc_exec_queue_timedout_job. 1910 1710 */ 1911 1711 set_exec_queue_reset(q); 1912 - if (!exec_queue_banned(q)) 1712 + if (!exec_queue_banned(q) && !exec_queue_check_timeout(q)) 1913 1713 xe_guc_exec_queue_trigger_cleanup(q); 1914 1714 1915 1715 return 0; ··· 1939 1739 1940 1740 /* Treat the same as engine reset */ 1941 1741 set_exec_queue_reset(q); 1942 - if (!exec_queue_banned(q)) 1742 + if (!exec_queue_banned(q) && !exec_queue_check_timeout(q)) 1943 1743 xe_guc_exec_queue_trigger_cleanup(q); 1944 1744 1945 1745 return 0;
+16 -14
drivers/gpu/drm/xe/xe_huc.c
··· 18 18 #include "xe_force_wake.h" 19 19 #include "xe_gsc_submit.h" 20 20 #include "xe_gt.h" 21 + #include "xe_gt_printk.h" 21 22 #include "xe_guc.h" 22 23 #include "xe_map.h" 23 24 #include "xe_mmio.h" 25 + #include "xe_sriov.h" 24 26 #include "xe_uc_fw.h" 25 27 26 28 static struct xe_gt * ··· 94 92 if (!xe_uc_fw_is_enabled(&huc->fw)) 95 93 return 0; 96 94 95 + if (IS_SRIOV_VF(xe)) 96 + return 0; 97 + 97 98 if (huc->fw.has_gsc_headers) { 98 99 ret = huc_alloc_gsc_pkt(huc); 99 100 if (ret) ··· 108 103 return 0; 109 104 110 105 out: 111 - drm_err(&xe->drm, "HuC init failed with %d", ret); 106 + xe_gt_err(gt, "HuC: initialization failed: %pe\n", ERR_PTR(ret)); 112 107 return ret; 113 108 } 114 109 ··· 196 191 } while (--retry && err == -EBUSY); 197 192 198 193 if (err) { 199 - drm_err(&xe->drm, "failed to submit GSC request to auth: %d\n", err); 194 + xe_gt_err(gt, "HuC: failed to submit GSC request to auth: %pe\n", ERR_PTR(err)); 200 195 return err; 201 196 } 202 197 203 198 err = xe_gsc_read_out_header(xe, &pkt->vmap, PXP43_HUC_AUTH_INOUT_SIZE, 204 199 sizeof(struct pxp43_huc_auth_out), &rd_offset); 205 200 if (err) { 206 - drm_err(&xe->drm, "HuC: invalid GSC reply for auth (err=%d)\n", err); 201 + xe_gt_err(gt, "HuC: invalid GSC reply for auth: %pe\n", ERR_PTR(err)); 207 202 return err; 208 203 } 209 204 ··· 214 209 */ 215 210 out_status = huc_auth_msg_rd(xe, &pkt->vmap, rd_offset, header.status); 216 211 if (out_status != PXP_STATUS_SUCCESS && out_status != PXP_STATUS_OP_NOT_PERMITTED) { 217 - drm_err(&xe->drm, "auth failed with GSC error = 0x%x\n", out_status); 212 + xe_gt_err(gt, "HuC: authentication failed with GSC error = %#x\n", out_status); 218 213 return -EIO; 219 214 } 220 215 ··· 243 238 244 239 int xe_huc_auth(struct xe_huc *huc, enum xe_huc_auth_types type) 245 240 { 246 - struct xe_device *xe = huc_to_xe(huc); 247 241 struct xe_gt *gt = huc_to_gt(huc); 248 242 struct xe_guc *guc = huc_to_guc(huc); 249 243 int ret; ··· 272 268 return -EINVAL; 273 269 } 274 270 if (ret) { 275 - drm_err(&xe->drm, "Failed to trigger HuC auth via %s: %d\n", 276 - huc_auth_modes[type].name, ret); 271 + xe_gt_err(gt, "HuC: failed to trigger auth via %s: %pe\n", 272 + huc_auth_modes[type].name, ERR_PTR(ret)); 277 273 goto fail; 278 274 } 279 275 280 276 ret = xe_mmio_wait32(gt, huc_auth_modes[type].reg, huc_auth_modes[type].val, 281 277 huc_auth_modes[type].val, 100000, NULL, false); 282 278 if (ret) { 283 - drm_err(&xe->drm, "HuC: Firmware not verified %d\n", ret); 279 + xe_gt_err(gt, "HuC: firmware not verified: %pe\n", ERR_PTR(ret)); 284 280 goto fail; 285 281 } 286 282 287 283 xe_uc_fw_change_status(&huc->fw, XE_UC_FIRMWARE_RUNNING); 288 - drm_dbg(&xe->drm, "HuC authenticated via %s\n", huc_auth_modes[type].name); 284 + xe_gt_dbg(gt, "HuC: authenticated via %s\n", huc_auth_modes[type].name); 289 285 290 286 return 0; 291 287 292 288 fail: 293 - drm_err(&xe->drm, "HuC: Auth via %s failed: %d\n", 294 - huc_auth_modes[type].name, ret); 289 + xe_gt_err(gt, "HuC: authentication via %s failed: %pe\n", 290 + huc_auth_modes[type].name, ERR_PTR(ret)); 295 291 xe_uc_fw_change_status(&huc->fw, XE_UC_FIRMWARE_LOAD_FAIL); 296 292 297 293 return ret; ··· 299 295 300 296 void xe_huc_sanitize(struct xe_huc *huc) 301 297 { 302 - if (!xe_uc_fw_is_loadable(&huc->fw)) 303 - return; 304 - xe_uc_fw_change_status(&huc->fw, XE_UC_FIRMWARE_LOADABLE); 298 + xe_uc_fw_sanitize(&huc->fw); 305 299 } 306 300 307 301 void xe_huc_print_info(struct xe_huc *huc, struct drm_printer *p)
+3 -1
drivers/gpu/drm/xe/xe_hw_engine.c
··· 26 26 #include "xe_macros.h" 27 27 #include "xe_mmio.h" 28 28 #include "xe_reg_sr.h" 29 + #include "xe_reg_whitelist.h" 29 30 #include "xe_rtp.h" 30 31 #include "xe_sched_job.h" 31 32 #include "xe_sriov.h" ··· 547 546 if (hwe->class == XE_ENGINE_CLASS_OTHER) 548 547 hwe->irq_handler = xe_gsc_hwe_irq_handler; 549 548 550 - xe_hw_engine_enable_ring(hwe); 549 + if (!IS_SRIOV_VF(xe)) 550 + xe_hw_engine_enable_ring(hwe); 551 551 } 552 552 553 553 /* We reserve the highest BCS instance for USM */
+2
drivers/gpu/drm/xe/xe_hw_engine_types.h
··· 148 148 enum xe_hw_engine_id engine_id; 149 149 /** @eclass: pointer to per hw engine class interface */ 150 150 struct xe_hw_engine_class_intf *eclass; 151 + /** @oa_unit: oa unit for this hw engine */ 152 + struct xe_oa_unit *oa_unit; 151 153 }; 152 154 153 155 /**
+3 -5
drivers/gpu/drm/xe/xe_irq.c
··· 134 134 u32 gsc_mask = 0; 135 135 u32 heci_mask = 0; 136 136 137 + if (IS_SRIOV_VF(xe) && xe_device_has_memirq(xe)) 138 + return; 139 + 137 140 if (xe_device_uc_enabled(xe)) { 138 141 irqs = GT_RENDER_USER_INTERRUPT | 139 142 GT_RENDER_PIPECTL_NOTIFY_INTERRUPT; ··· 736 733 free_irq(irq, xe); 737 734 738 735 return err; 739 - } 740 - 741 - void xe_irq_shutdown(struct xe_device *xe) 742 - { 743 - irq_uninstall(xe); 744 736 } 745 737 746 738 void xe_irq_suspend(struct xe_device *xe)
-1
drivers/gpu/drm/xe/xe_irq.h
··· 11 11 struct xe_gt; 12 12 13 13 int xe_irq_install(struct xe_device *xe); 14 - void xe_irq_shutdown(struct xe_device *xe); 15 14 void xe_irq_suspend(struct xe_device *xe); 16 15 void xe_irq_resume(struct xe_device *xe); 17 16 void xe_irq_enable_hwe(struct xe_gt *gt);
+86 -3
drivers/gpu/drm/xe/xe_lrc.c
··· 49 49 } tail; 50 50 u32 start_seqno; 51 51 u32 seqno; 52 + u32 ctx_timestamp; 53 + u32 ctx_job_timestamp; 52 54 }; 53 55 54 56 static struct xe_device * ··· 651 649 652 650 /* Make the magic macros work */ 653 651 #define __xe_lrc_pphwsp_offset xe_lrc_pphwsp_offset 652 + #define __xe_lrc_regs_offset xe_lrc_regs_offset 654 653 655 654 #define LRC_SEQNO_PPHWSP_OFFSET 512 656 655 #define LRC_START_SEQNO_PPHWSP_OFFSET (LRC_SEQNO_PPHWSP_OFFSET + 8) 656 + #define LRC_CTX_JOB_TIMESTAMP_OFFSET (LRC_START_SEQNO_PPHWSP_OFFSET + 8) 657 657 #define LRC_PARALLEL_PPHWSP_OFFSET 2048 658 658 #define LRC_PPHWSP_SIZE SZ_4K 659 + 660 + u32 xe_lrc_regs_offset(struct xe_lrc *lrc) 661 + { 662 + return xe_lrc_pphwsp_offset(lrc) + LRC_PPHWSP_SIZE; 663 + } 659 664 660 665 static size_t lrc_reg_size(struct xe_device *xe) 661 666 { ··· 689 680 return xe_lrc_pphwsp_offset(lrc) + LRC_START_SEQNO_PPHWSP_OFFSET; 690 681 } 691 682 683 + static u32 __xe_lrc_ctx_job_timestamp_offset(struct xe_lrc *lrc) 684 + { 685 + /* The start seqno is stored in the driver-defined portion of PPHWSP */ 686 + return xe_lrc_pphwsp_offset(lrc) + LRC_CTX_JOB_TIMESTAMP_OFFSET; 687 + } 688 + 692 689 static inline u32 __xe_lrc_parallel_offset(struct xe_lrc *lrc) 693 690 { 694 691 /* The parallel is stored in the driver-defined portion of PPHWSP */ 695 692 return xe_lrc_pphwsp_offset(lrc) + LRC_PARALLEL_PPHWSP_OFFSET; 696 693 } 697 694 698 - static inline u32 __xe_lrc_regs_offset(struct xe_lrc *lrc) 695 + static u32 __xe_lrc_ctx_timestamp_offset(struct xe_lrc *lrc) 699 696 { 700 - return xe_lrc_pphwsp_offset(lrc) + LRC_PPHWSP_SIZE; 697 + return __xe_lrc_regs_offset(lrc) + CTX_TIMESTAMP * sizeof(u32); 701 698 } 702 699 703 700 static inline u32 __xe_lrc_indirect_ring_offset(struct xe_lrc *lrc) ··· 731 716 DECL_MAP_ADDR_HELPERS(seqno) 732 717 DECL_MAP_ADDR_HELPERS(regs) 733 718 DECL_MAP_ADDR_HELPERS(start_seqno) 719 + DECL_MAP_ADDR_HELPERS(ctx_job_timestamp) 720 + DECL_MAP_ADDR_HELPERS(ctx_timestamp) 734 721 DECL_MAP_ADDR_HELPERS(parallel) 735 722 DECL_MAP_ADDR_HELPERS(indirect_ring) 736 723 737 724 #undef DECL_MAP_ADDR_HELPERS 725 + 726 + /** 727 + * xe_lrc_ctx_timestamp_ggtt_addr() - Get ctx timestamp GGTT address 728 + * @lrc: Pointer to the lrc. 729 + * 730 + * Returns: ctx timestamp GGTT address 731 + */ 732 + u32 xe_lrc_ctx_timestamp_ggtt_addr(struct xe_lrc *lrc) 733 + { 734 + return __xe_lrc_ctx_timestamp_ggtt_addr(lrc); 735 + } 736 + 737 + /** 738 + * xe_lrc_ctx_timestamp() - Read ctx timestamp value 739 + * @lrc: Pointer to the lrc. 740 + * 741 + * Returns: ctx timestamp value 742 + */ 743 + u32 xe_lrc_ctx_timestamp(struct xe_lrc *lrc) 744 + { 745 + struct xe_device *xe = lrc_to_xe(lrc); 746 + struct iosys_map map; 747 + 748 + map = __xe_lrc_ctx_timestamp_map(lrc); 749 + return xe_map_read32(xe, &map); 750 + } 751 + 752 + /** 753 + * xe_lrc_ctx_job_timestamp_ggtt_addr() - Get ctx job timestamp GGTT address 754 + * @lrc: Pointer to the lrc. 755 + * 756 + * Returns: ctx timestamp job GGTT address 757 + */ 758 + u32 xe_lrc_ctx_job_timestamp_ggtt_addr(struct xe_lrc *lrc) 759 + { 760 + return __xe_lrc_ctx_job_timestamp_ggtt_addr(lrc); 761 + } 762 + 763 + /** 764 + * xe_lrc_ctx_job_timestamp() - Read ctx job timestamp value 765 + * @lrc: Pointer to the lrc. 766 + * 767 + * Returns: ctx timestamp job value 768 + */ 769 + u32 xe_lrc_ctx_job_timestamp(struct xe_lrc *lrc) 770 + { 771 + struct xe_device *xe = lrc_to_xe(lrc); 772 + struct iosys_map map; 773 + 774 + map = __xe_lrc_ctx_job_timestamp_map(lrc); 775 + return xe_map_read32(xe, &map); 776 + } 738 777 739 778 u32 xe_lrc_ggtt_addr(struct xe_lrc *lrc) 740 779 { ··· 1645 1576 snapshot->lrc_offset = xe_lrc_pphwsp_offset(lrc); 1646 1577 snapshot->lrc_size = lrc->bo->size - snapshot->lrc_offset; 1647 1578 snapshot->lrc_snapshot = NULL; 1579 + snapshot->ctx_timestamp = xe_lrc_ctx_timestamp(lrc); 1580 + snapshot->ctx_job_timestamp = xe_lrc_ctx_job_timestamp(lrc); 1648 1581 return snapshot; 1649 1582 } 1650 1583 ··· 1695 1624 snapshot->tail.internal, snapshot->tail.memory); 1696 1625 drm_printf(p, "\tStart seqno: (memory) %d\n", snapshot->start_seqno); 1697 1626 drm_printf(p, "\tSeqno: (memory) %d\n", snapshot->seqno); 1627 + drm_printf(p, "\tTimestamp: 0x%08x\n", snapshot->ctx_timestamp); 1628 + drm_printf(p, "\tJob Timestamp: 0x%08x\n", snapshot->ctx_job_timestamp); 1698 1629 1699 1630 if (!snapshot->lrc_snapshot) 1700 1631 return; ··· 1732 1659 kfree(snapshot); 1733 1660 } 1734 1661 1662 + /** 1663 + * xe_lrc_update_timestamp() - Update ctx timestamp 1664 + * @lrc: Pointer to the lrc. 1665 + * @old_ts: Old timestamp value 1666 + * 1667 + * Populate @old_ts current saved ctx timestamp, read new ctx timestamp and 1668 + * update saved value. 1669 + * 1670 + * Returns: New ctx timestamp value 1671 + */ 1735 1672 u32 xe_lrc_update_timestamp(struct xe_lrc *lrc, u32 *old_ts) 1736 1673 { 1737 1674 *old_ts = lrc->ctx_timestamp; 1738 1675 1739 - lrc->ctx_timestamp = xe_lrc_read_ctx_reg(lrc, CTX_TIMESTAMP); 1676 + lrc->ctx_timestamp = xe_lrc_ctx_timestamp(lrc); 1740 1677 1741 1678 return lrc->ctx_timestamp; 1742 1679 }
+6
drivers/gpu/drm/xe/xe_lrc.h
··· 52 52 53 53 size_t xe_gt_lrc_size(struct xe_gt *gt, enum xe_engine_class class); 54 54 u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc); 55 + u32 xe_lrc_regs_offset(struct xe_lrc *lrc); 55 56 56 57 void xe_lrc_set_ring_tail(struct xe_lrc *lrc, u32 tail); 57 58 u32 xe_lrc_ring_tail(struct xe_lrc *lrc); ··· 94 93 void xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot *snapshot); 95 94 void xe_lrc_snapshot_print(struct xe_lrc_snapshot *snapshot, struct drm_printer *p); 96 95 void xe_lrc_snapshot_free(struct xe_lrc_snapshot *snapshot); 96 + 97 + u32 xe_lrc_ctx_timestamp_ggtt_addr(struct xe_lrc *lrc); 98 + u32 xe_lrc_ctx_timestamp(struct xe_lrc *lrc); 99 + u32 xe_lrc_ctx_job_timestamp_ggtt_addr(struct xe_lrc *lrc); 100 + u32 xe_lrc_ctx_job_timestamp(struct xe_lrc *lrc); 97 101 98 102 /** 99 103 * xe_lrc_update_timestamp - readout LRC timestamp and update cached value
+11 -17
drivers/gpu/drm/xe/xe_migrate.c
··· 32 32 #include "xe_res_cursor.h" 33 33 #include "xe_sched_job.h" 34 34 #include "xe_sync.h" 35 - #include "xe_trace.h" 35 + #include "xe_trace_bo.h" 36 36 #include "xe_vm.h" 37 37 38 38 /** ··· 647 647 bb->cs[bb->len++] = upper_32_bits(src_ofs); 648 648 } 649 649 650 - static int job_add_deps(struct xe_sched_job *job, struct dma_resv *resv, 651 - enum dma_resv_usage usage) 652 - { 653 - return drm_sched_job_add_resv_dependencies(&job->drm, resv, usage); 654 - } 655 - 656 650 static u64 xe_migrate_batch_base(struct xe_migrate *m, bool usm) 657 651 { 658 652 return usm ? m->usm_batch_base_ofs : m->batch_base_ofs; ··· 843 849 844 850 xe_sched_job_add_migrate_flush(job, flush_flags); 845 851 if (!fence) { 846 - err = job_add_deps(job, src_bo->ttm.base.resv, 847 - DMA_RESV_USAGE_BOOKKEEP); 852 + err = xe_sched_job_add_deps(job, src_bo->ttm.base.resv, 853 + DMA_RESV_USAGE_BOOKKEEP); 848 854 if (!err && src_bo != dst_bo) 849 - err = job_add_deps(job, dst_bo->ttm.base.resv, 850 - DMA_RESV_USAGE_BOOKKEEP); 855 + err = xe_sched_job_add_deps(job, dst_bo->ttm.base.resv, 856 + DMA_RESV_USAGE_BOOKKEEP); 851 857 if (err) 852 858 goto err_job; 853 859 } ··· 1085 1091 * fences, which are always tracked as 1086 1092 * DMA_RESV_USAGE_KERNEL. 1087 1093 */ 1088 - err = job_add_deps(job, bo->ttm.base.resv, 1089 - DMA_RESV_USAGE_KERNEL); 1094 + err = xe_sched_job_add_deps(job, bo->ttm.base.resv, 1095 + DMA_RESV_USAGE_KERNEL); 1090 1096 if (err) 1091 1097 goto err_job; 1092 1098 } ··· 1411 1417 1412 1418 /* Wait on BO move */ 1413 1419 if (bo) { 1414 - err = job_add_deps(job, bo->ttm.base.resv, 1415 - DMA_RESV_USAGE_KERNEL); 1420 + err = xe_sched_job_add_deps(job, bo->ttm.base.resv, 1421 + DMA_RESV_USAGE_KERNEL); 1416 1422 if (err) 1417 1423 goto err_job; 1418 1424 } ··· 1422 1428 * trigger preempts before moving forward 1423 1429 */ 1424 1430 if (first_munmap_rebind) { 1425 - err = job_add_deps(job, xe_vm_resv(vm), 1426 - DMA_RESV_USAGE_BOOKKEEP); 1431 + err = xe_sched_job_add_deps(job, xe_vm_resv(vm), 1432 + DMA_RESV_USAGE_BOOKKEEP); 1427 1433 if (err) 1428 1434 goto err_job; 1429 1435 }
+19 -4
drivers/gpu/drm/xe/xe_mmio.c
··· 21 21 #include "xe_gt_sriov_vf.h" 22 22 #include "xe_macros.h" 23 23 #include "xe_sriov.h" 24 + #include "xe_trace.h" 24 25 25 26 static void tiles_fini(void *arg) 26 27 { ··· 125 124 { 126 125 struct xe_tile *tile = gt_to_tile(gt); 127 126 u32 addr = xe_mmio_adjusted_addr(gt, reg.addr); 127 + u8 val; 128 128 129 - return readb((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + addr); 129 + val = readb((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + addr); 130 + trace_xe_reg_rw(gt, false, addr, val, sizeof(val)); 131 + 132 + return val; 130 133 } 131 134 132 135 u16 xe_mmio_read16(struct xe_gt *gt, struct xe_reg reg) 133 136 { 134 137 struct xe_tile *tile = gt_to_tile(gt); 135 138 u32 addr = xe_mmio_adjusted_addr(gt, reg.addr); 139 + u16 val; 136 140 137 - return readw((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + addr); 141 + val = readw((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + addr); 142 + trace_xe_reg_rw(gt, false, addr, val, sizeof(val)); 143 + 144 + return val; 138 145 } 139 146 140 147 void xe_mmio_write32(struct xe_gt *gt, struct xe_reg reg, u32 val) ··· 150 141 struct xe_tile *tile = gt_to_tile(gt); 151 142 u32 addr = xe_mmio_adjusted_addr(gt, reg.addr); 152 143 144 + trace_xe_reg_rw(gt, true, addr, val, sizeof(val)); 153 145 writel(val, (reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + addr); 154 146 } 155 147 ··· 158 148 { 159 149 struct xe_tile *tile = gt_to_tile(gt); 160 150 u32 addr = xe_mmio_adjusted_addr(gt, reg.addr); 151 + u32 val; 161 152 162 153 if (!reg.vf && IS_SRIOV_VF(gt_to_xe(gt))) 163 - return xe_gt_sriov_vf_read32(gt, reg); 154 + val = xe_gt_sriov_vf_read32(gt, reg); 155 + else 156 + val = readl((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + addr); 164 157 165 - return readl((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + addr); 158 + trace_xe_reg_rw(gt, false, addr, val, sizeof(val)); 159 + 160 + return val; 166 161 } 167 162 168 163 u32 xe_mmio_rmw32(struct xe_gt *gt, struct xe_reg reg, u32 clr, u32 set)
+5
drivers/gpu/drm/xe/xe_module.c
··· 11 11 #include "xe_drv.h" 12 12 #include "xe_hw_fence.h" 13 13 #include "xe_pci.h" 14 + #include "xe_perf.h" 14 15 #include "xe_sched_job.h" 15 16 16 17 struct xe_modparam xe_modparam = { ··· 78 77 { 79 78 .init = xe_register_pci_driver, 80 79 .exit = xe_unregister_pci_driver, 80 + }, 81 + { 82 + .init = xe_perf_sysctl_register, 83 + .exit = xe_perf_sysctl_unregister, 81 84 }, 82 85 }; 83 86
+2510
drivers/gpu/drm/xe/xe_oa.c
··· 1 + // SPDX-License-Identifier: MIT 2 + /* 3 + * Copyright © 2023-2024 Intel Corporation 4 + */ 5 + 6 + #include <linux/anon_inodes.h> 7 + #include <linux/delay.h> 8 + #include <linux/nospec.h> 9 + #include <linux/poll.h> 10 + 11 + #include <drm/drm_drv.h> 12 + #include <drm/drm_managed.h> 13 + #include <drm/xe_drm.h> 14 + 15 + #include "abi/guc_actions_slpc_abi.h" 16 + #include "instructions/xe_mi_commands.h" 17 + #include "regs/xe_engine_regs.h" 18 + #include "regs/xe_gt_regs.h" 19 + #include "regs/xe_lrc_layout.h" 20 + #include "regs/xe_oa_regs.h" 21 + #include "xe_assert.h" 22 + #include "xe_bb.h" 23 + #include "xe_bo.h" 24 + #include "xe_device.h" 25 + #include "xe_exec_queue.h" 26 + #include "xe_force_wake.h" 27 + #include "xe_gt.h" 28 + #include "xe_gt_mcr.h" 29 + #include "xe_gt_printk.h" 30 + #include "xe_guc_pc.h" 31 + #include "xe_lrc.h" 32 + #include "xe_macros.h" 33 + #include "xe_mmio.h" 34 + #include "xe_oa.h" 35 + #include "xe_perf.h" 36 + #include "xe_pm.h" 37 + #include "xe_sched_job.h" 38 + #include "xe_sriov.h" 39 + 40 + #define DEFAULT_POLL_FREQUENCY_HZ 200 41 + #define DEFAULT_POLL_PERIOD_NS (NSEC_PER_SEC / DEFAULT_POLL_FREQUENCY_HZ) 42 + #define XE_OA_UNIT_INVALID U32_MAX 43 + 44 + struct xe_oa_reg { 45 + struct xe_reg addr; 46 + u32 value; 47 + }; 48 + 49 + struct xe_oa_config { 50 + struct xe_oa *oa; 51 + 52 + char uuid[UUID_STRING_LEN + 1]; 53 + int id; 54 + 55 + const struct xe_oa_reg *regs; 56 + u32 regs_len; 57 + 58 + struct attribute_group sysfs_metric; 59 + struct attribute *attrs[2]; 60 + struct kobj_attribute sysfs_metric_id; 61 + 62 + struct kref ref; 63 + struct rcu_head rcu; 64 + }; 65 + 66 + struct flex { 67 + struct xe_reg reg; 68 + u32 offset; 69 + u32 value; 70 + }; 71 + 72 + struct xe_oa_open_param { 73 + u32 oa_unit_id; 74 + bool sample; 75 + u32 metric_set; 76 + enum xe_oa_format_name oa_format; 77 + int period_exponent; 78 + bool disabled; 79 + int exec_queue_id; 80 + int engine_instance; 81 + struct xe_exec_queue *exec_q; 82 + struct xe_hw_engine *hwe; 83 + bool no_preempt; 84 + }; 85 + 86 + struct xe_oa_config_bo { 87 + struct llist_node node; 88 + 89 + struct xe_oa_config *oa_config; 90 + struct xe_bb *bb; 91 + }; 92 + 93 + #define DRM_FMT(x) DRM_XE_OA_FMT_TYPE_##x 94 + 95 + static const struct xe_oa_format oa_formats[] = { 96 + [XE_OA_FORMAT_C4_B8] = { 7, 64, DRM_FMT(OAG) }, 97 + [XE_OA_FORMAT_A12] = { 0, 64, DRM_FMT(OAG) }, 98 + [XE_OA_FORMAT_A12_B8_C8] = { 2, 128, DRM_FMT(OAG) }, 99 + [XE_OA_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256, DRM_FMT(OAG) }, 100 + [XE_OAR_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256, DRM_FMT(OAR) }, 101 + [XE_OA_FORMAT_A24u40_A14u32_B8_C8] = { 5, 256, DRM_FMT(OAG) }, 102 + [XE_OAC_FORMAT_A24u64_B8_C8] = { 1, 320, DRM_FMT(OAC), HDR_64_BIT }, 103 + [XE_OAC_FORMAT_A22u32_R2u32_B8_C8] = { 2, 192, DRM_FMT(OAC), HDR_64_BIT }, 104 + [XE_OAM_FORMAT_MPEC8u64_B8_C8] = { 1, 192, DRM_FMT(OAM_MPEC), HDR_64_BIT }, 105 + [XE_OAM_FORMAT_MPEC8u32_B8_C8] = { 2, 128, DRM_FMT(OAM_MPEC), HDR_64_BIT }, 106 + [XE_OA_FORMAT_PEC64u64] = { 1, 576, DRM_FMT(PEC), HDR_64_BIT, 1, 0 }, 107 + [XE_OA_FORMAT_PEC64u64_B8_C8] = { 1, 640, DRM_FMT(PEC), HDR_64_BIT, 1, 1 }, 108 + [XE_OA_FORMAT_PEC64u32] = { 1, 320, DRM_FMT(PEC), HDR_64_BIT }, 109 + [XE_OA_FORMAT_PEC32u64_G1] = { 5, 320, DRM_FMT(PEC), HDR_64_BIT, 1, 0 }, 110 + [XE_OA_FORMAT_PEC32u32_G1] = { 5, 192, DRM_FMT(PEC), HDR_64_BIT }, 111 + [XE_OA_FORMAT_PEC32u64_G2] = { 6, 320, DRM_FMT(PEC), HDR_64_BIT, 1, 0 }, 112 + [XE_OA_FORMAT_PEC32u32_G2] = { 6, 192, DRM_FMT(PEC), HDR_64_BIT }, 113 + [XE_OA_FORMAT_PEC36u64_G1_32_G2_4] = { 3, 320, DRM_FMT(PEC), HDR_64_BIT, 1, 0 }, 114 + [XE_OA_FORMAT_PEC36u64_G1_4_G2_32] = { 4, 320, DRM_FMT(PEC), HDR_64_BIT, 1, 0 }, 115 + }; 116 + 117 + static u32 xe_oa_circ_diff(struct xe_oa_stream *stream, u32 tail, u32 head) 118 + { 119 + return tail >= head ? tail - head : 120 + tail + stream->oa_buffer.circ_size - head; 121 + } 122 + 123 + static u32 xe_oa_circ_incr(struct xe_oa_stream *stream, u32 ptr, u32 n) 124 + { 125 + return ptr + n >= stream->oa_buffer.circ_size ? 126 + ptr + n - stream->oa_buffer.circ_size : ptr + n; 127 + } 128 + 129 + static void xe_oa_config_release(struct kref *ref) 130 + { 131 + struct xe_oa_config *oa_config = 132 + container_of(ref, typeof(*oa_config), ref); 133 + 134 + kfree(oa_config->regs); 135 + 136 + kfree_rcu(oa_config, rcu); 137 + } 138 + 139 + static void xe_oa_config_put(struct xe_oa_config *oa_config) 140 + { 141 + if (!oa_config) 142 + return; 143 + 144 + kref_put(&oa_config->ref, xe_oa_config_release); 145 + } 146 + 147 + static struct xe_oa_config *xe_oa_config_get(struct xe_oa_config *oa_config) 148 + { 149 + return kref_get_unless_zero(&oa_config->ref) ? oa_config : NULL; 150 + } 151 + 152 + static struct xe_oa_config *xe_oa_get_oa_config(struct xe_oa *oa, int metrics_set) 153 + { 154 + struct xe_oa_config *oa_config; 155 + 156 + rcu_read_lock(); 157 + oa_config = idr_find(&oa->metrics_idr, metrics_set); 158 + if (oa_config) 159 + oa_config = xe_oa_config_get(oa_config); 160 + rcu_read_unlock(); 161 + 162 + return oa_config; 163 + } 164 + 165 + static void free_oa_config_bo(struct xe_oa_config_bo *oa_bo) 166 + { 167 + xe_oa_config_put(oa_bo->oa_config); 168 + xe_bb_free(oa_bo->bb, NULL); 169 + kfree(oa_bo); 170 + } 171 + 172 + static const struct xe_oa_regs *__oa_regs(struct xe_oa_stream *stream) 173 + { 174 + return &stream->hwe->oa_unit->regs; 175 + } 176 + 177 + static u32 xe_oa_hw_tail_read(struct xe_oa_stream *stream) 178 + { 179 + return xe_mmio_read32(stream->gt, __oa_regs(stream)->oa_tail_ptr) & 180 + OAG_OATAILPTR_MASK; 181 + } 182 + 183 + #define oa_report_header_64bit(__s) \ 184 + ((__s)->oa_buffer.format->header == HDR_64_BIT) 185 + 186 + static u64 oa_report_id(struct xe_oa_stream *stream, void *report) 187 + { 188 + return oa_report_header_64bit(stream) ? *(u64 *)report : *(u32 *)report; 189 + } 190 + 191 + static void oa_report_id_clear(struct xe_oa_stream *stream, u32 *report) 192 + { 193 + if (oa_report_header_64bit(stream)) 194 + *(u64 *)report = 0; 195 + else 196 + *report = 0; 197 + } 198 + 199 + static u64 oa_timestamp(struct xe_oa_stream *stream, void *report) 200 + { 201 + return oa_report_header_64bit(stream) ? 202 + *((u64 *)report + 1) : 203 + *((u32 *)report + 1); 204 + } 205 + 206 + static void oa_timestamp_clear(struct xe_oa_stream *stream, u32 *report) 207 + { 208 + if (oa_report_header_64bit(stream)) 209 + *(u64 *)&report[2] = 0; 210 + else 211 + report[1] = 0; 212 + } 213 + 214 + static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream) 215 + { 216 + u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo); 217 + int report_size = stream->oa_buffer.format->size; 218 + u32 tail, hw_tail; 219 + unsigned long flags; 220 + bool pollin; 221 + u32 partial_report_size; 222 + 223 + spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); 224 + 225 + hw_tail = xe_oa_hw_tail_read(stream); 226 + hw_tail -= gtt_offset; 227 + 228 + /* 229 + * The tail pointer increases in 64 byte (cacheline size), not in report_size 230 + * increments. Also report size may not be a power of 2. Compute potential 231 + * partially landed report in OA buffer. 232 + */ 233 + partial_report_size = xe_oa_circ_diff(stream, hw_tail, stream->oa_buffer.tail); 234 + partial_report_size %= report_size; 235 + 236 + /* Subtract partial amount off the tail */ 237 + hw_tail = xe_oa_circ_diff(stream, hw_tail, partial_report_size); 238 + 239 + tail = hw_tail; 240 + 241 + /* 242 + * Walk the stream backward until we find a report with report id and timestamp 243 + * not 0. We can't tell whether a report has fully landed in memory before the 244 + * report id and timestamp of the following report have landed. 245 + * 246 + * This is assuming that the writes of the OA unit land in memory in the order 247 + * they were written. If not : (╯°□°)╯︵ ┻━┻ 248 + */ 249 + while (xe_oa_circ_diff(stream, tail, stream->oa_buffer.tail) >= report_size) { 250 + void *report = stream->oa_buffer.vaddr + tail; 251 + 252 + if (oa_report_id(stream, report) || oa_timestamp(stream, report)) 253 + break; 254 + 255 + tail = xe_oa_circ_diff(stream, tail, report_size); 256 + } 257 + 258 + if (xe_oa_circ_diff(stream, hw_tail, tail) > report_size) 259 + drm_dbg(&stream->oa->xe->drm, 260 + "unlanded report(s) head=0x%x tail=0x%x hw_tail=0x%x\n", 261 + stream->oa_buffer.head, tail, hw_tail); 262 + 263 + stream->oa_buffer.tail = tail; 264 + 265 + pollin = xe_oa_circ_diff(stream, stream->oa_buffer.tail, 266 + stream->oa_buffer.head) >= report_size; 267 + 268 + spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); 269 + 270 + return pollin; 271 + } 272 + 273 + static enum hrtimer_restart xe_oa_poll_check_timer_cb(struct hrtimer *hrtimer) 274 + { 275 + struct xe_oa_stream *stream = 276 + container_of(hrtimer, typeof(*stream), poll_check_timer); 277 + 278 + if (xe_oa_buffer_check_unlocked(stream)) { 279 + stream->pollin = true; 280 + wake_up(&stream->poll_wq); 281 + } 282 + 283 + hrtimer_forward_now(hrtimer, ns_to_ktime(stream->poll_period_ns)); 284 + 285 + return HRTIMER_RESTART; 286 + } 287 + 288 + static int xe_oa_append_report(struct xe_oa_stream *stream, char __user *buf, 289 + size_t count, size_t *offset, const u8 *report) 290 + { 291 + int report_size = stream->oa_buffer.format->size; 292 + int report_size_partial; 293 + u8 *oa_buf_end; 294 + 295 + if ((count - *offset) < report_size) 296 + return -ENOSPC; 297 + 298 + buf += *offset; 299 + 300 + oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.circ_size; 301 + report_size_partial = oa_buf_end - report; 302 + 303 + if (report_size_partial < report_size) { 304 + if (copy_to_user(buf, report, report_size_partial)) 305 + return -EFAULT; 306 + buf += report_size_partial; 307 + 308 + if (copy_to_user(buf, stream->oa_buffer.vaddr, 309 + report_size - report_size_partial)) 310 + return -EFAULT; 311 + } else if (copy_to_user(buf, report, report_size)) { 312 + return -EFAULT; 313 + } 314 + 315 + *offset += report_size; 316 + 317 + return 0; 318 + } 319 + 320 + static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf, 321 + size_t count, size_t *offset) 322 + { 323 + int report_size = stream->oa_buffer.format->size; 324 + u8 *oa_buf_base = stream->oa_buffer.vaddr; 325 + u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo); 326 + size_t start_offset = *offset; 327 + unsigned long flags; 328 + u32 head, tail; 329 + int ret = 0; 330 + 331 + spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); 332 + head = stream->oa_buffer.head; 333 + tail = stream->oa_buffer.tail; 334 + spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); 335 + 336 + xe_assert(stream->oa->xe, 337 + head < stream->oa_buffer.circ_size && tail < stream->oa_buffer.circ_size); 338 + 339 + for (; xe_oa_circ_diff(stream, tail, head); 340 + head = xe_oa_circ_incr(stream, head, report_size)) { 341 + u8 *report = oa_buf_base + head; 342 + 343 + ret = xe_oa_append_report(stream, buf, count, offset, report); 344 + if (ret) 345 + break; 346 + 347 + if (!(stream->oa_buffer.circ_size % report_size)) { 348 + /* Clear out report id and timestamp to detect unlanded reports */ 349 + oa_report_id_clear(stream, (void *)report); 350 + oa_timestamp_clear(stream, (void *)report); 351 + } else { 352 + u8 *oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.circ_size; 353 + u32 part = oa_buf_end - report; 354 + 355 + /* Zero out the entire report */ 356 + if (report_size <= part) { 357 + memset(report, 0, report_size); 358 + } else { 359 + memset(report, 0, part); 360 + memset(oa_buf_base, 0, report_size - part); 361 + } 362 + } 363 + } 364 + 365 + if (start_offset != *offset) { 366 + struct xe_reg oaheadptr = __oa_regs(stream)->oa_head_ptr; 367 + 368 + spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); 369 + xe_mmio_write32(stream->gt, oaheadptr, 370 + (head + gtt_offset) & OAG_OAHEADPTR_MASK); 371 + stream->oa_buffer.head = head; 372 + spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); 373 + } 374 + 375 + return ret; 376 + } 377 + 378 + static void xe_oa_init_oa_buffer(struct xe_oa_stream *stream) 379 + { 380 + u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo); 381 + u32 oa_buf = gtt_offset | OABUFFER_SIZE_16M | OAG_OABUFFER_MEMORY_SELECT; 382 + unsigned long flags; 383 + 384 + spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); 385 + 386 + xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_status, 0); 387 + xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_head_ptr, 388 + gtt_offset & OAG_OAHEADPTR_MASK); 389 + stream->oa_buffer.head = 0; 390 + /* 391 + * PRM says: "This MMIO must be set before the OATAILPTR register and after the 392 + * OAHEADPTR register. This is to enable proper functionality of the overflow bit". 393 + */ 394 + xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_buffer, oa_buf); 395 + xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_tail_ptr, 396 + gtt_offset & OAG_OATAILPTR_MASK); 397 + 398 + /* Mark that we need updated tail pointer to read from */ 399 + stream->oa_buffer.tail = 0; 400 + 401 + spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); 402 + 403 + /* Zero out the OA buffer since we rely on zero report id and timestamp fields */ 404 + memset(stream->oa_buffer.vaddr, 0, stream->oa_buffer.bo->size); 405 + } 406 + 407 + static u32 __format_to_oactrl(const struct xe_oa_format *format, int counter_sel_mask) 408 + { 409 + return ((format->counter_select << (ffs(counter_sel_mask) - 1)) & counter_sel_mask) | 410 + REG_FIELD_PREP(OA_OACONTROL_REPORT_BC_MASK, format->bc_report) | 411 + REG_FIELD_PREP(OA_OACONTROL_COUNTER_SIZE_MASK, format->counter_size); 412 + } 413 + 414 + static u32 __oa_ccs_select(struct xe_oa_stream *stream) 415 + { 416 + u32 val; 417 + 418 + if (stream->hwe->class != XE_ENGINE_CLASS_COMPUTE) 419 + return 0; 420 + 421 + val = REG_FIELD_PREP(OAG_OACONTROL_OA_CCS_SELECT_MASK, stream->hwe->instance); 422 + xe_assert(stream->oa->xe, 423 + REG_FIELD_GET(OAG_OACONTROL_OA_CCS_SELECT_MASK, val) == stream->hwe->instance); 424 + return val; 425 + } 426 + 427 + static void xe_oa_enable(struct xe_oa_stream *stream) 428 + { 429 + const struct xe_oa_format *format = stream->oa_buffer.format; 430 + const struct xe_oa_regs *regs; 431 + u32 val; 432 + 433 + /* 434 + * BSpec: 46822: Bit 0. Even if stream->sample is 0, for OAR to function, the OA 435 + * buffer must be correctly initialized 436 + */ 437 + xe_oa_init_oa_buffer(stream); 438 + 439 + regs = __oa_regs(stream); 440 + val = __format_to_oactrl(format, regs->oa_ctrl_counter_select_mask) | 441 + __oa_ccs_select(stream) | OAG_OACONTROL_OA_COUNTER_ENABLE; 442 + 443 + xe_mmio_write32(stream->gt, regs->oa_ctrl, val); 444 + } 445 + 446 + static void xe_oa_disable(struct xe_oa_stream *stream) 447 + { 448 + xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_ctrl, 0); 449 + if (xe_mmio_wait32(stream->gt, __oa_regs(stream)->oa_ctrl, 450 + OAG_OACONTROL_OA_COUNTER_ENABLE, 0, 50000, NULL, false)) 451 + drm_err(&stream->oa->xe->drm, 452 + "wait for OA to be disabled timed out\n"); 453 + 454 + if (GRAPHICS_VERx100(stream->oa->xe) <= 1270 && GRAPHICS_VERx100(stream->oa->xe) != 1260) { 455 + /* <= XE_METEORLAKE except XE_PVC */ 456 + xe_mmio_write32(stream->gt, OA_TLB_INV_CR, 1); 457 + if (xe_mmio_wait32(stream->gt, OA_TLB_INV_CR, 1, 0, 50000, NULL, false)) 458 + drm_err(&stream->oa->xe->drm, 459 + "wait for OA tlb invalidate timed out\n"); 460 + } 461 + } 462 + 463 + static int xe_oa_wait_unlocked(struct xe_oa_stream *stream) 464 + { 465 + /* We might wait indefinitely if periodic sampling is not enabled */ 466 + if (!stream->periodic) 467 + return -EINVAL; 468 + 469 + return wait_event_interruptible(stream->poll_wq, 470 + xe_oa_buffer_check_unlocked(stream)); 471 + } 472 + 473 + #define OASTATUS_RELEVANT_BITS (OASTATUS_MMIO_TRG_Q_FULL | OASTATUS_COUNTER_OVERFLOW | \ 474 + OASTATUS_BUFFER_OVERFLOW | OASTATUS_REPORT_LOST) 475 + 476 + static int __xe_oa_read(struct xe_oa_stream *stream, char __user *buf, 477 + size_t count, size_t *offset) 478 + { 479 + /* Only clear our bits to avoid side-effects */ 480 + stream->oa_status = xe_mmio_rmw32(stream->gt, __oa_regs(stream)->oa_status, 481 + OASTATUS_RELEVANT_BITS, 0); 482 + /* 483 + * Signal to userspace that there is non-zero OA status to read via 484 + * @DRM_XE_PERF_IOCTL_STATUS perf fd ioctl 485 + */ 486 + if (stream->oa_status & OASTATUS_RELEVANT_BITS) 487 + return -EIO; 488 + 489 + return xe_oa_append_reports(stream, buf, count, offset); 490 + } 491 + 492 + static ssize_t xe_oa_read(struct file *file, char __user *buf, 493 + size_t count, loff_t *ppos) 494 + { 495 + struct xe_oa_stream *stream = file->private_data; 496 + size_t offset = 0; 497 + int ret; 498 + 499 + /* Can't read from disabled streams */ 500 + if (!stream->enabled || !stream->sample) 501 + return -EINVAL; 502 + 503 + if (!(file->f_flags & O_NONBLOCK)) { 504 + do { 505 + ret = xe_oa_wait_unlocked(stream); 506 + if (ret) 507 + return ret; 508 + 509 + mutex_lock(&stream->stream_lock); 510 + ret = __xe_oa_read(stream, buf, count, &offset); 511 + mutex_unlock(&stream->stream_lock); 512 + } while (!offset && !ret); 513 + } else { 514 + mutex_lock(&stream->stream_lock); 515 + ret = __xe_oa_read(stream, buf, count, &offset); 516 + mutex_unlock(&stream->stream_lock); 517 + } 518 + 519 + /* 520 + * Typically we clear pollin here in order to wait for the new hrtimer callback 521 + * before unblocking. The exception to this is if __xe_oa_read returns -ENOSPC, 522 + * which means that more OA data is available than could fit in the user provided 523 + * buffer. In this case we want the next poll() call to not block. 524 + * 525 + * Also in case of -EIO, we have already waited for data before returning 526 + * -EIO, so need to wait again 527 + */ 528 + if (ret != -ENOSPC && ret != -EIO) 529 + stream->pollin = false; 530 + 531 + /* Possible values for ret are 0, -EFAULT, -ENOSPC, -EIO, -EINVAL, ... */ 532 + return offset ?: (ret ?: -EAGAIN); 533 + } 534 + 535 + static __poll_t xe_oa_poll_locked(struct xe_oa_stream *stream, 536 + struct file *file, poll_table *wait) 537 + { 538 + __poll_t events = 0; 539 + 540 + poll_wait(file, &stream->poll_wq, wait); 541 + 542 + /* 543 + * We don't explicitly check whether there's something to read here since this 544 + * path may be hot depending on what else userspace is polling, or on the timeout 545 + * in use. We rely on hrtimer xe_oa_poll_check_timer_cb to notify us when there 546 + * are samples to read 547 + */ 548 + if (stream->pollin) 549 + events |= EPOLLIN; 550 + 551 + return events; 552 + } 553 + 554 + static __poll_t xe_oa_poll(struct file *file, poll_table *wait) 555 + { 556 + struct xe_oa_stream *stream = file->private_data; 557 + __poll_t ret; 558 + 559 + mutex_lock(&stream->stream_lock); 560 + ret = xe_oa_poll_locked(stream, file, wait); 561 + mutex_unlock(&stream->stream_lock); 562 + 563 + return ret; 564 + } 565 + 566 + static int xe_oa_submit_bb(struct xe_oa_stream *stream, struct xe_bb *bb) 567 + { 568 + struct xe_sched_job *job; 569 + struct dma_fence *fence; 570 + long timeout; 571 + int err = 0; 572 + 573 + /* Kernel configuration is issued on stream->k_exec_q, not stream->exec_q */ 574 + job = xe_bb_create_job(stream->k_exec_q, bb); 575 + if (IS_ERR(job)) { 576 + err = PTR_ERR(job); 577 + goto exit; 578 + } 579 + 580 + xe_sched_job_arm(job); 581 + fence = dma_fence_get(&job->drm.s_fence->finished); 582 + xe_sched_job_push(job); 583 + 584 + timeout = dma_fence_wait_timeout(fence, false, HZ); 585 + dma_fence_put(fence); 586 + if (timeout < 0) 587 + err = timeout; 588 + else if (!timeout) 589 + err = -ETIME; 590 + exit: 591 + return err; 592 + } 593 + 594 + static void write_cs_mi_lri(struct xe_bb *bb, const struct xe_oa_reg *reg_data, u32 n_regs) 595 + { 596 + u32 i; 597 + 598 + #define MI_LOAD_REGISTER_IMM_MAX_REGS (126) 599 + 600 + for (i = 0; i < n_regs; i++) { 601 + if ((i % MI_LOAD_REGISTER_IMM_MAX_REGS) == 0) { 602 + u32 n_lri = min_t(u32, n_regs - i, 603 + MI_LOAD_REGISTER_IMM_MAX_REGS); 604 + 605 + bb->cs[bb->len++] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(n_lri); 606 + } 607 + bb->cs[bb->len++] = reg_data[i].addr.addr; 608 + bb->cs[bb->len++] = reg_data[i].value; 609 + } 610 + } 611 + 612 + static int num_lri_dwords(int num_regs) 613 + { 614 + int count = 0; 615 + 616 + if (num_regs > 0) { 617 + count += DIV_ROUND_UP(num_regs, MI_LOAD_REGISTER_IMM_MAX_REGS); 618 + count += num_regs * 2; 619 + } 620 + 621 + return count; 622 + } 623 + 624 + static void xe_oa_free_oa_buffer(struct xe_oa_stream *stream) 625 + { 626 + xe_bo_unpin_map_no_vm(stream->oa_buffer.bo); 627 + } 628 + 629 + static void xe_oa_free_configs(struct xe_oa_stream *stream) 630 + { 631 + struct xe_oa_config_bo *oa_bo, *tmp; 632 + 633 + xe_oa_config_put(stream->oa_config); 634 + llist_for_each_entry_safe(oa_bo, tmp, stream->oa_config_bos.first, node) 635 + free_oa_config_bo(oa_bo); 636 + } 637 + 638 + static void xe_oa_store_flex(struct xe_oa_stream *stream, struct xe_lrc *lrc, 639 + struct xe_bb *bb, const struct flex *flex, u32 count) 640 + { 641 + u32 offset = xe_bo_ggtt_addr(lrc->bo); 642 + 643 + do { 644 + bb->cs[bb->len++] = MI_STORE_DATA_IMM | BIT(22) /* GGTT */ | 2; 645 + bb->cs[bb->len++] = offset + flex->offset * sizeof(u32); 646 + bb->cs[bb->len++] = 0; 647 + bb->cs[bb->len++] = flex->value; 648 + 649 + } while (flex++, --count); 650 + } 651 + 652 + static int xe_oa_modify_ctx_image(struct xe_oa_stream *stream, struct xe_lrc *lrc, 653 + const struct flex *flex, u32 count) 654 + { 655 + struct xe_bb *bb; 656 + int err; 657 + 658 + bb = xe_bb_new(stream->gt, 4 * count, false); 659 + if (IS_ERR(bb)) { 660 + err = PTR_ERR(bb); 661 + goto exit; 662 + } 663 + 664 + xe_oa_store_flex(stream, lrc, bb, flex, count); 665 + 666 + err = xe_oa_submit_bb(stream, bb); 667 + xe_bb_free(bb, NULL); 668 + exit: 669 + return err; 670 + } 671 + 672 + static int xe_oa_load_with_lri(struct xe_oa_stream *stream, struct xe_oa_reg *reg_lri) 673 + { 674 + struct xe_bb *bb; 675 + int err; 676 + 677 + bb = xe_bb_new(stream->gt, 3, false); 678 + if (IS_ERR(bb)) { 679 + err = PTR_ERR(bb); 680 + goto exit; 681 + } 682 + 683 + write_cs_mi_lri(bb, reg_lri, 1); 684 + 685 + err = xe_oa_submit_bb(stream, bb); 686 + xe_bb_free(bb, NULL); 687 + exit: 688 + return err; 689 + } 690 + 691 + static int xe_oa_configure_oar_context(struct xe_oa_stream *stream, bool enable) 692 + { 693 + const struct xe_oa_format *format = stream->oa_buffer.format; 694 + struct xe_lrc *lrc = stream->exec_q->lrc[0]; 695 + u32 regs_offset = xe_lrc_regs_offset(lrc) / sizeof(u32); 696 + u32 oacontrol = __format_to_oactrl(format, OAR_OACONTROL_COUNTER_SEL_MASK) | 697 + (enable ? OAR_OACONTROL_COUNTER_ENABLE : 0); 698 + 699 + struct flex regs_context[] = { 700 + { 701 + OACTXCONTROL(stream->hwe->mmio_base), 702 + stream->oa->ctx_oactxctrl_offset[stream->hwe->class] + 1, 703 + enable ? OA_COUNTER_RESUME : 0, 704 + }, 705 + { 706 + RING_CONTEXT_CONTROL(stream->hwe->mmio_base), 707 + regs_offset + CTX_CONTEXT_CONTROL, 708 + _MASKED_FIELD(CTX_CTRL_OAC_CONTEXT_ENABLE, 709 + enable ? CTX_CTRL_OAC_CONTEXT_ENABLE : 0) 710 + }, 711 + }; 712 + struct xe_oa_reg reg_lri = { OAR_OACONTROL, oacontrol }; 713 + int err; 714 + 715 + /* Modify stream hwe context image with regs_context */ 716 + err = xe_oa_modify_ctx_image(stream, stream->exec_q->lrc[0], 717 + regs_context, ARRAY_SIZE(regs_context)); 718 + if (err) 719 + return err; 720 + 721 + /* Apply reg_lri using LRI */ 722 + return xe_oa_load_with_lri(stream, &reg_lri); 723 + } 724 + 725 + static int xe_oa_configure_oac_context(struct xe_oa_stream *stream, bool enable) 726 + { 727 + const struct xe_oa_format *format = stream->oa_buffer.format; 728 + struct xe_lrc *lrc = stream->exec_q->lrc[0]; 729 + u32 regs_offset = xe_lrc_regs_offset(lrc) / sizeof(u32); 730 + u32 oacontrol = __format_to_oactrl(format, OAR_OACONTROL_COUNTER_SEL_MASK) | 731 + (enable ? OAR_OACONTROL_COUNTER_ENABLE : 0); 732 + struct flex regs_context[] = { 733 + { 734 + OACTXCONTROL(stream->hwe->mmio_base), 735 + stream->oa->ctx_oactxctrl_offset[stream->hwe->class] + 1, 736 + enable ? OA_COUNTER_RESUME : 0, 737 + }, 738 + { 739 + RING_CONTEXT_CONTROL(stream->hwe->mmio_base), 740 + regs_offset + CTX_CONTEXT_CONTROL, 741 + _MASKED_FIELD(CTX_CTRL_OAC_CONTEXT_ENABLE, 742 + enable ? CTX_CTRL_OAC_CONTEXT_ENABLE : 0) | 743 + _MASKED_FIELD(CTX_CTRL_RUN_ALONE, 744 + enable ? CTX_CTRL_RUN_ALONE : 0), 745 + }, 746 + }; 747 + struct xe_oa_reg reg_lri = { OAC_OACONTROL, oacontrol }; 748 + int err; 749 + 750 + /* Set ccs select to enable programming of OAC_OACONTROL */ 751 + xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_ctrl, __oa_ccs_select(stream)); 752 + 753 + /* Modify stream hwe context image with regs_context */ 754 + err = xe_oa_modify_ctx_image(stream, stream->exec_q->lrc[0], 755 + regs_context, ARRAY_SIZE(regs_context)); 756 + if (err) 757 + return err; 758 + 759 + /* Apply reg_lri using LRI */ 760 + return xe_oa_load_with_lri(stream, &reg_lri); 761 + } 762 + 763 + static int xe_oa_configure_oa_context(struct xe_oa_stream *stream, bool enable) 764 + { 765 + switch (stream->hwe->class) { 766 + case XE_ENGINE_CLASS_RENDER: 767 + return xe_oa_configure_oar_context(stream, enable); 768 + case XE_ENGINE_CLASS_COMPUTE: 769 + return xe_oa_configure_oac_context(stream, enable); 770 + default: 771 + /* Video engines do not support MI_REPORT_PERF_COUNT */ 772 + return 0; 773 + } 774 + } 775 + 776 + #define HAS_OA_BPC_REPORTING(xe) (GRAPHICS_VERx100(xe) >= 1255) 777 + 778 + static u32 oag_configure_mmio_trigger(const struct xe_oa_stream *stream, bool enable) 779 + { 780 + return _MASKED_FIELD(OAG_OA_DEBUG_DISABLE_MMIO_TRG, 781 + enable && stream && stream->sample ? 782 + 0 : OAG_OA_DEBUG_DISABLE_MMIO_TRG); 783 + } 784 + 785 + static void xe_oa_disable_metric_set(struct xe_oa_stream *stream) 786 + { 787 + u32 sqcnt1; 788 + 789 + /* 790 + * Wa_1508761755:xehpsdv, dg2 791 + * Enable thread stall DOP gating and EU DOP gating. 792 + */ 793 + if (stream->oa->xe->info.platform == XE_DG2) { 794 + xe_gt_mcr_multicast_write(stream->gt, ROW_CHICKEN, 795 + _MASKED_BIT_DISABLE(STALL_DOP_GATING_DISABLE)); 796 + xe_gt_mcr_multicast_write(stream->gt, ROW_CHICKEN2, 797 + _MASKED_BIT_DISABLE(DISABLE_DOP_GATING)); 798 + } 799 + 800 + xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_debug, 801 + oag_configure_mmio_trigger(stream, false)); 802 + 803 + /* disable the context save/restore or OAR counters */ 804 + if (stream->exec_q) 805 + xe_oa_configure_oa_context(stream, false); 806 + 807 + /* Make sure we disable noa to save power. */ 808 + xe_mmio_rmw32(stream->gt, RPM_CONFIG1, GT_NOA_ENABLE, 0); 809 + 810 + sqcnt1 = SQCNT1_PMON_ENABLE | 811 + (HAS_OA_BPC_REPORTING(stream->oa->xe) ? SQCNT1_OABPC : 0); 812 + 813 + /* Reset PMON Enable to save power. */ 814 + xe_mmio_rmw32(stream->gt, XELPMP_SQCNT1, sqcnt1, 0); 815 + } 816 + 817 + static void xe_oa_stream_destroy(struct xe_oa_stream *stream) 818 + { 819 + struct xe_oa_unit *u = stream->hwe->oa_unit; 820 + struct xe_gt *gt = stream->hwe->gt; 821 + 822 + if (WARN_ON(stream != u->exclusive_stream)) 823 + return; 824 + 825 + WRITE_ONCE(u->exclusive_stream, NULL); 826 + 827 + xe_oa_disable_metric_set(stream); 828 + xe_exec_queue_put(stream->k_exec_q); 829 + 830 + xe_oa_free_oa_buffer(stream); 831 + 832 + XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); 833 + xe_pm_runtime_put(stream->oa->xe); 834 + 835 + /* Wa_1509372804:pvc: Unset the override of GUCRC mode to enable rc6 */ 836 + if (stream->override_gucrc) 837 + xe_gt_WARN_ON(gt, xe_guc_pc_unset_gucrc_mode(&gt->uc.guc.pc)); 838 + 839 + xe_oa_free_configs(stream); 840 + } 841 + 842 + static int xe_oa_alloc_oa_buffer(struct xe_oa_stream *stream) 843 + { 844 + struct xe_bo *bo; 845 + 846 + BUILD_BUG_ON_NOT_POWER_OF_2(XE_OA_BUFFER_SIZE); 847 + BUILD_BUG_ON(XE_OA_BUFFER_SIZE < SZ_128K || XE_OA_BUFFER_SIZE > SZ_16M); 848 + 849 + bo = xe_bo_create_pin_map(stream->oa->xe, stream->gt->tile, NULL, 850 + XE_OA_BUFFER_SIZE, ttm_bo_type_kernel, 851 + XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT); 852 + if (IS_ERR(bo)) 853 + return PTR_ERR(bo); 854 + 855 + stream->oa_buffer.bo = bo; 856 + /* mmap implementation requires OA buffer to be in system memory */ 857 + xe_assert(stream->oa->xe, bo->vmap.is_iomem == 0); 858 + stream->oa_buffer.vaddr = bo->vmap.vaddr; 859 + return 0; 860 + } 861 + 862 + static struct xe_oa_config_bo * 863 + __xe_oa_alloc_config_buffer(struct xe_oa_stream *stream, struct xe_oa_config *oa_config) 864 + { 865 + struct xe_oa_config_bo *oa_bo; 866 + size_t config_length; 867 + struct xe_bb *bb; 868 + 869 + oa_bo = kzalloc(sizeof(*oa_bo), GFP_KERNEL); 870 + if (!oa_bo) 871 + return ERR_PTR(-ENOMEM); 872 + 873 + config_length = num_lri_dwords(oa_config->regs_len); 874 + config_length = ALIGN(sizeof(u32) * config_length, XE_PAGE_SIZE) / sizeof(u32); 875 + 876 + bb = xe_bb_new(stream->gt, config_length, false); 877 + if (IS_ERR(bb)) 878 + goto err_free; 879 + 880 + write_cs_mi_lri(bb, oa_config->regs, oa_config->regs_len); 881 + 882 + oa_bo->bb = bb; 883 + oa_bo->oa_config = xe_oa_config_get(oa_config); 884 + llist_add(&oa_bo->node, &stream->oa_config_bos); 885 + 886 + return oa_bo; 887 + err_free: 888 + kfree(oa_bo); 889 + return ERR_CAST(bb); 890 + } 891 + 892 + static struct xe_oa_config_bo * 893 + xe_oa_alloc_config_buffer(struct xe_oa_stream *stream, struct xe_oa_config *oa_config) 894 + { 895 + struct xe_oa_config_bo *oa_bo; 896 + 897 + /* Look for the buffer in the already allocated BOs attached to the stream */ 898 + llist_for_each_entry(oa_bo, stream->oa_config_bos.first, node) { 899 + if (oa_bo->oa_config == oa_config && 900 + memcmp(oa_bo->oa_config->uuid, oa_config->uuid, 901 + sizeof(oa_config->uuid)) == 0) 902 + goto out; 903 + } 904 + 905 + oa_bo = __xe_oa_alloc_config_buffer(stream, oa_config); 906 + out: 907 + return oa_bo; 908 + } 909 + 910 + static int xe_oa_emit_oa_config(struct xe_oa_stream *stream, struct xe_oa_config *config) 911 + { 912 + #define NOA_PROGRAM_ADDITIONAL_DELAY_US 500 913 + struct xe_oa_config_bo *oa_bo; 914 + int err, us = NOA_PROGRAM_ADDITIONAL_DELAY_US; 915 + 916 + oa_bo = xe_oa_alloc_config_buffer(stream, config); 917 + if (IS_ERR(oa_bo)) { 918 + err = PTR_ERR(oa_bo); 919 + goto exit; 920 + } 921 + 922 + err = xe_oa_submit_bb(stream, oa_bo->bb); 923 + 924 + /* Additional empirical delay needed for NOA programming after registers are written */ 925 + usleep_range(us, 2 * us); 926 + exit: 927 + return err; 928 + } 929 + 930 + static u32 oag_report_ctx_switches(const struct xe_oa_stream *stream) 931 + { 932 + /* If user didn't require OA reports, ask HW not to emit ctx switch reports */ 933 + return _MASKED_FIELD(OAG_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS, 934 + stream->sample ? 935 + 0 : OAG_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS); 936 + } 937 + 938 + static int xe_oa_enable_metric_set(struct xe_oa_stream *stream) 939 + { 940 + u32 oa_debug, sqcnt1; 941 + int ret; 942 + 943 + /* 944 + * Wa_1508761755:xehpsdv, dg2 945 + * EU NOA signals behave incorrectly if EU clock gating is enabled. 946 + * Disable thread stall DOP gating and EU DOP gating. 947 + */ 948 + if (stream->oa->xe->info.platform == XE_DG2) { 949 + xe_gt_mcr_multicast_write(stream->gt, ROW_CHICKEN, 950 + _MASKED_BIT_ENABLE(STALL_DOP_GATING_DISABLE)); 951 + xe_gt_mcr_multicast_write(stream->gt, ROW_CHICKEN2, 952 + _MASKED_BIT_ENABLE(DISABLE_DOP_GATING)); 953 + } 954 + 955 + /* Disable clk ratio reports */ 956 + oa_debug = OAG_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS | 957 + OAG_OA_DEBUG_INCLUDE_CLK_RATIO; 958 + 959 + if (GRAPHICS_VER(stream->oa->xe) >= 20) 960 + oa_debug |= 961 + /* The three bits below are needed to get PEC counters running */ 962 + OAG_OA_DEBUG_START_TRIGGER_SCOPE_CONTROL | 963 + OAG_OA_DEBUG_DISABLE_START_TRG_2_COUNT_QUAL | 964 + OAG_OA_DEBUG_DISABLE_START_TRG_1_COUNT_QUAL; 965 + 966 + xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_debug, 967 + _MASKED_BIT_ENABLE(oa_debug) | 968 + oag_report_ctx_switches(stream) | 969 + oag_configure_mmio_trigger(stream, true)); 970 + 971 + xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_ctx_ctrl, stream->periodic ? 972 + (OAG_OAGLBCTXCTRL_COUNTER_RESUME | 973 + OAG_OAGLBCTXCTRL_TIMER_ENABLE | 974 + REG_FIELD_PREP(OAG_OAGLBCTXCTRL_TIMER_PERIOD_MASK, 975 + stream->period_exponent)) : 0); 976 + 977 + /* 978 + * Initialize Super Queue Internal Cnt Register 979 + * Set PMON Enable in order to collect valid metrics 980 + * Enable bytes per clock reporting 981 + */ 982 + sqcnt1 = SQCNT1_PMON_ENABLE | 983 + (HAS_OA_BPC_REPORTING(stream->oa->xe) ? SQCNT1_OABPC : 0); 984 + 985 + xe_mmio_rmw32(stream->gt, XELPMP_SQCNT1, 0, sqcnt1); 986 + 987 + /* Configure OAR/OAC */ 988 + if (stream->exec_q) { 989 + ret = xe_oa_configure_oa_context(stream, true); 990 + if (ret) 991 + return ret; 992 + } 993 + 994 + return xe_oa_emit_oa_config(stream, stream->oa_config); 995 + } 996 + 997 + static void xe_oa_stream_enable(struct xe_oa_stream *stream) 998 + { 999 + stream->pollin = false; 1000 + 1001 + xe_oa_enable(stream); 1002 + 1003 + if (stream->sample) 1004 + hrtimer_start(&stream->poll_check_timer, 1005 + ns_to_ktime(stream->poll_period_ns), 1006 + HRTIMER_MODE_REL_PINNED); 1007 + } 1008 + 1009 + static void xe_oa_stream_disable(struct xe_oa_stream *stream) 1010 + { 1011 + xe_oa_disable(stream); 1012 + 1013 + if (stream->sample) 1014 + hrtimer_cancel(&stream->poll_check_timer); 1015 + } 1016 + 1017 + static int xe_oa_enable_preempt_timeslice(struct xe_oa_stream *stream) 1018 + { 1019 + struct xe_exec_queue *q = stream->exec_q; 1020 + int ret1, ret2; 1021 + 1022 + /* Best effort recovery: try to revert both to original, irrespective of error */ 1023 + ret1 = q->ops->set_timeslice(q, stream->hwe->eclass->sched_props.timeslice_us); 1024 + ret2 = q->ops->set_preempt_timeout(q, stream->hwe->eclass->sched_props.preempt_timeout_us); 1025 + if (ret1 || ret2) 1026 + goto err; 1027 + return 0; 1028 + err: 1029 + drm_dbg(&stream->oa->xe->drm, "%s failed ret1 %d ret2 %d\n", __func__, ret1, ret2); 1030 + return ret1 ?: ret2; 1031 + } 1032 + 1033 + static int xe_oa_disable_preempt_timeslice(struct xe_oa_stream *stream) 1034 + { 1035 + struct xe_exec_queue *q = stream->exec_q; 1036 + int ret; 1037 + 1038 + /* Setting values to 0 will disable timeslice and preempt_timeout */ 1039 + ret = q->ops->set_timeslice(q, 0); 1040 + if (ret) 1041 + goto err; 1042 + 1043 + ret = q->ops->set_preempt_timeout(q, 0); 1044 + if (ret) 1045 + goto err; 1046 + 1047 + return 0; 1048 + err: 1049 + xe_oa_enable_preempt_timeslice(stream); 1050 + drm_dbg(&stream->oa->xe->drm, "%s failed %d\n", __func__, ret); 1051 + return ret; 1052 + } 1053 + 1054 + static int xe_oa_enable_locked(struct xe_oa_stream *stream) 1055 + { 1056 + if (stream->enabled) 1057 + return 0; 1058 + 1059 + if (stream->no_preempt) { 1060 + int ret = xe_oa_disable_preempt_timeslice(stream); 1061 + 1062 + if (ret) 1063 + return ret; 1064 + } 1065 + 1066 + xe_oa_stream_enable(stream); 1067 + 1068 + stream->enabled = true; 1069 + return 0; 1070 + } 1071 + 1072 + static int xe_oa_disable_locked(struct xe_oa_stream *stream) 1073 + { 1074 + int ret = 0; 1075 + 1076 + if (!stream->enabled) 1077 + return 0; 1078 + 1079 + xe_oa_stream_disable(stream); 1080 + 1081 + if (stream->no_preempt) 1082 + ret = xe_oa_enable_preempt_timeslice(stream); 1083 + 1084 + stream->enabled = false; 1085 + return ret; 1086 + } 1087 + 1088 + static long xe_oa_config_locked(struct xe_oa_stream *stream, u64 arg) 1089 + { 1090 + struct drm_xe_ext_set_property ext; 1091 + long ret = stream->oa_config->id; 1092 + struct xe_oa_config *config; 1093 + int err; 1094 + 1095 + err = __copy_from_user(&ext, u64_to_user_ptr(arg), sizeof(ext)); 1096 + if (XE_IOCTL_DBG(stream->oa->xe, err)) 1097 + return -EFAULT; 1098 + 1099 + if (XE_IOCTL_DBG(stream->oa->xe, ext.pad) || 1100 + XE_IOCTL_DBG(stream->oa->xe, ext.base.name != DRM_XE_OA_EXTENSION_SET_PROPERTY) || 1101 + XE_IOCTL_DBG(stream->oa->xe, ext.base.next_extension) || 1102 + XE_IOCTL_DBG(stream->oa->xe, ext.property != DRM_XE_OA_PROPERTY_OA_METRIC_SET)) 1103 + return -EINVAL; 1104 + 1105 + config = xe_oa_get_oa_config(stream->oa, ext.value); 1106 + if (!config) 1107 + return -ENODEV; 1108 + 1109 + if (config != stream->oa_config) { 1110 + err = xe_oa_emit_oa_config(stream, config); 1111 + if (!err) 1112 + config = xchg(&stream->oa_config, config); 1113 + else 1114 + ret = err; 1115 + } 1116 + 1117 + xe_oa_config_put(config); 1118 + 1119 + return ret; 1120 + } 1121 + 1122 + static long xe_oa_status_locked(struct xe_oa_stream *stream, unsigned long arg) 1123 + { 1124 + struct drm_xe_oa_stream_status status = {}; 1125 + void __user *uaddr = (void __user *)arg; 1126 + 1127 + /* Map from register to uapi bits */ 1128 + if (stream->oa_status & OASTATUS_REPORT_LOST) 1129 + status.oa_status |= DRM_XE_OASTATUS_REPORT_LOST; 1130 + if (stream->oa_status & OASTATUS_BUFFER_OVERFLOW) 1131 + status.oa_status |= DRM_XE_OASTATUS_BUFFER_OVERFLOW; 1132 + if (stream->oa_status & OASTATUS_COUNTER_OVERFLOW) 1133 + status.oa_status |= DRM_XE_OASTATUS_COUNTER_OVERFLOW; 1134 + if (stream->oa_status & OASTATUS_MMIO_TRG_Q_FULL) 1135 + status.oa_status |= DRM_XE_OASTATUS_MMIO_TRG_Q_FULL; 1136 + 1137 + if (copy_to_user(uaddr, &status, sizeof(status))) 1138 + return -EFAULT; 1139 + 1140 + return 0; 1141 + } 1142 + 1143 + static long xe_oa_info_locked(struct xe_oa_stream *stream, unsigned long arg) 1144 + { 1145 + struct drm_xe_oa_stream_info info = { .oa_buf_size = XE_OA_BUFFER_SIZE, }; 1146 + void __user *uaddr = (void __user *)arg; 1147 + 1148 + if (copy_to_user(uaddr, &info, sizeof(info))) 1149 + return -EFAULT; 1150 + 1151 + return 0; 1152 + } 1153 + 1154 + static long xe_oa_ioctl_locked(struct xe_oa_stream *stream, 1155 + unsigned int cmd, 1156 + unsigned long arg) 1157 + { 1158 + switch (cmd) { 1159 + case DRM_XE_PERF_IOCTL_ENABLE: 1160 + return xe_oa_enable_locked(stream); 1161 + case DRM_XE_PERF_IOCTL_DISABLE: 1162 + return xe_oa_disable_locked(stream); 1163 + case DRM_XE_PERF_IOCTL_CONFIG: 1164 + return xe_oa_config_locked(stream, arg); 1165 + case DRM_XE_PERF_IOCTL_STATUS: 1166 + return xe_oa_status_locked(stream, arg); 1167 + case DRM_XE_PERF_IOCTL_INFO: 1168 + return xe_oa_info_locked(stream, arg); 1169 + } 1170 + 1171 + return -EINVAL; 1172 + } 1173 + 1174 + static long xe_oa_ioctl(struct file *file, 1175 + unsigned int cmd, 1176 + unsigned long arg) 1177 + { 1178 + struct xe_oa_stream *stream = file->private_data; 1179 + long ret; 1180 + 1181 + mutex_lock(&stream->stream_lock); 1182 + ret = xe_oa_ioctl_locked(stream, cmd, arg); 1183 + mutex_unlock(&stream->stream_lock); 1184 + 1185 + return ret; 1186 + } 1187 + 1188 + static void xe_oa_destroy_locked(struct xe_oa_stream *stream) 1189 + { 1190 + if (stream->enabled) 1191 + xe_oa_disable_locked(stream); 1192 + 1193 + xe_oa_stream_destroy(stream); 1194 + 1195 + if (stream->exec_q) 1196 + xe_exec_queue_put(stream->exec_q); 1197 + 1198 + kfree(stream); 1199 + } 1200 + 1201 + static int xe_oa_release(struct inode *inode, struct file *file) 1202 + { 1203 + struct xe_oa_stream *stream = file->private_data; 1204 + struct xe_gt *gt = stream->gt; 1205 + 1206 + mutex_lock(&gt->oa.gt_lock); 1207 + xe_oa_destroy_locked(stream); 1208 + mutex_unlock(&gt->oa.gt_lock); 1209 + 1210 + /* Release the reference the perf stream kept on the driver */ 1211 + drm_dev_put(&gt_to_xe(gt)->drm); 1212 + 1213 + return 0; 1214 + } 1215 + 1216 + static int xe_oa_mmap(struct file *file, struct vm_area_struct *vma) 1217 + { 1218 + struct xe_oa_stream *stream = file->private_data; 1219 + struct xe_bo *bo = stream->oa_buffer.bo; 1220 + unsigned long start = vma->vm_start; 1221 + int i, ret; 1222 + 1223 + if (xe_perf_stream_paranoid && !perfmon_capable()) { 1224 + drm_dbg(&stream->oa->xe->drm, "Insufficient privilege to map OA buffer\n"); 1225 + return -EACCES; 1226 + } 1227 + 1228 + /* Can mmap the entire OA buffer or nothing (no partial OA buffer mmaps) */ 1229 + if (vma->vm_end - vma->vm_start != XE_OA_BUFFER_SIZE) { 1230 + drm_dbg(&stream->oa->xe->drm, "Wrong mmap size, must be OA buffer size\n"); 1231 + return -EINVAL; 1232 + } 1233 + 1234 + /* 1235 + * Only support VM_READ, enforce MAP_PRIVATE by checking for 1236 + * VM_MAYSHARE, don't copy the vma on fork 1237 + */ 1238 + if (vma->vm_flags & (VM_WRITE | VM_EXEC | VM_SHARED | VM_MAYSHARE)) { 1239 + drm_dbg(&stream->oa->xe->drm, "mmap must be read only\n"); 1240 + return -EINVAL; 1241 + } 1242 + vm_flags_mod(vma, VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP | VM_DONTCOPY, 1243 + VM_MAYWRITE | VM_MAYEXEC); 1244 + 1245 + xe_assert(stream->oa->xe, bo->ttm.ttm->num_pages == 1246 + (vma->vm_end - vma->vm_start) >> PAGE_SHIFT); 1247 + for (i = 0; i < bo->ttm.ttm->num_pages; i++) { 1248 + ret = remap_pfn_range(vma, start, page_to_pfn(bo->ttm.ttm->pages[i]), 1249 + PAGE_SIZE, vma->vm_page_prot); 1250 + if (ret) 1251 + break; 1252 + 1253 + start += PAGE_SIZE; 1254 + } 1255 + 1256 + return ret; 1257 + } 1258 + 1259 + static const struct file_operations xe_oa_fops = { 1260 + .owner = THIS_MODULE, 1261 + .llseek = no_llseek, 1262 + .release = xe_oa_release, 1263 + .poll = xe_oa_poll, 1264 + .read = xe_oa_read, 1265 + .unlocked_ioctl = xe_oa_ioctl, 1266 + .mmap = xe_oa_mmap, 1267 + }; 1268 + 1269 + static bool engine_supports_mi_query(struct xe_hw_engine *hwe) 1270 + { 1271 + return hwe->class == XE_ENGINE_CLASS_RENDER || 1272 + hwe->class == XE_ENGINE_CLASS_COMPUTE; 1273 + } 1274 + 1275 + static bool xe_oa_find_reg_in_lri(u32 *state, u32 reg, u32 *offset, u32 end) 1276 + { 1277 + u32 idx = *offset; 1278 + u32 len = min(MI_LRI_LEN(state[idx]) + idx, end); 1279 + bool found = false; 1280 + 1281 + idx++; 1282 + for (; idx < len; idx += 2) { 1283 + if (state[idx] == reg) { 1284 + found = true; 1285 + break; 1286 + } 1287 + } 1288 + 1289 + *offset = idx; 1290 + return found; 1291 + } 1292 + 1293 + #define IS_MI_LRI_CMD(x) (REG_FIELD_GET(MI_OPCODE, (x)) == \ 1294 + REG_FIELD_GET(MI_OPCODE, MI_LOAD_REGISTER_IMM)) 1295 + 1296 + static u32 xe_oa_context_image_offset(struct xe_oa_stream *stream, u32 reg) 1297 + { 1298 + struct xe_lrc *lrc = stream->exec_q->lrc[0]; 1299 + u32 len = (xe_gt_lrc_size(stream->gt, stream->hwe->class) + 1300 + lrc->ring.size) / sizeof(u32); 1301 + u32 offset = xe_lrc_regs_offset(lrc) / sizeof(u32); 1302 + u32 *state = (u32 *)lrc->bo->vmap.vaddr; 1303 + 1304 + if (drm_WARN_ON(&stream->oa->xe->drm, !state)) 1305 + return U32_MAX; 1306 + 1307 + for (; offset < len; ) { 1308 + if (IS_MI_LRI_CMD(state[offset])) { 1309 + /* 1310 + * We expect reg-value pairs in MI_LRI command, so 1311 + * MI_LRI_LEN() should be even 1312 + */ 1313 + drm_WARN_ON(&stream->oa->xe->drm, 1314 + MI_LRI_LEN(state[offset]) & 0x1); 1315 + 1316 + if (xe_oa_find_reg_in_lri(state, reg, &offset, len)) 1317 + break; 1318 + } else { 1319 + offset++; 1320 + } 1321 + } 1322 + 1323 + return offset < len ? offset : U32_MAX; 1324 + } 1325 + 1326 + static int xe_oa_set_ctx_ctrl_offset(struct xe_oa_stream *stream) 1327 + { 1328 + struct xe_reg reg = OACTXCONTROL(stream->hwe->mmio_base); 1329 + u32 offset = stream->oa->ctx_oactxctrl_offset[stream->hwe->class]; 1330 + 1331 + /* Do this only once. Failure is stored as offset of U32_MAX */ 1332 + if (offset) 1333 + goto exit; 1334 + 1335 + offset = xe_oa_context_image_offset(stream, reg.addr); 1336 + stream->oa->ctx_oactxctrl_offset[stream->hwe->class] = offset; 1337 + 1338 + drm_dbg(&stream->oa->xe->drm, "%s oa ctx control at 0x%08x dword offset\n", 1339 + stream->hwe->name, offset); 1340 + exit: 1341 + return offset && offset != U32_MAX ? 0 : -ENODEV; 1342 + } 1343 + 1344 + static int xe_oa_stream_init(struct xe_oa_stream *stream, 1345 + struct xe_oa_open_param *param) 1346 + { 1347 + struct xe_oa_unit *u = param->hwe->oa_unit; 1348 + struct xe_gt *gt = param->hwe->gt; 1349 + int ret; 1350 + 1351 + stream->exec_q = param->exec_q; 1352 + stream->poll_period_ns = DEFAULT_POLL_PERIOD_NS; 1353 + stream->hwe = param->hwe; 1354 + stream->gt = stream->hwe->gt; 1355 + stream->oa_buffer.format = &stream->oa->oa_formats[param->oa_format]; 1356 + 1357 + stream->sample = param->sample; 1358 + stream->periodic = param->period_exponent > 0; 1359 + stream->period_exponent = param->period_exponent; 1360 + stream->no_preempt = param->no_preempt; 1361 + 1362 + /* 1363 + * For Xe2+, when overrun mode is enabled, there are no partial reports at the end 1364 + * of buffer, making the OA buffer effectively a non-power-of-2 size circular 1365 + * buffer whose size, circ_size, is a multiple of the report size 1366 + */ 1367 + if (GRAPHICS_VER(stream->oa->xe) >= 20 && 1368 + stream->hwe->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG && stream->sample) 1369 + stream->oa_buffer.circ_size = 1370 + XE_OA_BUFFER_SIZE - XE_OA_BUFFER_SIZE % stream->oa_buffer.format->size; 1371 + else 1372 + stream->oa_buffer.circ_size = XE_OA_BUFFER_SIZE; 1373 + 1374 + if (stream->exec_q && engine_supports_mi_query(stream->hwe)) { 1375 + /* If we don't find the context offset, just return error */ 1376 + ret = xe_oa_set_ctx_ctrl_offset(stream); 1377 + if (ret) { 1378 + drm_err(&stream->oa->xe->drm, 1379 + "xe_oa_set_ctx_ctrl_offset failed for %s\n", 1380 + stream->hwe->name); 1381 + goto exit; 1382 + } 1383 + } 1384 + 1385 + stream->oa_config = xe_oa_get_oa_config(stream->oa, param->metric_set); 1386 + if (!stream->oa_config) { 1387 + drm_dbg(&stream->oa->xe->drm, "Invalid OA config id=%i\n", param->metric_set); 1388 + ret = -EINVAL; 1389 + goto exit; 1390 + } 1391 + 1392 + /* 1393 + * Wa_1509372804:pvc 1394 + * 1395 + * GuC reset of engines causes OA to lose configuration 1396 + * state. Prevent this by overriding GUCRC mode. 1397 + */ 1398 + if (stream->oa->xe->info.platform == XE_PVC) { 1399 + ret = xe_guc_pc_override_gucrc_mode(&gt->uc.guc.pc, 1400 + SLPC_GUCRC_MODE_GUCRC_NO_RC6); 1401 + if (ret) 1402 + goto err_free_configs; 1403 + 1404 + stream->override_gucrc = true; 1405 + } 1406 + 1407 + /* Take runtime pm ref and forcewake to disable RC6 */ 1408 + xe_pm_runtime_get(stream->oa->xe); 1409 + XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL)); 1410 + 1411 + ret = xe_oa_alloc_oa_buffer(stream); 1412 + if (ret) 1413 + goto err_fw_put; 1414 + 1415 + stream->k_exec_q = xe_exec_queue_create(stream->oa->xe, NULL, 1416 + BIT(stream->hwe->logical_instance), 1, 1417 + stream->hwe, EXEC_QUEUE_FLAG_KERNEL, 0); 1418 + if (IS_ERR(stream->k_exec_q)) { 1419 + ret = PTR_ERR(stream->k_exec_q); 1420 + drm_err(&stream->oa->xe->drm, "gt%d, hwe %s, xe_exec_queue_create failed=%d", 1421 + stream->gt->info.id, stream->hwe->name, ret); 1422 + goto err_free_oa_buf; 1423 + } 1424 + 1425 + ret = xe_oa_enable_metric_set(stream); 1426 + if (ret) { 1427 + drm_dbg(&stream->oa->xe->drm, "Unable to enable metric set\n"); 1428 + goto err_put_k_exec_q; 1429 + } 1430 + 1431 + drm_dbg(&stream->oa->xe->drm, "opening stream oa config uuid=%s\n", 1432 + stream->oa_config->uuid); 1433 + 1434 + WRITE_ONCE(u->exclusive_stream, stream); 1435 + 1436 + hrtimer_init(&stream->poll_check_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 1437 + stream->poll_check_timer.function = xe_oa_poll_check_timer_cb; 1438 + init_waitqueue_head(&stream->poll_wq); 1439 + 1440 + spin_lock_init(&stream->oa_buffer.ptr_lock); 1441 + mutex_init(&stream->stream_lock); 1442 + 1443 + return 0; 1444 + 1445 + err_put_k_exec_q: 1446 + xe_oa_disable_metric_set(stream); 1447 + xe_exec_queue_put(stream->k_exec_q); 1448 + err_free_oa_buf: 1449 + xe_oa_free_oa_buffer(stream); 1450 + err_fw_put: 1451 + XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); 1452 + xe_pm_runtime_put(stream->oa->xe); 1453 + if (stream->override_gucrc) 1454 + xe_gt_WARN_ON(gt, xe_guc_pc_unset_gucrc_mode(&gt->uc.guc.pc)); 1455 + err_free_configs: 1456 + xe_oa_free_configs(stream); 1457 + exit: 1458 + return ret; 1459 + } 1460 + 1461 + static int xe_oa_stream_open_ioctl_locked(struct xe_oa *oa, 1462 + struct xe_oa_open_param *param) 1463 + { 1464 + struct xe_oa_stream *stream; 1465 + int stream_fd; 1466 + int ret; 1467 + 1468 + /* We currently only allow exclusive access */ 1469 + if (param->hwe->oa_unit->exclusive_stream) { 1470 + drm_dbg(&oa->xe->drm, "OA unit already in use\n"); 1471 + ret = -EBUSY; 1472 + goto exit; 1473 + } 1474 + 1475 + stream = kzalloc(sizeof(*stream), GFP_KERNEL); 1476 + if (!stream) { 1477 + ret = -ENOMEM; 1478 + goto exit; 1479 + } 1480 + 1481 + stream->oa = oa; 1482 + ret = xe_oa_stream_init(stream, param); 1483 + if (ret) 1484 + goto err_free; 1485 + 1486 + if (!param->disabled) { 1487 + ret = xe_oa_enable_locked(stream); 1488 + if (ret) 1489 + goto err_destroy; 1490 + } 1491 + 1492 + stream_fd = anon_inode_getfd("[xe_oa]", &xe_oa_fops, stream, 0); 1493 + if (stream_fd < 0) { 1494 + ret = stream_fd; 1495 + goto err_disable; 1496 + } 1497 + 1498 + /* Hold a reference on the drm device till stream_fd is released */ 1499 + drm_dev_get(&stream->oa->xe->drm); 1500 + 1501 + return stream_fd; 1502 + err_disable: 1503 + if (!param->disabled) 1504 + xe_oa_disable_locked(stream); 1505 + err_destroy: 1506 + xe_oa_stream_destroy(stream); 1507 + err_free: 1508 + kfree(stream); 1509 + exit: 1510 + return ret; 1511 + } 1512 + 1513 + /** 1514 + * xe_oa_timestamp_frequency - Return OA timestamp frequency 1515 + * @gt: @xe_gt 1516 + * 1517 + * OA timestamp frequency = CS timestamp frequency in most platforms. On some 1518 + * platforms OA unit ignores the CTC_SHIFT and the 2 timestamps differ. In such 1519 + * cases, return the adjusted CS timestamp frequency to the user. 1520 + */ 1521 + u32 xe_oa_timestamp_frequency(struct xe_gt *gt) 1522 + { 1523 + u32 reg, shift; 1524 + 1525 + /* 1526 + * Wa_18013179988:dg2 1527 + * Wa_14015568240:pvc 1528 + * Wa_14015846243:mtl 1529 + */ 1530 + switch (gt_to_xe(gt)->info.platform) { 1531 + case XE_DG2: 1532 + case XE_PVC: 1533 + case XE_METEORLAKE: 1534 + xe_pm_runtime_get(gt_to_xe(gt)); 1535 + reg = xe_mmio_read32(gt, RPM_CONFIG0); 1536 + xe_pm_runtime_put(gt_to_xe(gt)); 1537 + 1538 + shift = REG_FIELD_GET(RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK, reg); 1539 + return gt->info.reference_clock << (3 - shift); 1540 + 1541 + default: 1542 + return gt->info.reference_clock; 1543 + } 1544 + } 1545 + 1546 + static u64 oa_exponent_to_ns(struct xe_gt *gt, int exponent) 1547 + { 1548 + u64 nom = (2ULL << exponent) * NSEC_PER_SEC; 1549 + u32 den = xe_oa_timestamp_frequency(gt); 1550 + 1551 + return div_u64(nom + den - 1, den); 1552 + } 1553 + 1554 + static bool engine_supports_oa_format(const struct xe_hw_engine *hwe, int type) 1555 + { 1556 + switch (hwe->oa_unit->type) { 1557 + case DRM_XE_OA_UNIT_TYPE_OAG: 1558 + return type == DRM_XE_OA_FMT_TYPE_OAG || type == DRM_XE_OA_FMT_TYPE_OAR || 1559 + type == DRM_XE_OA_FMT_TYPE_OAC || type == DRM_XE_OA_FMT_TYPE_PEC; 1560 + case DRM_XE_OA_UNIT_TYPE_OAM: 1561 + return type == DRM_XE_OA_FMT_TYPE_OAM || type == DRM_XE_OA_FMT_TYPE_OAM_MPEC; 1562 + default: 1563 + return false; 1564 + } 1565 + } 1566 + 1567 + static int decode_oa_format(struct xe_oa *oa, u64 fmt, enum xe_oa_format_name *name) 1568 + { 1569 + u32 counter_size = FIELD_GET(DRM_XE_OA_FORMAT_MASK_COUNTER_SIZE, fmt); 1570 + u32 counter_sel = FIELD_GET(DRM_XE_OA_FORMAT_MASK_COUNTER_SEL, fmt); 1571 + u32 bc_report = FIELD_GET(DRM_XE_OA_FORMAT_MASK_BC_REPORT, fmt); 1572 + u32 type = FIELD_GET(DRM_XE_OA_FORMAT_MASK_FMT_TYPE, fmt); 1573 + int idx; 1574 + 1575 + for_each_set_bit(idx, oa->format_mask, __XE_OA_FORMAT_MAX) { 1576 + const struct xe_oa_format *f = &oa->oa_formats[idx]; 1577 + 1578 + if (counter_size == f->counter_size && bc_report == f->bc_report && 1579 + type == f->type && counter_sel == f->counter_select) { 1580 + *name = idx; 1581 + return 0; 1582 + } 1583 + } 1584 + 1585 + return -EINVAL; 1586 + } 1587 + 1588 + /** 1589 + * xe_oa_unit_id - Return OA unit ID for a hardware engine 1590 + * @hwe: @xe_hw_engine 1591 + * 1592 + * Return OA unit ID for a hardware engine when available 1593 + */ 1594 + u16 xe_oa_unit_id(struct xe_hw_engine *hwe) 1595 + { 1596 + return hwe->oa_unit && hwe->oa_unit->num_engines ? 1597 + hwe->oa_unit->oa_unit_id : U16_MAX; 1598 + } 1599 + 1600 + static int xe_oa_assign_hwe(struct xe_oa *oa, struct xe_oa_open_param *param) 1601 + { 1602 + struct xe_gt *gt; 1603 + int i, ret = 0; 1604 + 1605 + if (param->exec_q) { 1606 + /* When we have an exec_q, get hwe from the exec_q */ 1607 + param->hwe = xe_gt_hw_engine(param->exec_q->gt, param->exec_q->class, 1608 + param->engine_instance, true); 1609 + } else { 1610 + struct xe_hw_engine *hwe; 1611 + enum xe_hw_engine_id id; 1612 + 1613 + /* Else just get the first hwe attached to the oa unit */ 1614 + for_each_gt(gt, oa->xe, i) { 1615 + for_each_hw_engine(hwe, gt, id) { 1616 + if (xe_oa_unit_id(hwe) == param->oa_unit_id) { 1617 + param->hwe = hwe; 1618 + goto out; 1619 + } 1620 + } 1621 + } 1622 + } 1623 + out: 1624 + if (!param->hwe || xe_oa_unit_id(param->hwe) != param->oa_unit_id) { 1625 + drm_dbg(&oa->xe->drm, "Unable to find hwe (%d, %d) for OA unit ID %d\n", 1626 + param->exec_q ? param->exec_q->class : -1, 1627 + param->engine_instance, param->oa_unit_id); 1628 + ret = -EINVAL; 1629 + } 1630 + 1631 + return ret; 1632 + } 1633 + 1634 + static int xe_oa_set_prop_oa_unit_id(struct xe_oa *oa, u64 value, 1635 + struct xe_oa_open_param *param) 1636 + { 1637 + if (value >= oa->oa_unit_ids) { 1638 + drm_dbg(&oa->xe->drm, "OA unit ID out of range %lld\n", value); 1639 + return -EINVAL; 1640 + } 1641 + param->oa_unit_id = value; 1642 + return 0; 1643 + } 1644 + 1645 + static int xe_oa_set_prop_sample_oa(struct xe_oa *oa, u64 value, 1646 + struct xe_oa_open_param *param) 1647 + { 1648 + param->sample = value; 1649 + return 0; 1650 + } 1651 + 1652 + static int xe_oa_set_prop_metric_set(struct xe_oa *oa, u64 value, 1653 + struct xe_oa_open_param *param) 1654 + { 1655 + param->metric_set = value; 1656 + return 0; 1657 + } 1658 + 1659 + static int xe_oa_set_prop_oa_format(struct xe_oa *oa, u64 value, 1660 + struct xe_oa_open_param *param) 1661 + { 1662 + int ret = decode_oa_format(oa, value, &param->oa_format); 1663 + 1664 + if (ret) { 1665 + drm_dbg(&oa->xe->drm, "Unsupported OA report format %#llx\n", value); 1666 + return ret; 1667 + } 1668 + return 0; 1669 + } 1670 + 1671 + static int xe_oa_set_prop_oa_exponent(struct xe_oa *oa, u64 value, 1672 + struct xe_oa_open_param *param) 1673 + { 1674 + #define OA_EXPONENT_MAX 31 1675 + 1676 + if (value > OA_EXPONENT_MAX) { 1677 + drm_dbg(&oa->xe->drm, "OA timer exponent too high (> %u)\n", OA_EXPONENT_MAX); 1678 + return -EINVAL; 1679 + } 1680 + param->period_exponent = value; 1681 + return 0; 1682 + } 1683 + 1684 + static int xe_oa_set_prop_disabled(struct xe_oa *oa, u64 value, 1685 + struct xe_oa_open_param *param) 1686 + { 1687 + param->disabled = value; 1688 + return 0; 1689 + } 1690 + 1691 + static int xe_oa_set_prop_exec_queue_id(struct xe_oa *oa, u64 value, 1692 + struct xe_oa_open_param *param) 1693 + { 1694 + param->exec_queue_id = value; 1695 + return 0; 1696 + } 1697 + 1698 + static int xe_oa_set_prop_engine_instance(struct xe_oa *oa, u64 value, 1699 + struct xe_oa_open_param *param) 1700 + { 1701 + param->engine_instance = value; 1702 + return 0; 1703 + } 1704 + 1705 + static int xe_oa_set_no_preempt(struct xe_oa *oa, u64 value, 1706 + struct xe_oa_open_param *param) 1707 + { 1708 + param->no_preempt = value; 1709 + return 0; 1710 + } 1711 + 1712 + typedef int (*xe_oa_set_property_fn)(struct xe_oa *oa, u64 value, 1713 + struct xe_oa_open_param *param); 1714 + static const xe_oa_set_property_fn xe_oa_set_property_funcs[] = { 1715 + [DRM_XE_OA_PROPERTY_OA_UNIT_ID] = xe_oa_set_prop_oa_unit_id, 1716 + [DRM_XE_OA_PROPERTY_SAMPLE_OA] = xe_oa_set_prop_sample_oa, 1717 + [DRM_XE_OA_PROPERTY_OA_METRIC_SET] = xe_oa_set_prop_metric_set, 1718 + [DRM_XE_OA_PROPERTY_OA_FORMAT] = xe_oa_set_prop_oa_format, 1719 + [DRM_XE_OA_PROPERTY_OA_PERIOD_EXPONENT] = xe_oa_set_prop_oa_exponent, 1720 + [DRM_XE_OA_PROPERTY_OA_DISABLED] = xe_oa_set_prop_disabled, 1721 + [DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID] = xe_oa_set_prop_exec_queue_id, 1722 + [DRM_XE_OA_PROPERTY_OA_ENGINE_INSTANCE] = xe_oa_set_prop_engine_instance, 1723 + [DRM_XE_OA_PROPERTY_NO_PREEMPT] = xe_oa_set_no_preempt, 1724 + }; 1725 + 1726 + static int xe_oa_user_ext_set_property(struct xe_oa *oa, u64 extension, 1727 + struct xe_oa_open_param *param) 1728 + { 1729 + u64 __user *address = u64_to_user_ptr(extension); 1730 + struct drm_xe_ext_set_property ext; 1731 + int err; 1732 + u32 idx; 1733 + 1734 + err = __copy_from_user(&ext, address, sizeof(ext)); 1735 + if (XE_IOCTL_DBG(oa->xe, err)) 1736 + return -EFAULT; 1737 + 1738 + if (XE_IOCTL_DBG(oa->xe, ext.property >= ARRAY_SIZE(xe_oa_set_property_funcs)) || 1739 + XE_IOCTL_DBG(oa->xe, ext.pad)) 1740 + return -EINVAL; 1741 + 1742 + idx = array_index_nospec(ext.property, ARRAY_SIZE(xe_oa_set_property_funcs)); 1743 + return xe_oa_set_property_funcs[idx](oa, ext.value, param); 1744 + } 1745 + 1746 + typedef int (*xe_oa_user_extension_fn)(struct xe_oa *oa, u64 extension, 1747 + struct xe_oa_open_param *param); 1748 + static const xe_oa_user_extension_fn xe_oa_user_extension_funcs[] = { 1749 + [DRM_XE_OA_EXTENSION_SET_PROPERTY] = xe_oa_user_ext_set_property, 1750 + }; 1751 + 1752 + #define MAX_USER_EXTENSIONS 16 1753 + static int xe_oa_user_extensions(struct xe_oa *oa, u64 extension, int ext_number, 1754 + struct xe_oa_open_param *param) 1755 + { 1756 + u64 __user *address = u64_to_user_ptr(extension); 1757 + struct drm_xe_user_extension ext; 1758 + int err; 1759 + u32 idx; 1760 + 1761 + if (XE_IOCTL_DBG(oa->xe, ext_number >= MAX_USER_EXTENSIONS)) 1762 + return -E2BIG; 1763 + 1764 + err = __copy_from_user(&ext, address, sizeof(ext)); 1765 + if (XE_IOCTL_DBG(oa->xe, err)) 1766 + return -EFAULT; 1767 + 1768 + if (XE_IOCTL_DBG(oa->xe, ext.pad) || 1769 + XE_IOCTL_DBG(oa->xe, ext.name >= ARRAY_SIZE(xe_oa_user_extension_funcs))) 1770 + return -EINVAL; 1771 + 1772 + idx = array_index_nospec(ext.name, ARRAY_SIZE(xe_oa_user_extension_funcs)); 1773 + err = xe_oa_user_extension_funcs[idx](oa, extension, param); 1774 + if (XE_IOCTL_DBG(oa->xe, err)) 1775 + return err; 1776 + 1777 + if (ext.next_extension) 1778 + return xe_oa_user_extensions(oa, ext.next_extension, ++ext_number, param); 1779 + 1780 + return 0; 1781 + } 1782 + 1783 + /** 1784 + * xe_oa_stream_open_ioctl - Opens an OA stream 1785 + * @dev: @drm_device 1786 + * @data: pointer to struct @drm_xe_oa_config 1787 + * @file: @drm_file 1788 + * 1789 + * The functions opens an OA stream. An OA stream, opened with specified 1790 + * properties, enables perf counter samples to be collected, either 1791 + * periodically (time based sampling), or on request (using perf queries) 1792 + */ 1793 + int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *file) 1794 + { 1795 + struct xe_device *xe = to_xe_device(dev); 1796 + struct xe_oa *oa = &xe->oa; 1797 + struct xe_file *xef = to_xe_file(file); 1798 + struct xe_oa_open_param param = {}; 1799 + const struct xe_oa_format *f; 1800 + bool privileged_op = true; 1801 + int ret; 1802 + 1803 + if (!oa->xe) { 1804 + drm_dbg(&xe->drm, "xe oa interface not available for this system\n"); 1805 + return -ENODEV; 1806 + } 1807 + 1808 + ret = xe_oa_user_extensions(oa, data, 0, &param); 1809 + if (ret) 1810 + return ret; 1811 + 1812 + if (param.exec_queue_id > 0) { 1813 + param.exec_q = xe_exec_queue_lookup(xef, param.exec_queue_id); 1814 + if (XE_IOCTL_DBG(oa->xe, !param.exec_q)) 1815 + return -ENOENT; 1816 + 1817 + if (param.exec_q->width > 1) 1818 + drm_dbg(&oa->xe->drm, "exec_q->width > 1, programming only exec_q->lrc[0]\n"); 1819 + } 1820 + 1821 + /* 1822 + * Query based sampling (using MI_REPORT_PERF_COUNT) with OAR/OAC, 1823 + * without global stream access, can be an unprivileged operation 1824 + */ 1825 + if (param.exec_q && !param.sample) 1826 + privileged_op = false; 1827 + 1828 + if (param.no_preempt) { 1829 + if (!param.exec_q) { 1830 + drm_dbg(&oa->xe->drm, "Preemption disable without exec_q!\n"); 1831 + ret = -EINVAL; 1832 + goto err_exec_q; 1833 + } 1834 + privileged_op = true; 1835 + } 1836 + 1837 + if (privileged_op && xe_perf_stream_paranoid && !perfmon_capable()) { 1838 + drm_dbg(&oa->xe->drm, "Insufficient privileges to open xe perf stream\n"); 1839 + ret = -EACCES; 1840 + goto err_exec_q; 1841 + } 1842 + 1843 + if (!param.exec_q && !param.sample) { 1844 + drm_dbg(&oa->xe->drm, "Only OA report sampling supported\n"); 1845 + ret = -EINVAL; 1846 + goto err_exec_q; 1847 + } 1848 + 1849 + ret = xe_oa_assign_hwe(oa, &param); 1850 + if (ret) 1851 + goto err_exec_q; 1852 + 1853 + f = &oa->oa_formats[param.oa_format]; 1854 + if (!param.oa_format || !f->size || 1855 + !engine_supports_oa_format(param.hwe, f->type)) { 1856 + drm_dbg(&oa->xe->drm, "Invalid OA format %d type %d size %d for class %d\n", 1857 + param.oa_format, f->type, f->size, param.hwe->class); 1858 + ret = -EINVAL; 1859 + goto err_exec_q; 1860 + } 1861 + 1862 + if (param.period_exponent > 0) { 1863 + u64 oa_period, oa_freq_hz; 1864 + 1865 + /* Requesting samples from OAG buffer is a privileged operation */ 1866 + if (!param.sample) { 1867 + drm_dbg(&oa->xe->drm, "OA_EXPONENT specified without SAMPLE_OA\n"); 1868 + ret = -EINVAL; 1869 + goto err_exec_q; 1870 + } 1871 + oa_period = oa_exponent_to_ns(param.hwe->gt, param.period_exponent); 1872 + oa_freq_hz = div64_u64(NSEC_PER_SEC, oa_period); 1873 + drm_dbg(&oa->xe->drm, "Using periodic sampling freq %lld Hz\n", oa_freq_hz); 1874 + } 1875 + 1876 + mutex_lock(&param.hwe->gt->oa.gt_lock); 1877 + ret = xe_oa_stream_open_ioctl_locked(oa, &param); 1878 + mutex_unlock(&param.hwe->gt->oa.gt_lock); 1879 + err_exec_q: 1880 + if (ret < 0 && param.exec_q) 1881 + xe_exec_queue_put(param.exec_q); 1882 + return ret; 1883 + } 1884 + 1885 + static bool xe_oa_is_valid_flex_addr(struct xe_oa *oa, u32 addr) 1886 + { 1887 + static const struct xe_reg flex_eu_regs[] = { 1888 + EU_PERF_CNTL0, 1889 + EU_PERF_CNTL1, 1890 + EU_PERF_CNTL2, 1891 + EU_PERF_CNTL3, 1892 + EU_PERF_CNTL4, 1893 + EU_PERF_CNTL5, 1894 + EU_PERF_CNTL6, 1895 + }; 1896 + int i; 1897 + 1898 + for (i = 0; i < ARRAY_SIZE(flex_eu_regs); i++) { 1899 + if (flex_eu_regs[i].addr == addr) 1900 + return true; 1901 + } 1902 + return false; 1903 + } 1904 + 1905 + static bool xe_oa_reg_in_range_table(u32 addr, const struct xe_mmio_range *table) 1906 + { 1907 + while (table->start && table->end) { 1908 + if (addr >= table->start && addr <= table->end) 1909 + return true; 1910 + 1911 + table++; 1912 + } 1913 + 1914 + return false; 1915 + } 1916 + 1917 + static const struct xe_mmio_range xehp_oa_b_counters[] = { 1918 + { .start = 0xdc48, .end = 0xdc48 }, /* OAA_ENABLE_REG */ 1919 + { .start = 0xdd00, .end = 0xdd48 }, /* OAG_LCE0_0 - OAA_LENABLE_REG */ 1920 + {} 1921 + }; 1922 + 1923 + static const struct xe_mmio_range gen12_oa_b_counters[] = { 1924 + { .start = 0x2b2c, .end = 0x2b2c }, /* OAG_OA_PESS */ 1925 + { .start = 0xd900, .end = 0xd91c }, /* OAG_OASTARTTRIG[1-8] */ 1926 + { .start = 0xd920, .end = 0xd93c }, /* OAG_OAREPORTTRIG1[1-8] */ 1927 + { .start = 0xd940, .end = 0xd97c }, /* OAG_CEC[0-7][0-1] */ 1928 + { .start = 0xdc00, .end = 0xdc3c }, /* OAG_SCEC[0-7][0-1] */ 1929 + { .start = 0xdc40, .end = 0xdc40 }, /* OAG_SPCTR_CNF */ 1930 + { .start = 0xdc44, .end = 0xdc44 }, /* OAA_DBG_REG */ 1931 + {} 1932 + }; 1933 + 1934 + static const struct xe_mmio_range mtl_oam_b_counters[] = { 1935 + { .start = 0x393000, .end = 0x39301c }, /* OAM_STARTTRIG1[1-8] */ 1936 + { .start = 0x393020, .end = 0x39303c }, /* OAM_REPORTTRIG1[1-8] */ 1937 + { .start = 0x393040, .end = 0x39307c }, /* OAM_CEC[0-7][0-1] */ 1938 + { .start = 0x393200, .end = 0x39323C }, /* MPES[0-7] */ 1939 + {} 1940 + }; 1941 + 1942 + static const struct xe_mmio_range xe2_oa_b_counters[] = { 1943 + { .start = 0x393200, .end = 0x39323C }, /* MPES_0_MPES_SAG - MPES_7_UPPER_MPES_SAG */ 1944 + { .start = 0x394200, .end = 0x39423C }, /* MPES_0_MPES_SCMI0 - MPES_7_UPPER_MPES_SCMI0 */ 1945 + { .start = 0x394A00, .end = 0x394A3C }, /* MPES_0_MPES_SCMI1 - MPES_7_UPPER_MPES_SCMI1 */ 1946 + {}, 1947 + }; 1948 + 1949 + static bool xe_oa_is_valid_b_counter_addr(struct xe_oa *oa, u32 addr) 1950 + { 1951 + return xe_oa_reg_in_range_table(addr, xehp_oa_b_counters) || 1952 + xe_oa_reg_in_range_table(addr, gen12_oa_b_counters) || 1953 + xe_oa_reg_in_range_table(addr, mtl_oam_b_counters) || 1954 + (GRAPHICS_VER(oa->xe) >= 20 && 1955 + xe_oa_reg_in_range_table(addr, xe2_oa_b_counters)); 1956 + } 1957 + 1958 + static const struct xe_mmio_range mtl_oa_mux_regs[] = { 1959 + { .start = 0x0d00, .end = 0x0d04 }, /* RPM_CONFIG[0-1] */ 1960 + { .start = 0x0d0c, .end = 0x0d2c }, /* NOA_CONFIG[0-8] */ 1961 + { .start = 0x9840, .end = 0x9840 }, /* GDT_CHICKEN_BITS */ 1962 + { .start = 0x9884, .end = 0x9888 }, /* NOA_WRITE */ 1963 + { .start = 0x38d100, .end = 0x38d114}, /* VISACTL */ 1964 + {} 1965 + }; 1966 + 1967 + static const struct xe_mmio_range gen12_oa_mux_regs[] = { 1968 + { .start = 0x0d00, .end = 0x0d04 }, /* RPM_CONFIG[0-1] */ 1969 + { .start = 0x0d0c, .end = 0x0d2c }, /* NOA_CONFIG[0-8] */ 1970 + { .start = 0x9840, .end = 0x9840 }, /* GDT_CHICKEN_BITS */ 1971 + { .start = 0x9884, .end = 0x9888 }, /* NOA_WRITE */ 1972 + { .start = 0x20cc, .end = 0x20cc }, /* WAIT_FOR_RC6_EXIT */ 1973 + {} 1974 + }; 1975 + 1976 + static const struct xe_mmio_range xe2_oa_mux_regs[] = { 1977 + { .start = 0x5194, .end = 0x5194 }, /* SYS_MEM_LAT_MEASURE_MERTF_GRP_3D */ 1978 + { .start = 0x8704, .end = 0x8704 }, /* LMEM_LAT_MEASURE_MCFG_GRP */ 1979 + { .start = 0xB1BC, .end = 0xB1BC }, /* L3_BANK_LAT_MEASURE_LBCF_GFX */ 1980 + { .start = 0xE18C, .end = 0xE18C }, /* SAMPLER_MODE */ 1981 + { .start = 0xE590, .end = 0xE590 }, /* TDL_LSC_LAT_MEASURE_TDL_GFX */ 1982 + { .start = 0x13000, .end = 0x137FC }, /* PES_0_PESL0 - PES_63_UPPER_PESL3 */ 1983 + {}, 1984 + }; 1985 + 1986 + static bool xe_oa_is_valid_mux_addr(struct xe_oa *oa, u32 addr) 1987 + { 1988 + if (GRAPHICS_VER(oa->xe) >= 20) 1989 + return xe_oa_reg_in_range_table(addr, xe2_oa_mux_regs); 1990 + else if (GRAPHICS_VERx100(oa->xe) >= 1270) 1991 + return xe_oa_reg_in_range_table(addr, mtl_oa_mux_regs); 1992 + else 1993 + return xe_oa_reg_in_range_table(addr, gen12_oa_mux_regs); 1994 + } 1995 + 1996 + static bool xe_oa_is_valid_config_reg_addr(struct xe_oa *oa, u32 addr) 1997 + { 1998 + return xe_oa_is_valid_flex_addr(oa, addr) || 1999 + xe_oa_is_valid_b_counter_addr(oa, addr) || 2000 + xe_oa_is_valid_mux_addr(oa, addr); 2001 + } 2002 + 2003 + static struct xe_oa_reg * 2004 + xe_oa_alloc_regs(struct xe_oa *oa, bool (*is_valid)(struct xe_oa *oa, u32 addr), 2005 + u32 __user *regs, u32 n_regs) 2006 + { 2007 + struct xe_oa_reg *oa_regs; 2008 + int err; 2009 + u32 i; 2010 + 2011 + oa_regs = kmalloc_array(n_regs, sizeof(*oa_regs), GFP_KERNEL); 2012 + if (!oa_regs) 2013 + return ERR_PTR(-ENOMEM); 2014 + 2015 + for (i = 0; i < n_regs; i++) { 2016 + u32 addr, value; 2017 + 2018 + err = get_user(addr, regs); 2019 + if (err) 2020 + goto addr_err; 2021 + 2022 + if (!is_valid(oa, addr)) { 2023 + drm_dbg(&oa->xe->drm, "Invalid oa_reg address: %X\n", addr); 2024 + err = -EINVAL; 2025 + goto addr_err; 2026 + } 2027 + 2028 + err = get_user(value, regs + 1); 2029 + if (err) 2030 + goto addr_err; 2031 + 2032 + oa_regs[i].addr = XE_REG(addr); 2033 + oa_regs[i].value = value; 2034 + 2035 + regs += 2; 2036 + } 2037 + 2038 + return oa_regs; 2039 + 2040 + addr_err: 2041 + kfree(oa_regs); 2042 + return ERR_PTR(err); 2043 + } 2044 + 2045 + static ssize_t show_dynamic_id(struct kobject *kobj, 2046 + struct kobj_attribute *attr, 2047 + char *buf) 2048 + { 2049 + struct xe_oa_config *oa_config = 2050 + container_of(attr, typeof(*oa_config), sysfs_metric_id); 2051 + 2052 + return sysfs_emit(buf, "%d\n", oa_config->id); 2053 + } 2054 + 2055 + static int create_dynamic_oa_sysfs_entry(struct xe_oa *oa, 2056 + struct xe_oa_config *oa_config) 2057 + { 2058 + sysfs_attr_init(&oa_config->sysfs_metric_id.attr); 2059 + oa_config->sysfs_metric_id.attr.name = "id"; 2060 + oa_config->sysfs_metric_id.attr.mode = 0444; 2061 + oa_config->sysfs_metric_id.show = show_dynamic_id; 2062 + oa_config->sysfs_metric_id.store = NULL; 2063 + 2064 + oa_config->attrs[0] = &oa_config->sysfs_metric_id.attr; 2065 + oa_config->attrs[1] = NULL; 2066 + 2067 + oa_config->sysfs_metric.name = oa_config->uuid; 2068 + oa_config->sysfs_metric.attrs = oa_config->attrs; 2069 + 2070 + return sysfs_create_group(oa->metrics_kobj, &oa_config->sysfs_metric); 2071 + } 2072 + 2073 + /** 2074 + * xe_oa_add_config_ioctl - Adds one OA config 2075 + * @dev: @drm_device 2076 + * @data: pointer to struct @drm_xe_oa_config 2077 + * @file: @drm_file 2078 + * 2079 + * The functions adds an OA config to the set of OA configs maintained in 2080 + * the kernel. The config determines which OA metrics are collected for an 2081 + * OA stream. 2082 + */ 2083 + int xe_oa_add_config_ioctl(struct drm_device *dev, u64 data, struct drm_file *file) 2084 + { 2085 + struct xe_device *xe = to_xe_device(dev); 2086 + struct xe_oa *oa = &xe->oa; 2087 + struct drm_xe_oa_config param; 2088 + struct drm_xe_oa_config *arg = &param; 2089 + struct xe_oa_config *oa_config, *tmp; 2090 + struct xe_oa_reg *regs; 2091 + int err, id; 2092 + 2093 + if (!oa->xe) { 2094 + drm_dbg(&xe->drm, "xe oa interface not available for this system\n"); 2095 + return -ENODEV; 2096 + } 2097 + 2098 + if (xe_perf_stream_paranoid && !perfmon_capable()) { 2099 + drm_dbg(&oa->xe->drm, "Insufficient privileges to add xe OA config\n"); 2100 + return -EACCES; 2101 + } 2102 + 2103 + err = __copy_from_user(&param, u64_to_user_ptr(data), sizeof(param)); 2104 + if (XE_IOCTL_DBG(oa->xe, err)) 2105 + return -EFAULT; 2106 + 2107 + if (XE_IOCTL_DBG(oa->xe, arg->extensions) || 2108 + XE_IOCTL_DBG(oa->xe, !arg->regs_ptr) || 2109 + XE_IOCTL_DBG(oa->xe, !arg->n_regs)) 2110 + return -EINVAL; 2111 + 2112 + oa_config = kzalloc(sizeof(*oa_config), GFP_KERNEL); 2113 + if (!oa_config) 2114 + return -ENOMEM; 2115 + 2116 + oa_config->oa = oa; 2117 + kref_init(&oa_config->ref); 2118 + 2119 + if (!uuid_is_valid(arg->uuid)) { 2120 + drm_dbg(&oa->xe->drm, "Invalid uuid format for OA config\n"); 2121 + err = -EINVAL; 2122 + goto reg_err; 2123 + } 2124 + 2125 + /* Last character in oa_config->uuid will be 0 because oa_config is kzalloc */ 2126 + memcpy(oa_config->uuid, arg->uuid, sizeof(arg->uuid)); 2127 + 2128 + oa_config->regs_len = arg->n_regs; 2129 + regs = xe_oa_alloc_regs(oa, xe_oa_is_valid_config_reg_addr, 2130 + u64_to_user_ptr(arg->regs_ptr), 2131 + arg->n_regs); 2132 + if (IS_ERR(regs)) { 2133 + drm_dbg(&oa->xe->drm, "Failed to create OA config for mux_regs\n"); 2134 + err = PTR_ERR(regs); 2135 + goto reg_err; 2136 + } 2137 + oa_config->regs = regs; 2138 + 2139 + err = mutex_lock_interruptible(&oa->metrics_lock); 2140 + if (err) 2141 + goto reg_err; 2142 + 2143 + /* We shouldn't have too many configs, so this iteration shouldn't be too costly */ 2144 + idr_for_each_entry(&oa->metrics_idr, tmp, id) { 2145 + if (!strcmp(tmp->uuid, oa_config->uuid)) { 2146 + drm_dbg(&oa->xe->drm, "OA config already exists with this uuid\n"); 2147 + err = -EADDRINUSE; 2148 + goto sysfs_err; 2149 + } 2150 + } 2151 + 2152 + err = create_dynamic_oa_sysfs_entry(oa, oa_config); 2153 + if (err) { 2154 + drm_dbg(&oa->xe->drm, "Failed to create sysfs entry for OA config\n"); 2155 + goto sysfs_err; 2156 + } 2157 + 2158 + oa_config->id = idr_alloc(&oa->metrics_idr, oa_config, 1, 0, GFP_KERNEL); 2159 + if (oa_config->id < 0) { 2160 + drm_dbg(&oa->xe->drm, "Failed to create sysfs entry for OA config\n"); 2161 + err = oa_config->id; 2162 + goto sysfs_err; 2163 + } 2164 + 2165 + mutex_unlock(&oa->metrics_lock); 2166 + 2167 + drm_dbg(&oa->xe->drm, "Added config %s id=%i\n", oa_config->uuid, oa_config->id); 2168 + 2169 + return oa_config->id; 2170 + 2171 + sysfs_err: 2172 + mutex_unlock(&oa->metrics_lock); 2173 + reg_err: 2174 + xe_oa_config_put(oa_config); 2175 + drm_dbg(&oa->xe->drm, "Failed to add new OA config\n"); 2176 + return err; 2177 + } 2178 + 2179 + /** 2180 + * xe_oa_remove_config_ioctl - Removes one OA config 2181 + * @dev: @drm_device 2182 + * @data: pointer to struct @drm_xe_perf_param 2183 + * @file: @drm_file 2184 + */ 2185 + int xe_oa_remove_config_ioctl(struct drm_device *dev, u64 data, struct drm_file *file) 2186 + { 2187 + struct xe_device *xe = to_xe_device(dev); 2188 + struct xe_oa *oa = &xe->oa; 2189 + struct xe_oa_config *oa_config; 2190 + u64 arg, *ptr = u64_to_user_ptr(data); 2191 + int ret; 2192 + 2193 + if (!oa->xe) { 2194 + drm_dbg(&xe->drm, "xe oa interface not available for this system\n"); 2195 + return -ENODEV; 2196 + } 2197 + 2198 + if (xe_perf_stream_paranoid && !perfmon_capable()) { 2199 + drm_dbg(&oa->xe->drm, "Insufficient privileges to remove xe OA config\n"); 2200 + return -EACCES; 2201 + } 2202 + 2203 + ret = get_user(arg, ptr); 2204 + if (XE_IOCTL_DBG(oa->xe, ret)) 2205 + return ret; 2206 + 2207 + ret = mutex_lock_interruptible(&oa->metrics_lock); 2208 + if (ret) 2209 + return ret; 2210 + 2211 + oa_config = idr_find(&oa->metrics_idr, arg); 2212 + if (!oa_config) { 2213 + drm_dbg(&oa->xe->drm, "Failed to remove unknown OA config\n"); 2214 + ret = -ENOENT; 2215 + goto err_unlock; 2216 + } 2217 + 2218 + WARN_ON(arg != oa_config->id); 2219 + 2220 + sysfs_remove_group(oa->metrics_kobj, &oa_config->sysfs_metric); 2221 + idr_remove(&oa->metrics_idr, arg); 2222 + 2223 + mutex_unlock(&oa->metrics_lock); 2224 + 2225 + drm_dbg(&oa->xe->drm, "Removed config %s id=%i\n", oa_config->uuid, oa_config->id); 2226 + 2227 + xe_oa_config_put(oa_config); 2228 + 2229 + return 0; 2230 + 2231 + err_unlock: 2232 + mutex_unlock(&oa->metrics_lock); 2233 + return ret; 2234 + } 2235 + 2236 + /** 2237 + * xe_oa_register - Xe OA registration 2238 + * @xe: @xe_device 2239 + * 2240 + * Exposes the metrics sysfs directory upon completion of module initialization 2241 + */ 2242 + void xe_oa_register(struct xe_device *xe) 2243 + { 2244 + struct xe_oa *oa = &xe->oa; 2245 + 2246 + if (!oa->xe) 2247 + return; 2248 + 2249 + oa->metrics_kobj = kobject_create_and_add("metrics", 2250 + &xe->drm.primary->kdev->kobj); 2251 + } 2252 + 2253 + /** 2254 + * xe_oa_unregister - Xe OA de-registration 2255 + * @xe: @xe_device 2256 + */ 2257 + void xe_oa_unregister(struct xe_device *xe) 2258 + { 2259 + struct xe_oa *oa = &xe->oa; 2260 + 2261 + if (!oa->metrics_kobj) 2262 + return; 2263 + 2264 + kobject_put(oa->metrics_kobj); 2265 + oa->metrics_kobj = NULL; 2266 + } 2267 + 2268 + static u32 num_oa_units_per_gt(struct xe_gt *gt) 2269 + { 2270 + return 1; 2271 + } 2272 + 2273 + static u32 __hwe_oam_unit(struct xe_hw_engine *hwe) 2274 + { 2275 + if (GRAPHICS_VERx100(gt_to_xe(hwe->gt)) >= 1270) { 2276 + /* 2277 + * There's 1 SAMEDIA gt and 1 OAM per SAMEDIA gt. All media slices 2278 + * within the gt use the same OAM. All MTL/LNL SKUs list 1 SA MEDIA 2279 + */ 2280 + xe_gt_WARN_ON(hwe->gt, hwe->gt->info.type != XE_GT_TYPE_MEDIA); 2281 + 2282 + return 0; 2283 + } 2284 + 2285 + return XE_OA_UNIT_INVALID; 2286 + } 2287 + 2288 + static u32 __hwe_oa_unit(struct xe_hw_engine *hwe) 2289 + { 2290 + switch (hwe->class) { 2291 + case XE_ENGINE_CLASS_RENDER: 2292 + case XE_ENGINE_CLASS_COMPUTE: 2293 + return 0; 2294 + 2295 + case XE_ENGINE_CLASS_VIDEO_DECODE: 2296 + case XE_ENGINE_CLASS_VIDEO_ENHANCE: 2297 + return __hwe_oam_unit(hwe); 2298 + 2299 + default: 2300 + return XE_OA_UNIT_INVALID; 2301 + } 2302 + } 2303 + 2304 + static struct xe_oa_regs __oam_regs(u32 base) 2305 + { 2306 + return (struct xe_oa_regs) { 2307 + base, 2308 + OAM_HEAD_POINTER(base), 2309 + OAM_TAIL_POINTER(base), 2310 + OAM_BUFFER(base), 2311 + OAM_CONTEXT_CONTROL(base), 2312 + OAM_CONTROL(base), 2313 + OAM_DEBUG(base), 2314 + OAM_STATUS(base), 2315 + OAM_CONTROL_COUNTER_SEL_MASK, 2316 + }; 2317 + } 2318 + 2319 + static struct xe_oa_regs __oag_regs(void) 2320 + { 2321 + return (struct xe_oa_regs) { 2322 + 0, 2323 + OAG_OAHEADPTR, 2324 + OAG_OATAILPTR, 2325 + OAG_OABUFFER, 2326 + OAG_OAGLBCTXCTRL, 2327 + OAG_OACONTROL, 2328 + OAG_OA_DEBUG, 2329 + OAG_OASTATUS, 2330 + OAG_OACONTROL_OA_COUNTER_SEL_MASK, 2331 + }; 2332 + } 2333 + 2334 + static void __xe_oa_init_oa_units(struct xe_gt *gt) 2335 + { 2336 + const u32 mtl_oa_base[] = { 0x13000 }; 2337 + int i, num_units = gt->oa.num_oa_units; 2338 + 2339 + for (i = 0; i < num_units; i++) { 2340 + struct xe_oa_unit *u = &gt->oa.oa_unit[i]; 2341 + 2342 + if (gt->info.type != XE_GT_TYPE_MEDIA) { 2343 + u->regs = __oag_regs(); 2344 + u->type = DRM_XE_OA_UNIT_TYPE_OAG; 2345 + } else if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270) { 2346 + u->regs = __oam_regs(mtl_oa_base[i]); 2347 + u->type = DRM_XE_OA_UNIT_TYPE_OAM; 2348 + } 2349 + 2350 + /* Ensure MMIO trigger remains disabled till there is a stream */ 2351 + xe_mmio_write32(gt, u->regs.oa_debug, 2352 + oag_configure_mmio_trigger(NULL, false)); 2353 + 2354 + /* Set oa_unit_ids now to ensure ids remain contiguous */ 2355 + u->oa_unit_id = gt_to_xe(gt)->oa.oa_unit_ids++; 2356 + } 2357 + } 2358 + 2359 + static int xe_oa_init_gt(struct xe_gt *gt) 2360 + { 2361 + u32 num_oa_units = num_oa_units_per_gt(gt); 2362 + struct xe_hw_engine *hwe; 2363 + enum xe_hw_engine_id id; 2364 + struct xe_oa_unit *u; 2365 + 2366 + u = drmm_kcalloc(&gt_to_xe(gt)->drm, num_oa_units, sizeof(*u), GFP_KERNEL); 2367 + if (!u) 2368 + return -ENOMEM; 2369 + 2370 + for_each_hw_engine(hwe, gt, id) { 2371 + u32 index = __hwe_oa_unit(hwe); 2372 + 2373 + hwe->oa_unit = NULL; 2374 + if (index < num_oa_units) { 2375 + u[index].num_engines++; 2376 + hwe->oa_unit = &u[index]; 2377 + } 2378 + } 2379 + 2380 + /* 2381 + * Fused off engines can result in oa_unit's with num_engines == 0. These units 2382 + * will appear in OA unit query, but no perf streams can be opened on them. 2383 + */ 2384 + gt->oa.num_oa_units = num_oa_units; 2385 + gt->oa.oa_unit = u; 2386 + 2387 + __xe_oa_init_oa_units(gt); 2388 + 2389 + drmm_mutex_init(&gt_to_xe(gt)->drm, &gt->oa.gt_lock); 2390 + 2391 + return 0; 2392 + } 2393 + 2394 + static int xe_oa_init_oa_units(struct xe_oa *oa) 2395 + { 2396 + struct xe_gt *gt; 2397 + int i, ret; 2398 + 2399 + for_each_gt(gt, oa->xe, i) { 2400 + ret = xe_oa_init_gt(gt); 2401 + if (ret) 2402 + return ret; 2403 + } 2404 + 2405 + return 0; 2406 + } 2407 + 2408 + static void oa_format_add(struct xe_oa *oa, enum xe_oa_format_name format) 2409 + { 2410 + __set_bit(format, oa->format_mask); 2411 + } 2412 + 2413 + static void xe_oa_init_supported_formats(struct xe_oa *oa) 2414 + { 2415 + if (GRAPHICS_VER(oa->xe) >= 20) { 2416 + /* Xe2+ */ 2417 + oa_format_add(oa, XE_OAM_FORMAT_MPEC8u64_B8_C8); 2418 + oa_format_add(oa, XE_OAM_FORMAT_MPEC8u32_B8_C8); 2419 + oa_format_add(oa, XE_OA_FORMAT_PEC64u64); 2420 + oa_format_add(oa, XE_OA_FORMAT_PEC64u64_B8_C8); 2421 + oa_format_add(oa, XE_OA_FORMAT_PEC64u32); 2422 + oa_format_add(oa, XE_OA_FORMAT_PEC32u64_G1); 2423 + oa_format_add(oa, XE_OA_FORMAT_PEC32u32_G1); 2424 + oa_format_add(oa, XE_OA_FORMAT_PEC32u64_G2); 2425 + oa_format_add(oa, XE_OA_FORMAT_PEC32u32_G2); 2426 + oa_format_add(oa, XE_OA_FORMAT_PEC36u64_G1_32_G2_4); 2427 + oa_format_add(oa, XE_OA_FORMAT_PEC36u64_G1_4_G2_32); 2428 + } else if (GRAPHICS_VERx100(oa->xe) >= 1270) { 2429 + /* XE_METEORLAKE */ 2430 + oa_format_add(oa, XE_OAR_FORMAT_A32u40_A4u32_B8_C8); 2431 + oa_format_add(oa, XE_OA_FORMAT_A24u40_A14u32_B8_C8); 2432 + oa_format_add(oa, XE_OAC_FORMAT_A24u64_B8_C8); 2433 + oa_format_add(oa, XE_OAC_FORMAT_A22u32_R2u32_B8_C8); 2434 + oa_format_add(oa, XE_OAM_FORMAT_MPEC8u64_B8_C8); 2435 + oa_format_add(oa, XE_OAM_FORMAT_MPEC8u32_B8_C8); 2436 + } else if (GRAPHICS_VERx100(oa->xe) >= 1255) { 2437 + /* XE_DG2, XE_PVC */ 2438 + oa_format_add(oa, XE_OAR_FORMAT_A32u40_A4u32_B8_C8); 2439 + oa_format_add(oa, XE_OA_FORMAT_A24u40_A14u32_B8_C8); 2440 + oa_format_add(oa, XE_OAC_FORMAT_A24u64_B8_C8); 2441 + oa_format_add(oa, XE_OAC_FORMAT_A22u32_R2u32_B8_C8); 2442 + } else { 2443 + /* Gen12+ */ 2444 + xe_assert(oa->xe, GRAPHICS_VER(oa->xe) >= 12); 2445 + oa_format_add(oa, XE_OA_FORMAT_A12); 2446 + oa_format_add(oa, XE_OA_FORMAT_A12_B8_C8); 2447 + oa_format_add(oa, XE_OA_FORMAT_A32u40_A4u32_B8_C8); 2448 + oa_format_add(oa, XE_OA_FORMAT_C4_B8); 2449 + } 2450 + } 2451 + 2452 + /** 2453 + * xe_oa_init - OA initialization during device probe 2454 + * @xe: @xe_device 2455 + * 2456 + * Return: 0 on success or a negative error code on failure 2457 + */ 2458 + int xe_oa_init(struct xe_device *xe) 2459 + { 2460 + struct xe_oa *oa = &xe->oa; 2461 + int ret; 2462 + 2463 + /* Support OA only with GuC submission and Gen12+ */ 2464 + if (!xe_device_uc_enabled(xe) || GRAPHICS_VER(xe) < 12) 2465 + return 0; 2466 + 2467 + if (IS_SRIOV_VF(xe)) 2468 + return 0; 2469 + 2470 + oa->xe = xe; 2471 + oa->oa_formats = oa_formats; 2472 + 2473 + drmm_mutex_init(&oa->xe->drm, &oa->metrics_lock); 2474 + idr_init_base(&oa->metrics_idr, 1); 2475 + 2476 + ret = xe_oa_init_oa_units(oa); 2477 + if (ret) { 2478 + drm_err(&xe->drm, "OA initialization failed (%pe)\n", ERR_PTR(ret)); 2479 + goto exit; 2480 + } 2481 + 2482 + xe_oa_init_supported_formats(oa); 2483 + return 0; 2484 + exit: 2485 + oa->xe = NULL; 2486 + return ret; 2487 + } 2488 + 2489 + static int destroy_config(int id, void *p, void *data) 2490 + { 2491 + xe_oa_config_put(p); 2492 + return 0; 2493 + } 2494 + 2495 + /** 2496 + * xe_oa_fini - OA de-initialization during device remove 2497 + * @xe: @xe_device 2498 + */ 2499 + void xe_oa_fini(struct xe_device *xe) 2500 + { 2501 + struct xe_oa *oa = &xe->oa; 2502 + 2503 + if (!oa->xe) 2504 + return; 2505 + 2506 + idr_for_each(&oa->metrics_idr, destroy_config, oa); 2507 + idr_destroy(&oa->metrics_idr); 2508 + 2509 + oa->xe = NULL; 2510 + }
+27
drivers/gpu/drm/xe/xe_oa.h
··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2023-2024 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_OA_H_ 7 + #define _XE_OA_H_ 8 + 9 + #include "xe_oa_types.h" 10 + 11 + struct drm_device; 12 + struct drm_file; 13 + struct xe_device; 14 + struct xe_gt; 15 + struct xe_hw_engine; 16 + 17 + int xe_oa_init(struct xe_device *xe); 18 + void xe_oa_fini(struct xe_device *xe); 19 + void xe_oa_register(struct xe_device *xe); 20 + void xe_oa_unregister(struct xe_device *xe); 21 + int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *file); 22 + int xe_oa_add_config_ioctl(struct drm_device *dev, u64 data, struct drm_file *file); 23 + int xe_oa_remove_config_ioctl(struct drm_device *dev, u64 data, struct drm_file *file); 24 + u32 xe_oa_timestamp_frequency(struct xe_gt *gt); 25 + u16 xe_oa_unit_id(struct xe_hw_engine *hwe); 26 + 27 + #endif
+242
drivers/gpu/drm/xe/xe_oa_types.h
··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2023-2024 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_OA_TYPES_H_ 7 + #define _XE_OA_TYPES_H_ 8 + 9 + #include <linux/bitops.h> 10 + #include <linux/idr.h> 11 + #include <linux/mutex.h> 12 + #include <linux/types.h> 13 + 14 + #include <drm/xe_drm.h> 15 + #include "regs/xe_reg_defs.h" 16 + #include "xe_hw_engine_types.h" 17 + 18 + #define XE_OA_BUFFER_SIZE SZ_16M 19 + 20 + enum xe_oa_report_header { 21 + HDR_32_BIT = 0, 22 + HDR_64_BIT, 23 + }; 24 + 25 + enum xe_oa_format_name { 26 + XE_OA_FORMAT_C4_B8, 27 + 28 + /* Gen8+ */ 29 + XE_OA_FORMAT_A12, 30 + XE_OA_FORMAT_A12_B8_C8, 31 + XE_OA_FORMAT_A32u40_A4u32_B8_C8, 32 + 33 + /* DG2 */ 34 + XE_OAR_FORMAT_A32u40_A4u32_B8_C8, 35 + XE_OA_FORMAT_A24u40_A14u32_B8_C8, 36 + 37 + /* DG2/MTL OAC */ 38 + XE_OAC_FORMAT_A24u64_B8_C8, 39 + XE_OAC_FORMAT_A22u32_R2u32_B8_C8, 40 + 41 + /* MTL OAM */ 42 + XE_OAM_FORMAT_MPEC8u64_B8_C8, 43 + XE_OAM_FORMAT_MPEC8u32_B8_C8, 44 + 45 + /* Xe2+ */ 46 + XE_OA_FORMAT_PEC64u64, 47 + XE_OA_FORMAT_PEC64u64_B8_C8, 48 + XE_OA_FORMAT_PEC64u32, 49 + XE_OA_FORMAT_PEC32u64_G1, 50 + XE_OA_FORMAT_PEC32u32_G1, 51 + XE_OA_FORMAT_PEC32u64_G2, 52 + XE_OA_FORMAT_PEC32u32_G2, 53 + XE_OA_FORMAT_PEC36u64_G1_32_G2_4, 54 + XE_OA_FORMAT_PEC36u64_G1_4_G2_32, 55 + 56 + __XE_OA_FORMAT_MAX, 57 + }; 58 + 59 + /** 60 + * struct xe_oa_format - Format fields for supported OA formats. OA format 61 + * properties are specified in PRM/Bspec 52198 and 60942 62 + */ 63 + struct xe_oa_format { 64 + /** @counter_select: counter select value (see Bspec 52198/60942) */ 65 + u32 counter_select; 66 + /** @size: record size as written by HW (multiple of 64 byte cachelines) */ 67 + int size; 68 + /** @type: of enum @drm_xe_oa_format_type */ 69 + int type; 70 + /** @header: 32 or 64 bit report headers */ 71 + enum xe_oa_report_header header; 72 + /** @counter_size: counter size value (see Bspec 60942) */ 73 + u16 counter_size; 74 + /** @bc_report: BC report value (see Bspec 60942) */ 75 + u16 bc_report; 76 + }; 77 + 78 + /** struct xe_oa_regs - Registers for each OA unit */ 79 + struct xe_oa_regs { 80 + u32 base; 81 + struct xe_reg oa_head_ptr; 82 + struct xe_reg oa_tail_ptr; 83 + struct xe_reg oa_buffer; 84 + struct xe_reg oa_ctx_ctrl; 85 + struct xe_reg oa_ctrl; 86 + struct xe_reg oa_debug; 87 + struct xe_reg oa_status; 88 + u32 oa_ctrl_counter_select_mask; 89 + }; 90 + 91 + /** 92 + * struct xe_oa_unit - Hardware OA unit 93 + */ 94 + struct xe_oa_unit { 95 + /** @oa_unit_id: identifier for the OA unit */ 96 + u16 oa_unit_id; 97 + 98 + /** @type: Type of OA unit - OAM, OAG etc. */ 99 + enum drm_xe_oa_unit_type type; 100 + 101 + /** @regs: OA registers for programming the OA unit */ 102 + struct xe_oa_regs regs; 103 + 104 + /** @num_engines: number of engines attached to this OA unit */ 105 + u32 num_engines; 106 + 107 + /** @exclusive_stream: The stream currently using the OA unit */ 108 + struct xe_oa_stream *exclusive_stream; 109 + }; 110 + 111 + /** 112 + * struct xe_oa_gt - OA per-gt information 113 + */ 114 + struct xe_oa_gt { 115 + /** @gt_lock: lock protecting create/destroy OA streams */ 116 + struct mutex gt_lock; 117 + 118 + /** @num_oa_units: number of oa units for each gt */ 119 + u32 num_oa_units; 120 + 121 + /** @oa_unit: array of oa_units */ 122 + struct xe_oa_unit *oa_unit; 123 + }; 124 + 125 + /** 126 + * struct xe_oa - OA device level information 127 + */ 128 + struct xe_oa { 129 + /** @xe: back pointer to xe device */ 130 + struct xe_device *xe; 131 + 132 + /** @metrics_kobj: kobj for metrics sysfs */ 133 + struct kobject *metrics_kobj; 134 + 135 + /** @metrics_lock: lock protecting add/remove configs */ 136 + struct mutex metrics_lock; 137 + 138 + /** @metrics_idr: List of dynamic configurations (struct xe_oa_config) */ 139 + struct idr metrics_idr; 140 + 141 + /** @ctx_oactxctrl_offset: offset of OACTXCONTROL register in context image */ 142 + u32 ctx_oactxctrl_offset[XE_ENGINE_CLASS_MAX]; 143 + 144 + /** @oa_formats: tracks all OA formats across platforms */ 145 + const struct xe_oa_format *oa_formats; 146 + 147 + /** @format_mask: tracks valid OA formats for a platform */ 148 + unsigned long format_mask[BITS_TO_LONGS(__XE_OA_FORMAT_MAX)]; 149 + 150 + /** @oa_unit_ids: tracks oa unit ids assigned across gt's */ 151 + u16 oa_unit_ids; 152 + }; 153 + 154 + /** @xe_oa_buffer: State of the stream OA buffer */ 155 + struct xe_oa_buffer { 156 + /** @format: data format */ 157 + const struct xe_oa_format *format; 158 + 159 + /** @format: xe_bo backing the OA buffer */ 160 + struct xe_bo *bo; 161 + 162 + /** @vaddr: mapped vaddr of the OA buffer */ 163 + u8 *vaddr; 164 + 165 + /** @ptr_lock: Lock protecting reads/writes to head/tail pointers */ 166 + spinlock_t ptr_lock; 167 + 168 + /** @head: Cached head to read from */ 169 + u32 head; 170 + 171 + /** @tail: The last verified cached tail where HW has completed writing */ 172 + u32 tail; 173 + 174 + /** @circ_size: The effective circular buffer size, for Xe2+ */ 175 + u32 circ_size; 176 + }; 177 + 178 + /** 179 + * struct xe_oa_stream - state for a single open stream FD 180 + */ 181 + struct xe_oa_stream { 182 + /** @oa: xe_oa backpointer */ 183 + struct xe_oa *oa; 184 + 185 + /** @gt: gt associated with the oa stream */ 186 + struct xe_gt *gt; 187 + 188 + /** @hwe: hardware engine associated with this oa stream */ 189 + struct xe_hw_engine *hwe; 190 + 191 + /** @stream_lock: Lock serializing stream operations */ 192 + struct mutex stream_lock; 193 + 194 + /** @sample: true if DRM_XE_OA_PROP_SAMPLE_OA is provided */ 195 + bool sample; 196 + 197 + /** @exec_q: Exec queue corresponding to DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID */ 198 + struct xe_exec_queue *exec_q; 199 + 200 + /** @k_exec_q: kernel exec_q used for OA programming batch submissions */ 201 + struct xe_exec_queue *k_exec_q; 202 + 203 + /** @enabled: Whether the stream is currently enabled */ 204 + bool enabled; 205 + 206 + /** @oa_config: OA configuration used by the stream */ 207 + struct xe_oa_config *oa_config; 208 + 209 + /** @oa_config_bos: List of struct @xe_oa_config_bo's */ 210 + struct llist_head oa_config_bos; 211 + 212 + /** @poll_check_timer: Timer to periodically check for data in the OA buffer */ 213 + struct hrtimer poll_check_timer; 214 + 215 + /** @poll_wq: Wait queue for waiting for OA data to be available */ 216 + wait_queue_head_t poll_wq; 217 + 218 + /** @pollin: Whether there is data available to read */ 219 + bool pollin; 220 + 221 + /** @periodic: Whether periodic sampling is currently enabled */ 222 + bool periodic; 223 + 224 + /** @period_exponent: OA unit sampling frequency is derived from this */ 225 + int period_exponent; 226 + 227 + /** @oa_buffer: OA buffer for the stream */ 228 + struct xe_oa_buffer oa_buffer; 229 + 230 + /** @poll_period_ns: hrtimer period for checking OA buffer for available data */ 231 + u64 poll_period_ns; 232 + 233 + /** @override_gucrc: GuC RC has been overridden for the OA stream */ 234 + bool override_gucrc; 235 + 236 + /** @oa_status: temporary storage for oa_status register value */ 237 + u32 oa_status; 238 + 239 + /** @no_preempt: Whether preemption and timeslicing is disabled for stream exec_q */ 240 + u32 no_preempt; 241 + }; 242 + #endif
+7 -1
drivers/gpu/drm/xe/xe_pci.c
··· 340 340 .require_force_probe = true, 341 341 }; 342 342 343 - static const struct xe_device_desc bmg_desc __maybe_unused = { 343 + static const struct xe_device_desc bmg_desc = { 344 344 DGFX_FEATURES, 345 345 PLATFORM(BATTLEMAGE), 346 346 .has_display = true, ··· 390 390 XE_DG2_IDS(INTEL_VGA_DEVICE, &dg2_desc), 391 391 XE_MTL_IDS(INTEL_VGA_DEVICE, &mtl_desc), 392 392 XE_LNL_IDS(INTEL_VGA_DEVICE, &lnl_desc), 393 + XE_BMG_IDS(INTEL_VGA_DEVICE, &bmg_desc), 393 394 { } 394 395 }; 395 396 MODULE_DEVICE_TABLE(pci, pciidlist); ··· 747 746 xe = pci_get_drvdata(pdev); 748 747 if (!xe) /* driver load aborted, nothing to cleanup */ 749 748 return; 749 + 750 + #ifdef CONFIG_PCI_IOV 751 + if (IS_SRIOV_PF(xe)) 752 + xe_pci_sriov_configure(pdev, 0); 753 + #endif 750 754 751 755 xe_device_remove(xe); 752 756 xe_pm_runtime_fini(xe);
+14
drivers/gpu/drm/xe/xe_pci_sriov.c
··· 6 6 #include "xe_assert.h" 7 7 #include "xe_device.h" 8 8 #include "xe_gt_sriov_pf_config.h" 9 + #include "xe_gt_sriov_pf_control.h" 9 10 #include "xe_pci_sriov.h" 10 11 #include "xe_pm.h" 11 12 #include "xe_sriov.h" ··· 36 35 for_each_gt(gt, xe, id) 37 36 for (n = 1; n <= num_vfs; n++) 38 37 xe_gt_sriov_pf_config_release(gt, n, true); 38 + } 39 + 40 + static void pf_reset_vfs(struct xe_device *xe, unsigned int num_vfs) 41 + { 42 + struct xe_gt *gt; 43 + unsigned int id; 44 + unsigned int n; 45 + 46 + for_each_gt(gt, xe, id) 47 + for (n = 1; n <= num_vfs; n++) 48 + xe_gt_sriov_pf_control_trigger_flr(gt, n); 39 49 } 40 50 41 51 static int pf_enable_vfs(struct xe_device *xe, int num_vfs) ··· 105 93 return 0; 106 94 107 95 pci_disable_sriov(pdev); 96 + 97 + pf_reset_vfs(xe, num_vfs); 108 98 109 99 pf_unprovision_vfs(xe, num_vfs); 110 100
+92
drivers/gpu/drm/xe/xe_perf.c
··· 1 + // SPDX-License-Identifier: MIT 2 + /* 3 + * Copyright © 2023-2024 Intel Corporation 4 + */ 5 + 6 + #include <linux/errno.h> 7 + #include <linux/sysctl.h> 8 + 9 + #include <drm/xe_drm.h> 10 + 11 + #include "xe_oa.h" 12 + #include "xe_perf.h" 13 + 14 + u32 xe_perf_stream_paranoid = true; 15 + static struct ctl_table_header *sysctl_header; 16 + 17 + static int xe_oa_ioctl(struct drm_device *dev, struct drm_xe_perf_param *arg, 18 + struct drm_file *file) 19 + { 20 + switch (arg->perf_op) { 21 + case DRM_XE_PERF_OP_STREAM_OPEN: 22 + return xe_oa_stream_open_ioctl(dev, arg->param, file); 23 + case DRM_XE_PERF_OP_ADD_CONFIG: 24 + return xe_oa_add_config_ioctl(dev, arg->param, file); 25 + case DRM_XE_PERF_OP_REMOVE_CONFIG: 26 + return xe_oa_remove_config_ioctl(dev, arg->param, file); 27 + default: 28 + return -EINVAL; 29 + } 30 + } 31 + 32 + /** 33 + * xe_perf_ioctl - The top level perf layer ioctl 34 + * @dev: @drm_device 35 + * @data: pointer to struct @drm_xe_perf_param 36 + * @file: @drm_file 37 + * 38 + * The function is called for different perf streams types and allows execution 39 + * of different operations supported by those perf stream types. 40 + * 41 + * Return: 0 on success or a negative error code on failure. 42 + */ 43 + int xe_perf_ioctl(struct drm_device *dev, void *data, struct drm_file *file) 44 + { 45 + struct drm_xe_perf_param *arg = data; 46 + 47 + if (arg->extensions) 48 + return -EINVAL; 49 + 50 + switch (arg->perf_type) { 51 + case DRM_XE_PERF_TYPE_OA: 52 + return xe_oa_ioctl(dev, arg, file); 53 + default: 54 + return -EINVAL; 55 + } 56 + } 57 + 58 + static struct ctl_table perf_ctl_table[] = { 59 + { 60 + .procname = "perf_stream_paranoid", 61 + .data = &xe_perf_stream_paranoid, 62 + .maxlen = sizeof(xe_perf_stream_paranoid), 63 + .mode = 0644, 64 + .proc_handler = proc_dointvec_minmax, 65 + .extra1 = SYSCTL_ZERO, 66 + .extra2 = SYSCTL_ONE, 67 + }, 68 + {} 69 + }; 70 + 71 + /** 72 + * xe_perf_sysctl_register - Register "perf_stream_paranoid" sysctl 73 + * 74 + * Normally only superuser/root can access perf counter data. However, 75 + * superuser can set perf_stream_paranoid sysctl to 0 to allow non-privileged 76 + * users to also access perf data. 77 + * 78 + * Return: always returns 0 79 + */ 80 + int xe_perf_sysctl_register(void) 81 + { 82 + sysctl_header = register_sysctl("dev/xe", perf_ctl_table); 83 + return 0; 84 + } 85 + 86 + /** 87 + * xe_perf_sysctl_unregister - Unregister "perf_stream_paranoid" sysctl 88 + */ 89 + void xe_perf_sysctl_unregister(void) 90 + { 91 + unregister_sysctl_table(sysctl_header); 92 + }
+20
drivers/gpu/drm/xe/xe_perf.h
··· 1 + /* SPDX-License-Identifier: MIT */ 2 + /* 3 + * Copyright © 2023-2024 Intel Corporation 4 + */ 5 + 6 + #ifndef _XE_PERF_H_ 7 + #define _XE_PERF_H_ 8 + 9 + #include <linux/types.h> 10 + 11 + struct drm_device; 12 + struct drm_file; 13 + 14 + extern u32 xe_perf_stream_paranoid; 15 + 16 + int xe_perf_ioctl(struct drm_device *dev, void *data, struct drm_file *file); 17 + int xe_perf_sysctl_register(void); 18 + void xe_perf_sysctl_unregister(void); 19 + 20 + #endif
+1 -1
drivers/gpu/drm/xe/xe_preempt_fence.c
··· 129 129 list_del_init(&pfence->link); 130 130 pfence->q = xe_exec_queue_get(q); 131 131 dma_fence_init(&pfence->base, &preempt_fence_ops, 132 - &q->compute.lock, context, seqno); 132 + &q->lr.lock, context, seqno); 133 133 134 134 return &pfence->base; 135 135 }
+5 -3
drivers/gpu/drm/xe/xe_pt.c
··· 1137 1137 { 1138 1138 struct invalidation_fence *ifence = 1139 1139 container_of(cb, struct invalidation_fence, cb); 1140 + struct xe_device *xe = gt_to_xe(ifence->gt); 1140 1141 1141 - trace_xe_gt_tlb_invalidation_fence_cb(&ifence->base); 1142 + trace_xe_gt_tlb_invalidation_fence_cb(xe, &ifence->base); 1142 1143 if (!ifence->fence->error) { 1143 1144 queue_work(system_wq, &ifence->work); 1144 1145 } else { ··· 1154 1153 { 1155 1154 struct invalidation_fence *ifence = 1156 1155 container_of(w, struct invalidation_fence, work); 1156 + struct xe_device *xe = gt_to_xe(ifence->gt); 1157 1157 1158 - trace_xe_gt_tlb_invalidation_fence_work_func(&ifence->base); 1158 + trace_xe_gt_tlb_invalidation_fence_work_func(xe, &ifence->base); 1159 1159 xe_gt_tlb_invalidation_range(ifence->gt, &ifence->base, ifence->start, 1160 1160 ifence->end, ifence->asid); 1161 1161 } ··· 1168 1166 { 1169 1167 int ret; 1170 1168 1171 - trace_xe_gt_tlb_invalidation_fence_create(&ifence->base); 1169 + trace_xe_gt_tlb_invalidation_fence_create(gt_to_xe(gt), &ifence->base); 1172 1170 1173 1171 spin_lock_irq(&gt->tlb_invalidation.lock); 1174 1172 dma_fence_init(&ifence->base.base, &invalidation_fence_ops,
+77
drivers/gpu/drm/xe/xe_query.c
··· 602 602 return 0; 603 603 } 604 604 605 + static size_t calc_oa_unit_query_size(struct xe_device *xe) 606 + { 607 + size_t size = sizeof(struct drm_xe_query_oa_units); 608 + struct xe_gt *gt; 609 + int i, id; 610 + 611 + for_each_gt(gt, xe, id) { 612 + for (i = 0; i < gt->oa.num_oa_units; i++) { 613 + size += sizeof(struct drm_xe_oa_unit); 614 + size += gt->oa.oa_unit[i].num_engines * 615 + sizeof(struct drm_xe_engine_class_instance); 616 + } 617 + } 618 + 619 + return size; 620 + } 621 + 622 + static int query_oa_units(struct xe_device *xe, 623 + struct drm_xe_device_query *query) 624 + { 625 + void __user *query_ptr = u64_to_user_ptr(query->data); 626 + size_t size = calc_oa_unit_query_size(xe); 627 + struct drm_xe_query_oa_units *qoa; 628 + enum xe_hw_engine_id hwe_id; 629 + struct drm_xe_oa_unit *du; 630 + struct xe_hw_engine *hwe; 631 + struct xe_oa_unit *u; 632 + int gt_id, i, j, ret; 633 + struct xe_gt *gt; 634 + u8 *pdu; 635 + 636 + if (query->size == 0) { 637 + query->size = size; 638 + return 0; 639 + } else if (XE_IOCTL_DBG(xe, query->size != size)) { 640 + return -EINVAL; 641 + } 642 + 643 + qoa = kzalloc(size, GFP_KERNEL); 644 + if (!qoa) 645 + return -ENOMEM; 646 + 647 + pdu = (u8 *)&qoa->oa_units[0]; 648 + for_each_gt(gt, xe, gt_id) { 649 + for (i = 0; i < gt->oa.num_oa_units; i++) { 650 + u = &gt->oa.oa_unit[i]; 651 + du = (struct drm_xe_oa_unit *)pdu; 652 + 653 + du->oa_unit_id = u->oa_unit_id; 654 + du->oa_unit_type = u->type; 655 + du->oa_timestamp_freq = xe_oa_timestamp_frequency(gt); 656 + du->capabilities = DRM_XE_OA_CAPS_BASE; 657 + 658 + j = 0; 659 + for_each_hw_engine(hwe, gt, hwe_id) { 660 + if (!xe_hw_engine_is_reserved(hwe) && 661 + xe_oa_unit_id(hwe) == u->oa_unit_id) { 662 + du->eci[j].engine_class = 663 + xe_to_user_engine_class[hwe->class]; 664 + du->eci[j].engine_instance = hwe->logical_instance; 665 + du->eci[j].gt_id = gt->info.id; 666 + j++; 667 + } 668 + } 669 + du->num_engines = j; 670 + pdu += sizeof(*du) + j * sizeof(du->eci[0]); 671 + qoa->num_oa_units++; 672 + } 673 + } 674 + 675 + ret = copy_to_user(query_ptr, qoa, size); 676 + kfree(qoa); 677 + 678 + return ret ? -EFAULT : 0; 679 + } 680 + 605 681 static int (* const xe_query_funcs[])(struct xe_device *xe, 606 682 struct drm_xe_device_query *query) = { 607 683 query_engines, ··· 688 612 query_gt_topology, 689 613 query_engine_cycles, 690 614 query_uc_fw_version, 615 + query_oa_units, 691 616 }; 692 617 693 618 int xe_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
+23 -1
drivers/gpu/drm/xe/xe_reg_whitelist.c
··· 7 7 8 8 #include "regs/xe_engine_regs.h" 9 9 #include "regs/xe_gt_regs.h" 10 + #include "regs/xe_oa_regs.h" 10 11 #include "regs/xe_regs.h" 11 12 #include "xe_gt_types.h" 12 13 #include "xe_platform_types.h" ··· 64 63 ENGINE_CLASS(RENDER)), 65 64 XE_RTP_ACTIONS(WHITELIST(CSBE_DEBUG_STATUS(RENDER_RING_BASE), 0)) 66 65 }, 67 - 66 + { XE_RTP_NAME("oa_reg_render"), 67 + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, XE_RTP_END_VERSION_UNDEFINED), 68 + ENGINE_CLASS(RENDER)), 69 + XE_RTP_ACTIONS(WHITELIST(OAG_MMIOTRIGGER, 70 + RING_FORCE_TO_NONPRIV_ACCESS_RW), 71 + WHITELIST(OAG_OASTATUS, 72 + RING_FORCE_TO_NONPRIV_ACCESS_RD), 73 + WHITELIST(OAG_OAHEADPTR, 74 + RING_FORCE_TO_NONPRIV_ACCESS_RD | 75 + RING_FORCE_TO_NONPRIV_RANGE_4)) 76 + }, 77 + { XE_RTP_NAME("oa_reg_compute"), 78 + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, XE_RTP_END_VERSION_UNDEFINED), 79 + ENGINE_CLASS(COMPUTE)), 80 + XE_RTP_ACTIONS(WHITELIST(OAG_MMIOTRIGGER, 81 + RING_FORCE_TO_NONPRIV_ACCESS_RW), 82 + WHITELIST(OAG_OASTATUS, 83 + RING_FORCE_TO_NONPRIV_ACCESS_RD), 84 + WHITELIST(OAG_OAHEADPTR, 85 + RING_FORCE_TO_NONPRIV_ACCESS_RD | 86 + RING_FORCE_TO_NONPRIV_RANGE_4)) 87 + }, 68 88 {} 69 89 }; 70 90
+21
drivers/gpu/drm/xe/xe_ring_ops.c
··· 224 224 return job->q->vm ? BIT(8) : 0; 225 225 } 226 226 227 + static int emit_copy_timestamp(struct xe_lrc *lrc, u32 *dw, int i) 228 + { 229 + dw[i++] = MI_COPY_MEM_MEM | MI_COPY_MEM_MEM_SRC_GGTT | 230 + MI_COPY_MEM_MEM_DST_GGTT; 231 + dw[i++] = xe_lrc_ctx_job_timestamp_ggtt_addr(lrc); 232 + dw[i++] = 0; 233 + dw[i++] = xe_lrc_ctx_timestamp_ggtt_addr(lrc); 234 + dw[i++] = 0; 235 + dw[i++] = MI_NOOP; 236 + 237 + return i; 238 + } 239 + 227 240 /* for engines that don't require any special HW handling (no EUs, no aux inval, etc) */ 228 241 static void __emit_job_gen12_simple(struct xe_sched_job *job, struct xe_lrc *lrc, 229 242 u64 batch_addr, u32 seqno) ··· 244 231 u32 dw[MAX_JOB_SIZE_DW], i = 0; 245 232 u32 ppgtt_flag = get_ppgtt_flag(job); 246 233 struct xe_gt *gt = job->q->gt; 234 + 235 + i = emit_copy_timestamp(lrc, dw, i); 247 236 248 237 if (job->ring_ops_flush_tlb) { 249 238 dw[i++] = preparser_disable(true); ··· 298 283 struct xe_device *xe = gt_to_xe(gt); 299 284 bool decode = job->q->class == XE_ENGINE_CLASS_VIDEO_DECODE; 300 285 286 + i = emit_copy_timestamp(lrc, dw, i); 287 + 301 288 dw[i++] = preparser_disable(true); 302 289 303 290 /* hsdes: 1809175790 */ ··· 349 332 bool lacks_render = !(gt->info.engine_mask & XE_HW_ENGINE_RCS_MASK); 350 333 u32 mask_flags = 0; 351 334 335 + i = emit_copy_timestamp(lrc, dw, i); 336 + 352 337 dw[i++] = preparser_disable(true); 353 338 if (lacks_render) 354 339 mask_flags = PIPE_CONTROL_3D_ARCH_FLAGS; ··· 393 374 struct xe_lrc *lrc, u32 seqno) 394 375 { 395 376 u32 dw[MAX_JOB_SIZE_DW], i = 0; 377 + 378 + i = emit_copy_timestamp(lrc, dw, i); 396 379 397 380 i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc), 398 381 seqno, dw, i);
+37 -8
drivers/gpu/drm/xe/xe_rtp.c
··· 13 13 #include "xe_gt_topology.h" 14 14 #include "xe_macros.h" 15 15 #include "xe_reg_sr.h" 16 + #include "xe_sriov.h" 16 17 17 18 /** 18 19 * DOC: Register Table Processing ··· 36 35 unsigned int n_rules) 37 36 { 38 37 const struct xe_rtp_rule *r; 39 - unsigned int i; 38 + unsigned int i, rcount = 0; 40 39 bool match; 41 40 42 41 for (r = rules, i = 0; i < n_rules; r = &rules[++i]) { 43 42 switch (r->match_type) { 43 + case XE_RTP_MATCH_OR: 44 + /* 45 + * This is only reached if a complete set of 46 + * rules passed or none were evaluated. For both cases, 47 + * shortcut the other rules and return the proper value. 48 + */ 49 + goto done; 44 50 case XE_RTP_MATCH_PLATFORM: 45 51 match = xe->info.platform == r->platform; 46 52 break; ··· 63 55 match = xe->info.graphics_verx100 >= r->ver_start && 64 56 xe->info.graphics_verx100 <= r->ver_end && 65 57 (!has_samedia(xe) || !xe_gt_is_media_type(gt)); 58 + break; 59 + case XE_RTP_MATCH_GRAPHICS_VERSION_ANY_GT: 60 + match = xe->info.graphics_verx100 == r->ver_start; 66 61 break; 67 62 case XE_RTP_MATCH_GRAPHICS_STEP: 68 63 match = xe->info.step.graphics >= r->step_start && ··· 85 74 match = xe->info.step.media >= r->step_start && 86 75 xe->info.step.media < r->step_end && 87 76 (!has_samedia(xe) || xe_gt_is_media_type(gt)); 77 + break; 78 + case XE_RTP_MATCH_MEDIA_VERSION_ANY_GT: 79 + match = xe->info.media_verx100 == r->ver_start; 88 80 break; 89 81 case XE_RTP_MATCH_INTEGRATED: 90 82 match = !xe->info.is_dgfx; ··· 116 102 match = false; 117 103 } 118 104 119 - if (!match) 120 - return false; 105 + if (!match) { 106 + /* 107 + * Advance rules until we find XE_RTP_MATCH_OR to check 108 + * if there's another set of conditions to check 109 + */ 110 + while (i < n_rules && rules[++i].match_type != XE_RTP_MATCH_OR) 111 + ; 112 + 113 + if (i >= n_rules) 114 + return false; 115 + 116 + rcount = 0; 117 + } else { 118 + rcount++; 119 + } 121 120 } 121 + 122 + done: 123 + if (drm_WARN_ON(&xe->drm, !rcount)) 124 + return false; 122 125 123 126 return true; 124 127 } ··· 258 227 259 228 rtp_get_context(ctx, &hwe, &gt, &xe); 260 229 230 + if (IS_SRIOV_VF(xe)) 231 + return; 232 + 261 233 for (entry = entries; entry && entry->name; entry++) { 262 234 bool match = false; 263 235 ··· 358 324 return dss >= dss_per_gslice; 359 325 } 360 326 361 - bool xe_rtp_match_when_media2000(const struct xe_gt *gt, 362 - const struct xe_hw_engine *hwe) 363 - { 364 - return (gt_to_xe(gt))->info.media_verx100 == 2000; 365 - }
+52 -17
drivers/gpu/drm/xe/xe_rtp.h
··· 140 140 .ver_start = ver_start__, .ver_end = ver_end__, } 141 141 142 142 /** 143 - * XE_RTP_RULE_MEDIA_VERSION - Create rule matching media version 143 + * XE_RTP_RULE_GRAPHICS_VERSION_ANY_GT - Create rule matching graphics version on any GT 144 144 * @ver__: Graphics IP version to match 145 + * 146 + * Like XE_RTP_RULE_GRAPHICS_VERSION, but it matches even if the current GT 147 + * being checked is not of the graphics type. It allows to add RTP entries to 148 + * another GT when the device contains a Graphics IP with that version. 149 + * 150 + * Refer to XE_RTP_RULES() for expected usage. 151 + */ 152 + #define XE_RTP_RULE_GRAPHICS_VERSION_ANY_GT(ver__) \ 153 + { .match_type = XE_RTP_MATCH_GRAPHICS_VERSION_ANY_GT, \ 154 + .ver_start = ver__, } 155 + 156 + /** 157 + * XE_RTP_RULE_MEDIA_VERSION - Create rule matching media version 158 + * @ver__: Media IP version to match 145 159 * 146 160 * Refer to XE_RTP_RULES() for expected usage. 147 161 */ ··· 178 164 .ver_start = ver_start__, .ver_end = ver_end__, } 179 165 180 166 /** 167 + * XE_RTP_RULE_MEDIA_VERSION_ANY_GT - Create rule matching media version on any GT 168 + * @ver__: Media IP version to match 169 + * 170 + * Like XE_RTP_RULE_MEDIA_VERSION, but it matches even if the current GT being 171 + * checked is not of the media type. It allows to add RTP entries to another 172 + * GT when the device contains a Media IP with that version. 173 + * 174 + * Refer to XE_RTP_RULES() for expected usage. 175 + */ 176 + #define XE_RTP_RULE_MEDIA_VERSION_ANY_GT(ver__) \ 177 + { .match_type = XE_RTP_MATCH_MEDIA_VERSION_ANY_GT, \ 178 + .ver_start = ver__, } 179 + 180 + /** 181 181 * XE_RTP_RULE_IS_INTEGRATED - Create a rule matching integrated graphics devices 182 182 * 183 183 * Refer to XE_RTP_RULES() for expected usage. ··· 206 178 */ 207 179 #define XE_RTP_RULE_IS_DISCRETE \ 208 180 { .match_type = XE_RTP_MATCH_DISCRETE } 181 + 182 + /** 183 + * XE_RTP_RULE_OR - Create an OR condition for rtp rules 184 + * 185 + * RTP rules are AND'ed when evaluated and all of them need to match. 186 + * XE_RTP_RULE_OR allows to create set of rules where any of them matching is 187 + * sufficient for the action to trigger. Example: 188 + * 189 + * .. code-block:: c 190 + * 191 + * const struct xe_rtp_entry_sr entries[] = { 192 + * ... 193 + * { XE_RTP_NAME("test-entry"), 194 + * XE_RTP_RULES(PLATFORM(DG2), OR, PLATFORM(TIGERLAKE)), 195 + * ... 196 + * }, 197 + * ... 198 + * }; 199 + */ 200 + #define XE_RTP_RULE_OR \ 201 + { .match_type = XE_RTP_MATCH_OR } 209 202 210 203 /** 211 204 * XE_RTP_ACTION_WR - Helper to write a value to the register, overriding all ··· 374 325 * XE_RTP_RULES - Helper to set multiple rules to a struct xe_rtp_entry_sr entry 375 326 * @...: Rules 376 327 * 377 - * At least one rule is needed and up to 4 are supported. Multiple rules are 328 + * At least one rule is needed and up to 6 are supported. Multiple rules are 378 329 * AND'ed together, i.e. all the rules must evaluate to true for the entry to 379 330 * be processed. See XE_RTP_MATCH_* for the possible match rules. Example: 380 331 * ··· 399 350 * XE_RTP_ACTIONS - Helper to set multiple actions to a struct xe_rtp_entry_sr 400 351 * @...: Actions to be taken 401 352 * 402 - * At least one action is needed and up to 4 are supported. See XE_RTP_ACTION_* 353 + * At least one action is needed and up to 6 are supported. See XE_RTP_ACTION_* 403 354 * for the possible actions. Example: 404 355 * 405 356 * .. code-block:: c ··· 476 427 bool xe_rtp_match_first_gslice_fused_off(const struct xe_gt *gt, 477 428 const struct xe_hw_engine *hwe); 478 429 479 - /* 480 - * xe_rtp_match_when_media2000 - Match when media GT version 2000 481 - * 482 - * @gt: GT structure 483 - * @hwe: Engine instance 484 - * 485 - * Its one of the case where we need to apply workaround on primary GT 486 - * based on if media GT version 2000 is present. Thus this API will help 487 - * us to match media version 2000. 488 - * 489 - * Returns: true if media GT version 2000, false otherwise. 490 - */ 491 - bool xe_rtp_match_when_media2000(const struct xe_gt *gt, 492 - const struct xe_hw_engine *hwe); 493 430 #endif
+2
drivers/gpu/drm/xe/xe_rtp_helpers.h
··· 58 58 #define XE_RTP_PASTE_2(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, FIRST_ARG args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_1(prefix_, sep_, _XE_TUPLE_TAIL args_) 59 59 #define XE_RTP_PASTE_3(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, FIRST_ARG args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_2(prefix_, sep_, _XE_TUPLE_TAIL args_) 60 60 #define XE_RTP_PASTE_4(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, FIRST_ARG args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_3(prefix_, sep_, _XE_TUPLE_TAIL args_) 61 + #define XE_RTP_PASTE_5(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, FIRST_ARG args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_4(prefix_, sep_, _XE_TUPLE_TAIL args_) 62 + #define XE_RTP_PASTE_6(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, FIRST_ARG args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_5(prefix_, sep_, _XE_TUPLE_TAIL args_) 61 63 62 64 /* 63 65 * XE_RTP_DROP_CAST - Drop cast to convert a compound statement to a initializer
+3
drivers/gpu/drm/xe/xe_rtp_types.h
··· 42 42 XE_RTP_MATCH_SUBPLATFORM, 43 43 XE_RTP_MATCH_GRAPHICS_VERSION, 44 44 XE_RTP_MATCH_GRAPHICS_VERSION_RANGE, 45 + XE_RTP_MATCH_GRAPHICS_VERSION_ANY_GT, 45 46 XE_RTP_MATCH_GRAPHICS_STEP, 46 47 XE_RTP_MATCH_MEDIA_VERSION, 47 48 XE_RTP_MATCH_MEDIA_VERSION_RANGE, 49 + XE_RTP_MATCH_MEDIA_VERSION_ANY_GT, 48 50 XE_RTP_MATCH_MEDIA_STEP, 49 51 XE_RTP_MATCH_INTEGRATED, 50 52 XE_RTP_MATCH_DISCRETE, 51 53 XE_RTP_MATCH_ENGINE_CLASS, 52 54 XE_RTP_MATCH_NOT_ENGINE_CLASS, 53 55 XE_RTP_MATCH_FUNC, 56 + XE_RTP_MATCH_OR, 54 57 }; 55 58 56 59 /** struct xe_rtp_rule - match rule for processing entry */
+6
drivers/gpu/drm/xe/xe_sched_job.c
··· 363 363 for (i = 0; i < snapshot->batch_addr_len; i++) 364 364 drm_printf(p, "batch_addr[%u]: 0x%016llx\n", i, snapshot->batch_addr[i]); 365 365 } 366 + 367 + int xe_sched_job_add_deps(struct xe_sched_job *job, struct dma_resv *resv, 368 + enum dma_resv_usage usage) 369 + { 370 + return drm_sched_job_add_resv_dependencies(&job->drm, resv, usage); 371 + }
+3
drivers/gpu/drm/xe/xe_sched_job.h
··· 90 90 void xe_sched_job_snapshot_free(struct xe_sched_job_snapshot *snapshot); 91 91 void xe_sched_job_snapshot_print(struct xe_sched_job_snapshot *snapshot, struct drm_printer *p); 92 92 93 + int xe_sched_job_add_deps(struct xe_sched_job *job, struct dma_resv *resv, 94 + enum dma_resv_usage usage); 95 + 93 96 #endif
+3 -3
drivers/gpu/drm/xe/xe_sriov.h
··· 19 19 void xe_sriov_print_info(struct xe_device *xe, struct drm_printer *p); 20 20 int xe_sriov_init(struct xe_device *xe); 21 21 22 - static inline enum xe_sriov_mode xe_device_sriov_mode(struct xe_device *xe) 22 + static inline enum xe_sriov_mode xe_device_sriov_mode(const struct xe_device *xe) 23 23 { 24 24 xe_assert(xe, xe->sriov.__mode); 25 25 return xe->sriov.__mode; 26 26 } 27 27 28 - static inline bool xe_device_is_sriov_pf(struct xe_device *xe) 28 + static inline bool xe_device_is_sriov_pf(const struct xe_device *xe) 29 29 { 30 30 return xe_device_sriov_mode(xe) == XE_SRIOV_MODE_PF; 31 31 } 32 32 33 - static inline bool xe_device_is_sriov_vf(struct xe_device *xe) 33 + static inline bool xe_device_is_sriov_vf(const struct xe_device *xe) 34 34 { 35 35 return xe_device_sriov_mode(xe) == XE_SRIOV_MODE_VF; 36 36 }
+61 -311
drivers/gpu/drm/xe/xe_trace.h
··· 12 12 #include <linux/tracepoint.h> 13 13 #include <linux/types.h> 14 14 15 - #include "xe_bo.h" 16 - #include "xe_bo_types.h" 17 15 #include "xe_exec_queue_types.h" 18 16 #include "xe_gpu_scheduler_types.h" 19 17 #include "xe_gt_tlb_invalidation_types.h" ··· 20 22 #include "xe_sched_job.h" 21 23 #include "xe_vm.h" 22 24 25 + #define __dev_name_xe(xe) dev_name((xe)->drm.dev) 26 + #define __dev_name_gt(gt) __dev_name_xe(gt_to_xe((gt))) 27 + #define __dev_name_eq(q) __dev_name_gt((q)->gt) 28 + 23 29 DECLARE_EVENT_CLASS(xe_gt_tlb_invalidation_fence, 24 - TP_PROTO(struct xe_gt_tlb_invalidation_fence *fence), 25 - TP_ARGS(fence), 30 + TP_PROTO(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence), 31 + TP_ARGS(xe, fence), 26 32 27 33 TP_STRUCT__entry( 34 + __string(dev, __dev_name_xe(xe)) 28 35 __field(struct xe_gt_tlb_invalidation_fence *, fence) 29 36 __field(int, seqno) 30 37 ), 31 38 32 39 TP_fast_assign( 40 + __assign_str(dev); 33 41 __entry->fence = fence; 34 42 __entry->seqno = fence->seqno; 35 43 ), 36 44 37 - TP_printk("fence=%p, seqno=%d", 38 - __entry->fence, __entry->seqno) 45 + TP_printk("dev=%s, fence=%p, seqno=%d", 46 + __get_str(dev), __entry->fence, __entry->seqno) 39 47 ); 40 48 41 49 DEFINE_EVENT(xe_gt_tlb_invalidation_fence, xe_gt_tlb_invalidation_fence_create, 42 - TP_PROTO(struct xe_gt_tlb_invalidation_fence *fence), 43 - TP_ARGS(fence) 50 + TP_PROTO(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence), 51 + TP_ARGS(xe, fence) 44 52 ); 45 53 46 54 DEFINE_EVENT(xe_gt_tlb_invalidation_fence, 47 55 xe_gt_tlb_invalidation_fence_work_func, 48 - TP_PROTO(struct xe_gt_tlb_invalidation_fence *fence), 49 - TP_ARGS(fence) 56 + TP_PROTO(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence), 57 + TP_ARGS(xe, fence) 50 58 ); 51 59 52 60 DEFINE_EVENT(xe_gt_tlb_invalidation_fence, xe_gt_tlb_invalidation_fence_cb, 53 - TP_PROTO(struct xe_gt_tlb_invalidation_fence *fence), 54 - TP_ARGS(fence) 61 + TP_PROTO(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence), 62 + TP_ARGS(xe, fence) 55 63 ); 56 64 57 65 DEFINE_EVENT(xe_gt_tlb_invalidation_fence, xe_gt_tlb_invalidation_fence_send, 58 - TP_PROTO(struct xe_gt_tlb_invalidation_fence *fence), 59 - TP_ARGS(fence) 66 + TP_PROTO(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence), 67 + TP_ARGS(xe, fence) 60 68 ); 61 69 62 70 DEFINE_EVENT(xe_gt_tlb_invalidation_fence, xe_gt_tlb_invalidation_fence_recv, 63 - TP_PROTO(struct xe_gt_tlb_invalidation_fence *fence), 64 - TP_ARGS(fence) 71 + TP_PROTO(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence), 72 + TP_ARGS(xe, fence) 65 73 ); 66 74 67 75 DEFINE_EVENT(xe_gt_tlb_invalidation_fence, xe_gt_tlb_invalidation_fence_signal, 68 - TP_PROTO(struct xe_gt_tlb_invalidation_fence *fence), 69 - TP_ARGS(fence) 76 + TP_PROTO(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence), 77 + TP_ARGS(xe, fence) 70 78 ); 71 79 72 80 DEFINE_EVENT(xe_gt_tlb_invalidation_fence, xe_gt_tlb_invalidation_fence_timeout, 73 - TP_PROTO(struct xe_gt_tlb_invalidation_fence *fence), 74 - TP_ARGS(fence) 75 - ); 76 - 77 - DECLARE_EVENT_CLASS(xe_bo, 78 - TP_PROTO(struct xe_bo *bo), 79 - TP_ARGS(bo), 80 - 81 - TP_STRUCT__entry( 82 - __field(size_t, size) 83 - __field(u32, flags) 84 - __field(struct xe_vm *, vm) 85 - ), 86 - 87 - TP_fast_assign( 88 - __entry->size = bo->size; 89 - __entry->flags = bo->flags; 90 - __entry->vm = bo->vm; 91 - ), 92 - 93 - TP_printk("size=%zu, flags=0x%02x, vm=%p", 94 - __entry->size, __entry->flags, __entry->vm) 95 - ); 96 - 97 - DEFINE_EVENT(xe_bo, xe_bo_cpu_fault, 98 - TP_PROTO(struct xe_bo *bo), 99 - TP_ARGS(bo) 100 - ); 101 - 102 - TRACE_EVENT(xe_bo_move, 103 - TP_PROTO(struct xe_bo *bo, uint32_t new_placement, uint32_t old_placement, 104 - bool move_lacks_source), 105 - TP_ARGS(bo, new_placement, old_placement, move_lacks_source), 106 - TP_STRUCT__entry( 107 - __field(struct xe_bo *, bo) 108 - __field(size_t, size) 109 - __field(u32, new_placement) 110 - __field(u32, old_placement) 111 - __array(char, device_id, 12) 112 - __field(bool, move_lacks_source) 113 - ), 114 - 115 - TP_fast_assign( 116 - __entry->bo = bo; 117 - __entry->size = bo->size; 118 - __entry->new_placement = new_placement; 119 - __entry->old_placement = old_placement; 120 - strscpy(__entry->device_id, dev_name(xe_bo_device(__entry->bo)->drm.dev), 12); 121 - __entry->move_lacks_source = move_lacks_source; 122 - ), 123 - TP_printk("move_lacks_source:%s, migrate object %p [size %zu] from %s to %s device_id:%s", 124 - __entry->move_lacks_source ? "yes" : "no", __entry->bo, __entry->size, 125 - xe_mem_type_to_name[__entry->old_placement], 126 - xe_mem_type_to_name[__entry->new_placement], __entry->device_id) 81 + TP_PROTO(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence), 82 + TP_ARGS(xe, fence) 127 83 ); 128 84 129 85 DECLARE_EVENT_CLASS(xe_exec_queue, ··· 85 133 TP_ARGS(q), 86 134 87 135 TP_STRUCT__entry( 136 + __string(dev, __dev_name_eq(q)) 88 137 __field(enum xe_engine_class, class) 89 138 __field(u32, logical_mask) 90 139 __field(u8, gt_id) ··· 96 143 ), 97 144 98 145 TP_fast_assign( 146 + __assign_str(dev); 99 147 __entry->class = q->class; 100 148 __entry->logical_mask = q->logical_mask; 101 149 __entry->gt_id = q->gt->info.id; ··· 106 152 __entry->flags = q->flags; 107 153 ), 108 154 109 - TP_printk("%d:0x%x, gt=%d, width=%d, guc_id=%d, guc_state=0x%x, flags=0x%x", 110 - __entry->class, __entry->logical_mask, 155 + TP_printk("dev=%s, %d:0x%x, gt=%d, width=%d, guc_id=%d, guc_state=0x%x, flags=0x%x", 156 + __get_str(dev), __entry->class, __entry->logical_mask, 111 157 __entry->gt_id, __entry->width, __entry->guc_id, 112 158 __entry->guc_state, __entry->flags) 113 159 ); ··· 207 253 TP_ARGS(job), 208 254 209 255 TP_STRUCT__entry( 256 + __string(dev, __dev_name_eq(job->q)) 210 257 __field(u32, seqno) 211 258 __field(u32, lrc_seqno) 212 259 __field(u16, guc_id) ··· 219 264 ), 220 265 221 266 TP_fast_assign( 267 + __assign_str(dev); 222 268 __entry->seqno = xe_sched_job_seqno(job); 223 269 __entry->lrc_seqno = xe_sched_job_lrc_seqno(job); 224 270 __entry->guc_id = job->q->guc->id; ··· 231 275 __entry->batch_addr = (u64)job->ptrs[0].batch_addr; 232 276 ), 233 277 234 - TP_printk("fence=%p, seqno=%u, lrc_seqno=%u, guc_id=%d, batch_addr=0x%012llx, guc_state=0x%x, flags=0x%x, error=%d", 235 - __entry->fence, __entry->seqno, 278 + TP_printk("dev=%s, fence=%p, seqno=%u, lrc_seqno=%u, guc_id=%d, batch_addr=0x%012llx, guc_state=0x%x, flags=0x%x, error=%d", 279 + __get_str(dev), __entry->fence, __entry->seqno, 236 280 __entry->lrc_seqno, __entry->guc_id, 237 281 __entry->batch_addr, __entry->guc_state, 238 282 __entry->flags, __entry->error) ··· 278 322 TP_ARGS(msg), 279 323 280 324 TP_STRUCT__entry( 325 + __string(dev, __dev_name_eq(((struct xe_exec_queue *)msg->private_data))) 281 326 __field(u32, opcode) 282 327 __field(u16, guc_id) 283 328 ), 284 329 285 330 TP_fast_assign( 331 + __assign_str(dev); 286 332 __entry->opcode = msg->opcode; 287 333 __entry->guc_id = 288 334 ((struct xe_exec_queue *)msg->private_data)->guc->id; 289 335 ), 290 336 291 - TP_printk("guc_id=%d, opcode=%u", __entry->guc_id, 337 + TP_printk("dev=%s, guc_id=%d, opcode=%u", __get_str(dev), __entry->guc_id, 292 338 __entry->opcode) 293 339 ); 294 340 ··· 309 351 TP_ARGS(fence), 310 352 311 353 TP_STRUCT__entry( 354 + __string(dev, __dev_name_gt(fence->ctx->gt)) 312 355 __field(u64, ctx) 313 356 __field(u32, seqno) 314 357 __field(struct xe_hw_fence *, fence) 315 358 ), 316 359 317 360 TP_fast_assign( 361 + __assign_str(dev); 318 362 __entry->ctx = fence->dma.context; 319 363 __entry->seqno = fence->dma.seqno; 320 364 __entry->fence = fence; 321 365 ), 322 366 323 - TP_printk("ctx=0x%016llx, fence=%p, seqno=%u", 324 - __entry->ctx, __entry->fence, __entry->seqno) 367 + TP_printk("dev=%s, ctx=0x%016llx, fence=%p, seqno=%u", 368 + __get_str(dev), __entry->ctx, __entry->fence, __entry->seqno) 325 369 ); 326 370 327 371 DEFINE_EVENT(xe_hw_fence, xe_hw_fence_create, ··· 346 386 TP_ARGS(fence) 347 387 ); 348 388 349 - DECLARE_EVENT_CLASS(xe_vma, 350 - TP_PROTO(struct xe_vma *vma), 351 - TP_ARGS(vma), 389 + TRACE_EVENT(xe_reg_rw, 390 + TP_PROTO(struct xe_gt *gt, bool write, u32 reg, u64 val, int len), 352 391 353 - TP_STRUCT__entry( 354 - __field(struct xe_vma *, vma) 355 - __field(u32, asid) 356 - __field(u64, start) 357 - __field(u64, end) 358 - __field(u64, ptr) 359 - ), 392 + TP_ARGS(gt, write, reg, val, len), 360 393 361 - TP_fast_assign( 362 - __entry->vma = vma; 363 - __entry->asid = xe_vma_vm(vma)->usm.asid; 364 - __entry->start = xe_vma_start(vma); 365 - __entry->end = xe_vma_end(vma) - 1; 366 - __entry->ptr = xe_vma_userptr(vma); 367 - ), 394 + TP_STRUCT__entry( 395 + __string(dev, __dev_name_gt(gt)) 396 + __field(u64, val) 397 + __field(u32, reg) 398 + __field(u16, write) 399 + __field(u16, len) 400 + ), 368 401 369 - TP_printk("vma=%p, asid=0x%05x, start=0x%012llx, end=0x%012llx, userptr=0x%012llx,", 370 - __entry->vma, __entry->asid, __entry->start, 371 - __entry->end, __entry->ptr) 372 - ) 402 + TP_fast_assign( 403 + __assign_str(dev); 404 + __entry->val = val; 405 + __entry->reg = reg; 406 + __entry->write = write; 407 + __entry->len = len; 408 + ), 373 409 374 - DEFINE_EVENT(xe_vma, xe_vma_flush, 375 - TP_PROTO(struct xe_vma *vma), 376 - TP_ARGS(vma) 377 - ); 378 - 379 - DEFINE_EVENT(xe_vma, xe_vma_pagefault, 380 - TP_PROTO(struct xe_vma *vma), 381 - TP_ARGS(vma) 382 - ); 383 - 384 - DEFINE_EVENT(xe_vma, xe_vma_acc, 385 - TP_PROTO(struct xe_vma *vma), 386 - TP_ARGS(vma) 387 - ); 388 - 389 - DEFINE_EVENT(xe_vma, xe_vma_fail, 390 - TP_PROTO(struct xe_vma *vma), 391 - TP_ARGS(vma) 392 - ); 393 - 394 - DEFINE_EVENT(xe_vma, xe_vma_bind, 395 - TP_PROTO(struct xe_vma *vma), 396 - TP_ARGS(vma) 397 - ); 398 - 399 - DEFINE_EVENT(xe_vma, xe_vma_pf_bind, 400 - TP_PROTO(struct xe_vma *vma), 401 - TP_ARGS(vma) 402 - ); 403 - 404 - DEFINE_EVENT(xe_vma, xe_vma_unbind, 405 - TP_PROTO(struct xe_vma *vma), 406 - TP_ARGS(vma) 407 - ); 408 - 409 - DEFINE_EVENT(xe_vma, xe_vma_userptr_rebind_worker, 410 - TP_PROTO(struct xe_vma *vma), 411 - TP_ARGS(vma) 412 - ); 413 - 414 - DEFINE_EVENT(xe_vma, xe_vma_userptr_rebind_exec, 415 - TP_PROTO(struct xe_vma *vma), 416 - TP_ARGS(vma) 417 - ); 418 - 419 - DEFINE_EVENT(xe_vma, xe_vma_rebind_worker, 420 - TP_PROTO(struct xe_vma *vma), 421 - TP_ARGS(vma) 422 - ); 423 - 424 - DEFINE_EVENT(xe_vma, xe_vma_rebind_exec, 425 - TP_PROTO(struct xe_vma *vma), 426 - TP_ARGS(vma) 427 - ); 428 - 429 - DEFINE_EVENT(xe_vma, xe_vma_userptr_invalidate, 430 - TP_PROTO(struct xe_vma *vma), 431 - TP_ARGS(vma) 432 - ); 433 - 434 - DEFINE_EVENT(xe_vma, xe_vma_invalidate, 435 - TP_PROTO(struct xe_vma *vma), 436 - TP_ARGS(vma) 437 - ); 438 - 439 - DEFINE_EVENT(xe_vma, xe_vma_evict, 440 - TP_PROTO(struct xe_vma *vma), 441 - TP_ARGS(vma) 442 - ); 443 - 444 - DEFINE_EVENT(xe_vma, xe_vma_userptr_invalidate_complete, 445 - TP_PROTO(struct xe_vma *vma), 446 - TP_ARGS(vma) 447 - ); 448 - 449 - DECLARE_EVENT_CLASS(xe_vm, 450 - TP_PROTO(struct xe_vm *vm), 451 - TP_ARGS(vm), 452 - 453 - TP_STRUCT__entry( 454 - __field(struct xe_vm *, vm) 455 - __field(u32, asid) 456 - ), 457 - 458 - TP_fast_assign( 459 - __entry->vm = vm; 460 - __entry->asid = vm->usm.asid; 461 - ), 462 - 463 - TP_printk("vm=%p, asid=0x%05x", __entry->vm, 464 - __entry->asid) 465 - ); 466 - 467 - DEFINE_EVENT(xe_vm, xe_vm_kill, 468 - TP_PROTO(struct xe_vm *vm), 469 - TP_ARGS(vm) 470 - ); 471 - 472 - DEFINE_EVENT(xe_vm, xe_vm_create, 473 - TP_PROTO(struct xe_vm *vm), 474 - TP_ARGS(vm) 475 - ); 476 - 477 - DEFINE_EVENT(xe_vm, xe_vm_free, 478 - TP_PROTO(struct xe_vm *vm), 479 - TP_ARGS(vm) 480 - ); 481 - 482 - DEFINE_EVENT(xe_vm, xe_vm_cpu_bind, 483 - TP_PROTO(struct xe_vm *vm), 484 - TP_ARGS(vm) 485 - ); 486 - 487 - DEFINE_EVENT(xe_vm, xe_vm_restart, 488 - TP_PROTO(struct xe_vm *vm), 489 - TP_ARGS(vm) 490 - ); 491 - 492 - DEFINE_EVENT(xe_vm, xe_vm_rebind_worker_enter, 493 - TP_PROTO(struct xe_vm *vm), 494 - TP_ARGS(vm) 495 - ); 496 - 497 - DEFINE_EVENT(xe_vm, xe_vm_rebind_worker_retry, 498 - TP_PROTO(struct xe_vm *vm), 499 - TP_ARGS(vm) 500 - ); 501 - 502 - DEFINE_EVENT(xe_vm, xe_vm_rebind_worker_exit, 503 - TP_PROTO(struct xe_vm *vm), 504 - TP_ARGS(vm) 505 - ); 506 - 507 - /* GuC */ 508 - DECLARE_EVENT_CLASS(xe_guc_ct_flow_control, 509 - TP_PROTO(u32 _head, u32 _tail, u32 size, u32 space, u32 len), 510 - TP_ARGS(_head, _tail, size, space, len), 511 - 512 - TP_STRUCT__entry( 513 - __field(u32, _head) 514 - __field(u32, _tail) 515 - __field(u32, size) 516 - __field(u32, space) 517 - __field(u32, len) 518 - ), 519 - 520 - TP_fast_assign( 521 - __entry->_head = _head; 522 - __entry->_tail = _tail; 523 - __entry->size = size; 524 - __entry->space = space; 525 - __entry->len = len; 526 - ), 527 - 528 - TP_printk("h2g flow control: head=%u, tail=%u, size=%u, space=%u, len=%u", 529 - __entry->_head, __entry->_tail, __entry->size, 530 - __entry->space, __entry->len) 531 - ); 532 - 533 - DEFINE_EVENT(xe_guc_ct_flow_control, xe_guc_ct_h2g_flow_control, 534 - TP_PROTO(u32 _head, u32 _tail, u32 size, u32 space, u32 len), 535 - TP_ARGS(_head, _tail, size, space, len) 536 - ); 537 - 538 - DEFINE_EVENT_PRINT(xe_guc_ct_flow_control, xe_guc_ct_g2h_flow_control, 539 - TP_PROTO(u32 _head, u32 _tail, u32 size, u32 space, u32 len), 540 - TP_ARGS(_head, _tail, size, space, len), 541 - 542 - TP_printk("g2h flow control: head=%u, tail=%u, size=%u, space=%u, len=%u", 543 - __entry->_head, __entry->_tail, __entry->size, 544 - __entry->space, __entry->len) 545 - ); 546 - 547 - DECLARE_EVENT_CLASS(xe_guc_ctb, 548 - TP_PROTO(u8 gt_id, u32 action, u32 len, u32 _head, u32 tail), 549 - TP_ARGS(gt_id, action, len, _head, tail), 550 - 551 - TP_STRUCT__entry( 552 - __field(u8, gt_id) 553 - __field(u32, action) 554 - __field(u32, len) 555 - __field(u32, tail) 556 - __field(u32, _head) 557 - ), 558 - 559 - TP_fast_assign( 560 - __entry->gt_id = gt_id; 561 - __entry->action = action; 562 - __entry->len = len; 563 - __entry->tail = tail; 564 - __entry->_head = _head; 565 - ), 566 - 567 - TP_printk("gt%d: H2G CTB: action=0x%x, len=%d, tail=%d, head=%d\n", 568 - __entry->gt_id, __entry->action, __entry->len, 569 - __entry->tail, __entry->_head) 570 - ); 571 - 572 - DEFINE_EVENT(xe_guc_ctb, xe_guc_ctb_h2g, 573 - TP_PROTO(u8 gt_id, u32 action, u32 len, u32 _head, u32 tail), 574 - TP_ARGS(gt_id, action, len, _head, tail) 575 - ); 576 - 577 - DEFINE_EVENT_PRINT(xe_guc_ctb, xe_guc_ctb_g2h, 578 - TP_PROTO(u8 gt_id, u32 action, u32 len, u32 _head, u32 tail), 579 - TP_ARGS(gt_id, action, len, _head, tail), 580 - 581 - TP_printk("gt%d: G2H CTB: action=0x%x, len=%d, tail=%d, head=%d\n", 582 - __entry->gt_id, __entry->action, __entry->len, 583 - __entry->tail, __entry->_head) 584 - 410 + TP_printk("dev=%s, %s reg=0x%x, len=%d, val=(0x%x, 0x%x)", 411 + __get_str(dev), __entry->write ? "write" : "read", 412 + __entry->reg, __entry->len, 413 + (u32)(__entry->val & 0xffffffff), 414 + (u32)(__entry->val >> 32)) 585 415 ); 586 416 587 417 #endif
+9
drivers/gpu/drm/xe/xe_trace_bo.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* 3 + * Copyright © 2024 Intel Corporation 4 + */ 5 + 6 + #ifndef __CHECKER__ 7 + #define CREATE_TRACE_POINTS 8 + #include "xe_trace_bo.h" 9 + #endif
+247
drivers/gpu/drm/xe/xe_trace_bo.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0-only */ 2 + /* 3 + * Copyright © 2024 Intel Corporation 4 + */ 5 + 6 + #undef TRACE_SYSTEM 7 + #define TRACE_SYSTEM xe 8 + 9 + #if !defined(_XE_TRACE_BO_H_) || defined(TRACE_HEADER_MULTI_READ) 10 + #define _XE_TRACE_BO_H_ 11 + 12 + #include <linux/tracepoint.h> 13 + #include <linux/types.h> 14 + 15 + #include "xe_bo.h" 16 + #include "xe_bo_types.h" 17 + #include "xe_vm.h" 18 + 19 + #define __dev_name_bo(bo) dev_name(xe_bo_device(bo)->drm.dev) 20 + #define __dev_name_vm(vm) dev_name((vm)->xe->drm.dev) 21 + #define __dev_name_vma(vma) __dev_name_vm(xe_vma_vm(vma)) 22 + 23 + DECLARE_EVENT_CLASS(xe_bo, 24 + TP_PROTO(struct xe_bo *bo), 25 + TP_ARGS(bo), 26 + 27 + TP_STRUCT__entry( 28 + __string(dev, __dev_name_bo(bo)) 29 + __field(size_t, size) 30 + __field(u32, flags) 31 + __field(struct xe_vm *, vm) 32 + ), 33 + 34 + TP_fast_assign( 35 + __assign_str(dev); 36 + __entry->size = bo->size; 37 + __entry->flags = bo->flags; 38 + __entry->vm = bo->vm; 39 + ), 40 + 41 + TP_printk("dev=%s, size=%zu, flags=0x%02x, vm=%p", 42 + __get_str(dev), __entry->size, 43 + __entry->flags, __entry->vm) 44 + ); 45 + 46 + DEFINE_EVENT(xe_bo, xe_bo_cpu_fault, 47 + TP_PROTO(struct xe_bo *bo), 48 + TP_ARGS(bo) 49 + ); 50 + 51 + TRACE_EVENT(xe_bo_move, 52 + TP_PROTO(struct xe_bo *bo, uint32_t new_placement, uint32_t old_placement, 53 + bool move_lacks_source), 54 + TP_ARGS(bo, new_placement, old_placement, move_lacks_source), 55 + TP_STRUCT__entry( 56 + __field(struct xe_bo *, bo) 57 + __field(size_t, size) 58 + __field(u32, new_placement) 59 + __field(u32, old_placement) 60 + __string(device_id, __dev_name_bo(bo)) 61 + __field(bool, move_lacks_source) 62 + ), 63 + 64 + TP_fast_assign( 65 + __entry->bo = bo; 66 + __entry->size = bo->size; 67 + __entry->new_placement = new_placement; 68 + __entry->old_placement = old_placement; 69 + __assign_str(device_id); 70 + __entry->move_lacks_source = move_lacks_source; 71 + ), 72 + TP_printk("move_lacks_source:%s, migrate object %p [size %zu] from %s to %s device_id:%s", 73 + __entry->move_lacks_source ? "yes" : "no", __entry->bo, __entry->size, 74 + xe_mem_type_to_name[__entry->old_placement], 75 + xe_mem_type_to_name[__entry->new_placement], __get_str(device_id)) 76 + ); 77 + 78 + DECLARE_EVENT_CLASS(xe_vma, 79 + TP_PROTO(struct xe_vma *vma), 80 + TP_ARGS(vma), 81 + 82 + TP_STRUCT__entry( 83 + __string(dev, __dev_name_vma(vma)) 84 + __field(struct xe_vma *, vma) 85 + __field(u32, asid) 86 + __field(u64, start) 87 + __field(u64, end) 88 + __field(u64, ptr) 89 + ), 90 + 91 + TP_fast_assign( 92 + __assign_str(dev); 93 + __entry->vma = vma; 94 + __entry->asid = xe_vma_vm(vma)->usm.asid; 95 + __entry->start = xe_vma_start(vma); 96 + __entry->end = xe_vma_end(vma) - 1; 97 + __entry->ptr = xe_vma_userptr(vma); 98 + ), 99 + 100 + TP_printk("dev=%s, vma=%p, asid=0x%05x, start=0x%012llx, end=0x%012llx, userptr=0x%012llx,", 101 + __get_str(dev), __entry->vma, __entry->asid, __entry->start, 102 + __entry->end, __entry->ptr) 103 + ) 104 + 105 + DEFINE_EVENT(xe_vma, xe_vma_flush, 106 + TP_PROTO(struct xe_vma *vma), 107 + TP_ARGS(vma) 108 + ); 109 + 110 + DEFINE_EVENT(xe_vma, xe_vma_pagefault, 111 + TP_PROTO(struct xe_vma *vma), 112 + TP_ARGS(vma) 113 + ); 114 + 115 + DEFINE_EVENT(xe_vma, xe_vma_acc, 116 + TP_PROTO(struct xe_vma *vma), 117 + TP_ARGS(vma) 118 + ); 119 + 120 + DEFINE_EVENT(xe_vma, xe_vma_fail, 121 + TP_PROTO(struct xe_vma *vma), 122 + TP_ARGS(vma) 123 + ); 124 + 125 + DEFINE_EVENT(xe_vma, xe_vma_bind, 126 + TP_PROTO(struct xe_vma *vma), 127 + TP_ARGS(vma) 128 + ); 129 + 130 + DEFINE_EVENT(xe_vma, xe_vma_pf_bind, 131 + TP_PROTO(struct xe_vma *vma), 132 + TP_ARGS(vma) 133 + ); 134 + 135 + DEFINE_EVENT(xe_vma, xe_vma_unbind, 136 + TP_PROTO(struct xe_vma *vma), 137 + TP_ARGS(vma) 138 + ); 139 + 140 + DEFINE_EVENT(xe_vma, xe_vma_userptr_rebind_worker, 141 + TP_PROTO(struct xe_vma *vma), 142 + TP_ARGS(vma) 143 + ); 144 + 145 + DEFINE_EVENT(xe_vma, xe_vma_userptr_rebind_exec, 146 + TP_PROTO(struct xe_vma *vma), 147 + TP_ARGS(vma) 148 + ); 149 + 150 + DEFINE_EVENT(xe_vma, xe_vma_rebind_worker, 151 + TP_PROTO(struct xe_vma *vma), 152 + TP_ARGS(vma) 153 + ); 154 + 155 + DEFINE_EVENT(xe_vma, xe_vma_rebind_exec, 156 + TP_PROTO(struct xe_vma *vma), 157 + TP_ARGS(vma) 158 + ); 159 + 160 + DEFINE_EVENT(xe_vma, xe_vma_userptr_invalidate, 161 + TP_PROTO(struct xe_vma *vma), 162 + TP_ARGS(vma) 163 + ); 164 + 165 + DEFINE_EVENT(xe_vma, xe_vma_invalidate, 166 + TP_PROTO(struct xe_vma *vma), 167 + TP_ARGS(vma) 168 + ); 169 + 170 + DEFINE_EVENT(xe_vma, xe_vma_evict, 171 + TP_PROTO(struct xe_vma *vma), 172 + TP_ARGS(vma) 173 + ); 174 + 175 + DEFINE_EVENT(xe_vma, xe_vma_userptr_invalidate_complete, 176 + TP_PROTO(struct xe_vma *vma), 177 + TP_ARGS(vma) 178 + ); 179 + 180 + DECLARE_EVENT_CLASS(xe_vm, 181 + TP_PROTO(struct xe_vm *vm), 182 + TP_ARGS(vm), 183 + 184 + TP_STRUCT__entry( 185 + __string(dev, __dev_name_vm(vm)) 186 + __field(struct xe_vm *, vm) 187 + __field(u32, asid) 188 + ), 189 + 190 + TP_fast_assign( 191 + __assign_str(dev); 192 + __entry->vm = vm; 193 + __entry->asid = vm->usm.asid; 194 + ), 195 + 196 + TP_printk("dev=%s, vm=%p, asid=0x%05x", __get_str(dev), 197 + __entry->vm, __entry->asid) 198 + ); 199 + 200 + DEFINE_EVENT(xe_vm, xe_vm_kill, 201 + TP_PROTO(struct xe_vm *vm), 202 + TP_ARGS(vm) 203 + ); 204 + 205 + DEFINE_EVENT(xe_vm, xe_vm_create, 206 + TP_PROTO(struct xe_vm *vm), 207 + TP_ARGS(vm) 208 + ); 209 + 210 + DEFINE_EVENT(xe_vm, xe_vm_free, 211 + TP_PROTO(struct xe_vm *vm), 212 + TP_ARGS(vm) 213 + ); 214 + 215 + DEFINE_EVENT(xe_vm, xe_vm_cpu_bind, 216 + TP_PROTO(struct xe_vm *vm), 217 + TP_ARGS(vm) 218 + ); 219 + 220 + DEFINE_EVENT(xe_vm, xe_vm_restart, 221 + TP_PROTO(struct xe_vm *vm), 222 + TP_ARGS(vm) 223 + ); 224 + 225 + DEFINE_EVENT(xe_vm, xe_vm_rebind_worker_enter, 226 + TP_PROTO(struct xe_vm *vm), 227 + TP_ARGS(vm) 228 + ); 229 + 230 + DEFINE_EVENT(xe_vm, xe_vm_rebind_worker_retry, 231 + TP_PROTO(struct xe_vm *vm), 232 + TP_ARGS(vm) 233 + ); 234 + 235 + DEFINE_EVENT(xe_vm, xe_vm_rebind_worker_exit, 236 + TP_PROTO(struct xe_vm *vm), 237 + TP_ARGS(vm) 238 + ); 239 + 240 + #endif 241 + 242 + /* This part must be outside protection */ 243 + #undef TRACE_INCLUDE_PATH 244 + #undef TRACE_INCLUDE_FILE 245 + #define TRACE_INCLUDE_PATH ../../drivers/gpu/drm/xe 246 + #define TRACE_INCLUDE_FILE xe_trace_bo 247 + #include <trace/define_trace.h>
+9
drivers/gpu/drm/xe/xe_trace_guc.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* 3 + * Copyright © 2024 Intel Corporation 4 + */ 5 + 6 + #ifndef __CHECKER__ 7 + #define CREATE_TRACE_POINTS 8 + #include "xe_trace_guc.h" 9 + #endif
+110
drivers/gpu/drm/xe/xe_trace_guc.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0-only */ 2 + /* 3 + * Copyright © 2024 Intel Corporation 4 + */ 5 + 6 + #undef TRACE_SYSTEM 7 + #define TRACE_SYSTEM xe 8 + 9 + #if !defined(_XE_TRACE_GUC_H_) || defined(TRACE_HEADER_MULTI_READ) 10 + #define _XE_TRACE_GUC_H_ 11 + 12 + #include <linux/tracepoint.h> 13 + #include <linux/types.h> 14 + 15 + #include "xe_device_types.h" 16 + #include "xe_guc_exec_queue_types.h" 17 + 18 + #define __dev_name_xe(xe) dev_name((xe)->drm.dev) 19 + 20 + DECLARE_EVENT_CLASS(xe_guc_ct_flow_control, 21 + TP_PROTO(struct xe_device *xe, u32 _head, u32 _tail, u32 size, u32 space, u32 len), 22 + TP_ARGS(xe, _head, _tail, size, space, len), 23 + 24 + TP_STRUCT__entry( 25 + __string(dev, __dev_name_xe(xe)) 26 + __field(u32, _head) 27 + __field(u32, _tail) 28 + __field(u32, size) 29 + __field(u32, space) 30 + __field(u32, len) 31 + ), 32 + 33 + TP_fast_assign( 34 + __assign_str(dev); 35 + __entry->_head = _head; 36 + __entry->_tail = _tail; 37 + __entry->size = size; 38 + __entry->space = space; 39 + __entry->len = len; 40 + ), 41 + 42 + TP_printk("h2g flow control: dev=%s, head=%u, tail=%u, size=%u, space=%u, len=%u", 43 + __get_str(dev), __entry->_head, __entry->_tail, __entry->size, 44 + __entry->space, __entry->len) 45 + ); 46 + 47 + DEFINE_EVENT(xe_guc_ct_flow_control, xe_guc_ct_h2g_flow_control, 48 + TP_PROTO(struct xe_device *xe, u32 _head, u32 _tail, u32 size, u32 space, u32 len), 49 + TP_ARGS(xe, _head, _tail, size, space, len) 50 + ); 51 + 52 + DEFINE_EVENT_PRINT(xe_guc_ct_flow_control, xe_guc_ct_g2h_flow_control, 53 + TP_PROTO(struct xe_device *xe, u32 _head, u32 _tail, u32 size, u32 space, u32 len), 54 + TP_ARGS(xe, _head, _tail, size, space, len), 55 + 56 + TP_printk("g2h flow control: dev=%s, head=%u, tail=%u, size=%u, space=%u, len=%u", 57 + __get_str(dev), __entry->_head, __entry->_tail, __entry->size, 58 + __entry->space, __entry->len) 59 + ); 60 + 61 + DECLARE_EVENT_CLASS(xe_guc_ctb, 62 + TP_PROTO(struct xe_device *xe, u8 gt_id, u32 action, u32 len, u32 _head, u32 tail), 63 + TP_ARGS(xe, gt_id, action, len, _head, tail), 64 + 65 + TP_STRUCT__entry( 66 + __string(dev, __dev_name_xe(xe)) 67 + __field(u8, gt_id) 68 + __field(u32, action) 69 + __field(u32, len) 70 + __field(u32, tail) 71 + __field(u32, _head) 72 + ), 73 + 74 + TP_fast_assign( 75 + __assign_str(dev); 76 + __entry->gt_id = gt_id; 77 + __entry->action = action; 78 + __entry->len = len; 79 + __entry->tail = tail; 80 + __entry->_head = _head; 81 + ), 82 + 83 + TP_printk("H2G CTB: dev=%s, gt%d: action=0x%x, len=%d, tail=%d, head=%d\n", 84 + __get_str(dev), __entry->gt_id, __entry->action, __entry->len, 85 + __entry->tail, __entry->_head) 86 + ); 87 + 88 + DEFINE_EVENT(xe_guc_ctb, xe_guc_ctb_h2g, 89 + TP_PROTO(struct xe_device *xe, u8 gt_id, u32 action, u32 len, u32 _head, u32 tail), 90 + TP_ARGS(xe, gt_id, action, len, _head, tail) 91 + ); 92 + 93 + DEFINE_EVENT_PRINT(xe_guc_ctb, xe_guc_ctb_g2h, 94 + TP_PROTO(struct xe_device *xe, u8 gt_id, u32 action, u32 len, u32 _head, u32 tail), 95 + TP_ARGS(xe, gt_id, action, len, _head, tail), 96 + 97 + TP_printk("G2H CTB: dev=%s, gt%d: action=0x%x, len=%d, tail=%d, head=%d\n", 98 + __get_str(dev), __entry->gt_id, __entry->action, __entry->len, 99 + __entry->tail, __entry->_head) 100 + 101 + ); 102 + 103 + #endif 104 + 105 + /* This part must be outside protection */ 106 + #undef TRACE_INCLUDE_PATH 107 + #undef TRACE_INCLUDE_FILE 108 + #define TRACE_INCLUDE_PATH ../../drivers/gpu/drm/xe 109 + #define TRACE_INCLUDE_FILE xe_trace_guc 110 + #include <trace/define_trace.h>
+1 -1
drivers/gpu/drm/xe/xe_uc_fw.h
··· 158 158 159 159 static inline void xe_uc_fw_sanitize(struct xe_uc_fw *uc_fw) 160 160 { 161 - if (xe_uc_fw_is_loaded(uc_fw)) 161 + if (xe_uc_fw_is_loadable(uc_fw)) 162 162 xe_uc_fw_change_status(uc_fw, XE_UC_FIRMWARE_LOADABLE); 163 163 } 164 164
+48 -53
drivers/gpu/drm/xe/xe_vm.c
··· 36 36 #include "xe_pt.h" 37 37 #include "xe_res_cursor.h" 38 38 #include "xe_sync.h" 39 - #include "xe_trace.h" 39 + #include "xe_trace_bo.h" 40 40 #include "xe_wa.h" 41 41 #include "xe_hmm.h" 42 42 ··· 83 83 lockdep_assert_held(&vm->lock); 84 84 xe_vm_assert_held(vm); 85 85 86 - list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) { 87 - if (!q->compute.pfence || 86 + list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 87 + if (!q->lr.pfence || 88 88 test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, 89 - &q->compute.pfence->flags)) { 89 + &q->lr.pfence->flags)) { 90 90 return true; 91 91 } 92 92 } ··· 129 129 130 130 xe_vm_assert_held(vm); 131 131 132 - list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) { 133 - if (q->compute.pfence) { 134 - long timeout = dma_fence_wait(q->compute.pfence, false); 132 + list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 133 + if (q->lr.pfence) { 134 + long timeout = dma_fence_wait(q->lr.pfence, false); 135 135 136 136 if (timeout < 0) 137 137 return -ETIME; 138 - dma_fence_put(q->compute.pfence); 139 - q->compute.pfence = NULL; 138 + dma_fence_put(q->lr.pfence); 139 + q->lr.pfence = NULL; 140 140 } 141 141 } 142 142 ··· 148 148 struct xe_exec_queue *q; 149 149 150 150 xe_vm_assert_held(vm); 151 - list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) { 151 + list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 152 152 if (!xe_exec_queue_is_idle(q)) 153 153 return false; 154 154 } ··· 161 161 struct list_head *link; 162 162 struct xe_exec_queue *q; 163 163 164 - list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) { 164 + list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 165 165 struct dma_fence *fence; 166 166 167 167 link = list->next; 168 168 xe_assert(vm->xe, link != list); 169 169 170 170 fence = xe_preempt_fence_arm(to_preempt_fence_from_link(link), 171 - q, q->compute.context, 172 - ++q->compute.seqno); 173 - dma_fence_put(q->compute.pfence); 174 - q->compute.pfence = fence; 171 + q, q->lr.context, 172 + ++q->lr.seqno); 173 + dma_fence_put(q->lr.pfence); 174 + q->lr.pfence = fence; 175 175 } 176 176 } 177 177 ··· 180 180 struct xe_exec_queue *q; 181 181 int err; 182 182 183 + xe_bo_assert_held(bo); 184 + 183 185 if (!vm->preempt.num_exec_queues) 184 186 return 0; 185 187 186 - err = xe_bo_lock(bo, true); 188 + err = dma_resv_reserve_fences(bo->ttm.base.resv, vm->preempt.num_exec_queues); 187 189 if (err) 188 190 return err; 189 191 190 - err = dma_resv_reserve_fences(bo->ttm.base.resv, vm->preempt.num_exec_queues); 191 - if (err) 192 - goto out_unlock; 193 - 194 - list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) 195 - if (q->compute.pfence) { 192 + list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) 193 + if (q->lr.pfence) { 196 194 dma_resv_add_fence(bo->ttm.base.resv, 197 - q->compute.pfence, 195 + q->lr.pfence, 198 196 DMA_RESV_USAGE_BOOKKEEP); 199 197 } 200 198 201 - out_unlock: 202 - xe_bo_unlock(bo); 203 - return err; 199 + return 0; 204 200 } 205 201 206 202 static void resume_and_reinstall_preempt_fences(struct xe_vm *vm, ··· 207 211 lockdep_assert_held(&vm->lock); 208 212 xe_vm_assert_held(vm); 209 213 210 - list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) { 214 + list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { 211 215 q->ops->resume(q); 212 216 213 - drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, q->compute.pfence, 217 + drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, q->lr.pfence, 214 218 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP); 215 219 } 216 220 } ··· 234 238 if (err) 235 239 goto out_up_write; 236 240 237 - pfence = xe_preempt_fence_create(q, q->compute.context, 238 - ++q->compute.seqno); 241 + pfence = xe_preempt_fence_create(q, q->lr.context, 242 + ++q->lr.seqno); 239 243 if (!pfence) { 240 244 err = -ENOMEM; 241 245 goto out_fini; 242 246 } 243 247 244 - list_add(&q->compute.link, &vm->preempt.exec_queues); 248 + list_add(&q->lr.link, &vm->preempt.exec_queues); 245 249 ++vm->preempt.num_exec_queues; 246 - q->compute.pfence = pfence; 250 + q->lr.pfence = pfence; 247 251 248 252 down_read(&vm->userptr.notifier_lock); 249 253 ··· 280 284 return; 281 285 282 286 down_write(&vm->lock); 283 - list_del(&q->compute.link); 287 + list_del(&q->lr.link); 284 288 --vm->preempt.num_exec_queues; 285 - if (q->compute.pfence) { 286 - dma_fence_enable_sw_signaling(q->compute.pfence); 287 - dma_fence_put(q->compute.pfence); 288 - q->compute.pfence = NULL; 289 + if (q->lr.pfence) { 290 + dma_fence_enable_sw_signaling(q->lr.pfence); 291 + dma_fence_put(q->lr.pfence); 292 + q->lr.pfence = NULL; 289 293 } 290 294 up_write(&vm->lock); 291 295 } ··· 323 327 vm->flags |= XE_VM_FLAG_BANNED; 324 328 trace_xe_vm_kill(vm); 325 329 326 - list_for_each_entry(q, &vm->preempt.exec_queues, compute.link) 330 + list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) 327 331 q->ops->kill(q); 328 332 329 333 if (unlocked) ··· 2136 2140 struct xe_bo *bo = op->gem.obj ? gem_to_xe_bo(op->gem.obj) : NULL; 2137 2141 struct drm_exec exec; 2138 2142 struct xe_vma *vma; 2139 - int err; 2143 + int err = 0; 2140 2144 2141 2145 lockdep_assert_held_write(&vm->lock); 2142 2146 ··· 2161 2165 vma = xe_vma_create(vm, bo, op->gem.offset, 2162 2166 op->va.addr, op->va.addr + 2163 2167 op->va.range - 1, pat_index, flags); 2168 + if (IS_ERR(vma)) 2169 + goto err_unlock; 2170 + 2171 + if (xe_vma_is_userptr(vma)) 2172 + err = xe_vma_userptr_pin_pages(to_userptr_vma(vma)); 2173 + else if (!xe_vma_has_no_bo(vma) && !bo->vm) 2174 + err = add_preempt_fences(vm, bo); 2175 + 2176 + err_unlock: 2164 2177 if (bo) 2165 2178 drm_exec_fini(&exec); 2166 2179 2167 - if (xe_vma_is_userptr(vma)) { 2168 - err = xe_vma_userptr_pin_pages(to_userptr_vma(vma)); 2169 - if (err) { 2170 - prep_vma_destroy(vm, vma, false); 2171 - xe_vma_destroy_unlocked(vma); 2172 - return ERR_PTR(err); 2173 - } 2174 - } else if (!xe_vma_has_no_bo(vma) && !bo->vm) { 2175 - err = add_preempt_fences(vm, bo); 2176 - if (err) { 2177 - prep_vma_destroy(vm, vma, false); 2178 - xe_vma_destroy_unlocked(vma); 2179 - return ERR_PTR(err); 2180 - } 2180 + if (err) { 2181 + prep_vma_destroy(vm, vma, false); 2182 + xe_vma_destroy_unlocked(vma); 2183 + vma = ERR_PTR(err); 2181 2184 } 2182 2185 2183 2186 return vma;
+16 -9
drivers/gpu/drm/xe/xe_wa.c
··· 21 21 #include "xe_mmio.h" 22 22 #include "xe_platform_types.h" 23 23 #include "xe_rtp.h" 24 + #include "xe_sriov.h" 24 25 #include "xe_step.h" 25 26 26 27 /** ··· 630 629 XE_RTP_ACTIONS(SET(CACHE_MODE_1, MSAA_OPTIMIZATION_REDUC_DISABLE)) 631 630 }, 632 631 { XE_RTP_NAME("14019877138"), 633 - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1271), ENGINE_CLASS(RENDER)), 632 + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1274), ENGINE_CLASS(RENDER)), 634 633 XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FD_END_COLLECT)) 635 634 }, 636 635 ··· 679 678 XE_RTP_ACTIONS(SET(CHICKEN_RASTER_2, TBIMR_FAST_CLIP)) 680 679 }, 681 680 { XE_RTP_NAME("14020756599"), 682 - XE_RTP_RULES(GRAPHICS_VERSION(2004), ENGINE_CLASS(RENDER)), 681 + XE_RTP_RULES(GRAPHICS_VERSION(2004), ENGINE_CLASS(RENDER), OR, 682 + MEDIA_VERSION_ANY_GT(2000), ENGINE_CLASS(RENDER)), 683 683 XE_RTP_ACTIONS(SET(WM_CHICKEN3, HIZ_PLANE_COMPRESSION_DIS)) 684 + }, 685 + { XE_RTP_NAME("14021490052"), 686 + XE_RTP_RULES(GRAPHICS_VERSION(2004), ENGINE_CLASS(RENDER)), 687 + XE_RTP_ACTIONS(SET(FF_MODE, 688 + DIS_MESH_PARTIAL_AUTOSTRIP | 689 + DIS_MESH_AUTOSTRIP), 690 + SET(VFLSKPD, 691 + DIS_PARTIAL_AUTOSTRIP | 692 + DIS_AUTOSTRIP)) 684 693 }, 685 694 686 695 /* Xe2_HPG */ ··· 714 703 SET(VFLSKPD, 715 704 DIS_PARTIAL_AUTOSTRIP | 716 705 DIS_AUTOSTRIP)) 717 - }, 718 - 719 - /* Xe2_LPM */ 720 - 721 - { XE_RTP_NAME("14020756599"), 722 - XE_RTP_RULES(ENGINE_CLASS(RENDER), FUNC(xe_rtp_match_when_media2000)), 723 - XE_RTP_ACTIONS(SET(WM_CHICKEN3, HIZ_PLANE_COMPRESSION_DIS)) 724 706 }, 725 707 726 708 {} ··· 865 861 void xe_wa_apply_tile_workarounds(struct xe_tile *tile) 866 862 { 867 863 struct xe_gt *mmio = tile->primary_gt; 864 + 865 + if (IS_SRIOV_VF(tile->xe)) 866 + return; 868 867 869 868 if (XE_WA(mmio, 22010954014)) 870 869 xe_mmio_rmw32(mmio, XEHP_CLOCK_GATE_DIS, 0, SGSI_SIDECLK_DIS);
-2
drivers/gpu/drm/xe/xe_wa.h
··· 17 17 void xe_wa_process_engine(struct xe_hw_engine *hwe); 18 18 void xe_wa_process_lrc(struct xe_hw_engine *hwe); 19 19 void xe_wa_apply_tile_workarounds(struct xe_tile *tile); 20 - 21 - void xe_reg_whitelist_process_engine(struct xe_hw_engine *hwe); 22 20 void xe_wa_dump(struct xe_gt *gt, struct drm_printer *p); 23 21 24 22 /**
+1
drivers/gpu/drm/xe/xe_wa_oob.rules
··· 27 27 16022287689 GRAPHICS_VERSION(2001) 28 28 GRAPHICS_VERSION(2004) 29 29 13011645652 GRAPHICS_VERSION(2004) 30 + 22019338487 MEDIA_VERSION(2000)
+7
include/drm/intel/xe_pciids.h
··· 192 192 MACRO__(0x64A0, ## __VA_ARGS__), \ 193 193 MACRO__(0x64B0, ## __VA_ARGS__) 194 194 195 + #define XE_BMG_IDS(MACRO__, ...) \ 196 + MACRO__(0xE202, ## __VA_ARGS__), \ 197 + MACRO__(0xE20B, ## __VA_ARGS__), \ 198 + MACRO__(0xE20C, ## __VA_ARGS__), \ 199 + MACRO__(0xE20D, ## __VA_ARGS__), \ 200 + MACRO__(0xE212, ## __VA_ARGS__) 201 + 195 202 #endif
+42 -11
include/linux/devcoredump.h
··· 12 12 #include <linux/scatterlist.h> 13 13 #include <linux/slab.h> 14 14 15 + /* if data isn't read by userspace after 5 minutes then delete it */ 16 + #define DEVCD_TIMEOUT (HZ * 60 * 5) 17 + 15 18 /* 16 19 * _devcd_free_sgtable - free all the memory of the given scatterlist table 17 20 * (i.e. both pages and scatterlist instances) ··· 53 50 kfree(delete_iter); 54 51 } 55 52 56 - 57 53 #ifdef CONFIG_DEV_COREDUMP 58 54 void dev_coredumpv(struct device *dev, void *data, size_t datalen, 59 55 gfp_t gfp); 60 56 61 - void dev_coredumpm(struct device *dev, struct module *owner, 62 - void *data, size_t datalen, gfp_t gfp, 63 - ssize_t (*read)(char *buffer, loff_t offset, size_t count, 64 - void *data, size_t datalen), 65 - void (*free)(void *data)); 57 + void dev_coredumpm_timeout(struct device *dev, struct module *owner, 58 + void *data, size_t datalen, gfp_t gfp, 59 + ssize_t (*read)(char *buffer, loff_t offset, 60 + size_t count, void *data, 61 + size_t datalen), 62 + void (*free)(void *data), 63 + unsigned long timeout); 66 64 67 65 void dev_coredumpsg(struct device *dev, struct scatterlist *table, 68 66 size_t datalen, gfp_t gfp); ··· 77 73 } 78 74 79 75 static inline void 80 - dev_coredumpm(struct device *dev, struct module *owner, 81 - void *data, size_t datalen, gfp_t gfp, 82 - ssize_t (*read)(char *buffer, loff_t offset, size_t count, 83 - void *data, size_t datalen), 84 - void (*free)(void *data)) 76 + dev_coredumpm_timeout(struct device *dev, struct module *owner, 77 + void *data, size_t datalen, gfp_t gfp, 78 + ssize_t (*read)(char *buffer, loff_t offset, 79 + size_t count, void *data, 80 + size_t datalen), 81 + void (*free)(void *data), 82 + unsigned long timeout) 85 83 { 86 84 free(data); 87 85 } ··· 97 91 { 98 92 } 99 93 #endif /* CONFIG_DEV_COREDUMP */ 94 + 95 + /** 96 + * dev_coredumpm - create device coredump with read/free methods 97 + * @dev: the struct device for the crashed device 98 + * @owner: the module that contains the read/free functions, use %THIS_MODULE 99 + * @data: data cookie for the @read/@free functions 100 + * @datalen: length of the data 101 + * @gfp: allocation flags 102 + * @read: function to read from the given buffer 103 + * @free: function to free the given buffer 104 + * 105 + * Creates a new device coredump for the given device. If a previous one hasn't 106 + * been read yet, the new coredump is discarded. The data lifetime is determined 107 + * by the device coredump framework and when it is no longer needed the @free 108 + * function will be called to free the data. 109 + */ 110 + static inline void dev_coredumpm(struct device *dev, struct module *owner, 111 + void *data, size_t datalen, gfp_t gfp, 112 + ssize_t (*read)(char *buffer, loff_t offset, size_t count, 113 + void *data, size_t datalen), 114 + void (*free)(void *data)) 115 + { 116 + dev_coredumpm_timeout(dev, owner, data, datalen, gfp, read, free, 117 + DEVCD_TIMEOUT); 118 + } 100 119 101 120 #endif /* __DEVCOREDUMP_H */
+308
include/uapi/drm/xe_drm.h
··· 80 80 * - &DRM_IOCTL_XE_EXEC_QUEUE_GET_PROPERTY 81 81 * - &DRM_IOCTL_XE_EXEC 82 82 * - &DRM_IOCTL_XE_WAIT_USER_FENCE 83 + * - &DRM_IOCTL_XE_PERF 83 84 */ 84 85 85 86 /* ··· 101 100 #define DRM_XE_EXEC_QUEUE_GET_PROPERTY 0x08 102 101 #define DRM_XE_EXEC 0x09 103 102 #define DRM_XE_WAIT_USER_FENCE 0x0a 103 + #define DRM_XE_PERF 0x0b 104 + 104 105 /* Must be kept compact -- no holes */ 105 106 106 107 #define DRM_IOCTL_XE_DEVICE_QUERY DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_DEVICE_QUERY, struct drm_xe_device_query) ··· 116 113 #define DRM_IOCTL_XE_EXEC_QUEUE_GET_PROPERTY DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_EXEC_QUEUE_GET_PROPERTY, struct drm_xe_exec_queue_get_property) 117 114 #define DRM_IOCTL_XE_EXEC DRM_IOW(DRM_COMMAND_BASE + DRM_XE_EXEC, struct drm_xe_exec) 118 115 #define DRM_IOCTL_XE_WAIT_USER_FENCE DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_WAIT_USER_FENCE, struct drm_xe_wait_user_fence) 116 + #define DRM_IOCTL_XE_PERF DRM_IOW(DRM_COMMAND_BASE + DRM_XE_PERF, struct drm_xe_perf_param) 119 117 120 118 /** 121 119 * DOC: Xe IOCTL Extensions ··· 689 685 #define DRM_XE_DEVICE_QUERY_GT_TOPOLOGY 5 690 686 #define DRM_XE_DEVICE_QUERY_ENGINE_CYCLES 6 691 687 #define DRM_XE_DEVICE_QUERY_UC_FW_VERSION 7 688 + #define DRM_XE_DEVICE_QUERY_OA_UNITS 8 692 689 /** @query: The type of data to query */ 693 690 __u32 query; 694 691 ··· 1373 1368 1374 1369 /** @reserved: Reserved */ 1375 1370 __u64 reserved[2]; 1371 + }; 1372 + 1373 + /** 1374 + * enum drm_xe_perf_type - Perf stream types 1375 + */ 1376 + enum drm_xe_perf_type { 1377 + /** @DRM_XE_PERF_TYPE_OA: OA perf stream type */ 1378 + DRM_XE_PERF_TYPE_OA, 1379 + }; 1380 + 1381 + /** 1382 + * enum drm_xe_perf_op - Perf stream ops 1383 + */ 1384 + enum drm_xe_perf_op { 1385 + /** @DRM_XE_PERF_OP_STREAM_OPEN: Open a perf counter stream */ 1386 + DRM_XE_PERF_OP_STREAM_OPEN, 1387 + 1388 + /** @DRM_XE_PERF_OP_ADD_CONFIG: Add perf stream config */ 1389 + DRM_XE_PERF_OP_ADD_CONFIG, 1390 + 1391 + /** @DRM_XE_PERF_OP_REMOVE_CONFIG: Remove perf stream config */ 1392 + DRM_XE_PERF_OP_REMOVE_CONFIG, 1393 + }; 1394 + 1395 + /** 1396 + * struct drm_xe_perf_param - Input of &DRM_XE_PERF 1397 + * 1398 + * The perf layer enables multiplexing perf counter streams of multiple 1399 + * types. The actual params for a particular stream operation are supplied 1400 + * via the @param pointer (use __copy_from_user to get these params). 1401 + */ 1402 + struct drm_xe_perf_param { 1403 + /** @extensions: Pointer to the first extension struct, if any */ 1404 + __u64 extensions; 1405 + /** @perf_type: Perf stream type, of enum @drm_xe_perf_type */ 1406 + __u64 perf_type; 1407 + /** @perf_op: Perf op, of enum @drm_xe_perf_op */ 1408 + __u64 perf_op; 1409 + /** @param: Pointer to actual stream params */ 1410 + __u64 param; 1411 + }; 1412 + 1413 + /** 1414 + * enum drm_xe_perf_ioctls - Perf fd ioctl's 1415 + * 1416 + * Information exchanged between userspace and kernel for perf fd ioctl's 1417 + * is stream type specific 1418 + */ 1419 + enum drm_xe_perf_ioctls { 1420 + /** @DRM_XE_PERF_IOCTL_ENABLE: Enable data capture for a stream */ 1421 + DRM_XE_PERF_IOCTL_ENABLE = _IO('i', 0x0), 1422 + 1423 + /** @DRM_XE_PERF_IOCTL_DISABLE: Disable data capture for a stream */ 1424 + DRM_XE_PERF_IOCTL_DISABLE = _IO('i', 0x1), 1425 + 1426 + /** @DRM_XE_PERF_IOCTL_CONFIG: Change stream configuration */ 1427 + DRM_XE_PERF_IOCTL_CONFIG = _IO('i', 0x2), 1428 + 1429 + /** @DRM_XE_PERF_IOCTL_STATUS: Return stream status */ 1430 + DRM_XE_PERF_IOCTL_STATUS = _IO('i', 0x3), 1431 + 1432 + /** @DRM_XE_PERF_IOCTL_INFO: Return stream info */ 1433 + DRM_XE_PERF_IOCTL_INFO = _IO('i', 0x4), 1434 + }; 1435 + 1436 + /** 1437 + * enum drm_xe_oa_unit_type - OA unit types 1438 + */ 1439 + enum drm_xe_oa_unit_type { 1440 + /** 1441 + * @DRM_XE_OA_UNIT_TYPE_OAG: OAG OA unit. OAR/OAC are considered 1442 + * sub-types of OAG. For OAR/OAC, use OAG. 1443 + */ 1444 + DRM_XE_OA_UNIT_TYPE_OAG, 1445 + 1446 + /** @DRM_XE_OA_UNIT_TYPE_OAM: OAM OA unit */ 1447 + DRM_XE_OA_UNIT_TYPE_OAM, 1448 + }; 1449 + 1450 + /** 1451 + * struct drm_xe_oa_unit - describe OA unit 1452 + */ 1453 + struct drm_xe_oa_unit { 1454 + /** @extensions: Pointer to the first extension struct, if any */ 1455 + __u64 extensions; 1456 + 1457 + /** @oa_unit_id: OA unit ID */ 1458 + __u32 oa_unit_id; 1459 + 1460 + /** @oa_unit_type: OA unit type of @drm_xe_oa_unit_type */ 1461 + __u32 oa_unit_type; 1462 + 1463 + /** @capabilities: OA capabilities bit-mask */ 1464 + __u64 capabilities; 1465 + #define DRM_XE_OA_CAPS_BASE (1 << 0) 1466 + 1467 + /** @oa_timestamp_freq: OA timestamp freq */ 1468 + __u64 oa_timestamp_freq; 1469 + 1470 + /** @reserved: MBZ */ 1471 + __u64 reserved[4]; 1472 + 1473 + /** @num_engines: number of engines in @eci array */ 1474 + __u64 num_engines; 1475 + 1476 + /** @eci: engines attached to this OA unit */ 1477 + struct drm_xe_engine_class_instance eci[]; 1478 + }; 1479 + 1480 + /** 1481 + * struct drm_xe_query_oa_units - describe OA units 1482 + * 1483 + * If a query is made with a struct drm_xe_device_query where .query 1484 + * is equal to DRM_XE_DEVICE_QUERY_OA_UNITS, then the reply uses struct 1485 + * drm_xe_query_oa_units in .data. 1486 + * 1487 + * OA unit properties for all OA units can be accessed using a code block 1488 + * such as the one below: 1489 + * 1490 + * .. code-block:: C 1491 + * 1492 + * struct drm_xe_query_oa_units *qoa; 1493 + * struct drm_xe_oa_unit *oau; 1494 + * u8 *poau; 1495 + * 1496 + * // malloc qoa and issue DRM_XE_DEVICE_QUERY_OA_UNITS. Then: 1497 + * poau = (u8 *)&qoa->oa_units[0]; 1498 + * for (int i = 0; i < qoa->num_oa_units; i++) { 1499 + * oau = (struct drm_xe_oa_unit *)poau; 1500 + * // Access 'struct drm_xe_oa_unit' fields here 1501 + * poau += sizeof(*oau) + oau->num_engines * sizeof(oau->eci[0]); 1502 + * } 1503 + */ 1504 + struct drm_xe_query_oa_units { 1505 + /** @extensions: Pointer to the first extension struct, if any */ 1506 + __u64 extensions; 1507 + /** @num_oa_units: number of OA units returned in oau[] */ 1508 + __u32 num_oa_units; 1509 + /** @pad: MBZ */ 1510 + __u32 pad; 1511 + /** 1512 + * @oa_units: struct @drm_xe_oa_unit array returned for this device. 1513 + * Written below as a u64 array to avoid problems with nested flexible 1514 + * arrays with some compilers 1515 + */ 1516 + __u64 oa_units[]; 1517 + }; 1518 + 1519 + /** 1520 + * enum drm_xe_oa_format_type - OA format types as specified in PRM/Bspec 1521 + * 52198/60942 1522 + */ 1523 + enum drm_xe_oa_format_type { 1524 + /** @DRM_XE_OA_FMT_TYPE_OAG: OAG report format */ 1525 + DRM_XE_OA_FMT_TYPE_OAG, 1526 + /** @DRM_XE_OA_FMT_TYPE_OAR: OAR report format */ 1527 + DRM_XE_OA_FMT_TYPE_OAR, 1528 + /** @DRM_XE_OA_FMT_TYPE_OAM: OAM report format */ 1529 + DRM_XE_OA_FMT_TYPE_OAM, 1530 + /** @DRM_XE_OA_FMT_TYPE_OAC: OAC report format */ 1531 + DRM_XE_OA_FMT_TYPE_OAC, 1532 + /** @DRM_XE_OA_FMT_TYPE_OAM_MPEC: OAM SAMEDIA or OAM MPEC report format */ 1533 + DRM_XE_OA_FMT_TYPE_OAM_MPEC, 1534 + /** @DRM_XE_OA_FMT_TYPE_PEC: PEC report format */ 1535 + DRM_XE_OA_FMT_TYPE_PEC, 1536 + }; 1537 + 1538 + /** 1539 + * enum drm_xe_oa_property_id - OA stream property id's 1540 + * 1541 + * Stream params are specified as a chain of @drm_xe_ext_set_property 1542 + * struct's, with @property values from enum @drm_xe_oa_property_id and 1543 + * @drm_xe_user_extension base.name set to @DRM_XE_OA_EXTENSION_SET_PROPERTY. 1544 + * @param field in struct @drm_xe_perf_param points to the first 1545 + * @drm_xe_ext_set_property struct. 1546 + * 1547 + * Exactly the same mechanism is also used for stream reconfiguration using 1548 + * the @DRM_XE_PERF_IOCTL_CONFIG perf fd ioctl, though only a subset of 1549 + * properties below can be specified for stream reconfiguration. 1550 + */ 1551 + enum drm_xe_oa_property_id { 1552 + #define DRM_XE_OA_EXTENSION_SET_PROPERTY 0 1553 + /** 1554 + * @DRM_XE_OA_PROPERTY_OA_UNIT_ID: ID of the OA unit on which to open 1555 + * the OA stream, see @oa_unit_id in 'struct 1556 + * drm_xe_query_oa_units'. Defaults to 0 if not provided. 1557 + */ 1558 + DRM_XE_OA_PROPERTY_OA_UNIT_ID = 1, 1559 + 1560 + /** 1561 + * @DRM_XE_OA_PROPERTY_SAMPLE_OA: A value of 1 requests inclusion of raw 1562 + * OA unit reports or stream samples in a global buffer attached to an 1563 + * OA unit. 1564 + */ 1565 + DRM_XE_OA_PROPERTY_SAMPLE_OA, 1566 + 1567 + /** 1568 + * @DRM_XE_OA_PROPERTY_OA_METRIC_SET: OA metrics defining contents of OA 1569 + * reports, previously added via @DRM_XE_PERF_OP_ADD_CONFIG. 1570 + */ 1571 + DRM_XE_OA_PROPERTY_OA_METRIC_SET, 1572 + 1573 + /** @DRM_XE_OA_PROPERTY_OA_FORMAT: Perf counter report format */ 1574 + DRM_XE_OA_PROPERTY_OA_FORMAT, 1575 + /* 1576 + * OA_FORMAT's are specified the same way as in PRM/Bspec 52198/60942, 1577 + * in terms of the following quantities: a. enum @drm_xe_oa_format_type 1578 + * b. Counter select c. Counter size and d. BC report. Also refer to the 1579 + * oa_formats array in drivers/gpu/drm/xe/xe_oa.c. 1580 + */ 1581 + #define DRM_XE_OA_FORMAT_MASK_FMT_TYPE (0xff << 0) 1582 + #define DRM_XE_OA_FORMAT_MASK_COUNTER_SEL (0xff << 8) 1583 + #define DRM_XE_OA_FORMAT_MASK_COUNTER_SIZE (0xff << 16) 1584 + #define DRM_XE_OA_FORMAT_MASK_BC_REPORT (0xff << 24) 1585 + 1586 + /** 1587 + * @DRM_XE_OA_PROPERTY_OA_PERIOD_EXPONENT: Requests periodic OA unit 1588 + * sampling with sampling frequency proportional to 2^(period_exponent + 1) 1589 + */ 1590 + DRM_XE_OA_PROPERTY_OA_PERIOD_EXPONENT, 1591 + 1592 + /** 1593 + * @DRM_XE_OA_PROPERTY_OA_DISABLED: A value of 1 will open the OA 1594 + * stream in a DISABLED state (see @DRM_XE_PERF_IOCTL_ENABLE). 1595 + */ 1596 + DRM_XE_OA_PROPERTY_OA_DISABLED, 1597 + 1598 + /** 1599 + * @DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID: Open the stream for a specific 1600 + * @exec_queue_id. Perf queries can be executed on this exec queue. 1601 + */ 1602 + DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID, 1603 + 1604 + /** 1605 + * @DRM_XE_OA_PROPERTY_OA_ENGINE_INSTANCE: Optional engine instance to 1606 + * pass along with @DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID or will default to 0. 1607 + */ 1608 + DRM_XE_OA_PROPERTY_OA_ENGINE_INSTANCE, 1609 + 1610 + /** 1611 + * @DRM_XE_OA_PROPERTY_NO_PREEMPT: Allow preemption and timeslicing 1612 + * to be disabled for the stream exec queue. 1613 + */ 1614 + DRM_XE_OA_PROPERTY_NO_PREEMPT, 1615 + }; 1616 + 1617 + /** 1618 + * struct drm_xe_oa_config - OA metric configuration 1619 + * 1620 + * Multiple OA configs can be added using @DRM_XE_PERF_OP_ADD_CONFIG. A 1621 + * particular config can be specified when opening an OA stream using 1622 + * @DRM_XE_OA_PROPERTY_OA_METRIC_SET property. 1623 + */ 1624 + struct drm_xe_oa_config { 1625 + /** @extensions: Pointer to the first extension struct, if any */ 1626 + __u64 extensions; 1627 + 1628 + /** @uuid: String formatted like "%\08x-%\04x-%\04x-%\04x-%\012x" */ 1629 + char uuid[36]; 1630 + 1631 + /** @n_regs: Number of regs in @regs_ptr */ 1632 + __u32 n_regs; 1633 + 1634 + /** 1635 + * @regs_ptr: Pointer to (register address, value) pairs for OA config 1636 + * registers. Expected length of buffer is: (2 * sizeof(u32) * @n_regs). 1637 + */ 1638 + __u64 regs_ptr; 1639 + }; 1640 + 1641 + /** 1642 + * struct drm_xe_oa_stream_status - OA stream status returned from 1643 + * @DRM_XE_PERF_IOCTL_STATUS perf fd ioctl. Userspace can call the ioctl to 1644 + * query stream status in response to EIO errno from perf fd read(). 1645 + */ 1646 + struct drm_xe_oa_stream_status { 1647 + /** @extensions: Pointer to the first extension struct, if any */ 1648 + __u64 extensions; 1649 + 1650 + /** @oa_status: OA stream status (see Bspec 46717/61226) */ 1651 + __u64 oa_status; 1652 + #define DRM_XE_OASTATUS_MMIO_TRG_Q_FULL (1 << 3) 1653 + #define DRM_XE_OASTATUS_COUNTER_OVERFLOW (1 << 2) 1654 + #define DRM_XE_OASTATUS_BUFFER_OVERFLOW (1 << 1) 1655 + #define DRM_XE_OASTATUS_REPORT_LOST (1 << 0) 1656 + 1657 + /** @reserved: reserved for future use */ 1658 + __u64 reserved[3]; 1659 + }; 1660 + 1661 + /** 1662 + * struct drm_xe_oa_stream_info - OA stream info returned from 1663 + * @DRM_XE_PERF_IOCTL_INFO perf fd ioctl 1664 + */ 1665 + struct drm_xe_oa_stream_info { 1666 + /** @extensions: Pointer to the first extension struct, if any */ 1667 + __u64 extensions; 1668 + 1669 + /** @oa_buf_size: OA buffer size */ 1670 + __u64 oa_buf_size; 1671 + 1672 + /** @reserved: reserved for future use */ 1673 + __u64 reserved[3]; 1376 1674 }; 1377 1675 1378 1676 #if defined(__cplusplus)